xref: /titanic_44/usr/src/uts/common/inet/udp/udp.c (revision 753d2d2e8e7fd0c9bcf736d9bf2f2faf4d6234cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 const char udp_version[] = "%Z%%M%	%I%	%E% SMI";
30 
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/dlpi.h>
34 #include <sys/pattr.h>
35 #include <sys/stropts.h>
36 #include <sys/strlog.h>
37 #include <sys/strsun.h>
38 #include <sys/time.h>
39 #define	_SUN_TPI_VERSION 2
40 #include <sys/tihdr.h>
41 #include <sys/timod.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/strsubr.h>
45 #include <sys/suntpi.h>
46 #include <sys/xti_inet.h>
47 #include <sys/cmn_err.h>
48 #include <sys/kmem.h>
49 #include <sys/policy.h>
50 #include <sys/ucred.h>
51 #include <sys/zone.h>
52 
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/vtrace.h>
56 #include <sys/debug.h>
57 #include <sys/isa_defs.h>
58 #include <sys/random.h>
59 #include <netinet/in.h>
60 #include <netinet/ip6.h>
61 #include <netinet/icmp6.h>
62 #include <netinet/udp.h>
63 #include <net/if.h>
64 #include <net/route.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip_impl.h>
69 #include <inet/ip6.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_multi.h>
73 #include <inet/ip_ndp.h>
74 #include <inet/mi.h>
75 #include <inet/mib2.h>
76 #include <inet/nd.h>
77 #include <inet/optcom.h>
78 #include <inet/snmpcom.h>
79 #include <inet/kstatcom.h>
80 #include <inet/udp_impl.h>
81 #include <inet/ipclassifier.h>
82 #include <inet/ipsec_impl.h>
83 #include <inet/ipp_common.h>
84 
85 /*
86  * The ipsec_info.h header file is here since it has the definition for the
87  * M_CTL message types used by IP to convey information to the ULP. The
88  * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence.
89  */
90 #include <net/pfkeyv2.h>
91 #include <inet/ipsec_info.h>
92 
93 #include <sys/tsol/label.h>
94 #include <sys/tsol/tnet.h>
95 #include <rpc/pmap_prot.h>
96 
97 /*
98  * Synchronization notes:
99  *
100  * UDP uses a combination of its internal perimeter, a global lock and
101  * a set of bind hash locks to protect its data structures.  Please see
102  * the note above udp_mode_assertions for details about the internal
103  * perimeter.
104  *
105  * When a UDP endpoint is bound to a local port, it is inserted into
106  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
107  * The size of the array is controlled by the udp_bind_fanout_size variable.
108  * This variable can be changed in /etc/system if the default value is
109  * not large enough.  Each bind hash bucket is protected by a per bucket
110  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
111  * structure.  An UDP endpoint is removed from the bind hash list only
112  * when it is being unbound or being closed.  The per bucket lock also
113  * protects a UDP endpoint's state changes.
114  *
115  * Plumbing notes:
116  *
117  * Both udp and ip are merged, but the streams plumbing is kept unchanged
118  * in that udp is always pushed atop /dev/ip.  This is done to preserve
119  * backwards compatibility for certain applications which rely on such
120  * plumbing geometry to do things such as issuing I_POP on the stream
121  * in order to obtain direct access to /dev/ip, etc.
122  *
123  * All UDP processings happen in the /dev/ip instance; the udp module
124  * instance does not possess any state about the endpoint, and merely
125  * acts as a dummy module whose presence is to keep the streams plumbing
126  * appearance unchanged.  At open time /dev/ip allocates a conn_t that
127  * happens to embed a udp_t.  This stays dormant until the time udp is
128  * pushed, which indicates to /dev/ip that it must convert itself from
129  * an IP to a UDP endpoint.
130  *
131  * We only allow for the following plumbing cases:
132  *
133  * Normal:
134  *	/dev/ip is first opened and later udp is pushed directly on top.
135  *	This is the default action that happens when a udp socket or
136  *	/dev/udp is opened.  The conn_t created by /dev/ip instance is
137  *	now shared and is marked with IPCL_UDP.
138  *
139  * SNMP-only:
140  *	udp is pushed on top of a module other than /dev/ip.  When this
141  *	happens it will support only SNMP semantics.  A new conn_t is
142  *	allocated and marked with IPCL_UDPMOD.
143  *
144  * The above cases imply that we don't support any intermediate module to
145  * reside in between /dev/ip and udp -- in fact, we never supported such
146  * scenario in the past as the inter-layer communication semantics have
147  * always been private.  Also note that the normal case allows for SNMP
148  * requests to be processed in addition to the rest of UDP operations.
149  *
150  * The normal case plumbing is depicted by the following diagram:
151  *
152  *	+---------------+---------------+
153  *	|		|		| udp
154  *	|     udp_wq	|    udp_rq	|
155  *	|		|    UDP_RD	|
156  *	|		|		|
157  *	+---------------+---------------+
158  *		|		^
159  *		v		|
160  *	+---------------+---------------+
161  *	|		|		| /dev/ip
162  *	|     ip_wq	|     ip_rq	| conn_t
163  *	|     UDP_WR	|		|
164  *	|		|		|
165  *	+---------------+---------------+
166  *
167  * Messages arriving at udp_wq from above will end up in ip_wq before
168  * it gets processed, i.e. udp write entry points will advance udp_wq
169  * and use its q_next value as ip_wq in order to use the conn_t that
170  * is stored in its q_ptr.  Likewise, messages generated by ip to the
171  * module above udp will appear as if they are originated from udp_rq,
172  * i.e. putnext() calls to the module above udp is done using the
173  * udp_rq instead of ip_rq in order to avoid udp_rput() which does
174  * nothing more than calling putnext().
175  *
176  * The above implies the following rule of thumb:
177  *
178  *   1. udp_t is obtained from conn_t, which is created by the /dev/ip
179  *	instance and is stored in q_ptr of both ip_wq and ip_rq.  There
180  *	is no direct reference to conn_t from either udp_wq or udp_rq.
181  *
182  *   2. Write-side entry points of udp can obtain the conn_t via the
183  *	Q_TO_CONN() macro, using the queue value obtain from UDP_WR().
184  *
185  *   3. While in /dev/ip context, putnext() to the module above udp can
186  *	be done by supplying the queue value obtained from UDP_RD().
187  *
188  */
189 
190 static queue_t *UDP_WR(queue_t *);
191 static queue_t *UDP_RD(queue_t *);
192 
193 udp_stat_t udp_statistics = {
194 	{ "udp_ip_send",		KSTAT_DATA_UINT64 },
195 	{ "udp_ip_ire_send",		KSTAT_DATA_UINT64 },
196 	{ "udp_ire_null",		KSTAT_DATA_UINT64 },
197 	{ "udp_drain",			KSTAT_DATA_UINT64 },
198 	{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
199 	{ "udp_rrw_busy",		KSTAT_DATA_UINT64 },
200 	{ "udp_rrw_msgcnt",		KSTAT_DATA_UINT64 },
201 	{ "udp_out_sw_cksum",		KSTAT_DATA_UINT64 },
202 	{ "udp_out_sw_cksum_bytes",	KSTAT_DATA_UINT64 },
203 	{ "udp_out_opt",		KSTAT_DATA_UINT64 },
204 	{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
205 	{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
206 	{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
207 	{ "udp_in_pktinfo",		KSTAT_DATA_UINT64 },
208 	{ "udp_in_recvdstaddr",		KSTAT_DATA_UINT64 },
209 	{ "udp_in_recvopts",		KSTAT_DATA_UINT64 },
210 	{ "udp_in_recvif",		KSTAT_DATA_UINT64 },
211 	{ "udp_in_recvslla",		KSTAT_DATA_UINT64 },
212 	{ "udp_in_recvucred",		KSTAT_DATA_UINT64 },
213 	{ "udp_in_recvttl",		KSTAT_DATA_UINT64 },
214 	{ "udp_in_recvhopopts",		KSTAT_DATA_UINT64 },
215 	{ "udp_in_recvhoplimit",	KSTAT_DATA_UINT64 },
216 	{ "udp_in_recvdstopts",		KSTAT_DATA_UINT64 },
217 	{ "udp_in_recvrtdstopts",	KSTAT_DATA_UINT64 },
218 	{ "udp_in_recvrthdr",		KSTAT_DATA_UINT64 },
219 	{ "udp_in_recvpktinfo",		KSTAT_DATA_UINT64 },
220 	{ "udp_in_recvtclass",		KSTAT_DATA_UINT64 },
221 	{ "udp_in_timestamp",		KSTAT_DATA_UINT64 },
222 #ifdef DEBUG
223 	{ "udp_data_conn",		KSTAT_DATA_UINT64 },
224 	{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
225 #endif
226 };
227 
228 static kstat_t *udp_ksp;
229 struct kmem_cache *udp_cache;
230 
231 /*
232  * Bind hash list size and hash function.  It has to be a power of 2 for
233  * hashing.
234  */
235 #define	UDP_BIND_FANOUT_SIZE	512
236 #define	UDP_BIND_HASH(lport) \
237 	((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1))
238 
239 /* UDP bind fanout hash structure. */
240 typedef struct udp_fanout_s {
241 	udp_t *uf_udp;
242 	kmutex_t uf_lock;
243 #if defined(_LP64) || defined(_I32LPx)
244 	char	uf_pad[48];
245 #else
246 	char	uf_pad[56];
247 #endif
248 } udp_fanout_t;
249 
250 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
251 /* udp_fanout_t *udp_bind_fanout. */
252 static udp_fanout_t *udp_bind_fanout;
253 
254 /*
255  * This controls the rate some ndd info report functions can be used
256  * by non-privileged users.  It stores the last time such info is
257  * requested.  When those report functions are called again, this
258  * is checked with the current time and compare with the ndd param
259  * udp_ndd_get_info_interval.
260  */
261 static clock_t udp_last_ndd_get_info_time;
262 #define	NDD_TOO_QUICK_MSG \
263 	"ndd get info rate too high for non-privileged users, try again " \
264 	"later.\n"
265 #define	NDD_OUT_OF_BUF_MSG	"<< Out of buffer >>\n"
266 
267 /* Option processing attrs */
268 typedef struct udpattrs_s {
269 	ip6_pkt_t	*udpattr_ipp;
270 	mblk_t		*udpattr_mb;
271 	boolean_t	udpattr_credset;
272 } udpattrs_t;
273 
274 static void	udp_addr_req(queue_t *q, mblk_t *mp);
275 static void	udp_bind(queue_t *q, mblk_t *mp);
276 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
277 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
278 static int	udp_build_hdrs(queue_t *q, udp_t *udp);
279 static void	udp_capability_req(queue_t *q, mblk_t *mp);
280 static int	udp_close(queue_t *q);
281 static void	udp_connect(queue_t *q, mblk_t *mp);
282 static void	udp_disconnect(queue_t *q, mblk_t *mp);
283 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
284 		    int sys_error);
285 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive,
286 		    t_scalar_t tlierr, int unixerr);
287 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
288 		    cred_t *cr);
289 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
290 		    char *value, caddr_t cp, cred_t *cr);
291 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
292 		    char *value, caddr_t cp, cred_t *cr);
293 static void	udp_icmp_error(queue_t *q, mblk_t *mp);
294 static void	udp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
295 static void	udp_info_req(queue_t *q, mblk_t *mp);
296 static mblk_t	*udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim,
297 		    t_scalar_t addr_length);
298 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
299 		    cred_t *credp);
300 static  int	udp_unitdata_opt_process(queue_t *q, mblk_t *mp,
301 		    int *errorp, udpattrs_t *udpattrs);
302 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
303 static int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
304 static boolean_t udp_param_register(udpparam_t *udppa, int cnt);
305 static int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
306 		    cred_t *cr);
307 static void	udp_report_item(mblk_t *mp, udp_t *udp);
308 static void	udp_rput(queue_t *q, mblk_t *mp);
309 static void	udp_rput_other(queue_t *, mblk_t *);
310 static int	udp_rinfop(queue_t *q, infod_t *dp);
311 static int	udp_rrw(queue_t *q, struiod_t *dp);
312 static	void	udp_rput_bind_ack(queue_t *q, mblk_t *mp);
313 static int	udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
314 		    cred_t *cr);
315 static void	udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha);
316 static void	udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr,
317 		    t_scalar_t destlen, t_scalar_t err);
318 static void	udp_unbind(queue_t *q, mblk_t *mp);
319 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
320     boolean_t random);
321 static void	udp_wput(queue_t *q, mblk_t *mp);
322 static mblk_t	*udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst,
323 		    uint16_t port, uint_t srcid, int *error);
324 static mblk_t	*udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6,
325 		    int *error);
326 static void	udp_wput_other(queue_t *q, mblk_t *mp);
327 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
328 static void	udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr,
329 		    socklen_t addrlen);
330 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
331 
332 static void	udp_kstat_init(void);
333 static void	udp_kstat_fini(void);
334 static int	udp_kstat_update(kstat_t *kp, int rw);
335 static void	udp_input_wrapper(void *arg, mblk_t *mp, void *arg2);
336 static void	udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2);
337 static void	udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2);
338 static void	udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2);
339 
340 static void	udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp,
341 		    uint_t pkt_len);
342 static void	udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing);
343 static void	udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t);
344 static void	udp_exit(conn_t *);
345 static void	udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t);
346 #ifdef DEBUG
347 static void	udp_mode_assertions(udp_t *, int);
348 #endif /* DEBUG */
349 
350 major_t UDP6_MAJ;
351 #define	UDP6 "udp6"
352 
353 #define	UDP_RECV_HIWATER	(56 * 1024)
354 #define	UDP_RECV_LOWATER	128
355 #define	UDP_XMIT_HIWATER	(56 * 1024)
356 #define	UDP_XMIT_LOWATER	1024
357 
358 static struct module_info udp_info =  {
359 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
360 };
361 
362 static struct qinit udp_rinit = {
363 	(pfi_t)udp_rput, NULL, udp_open, udp_close, NULL,
364 	&udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD
365 };
366 
367 static struct qinit udp_winit = {
368 	(pfi_t)udp_wput, NULL, NULL, NULL, NULL,
369 	&udp_info, NULL, NULL, NULL, STRUIOT_NONE
370 };
371 
372 static struct qinit winit = {
373 	(pfi_t)putnext, NULL, NULL, NULL, NULL,
374 	&udp_info, NULL, NULL, NULL, STRUIOT_NONE
375 };
376 
377 /* Support for just SNMP if UDP is not pushed directly over device IP */
378 struct qinit udp_snmp_rinit = {
379 	(pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL,
380 	&udp_info, NULL, NULL, NULL, STRUIOT_NONE
381 };
382 
383 struct qinit udp_snmp_winit = {
384 	(pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL,
385 	&udp_info, NULL, NULL, NULL, STRUIOT_NONE
386 };
387 
388 struct streamtab udpinfo = {
389 	&udp_rinit, &winit
390 };
391 
392 static	sin_t	sin_null;	/* Zero address for quick clears */
393 static	sin6_t	sin6_null;	/* Zero address for quick clears */
394 
395 /* Hint not protected by any lock */
396 static in_port_t	udp_g_next_port_to_try;
397 
398 /*
399  * Extra privileged ports. In host byte order.
400  */
401 #define	UDP_NUM_EPRIV_PORTS	64
402 static int	udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
403 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 };
404 
405 /* Only modified during _init and _fini thus no locking is needed. */
406 static IDP	udp_g_nd;	/* Points to table of UDP ND variables. */
407 
408 /* MIB-2 stuff for SNMP */
409 static mib2_udp_t	udp_mib;	/* SNMP fixed size info */
410 static kstat_t		*udp_mibkp;	/* kstat exporting udp_mib data */
411 
412 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
413 
414 /* Default structure copied into T_INFO_ACK messages */
415 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
416 	T_INFO_ACK,
417 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
418 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
419 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
420 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
421 	sizeof (sin_t),	/* ADDR_size. */
422 	0,		/* OPT_size - not initialized here */
423 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
424 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
425 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
426 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
427 };
428 
429 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
430 
431 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
432 	T_INFO_ACK,
433 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
434 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
435 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
436 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
437 	sizeof (sin6_t), /* ADDR_size. */
438 	0,		/* OPT_size - not initialized here */
439 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
440 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
441 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
442 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
443 };
444 
445 /* largest UDP port number */
446 #define	UDP_MAX_PORT	65535
447 
448 /*
449  * Table of ND variables supported by udp.  These are loaded into udp_g_nd
450  * in udp_open.
451  * All of these are alterable, within the min/max values given, at run time.
452  */
453 /* BEGIN CSTYLED */
454 udpparam_t udp_param_arr[] = {
455  /*min		max		value		name */
456  { 0L,		256,		32,		"udp_wroff_extra" },
457  { 1L,		255,		255,		"udp_ipv4_ttl" },
458  { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
459  { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
460  { 0,		1,		1,		"udp_do_checksum" },
461  { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
462  { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
463  { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
464  { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
465  { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
466  { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
467  { 100,		60000,		1000,		"udp_ndd_get_info_interval"},
468 };
469 /* END CSTYLED */
470 
471 /*
472  * The smallest anonymous port in the privileged port range which UDP
473  * looks for free port.  Use in the option UDP_ANONPRIVBIND.
474  */
475 static in_port_t udp_min_anonpriv_port = 512;
476 
477 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
478 uint32_t udp_random_anon_port = 1;
479 
480 /*
481  * Hook functions to enable cluster networking.
482  * On non-clustered systems these vectors must always be NULL
483  */
484 
485 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family,
486     uint8_t *laddrp, in_port_t lport) = NULL;
487 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family,
488     uint8_t *laddrp, in_port_t lport) = NULL;
489 
490 typedef union T_primitives *t_primp_t;
491 
492 #define	UDP_ENQUEUE_MP(udp, mp, proc, tag) {			\
493 	ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL);	\
494 	ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock));	\
495 	(mp)->b_queue = (queue_t *)((uintptr_t)tag);		\
496 	(mp)->b_prev = (mblk_t *)proc;				\
497 	if ((udp)->udp_mphead == NULL)				\
498 		(udp)->udp_mphead = (mp);			\
499 	else							\
500 		(udp)->udp_mptail->b_next = (mp);		\
501 	(udp)->udp_mptail = (mp);				\
502 	(udp)->udp_mpcount++;					\
503 }
504 
505 #define	UDP_READERS_INCREF(udp) {				\
506 	ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock));	\
507 	(udp)->udp_reader_count++;				\
508 }
509 
510 #define	UDP_READERS_DECREF(udp) {				\
511 	ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock));	\
512 	(udp)->udp_reader_count--;				\
513 	if ((udp)->udp_reader_count == 0)			\
514 		cv_broadcast(&(udp)->udp_connp->conn_cv);	\
515 }
516 
517 #define	UDP_SQUEUE_DECREF(udp) {				\
518 	ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock));	\
519 	(udp)->udp_squeue_count--;				\
520 	if ((udp)->udp_squeue_count == 0)			\
521 		cv_broadcast(&(udp)->udp_connp->conn_cv);	\
522 }
523 
524 /*
525  * Notes on UDP endpoint synchronization:
526  *
527  * UDP needs exclusive operation on a per endpoint basis, when executing
528  * functions that modify the endpoint state.  udp_rput_other() deals with
529  * packets with IP options, and processing these packets end up having
530  * to update the endpoint's option related state.  udp_wput_other() deals
531  * with control operations from the top, e.g. connect() that needs to
532  * update the endpoint state.  These could be synchronized using locks,
533  * but the current version uses squeues for this purpose.  squeues may
534  * give performance improvement for certain cases such as connected UDP
535  * sockets; thus the framework allows for using squeues.
536  *
537  * The perimeter routines are described as follows:
538  *
539  * udp_enter():
540  *	Enter the UDP endpoint perimeter.
541  *
542  * udp_become_writer():
543  *	Become exclusive on the UDP endpoint.  Specifies a function
544  *	that will be called exclusively either immediately or later
545  *	when the perimeter is available exclusively.
546  *
547  * udp_exit():
548  *	Exit the UDP perimeter.
549  *
550  * Entering UDP from the top or from the bottom must be done using
551  * udp_enter().  No lock must be held while attempting to enter the UDP
552  * perimeter.  When finished, udp_exit() must be called to get out of
553  * the perimeter.
554  *
555  * UDP operates in either MT_HOT mode or in SQUEUE mode.  In MT_HOT mode,
556  * multiple threads may enter a UDP endpoint concurrently.  This is used
557  * for sending and/or receiving normal data.  Control operations and other
558  * special cases call udp_become_writer() to become exclusive on a per
559  * endpoint basis and this results in transitioning to SQUEUE mode.  squeue
560  * by definition serializes access to the conn_t.  When there are no more
561  * pending messages on the squeue for the UDP connection, the endpoint
562  * reverts to MT_HOT mode.  During the interregnum when not all MT threads
563  * of an endpoint have finished, messages are queued in the UDP endpoint
564  * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode.
565  *
566  * These modes have the following analogs:
567  *
568  *	UDP_MT_HOT/udp_reader_count==0		none
569  *	UDP_MT_HOT/udp_reader_count>0		RW_READ_LOCK
570  *	UDP_MT_QUEUED				RW_WRITE_WANTED
571  *	UDP_SQUEUE or UDP_QUEUED_SQUEUE		RW_WRITE_LOCKED
572  *
573  * Stable modes:	UDP_MT_HOT, UDP_SQUEUE
574  * Transient modes:	UDP_MT_QUEUED, UDP_QUEUED_SQUEUE
575  *
576  * While in stable modes, UDP keeps track of the number of threads
577  * operating on the endpoint.  The udp_reader_count variable represents
578  * the number of threads entering the endpoint as readers while it is
579  * in UDP_MT_HOT mode.  Transitioning to UDP_SQUEUE happens when there
580  * is only a single reader, i.e. when this counter drops to 1.  Likewise,
581  * udp_squeue_count represents the number of threads operating on the
582  * endpoint's squeue while it is in UDP_SQUEUE mode.  The mode transition
583  * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e.
584  * when this counter drops to 0.
585  *
586  * The default mode is set to UDP_MT_HOT and UDP alternates between
587  * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below.
588  *
589  * Mode transition:
590  * ----------------------------------------------------------------
591  * old mode		Event				New mode
592  * ----------------------------------------------------------------
593  * UDP_MT_HOT		Call to udp_become_writer()	UDP_SQUEUE
594  *			and udp_reader_count == 1
595  *
596  * UDP_MT_HOT		Call to udp_become_writer()	UDP_MT_QUEUED
597  *			and udp_reader_count > 1
598  *
599  * UDP_MT_QUEUED	udp_reader_count drops to zero	UDP_QUEUED_SQUEUE
600  *
601  * UDP_QUEUED_SQUEUE	All messages enqueued on the	UDP_SQUEUE
602  *			internal UDP queue successfully
603  *			moved to squeue AND udp_squeue_count != 0
604  *
605  * UDP_QUEUED_SQUEUE	All messages enqueued on the	UDP_MT_HOT
606  *			internal UDP queue successfully
607  *			moved to squeue AND udp_squeue_count
608  *			drops to zero
609  *
610  * UDP_SQUEUE		udp_squeue_count drops to zero	UDP_MT_HOT
611  * ----------------------------------------------------------------
612  */
613 
614 static queue_t *
615 UDP_WR(queue_t *q)
616 {
617 	ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL);
618 	ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL);
619 	ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next)));
620 
621 	return (_WR(q)->q_next);
622 }
623 
624 static queue_t *
625 UDP_RD(queue_t *q)
626 {
627 	ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL);
628 	ASSERT(IPCL_IS_UDP(Q_TO_CONN(q)));
629 	ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL);
630 
631 	return (_RD(q)->q_next);
632 }
633 
634 #ifdef DEBUG
635 #define	UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller)
636 #else
637 #define	UDP_MODE_ASSERTIONS(udp, caller)
638 #endif
639 
640 /* Invariants */
641 #ifdef DEBUG
642 
643 uint32_t udp_count[4];
644 
645 /* Context of udp_mode_assertions */
646 #define	UDP_ENTER		1
647 #define	UDP_BECOME_WRITER	2
648 #define	UDP_EXIT		3
649 
650 static void
651 udp_mode_assertions(udp_t *udp, int caller)
652 {
653 	ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock));
654 
655 	switch (udp->udp_mode) {
656 	case UDP_MT_HOT:
657 		/*
658 		 * Messages have not yet been enqueued on the internal queue,
659 		 * otherwise we would have switched to UDP_MT_QUEUED. Likewise
660 		 * by definition, there can't be any messages enqueued on the
661 		 * squeue. The UDP could be quiescent, so udp_reader_count
662 		 * could be zero at entry.
663 		 */
664 		ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 &&
665 		    udp->udp_squeue_count == 0);
666 		ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0);
667 		udp_count[0]++;
668 		break;
669 
670 	case UDP_MT_QUEUED:
671 		/*
672 		 * The last MT thread to exit the udp perimeter empties the
673 		 * internal queue and then switches the UDP to
674 		 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED
675 		 * mode, it means there must be at least 1 MT thread still in
676 		 * the perimeter and at least 1 message on the internal queue.
677 		 */
678 		ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL &&
679 		    udp->udp_mpcount != 0 && udp->udp_squeue_count == 0);
680 		udp_count[1]++;
681 		break;
682 
683 	case UDP_QUEUED_SQUEUE:
684 		/*
685 		 * The switch has happened from MT to SQUEUE. So there can't
686 		 * any MT threads. Messages could still pile up on the internal
687 		 * queue until the transition is complete and we move to
688 		 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count
689 		 * since the squeue could drain any time.
690 		 */
691 		ASSERT(udp->udp_reader_count == 0);
692 		udp_count[2]++;
693 		break;
694 
695 	case UDP_SQUEUE:
696 		/*
697 		 * The transition is complete. Thre can't be any messages on
698 		 * the internal queue. The udp could be quiescent or the squeue
699 		 * could drain any time, so we can't assert on nonzero
700 		 * udp_squeue_count during entry. Nor can we assert that
701 		 * udp_reader_count is zero, since, a reader thread could have
702 		 * directly become writer in line by calling udp_become_writer
703 		 * without going through the queued states.
704 		 */
705 		ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0);
706 		ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0);
707 		udp_count[3]++;
708 		break;
709 	}
710 }
711 #endif
712 
713 #define	_UDP_ENTER(connp, mp, proc, tag) {				\
714 	udp_t *_udp = (connp)->conn_udp;				\
715 									\
716 	mutex_enter(&(connp)->conn_lock);				\
717 	if ((connp)->conn_state_flags & CONN_CLOSING) {			\
718 		mutex_exit(&(connp)->conn_lock);			\
719 		freemsg(mp);						\
720 	} else {							\
721 		UDP_MODE_ASSERTIONS(_udp, UDP_ENTER);			\
722 									\
723 		switch (_udp->udp_mode) {				\
724 		case UDP_MT_HOT:					\
725 			/* We can execute as reader right away. */	\
726 			UDP_READERS_INCREF(_udp);			\
727 			mutex_exit(&(connp)->conn_lock);		\
728 			(*(proc))(connp, mp, (connp)->conn_sqp);	\
729 			break;						\
730 									\
731 		case UDP_SQUEUE:					\
732 			/*						\
733 			 * We are in squeue mode, send the		\
734 			 * packet to the squeue				\
735 			 */						\
736 			_udp->udp_squeue_count++;			\
737 			CONN_INC_REF_LOCKED(connp);			\
738 			mutex_exit(&(connp)->conn_lock);		\
739 			squeue_enter((connp)->conn_sqp, mp, proc,	\
740 			    connp, tag);				\
741 			break;						\
742 									\
743 		case UDP_MT_QUEUED:					\
744 		case UDP_QUEUED_SQUEUE:					\
745 			/*						\
746 			 * Some messages may have been enqueued		\
747 			 * ahead of us.  Enqueue the new message	\
748 			 * at the tail of the internal queue to		\
749 			 * preserve message ordering.			\
750 			 */						\
751 			UDP_ENQUEUE_MP(_udp, mp, proc, tag);		\
752 			mutex_exit(&(connp)->conn_lock);		\
753 			break;						\
754 		}							\
755 	}								\
756 }
757 
758 static void
759 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag)
760 {
761 	_UDP_ENTER(connp, mp, proc, tag);
762 }
763 
764 static void
765 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag)
766 {
767 	udp_t	*udp;
768 
769 	udp = connp->conn_udp;
770 
771 	mutex_enter(&connp->conn_lock);
772 
773 	UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER);
774 
775 	switch (udp->udp_mode) {
776 	case UDP_MT_HOT:
777 		if (udp->udp_reader_count == 1) {
778 			/*
779 			 * We are the only MT thread. Switch to squeue mode
780 			 * immediately.
781 			 */
782 			udp->udp_mode = UDP_SQUEUE;
783 			udp->udp_squeue_count = 1;
784 			CONN_INC_REF_LOCKED(connp);
785 			mutex_exit(&connp->conn_lock);
786 			squeue_enter(connp->conn_sqp, mp, proc, connp, tag);
787 			return;
788 		}
789 		/* FALLTHRU */
790 
791 	case UDP_MT_QUEUED:
792 		/* Enqueue the packet internally in UDP */
793 		udp->udp_mode = UDP_MT_QUEUED;
794 		UDP_ENQUEUE_MP(udp, mp, proc, tag);
795 		mutex_exit(&connp->conn_lock);
796 		return;
797 
798 	case UDP_SQUEUE:
799 	case UDP_QUEUED_SQUEUE:
800 		/*
801 		 * We are already exclusive. i.e. we are already
802 		 * writer. Simply call the desired function.
803 		 */
804 		udp->udp_squeue_count++;
805 		mutex_exit(&connp->conn_lock);
806 		(*proc)(connp, mp, connp->conn_sqp);
807 		return;
808 	}
809 }
810 
811 /*
812  * Transition from MT mode to SQUEUE mode, when the last MT thread
813  * is exiting the UDP perimeter. Move all messages from the internal
814  * udp queue to the squeue. A better way would be to move all the
815  * messages in one shot, this needs more support from the squeue framework
816  */
817 static void
818 udp_switch_to_squeue(udp_t *udp)
819 {
820 	mblk_t *mp;
821 	mblk_t	*mp_next;
822 	sqproc_t proc;
823 	uint8_t	tag;
824 	conn_t	*connp = udp->udp_connp;
825 
826 	ASSERT(MUTEX_HELD(&connp->conn_lock));
827 	ASSERT(udp->udp_mode == UDP_MT_QUEUED);
828 	while (udp->udp_mphead != NULL) {
829 		mp = udp->udp_mphead;
830 		udp->udp_mphead = NULL;
831 		udp->udp_mptail = NULL;
832 		udp->udp_mpcount = 0;
833 		udp->udp_mode = UDP_QUEUED_SQUEUE;
834 		mutex_exit(&connp->conn_lock);
835 		/*
836 		 * It is best not to hold any locks across the calls
837 		 * to squeue functions. Since we drop the lock we
838 		 * need to go back and check the udp_mphead once again
839 		 * after the squeue_fill and hence the while loop at
840 		 * the top of this function
841 		 */
842 		for (; mp != NULL; mp = mp_next) {
843 			mp_next = mp->b_next;
844 			proc = (sqproc_t)mp->b_prev;
845 			tag = (uint8_t)((uintptr_t)mp->b_queue);
846 			mp->b_next = NULL;
847 			mp->b_prev = NULL;
848 			mp->b_queue = NULL;
849 			CONN_INC_REF(connp);
850 			udp->udp_squeue_count++;
851 			squeue_fill(connp->conn_sqp, mp, proc, connp,
852 			    tag);
853 		}
854 		mutex_enter(&connp->conn_lock);
855 	}
856 	/*
857 	 * udp_squeue_count of zero implies that the squeue has drained
858 	 * even before we arrived here (i.e. after the squeue_fill above)
859 	 */
860 	udp->udp_mode = (udp->udp_squeue_count != 0) ?
861 	    UDP_SQUEUE : UDP_MT_HOT;
862 }
863 
864 #define	_UDP_EXIT(connp) {						\
865 	udp_t *_udp = (connp)->conn_udp;				\
866 									\
867 	mutex_enter(&(connp)->conn_lock);				\
868 	UDP_MODE_ASSERTIONS(_udp, UDP_EXIT);				\
869 									\
870 	switch (_udp->udp_mode) {					\
871 	case UDP_MT_HOT:						\
872 		UDP_READERS_DECREF(_udp);				\
873 		mutex_exit(&(connp)->conn_lock);			\
874 		break;							\
875 									\
876 	case UDP_SQUEUE:						\
877 		UDP_SQUEUE_DECREF(_udp);				\
878 		if (_udp->udp_squeue_count == 0)			\
879 		    _udp->udp_mode = UDP_MT_HOT;			\
880 		mutex_exit(&(connp)->conn_lock);			\
881 		break;							\
882 									\
883 	case UDP_MT_QUEUED:						\
884 		/*							\
885 		 * If this is the last MT thread, we need to		\
886 		 * switch to squeue mode				\
887 		 */							\
888 		UDP_READERS_DECREF(_udp);				\
889 		if (_udp->udp_reader_count == 0)			\
890 			udp_switch_to_squeue(_udp);			\
891 		mutex_exit(&(connp)->conn_lock);			\
892 		break;							\
893 									\
894 	case UDP_QUEUED_SQUEUE:						\
895 		UDP_SQUEUE_DECREF(_udp);				\
896 		/*							\
897 		 * Even if the udp_squeue_count drops to zero, we	\
898 		 * don't want to change udp_mode to UDP_MT_HOT here.	\
899 		 * The thread in udp_switch_to_squeue will take care	\
900 		 * of the transition to UDP_MT_HOT, after emptying	\
901 		 * any more new messages that have been enqueued in	\
902 		 * udp_mphead.						\
903 		 */							\
904 		mutex_exit(&(connp)->conn_lock);			\
905 		break;							\
906 	}								\
907 }
908 
909 static void
910 udp_exit(conn_t *connp)
911 {
912 	_UDP_EXIT(connp);
913 }
914 
915 /*
916  * Return the next anonymous port in the privileged port range for
917  * bind checking.
918  *
919  * Trusted Extension (TX) notes: TX allows administrator to mark or
920  * reserve ports as Multilevel ports (MLP). MLP has special function
921  * on TX systems. Once a port is made MLP, it's not available as
922  * ordinary port. This creates "holes" in the port name space. It
923  * may be necessary to skip the "holes" find a suitable anon port.
924  */
925 static in_port_t
926 udp_get_next_priv_port(udp_t *udp)
927 {
928 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
929 	in_port_t nextport;
930 	boolean_t restart = B_FALSE;
931 
932 retry:
933 	if (next_priv_port < udp_min_anonpriv_port ||
934 	    next_priv_port >= IPPORT_RESERVED) {
935 		next_priv_port = IPPORT_RESERVED - 1;
936 		if (restart)
937 			return (0);
938 		restart = B_TRUE;
939 	}
940 
941 	if (is_system_labeled() &&
942 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
943 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
944 		next_priv_port = nextport;
945 		goto retry;
946 	}
947 
948 	return (next_priv_port--);
949 }
950 
951 /* UDP bind hash report triggered via the Named Dispatch mechanism. */
952 /* ARGSUSED */
953 static int
954 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
955 {
956 	udp_fanout_t	*udpf;
957 	int		i;
958 	zoneid_t	zoneid;
959 	conn_t		*connp;
960 	udp_t		*udp;
961 
962 	connp = Q_TO_CONN(q);
963 	udp = connp->conn_udp;
964 
965 	/* Refer to comments in udp_status_report(). */
966 	if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) {
967 		if (ddi_get_lbolt() - udp_last_ndd_get_info_time <
968 		    drv_usectohz(udp_ndd_get_info_interval * 1000)) {
969 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
970 			return (0);
971 		}
972 	}
973 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
974 		/* The following may work even if we cannot get a large buf. */
975 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
976 		return (0);
977 	}
978 
979 	(void) mi_mpprintf(mp,
980 	    "UDP     " MI_COL_HDRPAD_STR
981 	/*   12345678[89ABCDEF] */
982 	    " zone lport src addr        dest addr       port  state");
983 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
984 
985 	zoneid = connp->conn_zoneid;
986 
987 	for (i = 0; i < udp_bind_fanout_size; i++) {
988 		udpf = &udp_bind_fanout[i];
989 		mutex_enter(&udpf->uf_lock);
990 
991 		/* Print the hash index. */
992 		udp = udpf->uf_udp;
993 		if (zoneid != GLOBAL_ZONEID) {
994 			/* skip to first entry in this zone; might be none */
995 			while (udp != NULL &&
996 			    udp->udp_connp->conn_zoneid != zoneid)
997 				udp = udp->udp_bind_hash;
998 		}
999 		if (udp != NULL) {
1000 			uint_t print_len, buf_len;
1001 
1002 			buf_len = mp->b_cont->b_datap->db_lim -
1003 			    mp->b_cont->b_wptr;
1004 			print_len = snprintf((char *)mp->b_cont->b_wptr,
1005 			    buf_len, "%d\n", i);
1006 			if (print_len < buf_len) {
1007 				mp->b_cont->b_wptr += print_len;
1008 			} else {
1009 				mp->b_cont->b_wptr += buf_len;
1010 			}
1011 			for (; udp != NULL; udp = udp->udp_bind_hash) {
1012 				if (zoneid == GLOBAL_ZONEID ||
1013 				    zoneid == udp->udp_connp->conn_zoneid)
1014 					udp_report_item(mp->b_cont, udp);
1015 			}
1016 		}
1017 		mutex_exit(&udpf->uf_lock);
1018 	}
1019 	udp_last_ndd_get_info_time = ddi_get_lbolt();
1020 	return (0);
1021 }
1022 
1023 /*
1024  * Hash list removal routine for udp_t structures.
1025  */
1026 static void
1027 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
1028 {
1029 	udp_t	*udpnext;
1030 	kmutex_t *lockp;
1031 
1032 	if (udp->udp_ptpbhn == NULL)
1033 		return;
1034 
1035 	/*
1036 	 * Extract the lock pointer in case there are concurrent
1037 	 * hash_remove's for this instance.
1038 	 */
1039 	ASSERT(udp->udp_port != 0);
1040 	if (!caller_holds_lock) {
1041 		lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock;
1042 		ASSERT(lockp != NULL);
1043 		mutex_enter(lockp);
1044 	}
1045 	if (udp->udp_ptpbhn != NULL) {
1046 		udpnext = udp->udp_bind_hash;
1047 		if (udpnext != NULL) {
1048 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
1049 			udp->udp_bind_hash = NULL;
1050 		}
1051 		*udp->udp_ptpbhn = udpnext;
1052 		udp->udp_ptpbhn = NULL;
1053 	}
1054 	if (!caller_holds_lock) {
1055 		mutex_exit(lockp);
1056 	}
1057 }
1058 
1059 static void
1060 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
1061 {
1062 	udp_t	**udpp;
1063 	udp_t	*udpnext;
1064 
1065 	ASSERT(MUTEX_HELD(&uf->uf_lock));
1066 	if (udp->udp_ptpbhn != NULL) {
1067 		udp_bind_hash_remove(udp, B_TRUE);
1068 	}
1069 	udpp = &uf->uf_udp;
1070 	udpnext = udpp[0];
1071 	if (udpnext != NULL) {
1072 		/*
1073 		 * If the new udp bound to the INADDR_ANY address
1074 		 * and the first one in the list is not bound to
1075 		 * INADDR_ANY we skip all entries until we find the
1076 		 * first one bound to INADDR_ANY.
1077 		 * This makes sure that applications binding to a
1078 		 * specific address get preference over those binding to
1079 		 * INADDR_ANY.
1080 		 */
1081 		if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) &&
1082 		    !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) {
1083 			while ((udpnext = udpp[0]) != NULL &&
1084 			    !V6_OR_V4_INADDR_ANY(
1085 			    udpnext->udp_bound_v6src)) {
1086 				udpp = &(udpnext->udp_bind_hash);
1087 			}
1088 			if (udpnext != NULL)
1089 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
1090 		} else {
1091 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
1092 		}
1093 	}
1094 	udp->udp_bind_hash = udpnext;
1095 	udp->udp_ptpbhn = udpp;
1096 	udpp[0] = udp;
1097 }
1098 
1099 /*
1100  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
1101  * passed to udp_wput.
1102  * It associates a port number and local address with the stream.
1103  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP
1104  * protocol type (IPPROTO_UDP) placed in the message following the address.
1105  * A T_BIND_ACK message is passed upstream when ip acknowledges the request.
1106  * (Called as writer.)
1107  *
1108  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
1109  * without setting SO_REUSEADDR. This is needed so that they
1110  * can be viewed as two independent transport protocols.
1111  * However, anonymouns ports are allocated from the same range to avoid
1112  * duplicating the udp_g_next_port_to_try.
1113  */
1114 static void
1115 udp_bind(queue_t *q, mblk_t *mp)
1116 {
1117 	sin_t		*sin;
1118 	sin6_t		*sin6;
1119 	mblk_t		*mp1;
1120 	in_port_t	port;		/* Host byte order */
1121 	in_port_t	requested_port;	/* Host byte order */
1122 	struct T_bind_req *tbr;
1123 	int		count;
1124 	in6_addr_t	v6src;
1125 	boolean_t	bind_to_req_port_only;
1126 	int		loopmax;
1127 	udp_fanout_t	*udpf;
1128 	in_port_t	lport;		/* Network byte order */
1129 	zoneid_t	zoneid;
1130 	conn_t		*connp;
1131 	udp_t		*udp;
1132 	boolean_t	is_inaddr_any;
1133 	mlp_type_t	addrtype, mlptype;
1134 
1135 	connp = Q_TO_CONN(q);
1136 	udp = connp->conn_udp;
1137 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
1138 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
1139 		    "udp_bind: bad req, len %u",
1140 		    (uint_t)(mp->b_wptr - mp->b_rptr));
1141 		udp_err_ack(q, mp, TPROTO, 0);
1142 		return;
1143 	}
1144 
1145 	if (udp->udp_state != TS_UNBND) {
1146 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
1147 		    "udp_bind: bad state, %u", udp->udp_state);
1148 		udp_err_ack(q, mp, TOUTSTATE, 0);
1149 		return;
1150 	}
1151 	/*
1152 	 * Reallocate the message to make sure we have enough room for an
1153 	 * address and the protocol type.
1154 	 */
1155 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
1156 	if (!mp1) {
1157 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1158 		return;
1159 	}
1160 
1161 	mp = mp1;
1162 	tbr = (struct T_bind_req *)mp->b_rptr;
1163 	switch (tbr->ADDR_length) {
1164 	case 0:			/* Request for a generic port */
1165 		tbr->ADDR_offset = sizeof (struct T_bind_req);
1166 		if (udp->udp_family == AF_INET) {
1167 			tbr->ADDR_length = sizeof (sin_t);
1168 			sin = (sin_t *)&tbr[1];
1169 			*sin = sin_null;
1170 			sin->sin_family = AF_INET;
1171 			mp->b_wptr = (uchar_t *)&sin[1];
1172 		} else {
1173 			ASSERT(udp->udp_family == AF_INET6);
1174 			tbr->ADDR_length = sizeof (sin6_t);
1175 			sin6 = (sin6_t *)&tbr[1];
1176 			*sin6 = sin6_null;
1177 			sin6->sin6_family = AF_INET6;
1178 			mp->b_wptr = (uchar_t *)&sin6[1];
1179 		}
1180 		port = 0;
1181 		break;
1182 
1183 	case sizeof (sin_t):	/* Complete IPv4 address */
1184 		sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
1185 		    sizeof (sin_t));
1186 		if (sin == NULL || !OK_32PTR((char *)sin)) {
1187 			udp_err_ack(q, mp, TSYSERR, EINVAL);
1188 			return;
1189 		}
1190 		if (udp->udp_family != AF_INET ||
1191 		    sin->sin_family != AF_INET) {
1192 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
1193 			return;
1194 		}
1195 		port = ntohs(sin->sin_port);
1196 		break;
1197 
1198 	case sizeof (sin6_t):	/* complete IPv6 address */
1199 		sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
1200 		    sizeof (sin6_t));
1201 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
1202 			udp_err_ack(q, mp, TSYSERR, EINVAL);
1203 			return;
1204 		}
1205 		if (udp->udp_family != AF_INET6 ||
1206 		    sin6->sin6_family != AF_INET6) {
1207 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
1208 			return;
1209 		}
1210 		port = ntohs(sin6->sin6_port);
1211 		break;
1212 
1213 	default:		/* Invalid request */
1214 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
1215 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
1216 		udp_err_ack(q, mp, TBADADDR, 0);
1217 		return;
1218 	}
1219 
1220 	requested_port = port;
1221 
1222 	if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ)
1223 		bind_to_req_port_only = B_FALSE;
1224 	else			/* T_BIND_REQ and requested_port != 0 */
1225 		bind_to_req_port_only = B_TRUE;
1226 
1227 	if (requested_port == 0) {
1228 		/*
1229 		 * If the application passed in zero for the port number, it
1230 		 * doesn't care which port number we bind to. Get one in the
1231 		 * valid range.
1232 		 */
1233 		if (udp->udp_anon_priv_bind) {
1234 			port = udp_get_next_priv_port(udp);
1235 		} else {
1236 			port = udp_update_next_port(udp,
1237 			    udp_g_next_port_to_try, B_TRUE);
1238 		}
1239 	} else {
1240 		/*
1241 		 * If the port is in the well-known privileged range,
1242 		 * make sure the caller was privileged.
1243 		 */
1244 		int i;
1245 		boolean_t priv = B_FALSE;
1246 
1247 		if (port < udp_smallest_nonpriv_port) {
1248 			priv = B_TRUE;
1249 		} else {
1250 			for (i = 0; i < udp_g_num_epriv_ports; i++) {
1251 				if (port == udp_g_epriv_ports[i]) {
1252 					priv = B_TRUE;
1253 					break;
1254 				}
1255 			}
1256 		}
1257 
1258 		if (priv) {
1259 			cred_t *cr = DB_CREDDEF(mp, connp->conn_cred);
1260 
1261 			if (secpolicy_net_privaddr(cr, port) != 0) {
1262 				udp_err_ack(q, mp, TACCES, 0);
1263 				return;
1264 			}
1265 		}
1266 	}
1267 
1268 	if (port == 0) {
1269 		udp_err_ack(q, mp, TNOADDR, 0);
1270 		return;
1271 	}
1272 
1273 	/*
1274 	 * Copy the source address into our udp structure. This address
1275 	 * may still be zero; if so, IP will fill in the correct address
1276 	 * each time an outbound packet is passed to it.
1277 	 */
1278 	if (udp->udp_family == AF_INET) {
1279 		ASSERT(sin != NULL);
1280 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
1281 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
1282 		    udp->udp_ip_snd_options_len;
1283 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src);
1284 	} else {
1285 		ASSERT(sin6 != NULL);
1286 		v6src = sin6->sin6_addr;
1287 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
1288 			udp->udp_ipversion = IPV4_VERSION;
1289 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
1290 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
1291 		} else {
1292 			udp->udp_ipversion = IPV6_VERSION;
1293 			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
1294 		}
1295 	}
1296 
1297 	/*
1298 	 * If udp_reuseaddr is not set, then we have to make sure that
1299 	 * the IP address and port number the application requested
1300 	 * (or we selected for the application) is not being used by
1301 	 * another stream.  If another stream is already using the
1302 	 * requested IP address and port, the behavior depends on
1303 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
1304 	 * search for any an unused port to bind to the the stream.
1305 	 *
1306 	 * As per the BSD semantics, as modified by the Deering multicast
1307 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
1308 	 * to the same port independent of the local IP address.
1309 	 *
1310 	 * This is slightly different than in SunOS 4.X which did not
1311 	 * support IP multicast. Note that the change implemented by the
1312 	 * Deering multicast code effects all binds - not only binding
1313 	 * to IP multicast addresses.
1314 	 *
1315 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
1316 	 * order to guarantee a unique port.
1317 	 */
1318 
1319 	count = 0;
1320 	if (udp->udp_anon_priv_bind) {
1321 		/* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */
1322 		loopmax = IPPORT_RESERVED - udp_min_anonpriv_port;
1323 	} else {
1324 		loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1;
1325 	}
1326 
1327 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
1328 	zoneid = connp->conn_zoneid;
1329 
1330 	for (;;) {
1331 		udp_t		*udp1;
1332 		boolean_t	found_exclbind = B_FALSE;
1333 
1334 		/*
1335 		 * Walk through the list of udp streams bound to
1336 		 * requested port with the same IP address.
1337 		 */
1338 		lport = htons(port);
1339 		udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)];
1340 		mutex_enter(&udpf->uf_lock);
1341 		for (udp1 = udpf->uf_udp; udp1 != NULL;
1342 		    udp1 = udp1->udp_bind_hash) {
1343 			if (lport != udp1->udp_port)
1344 				continue;
1345 
1346 			/*
1347 			 * On a labeled system, we must treat bindings to ports
1348 			 * on shared IP addresses by sockets with MAC exemption
1349 			 * privilege as being in all zones, as there's
1350 			 * otherwise no way to identify the right receiver.
1351 			 */
1352 			if (zoneid != udp1->udp_connp->conn_zoneid &&
1353 			    !udp->udp_mac_exempt && !udp1->udp_mac_exempt)
1354 				continue;
1355 
1356 			/*
1357 			 * If UDP_EXCLBIND is set for either the bound or
1358 			 * binding endpoint, the semantics of bind
1359 			 * is changed according to the following chart.
1360 			 *
1361 			 * spec = specified address (v4 or v6)
1362 			 * unspec = unspecified address (v4 or v6)
1363 			 * A = specified addresses are different for endpoints
1364 			 *
1365 			 * bound	bind to		allowed?
1366 			 * -------------------------------------
1367 			 * unspec	unspec		no
1368 			 * unspec	spec		no
1369 			 * spec		unspec		no
1370 			 * spec		spec		yes if A
1371 			 *
1372 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
1373 			 * as UDP_EXCLBIND, except that zoneid is ignored.
1374 			 */
1375 			if (udp1->udp_exclbind || udp->udp_exclbind ||
1376 			    udp1->udp_mac_exempt || udp->udp_mac_exempt) {
1377 				if (V6_OR_V4_INADDR_ANY(
1378 				    udp1->udp_bound_v6src) ||
1379 				    is_inaddr_any ||
1380 				    IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
1381 				    &v6src)) {
1382 					found_exclbind = B_TRUE;
1383 					break;
1384 				}
1385 				continue;
1386 			}
1387 
1388 			/*
1389 			 * Check ipversion to allow IPv4 and IPv6 sockets to
1390 			 * have disjoint port number spaces.
1391 			 */
1392 			if (udp->udp_ipversion != udp1->udp_ipversion) {
1393 
1394 				/*
1395 				 * On the first time through the loop, if the
1396 				 * the user intentionally specified a
1397 				 * particular port number, then ignore any
1398 				 * bindings of the other protocol that may
1399 				 * conflict. This allows the user to bind IPv6
1400 				 * alone and get both v4 and v6, or bind both
1401 				 * both and get each seperately. On subsequent
1402 				 * times through the loop, we're checking a
1403 				 * port that we chose (not the user) and thus
1404 				 * we do not allow casual duplicate bindings.
1405 				 */
1406 				if (count == 0 && requested_port != 0)
1407 					continue;
1408 			}
1409 
1410 			/*
1411 			 * No difference depending on SO_REUSEADDR.
1412 			 *
1413 			 * If existing port is bound to a
1414 			 * non-wildcard IP address and
1415 			 * the requesting stream is bound to
1416 			 * a distinct different IP addresses
1417 			 * (non-wildcard, also), keep going.
1418 			 */
1419 			if (!is_inaddr_any &&
1420 			    !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) &&
1421 			    !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src,
1422 			    &v6src)) {
1423 				continue;
1424 			}
1425 			break;
1426 		}
1427 
1428 		if (!found_exclbind &&
1429 		    (udp->udp_reuseaddr && requested_port != 0)) {
1430 			break;
1431 		}
1432 
1433 		if (udp1 == NULL) {
1434 			/*
1435 			 * No other stream has this IP address
1436 			 * and port number. We can use it.
1437 			 */
1438 			break;
1439 		}
1440 		mutex_exit(&udpf->uf_lock);
1441 		if (bind_to_req_port_only) {
1442 			/*
1443 			 * We get here only when requested port
1444 			 * is bound (and only first  of the for()
1445 			 * loop iteration).
1446 			 *
1447 			 * The semantics of this bind request
1448 			 * require it to fail so we return from
1449 			 * the routine (and exit the loop).
1450 			 *
1451 			 */
1452 			udp_err_ack(q, mp, TADDRBUSY, 0);
1453 			return;
1454 		}
1455 
1456 		if (udp->udp_anon_priv_bind) {
1457 			port = udp_get_next_priv_port(udp);
1458 		} else {
1459 			if ((count == 0) && (requested_port != 0)) {
1460 				/*
1461 				 * If the application wants us to find
1462 				 * a port, get one to start with. Set
1463 				 * requested_port to 0, so that we will
1464 				 * update udp_g_next_port_to_try below.
1465 				 */
1466 				port = udp_update_next_port(udp,
1467 				    udp_g_next_port_to_try, B_TRUE);
1468 				requested_port = 0;
1469 			} else {
1470 				port = udp_update_next_port(udp, port + 1,
1471 				    B_FALSE);
1472 			}
1473 		}
1474 
1475 		if (port == 0 || ++count >= loopmax) {
1476 			/*
1477 			 * We've tried every possible port number and
1478 			 * there are none available, so send an error
1479 			 * to the user.
1480 			 */
1481 			udp_err_ack(q, mp, TNOADDR, 0);
1482 			return;
1483 		}
1484 	}
1485 
1486 	/*
1487 	 * Copy the source address into our udp structure.  This address
1488 	 * may still be zero; if so, ip will fill in the correct address
1489 	 * each time an outbound packet is passed to it.
1490 	 * If we are binding to a broadcast or multicast address udp_rput
1491 	 * will clear the source address when it receives the T_BIND_ACK.
1492 	 */
1493 	udp->udp_v6src = udp->udp_bound_v6src = v6src;
1494 	udp->udp_port = lport;
1495 	/*
1496 	 * Now reset the the next anonymous port if the application requested
1497 	 * an anonymous port, or we handed out the next anonymous port.
1498 	 */
1499 	if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) {
1500 		udp_g_next_port_to_try = port + 1;
1501 	}
1502 
1503 	/* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */
1504 	if (udp->udp_family == AF_INET) {
1505 		sin->sin_port = udp->udp_port;
1506 	} else {
1507 		int error;
1508 
1509 		sin6->sin6_port = udp->udp_port;
1510 		/* Rebuild the header template */
1511 		error = udp_build_hdrs(q, udp);
1512 		if (error != 0) {
1513 			mutex_exit(&udpf->uf_lock);
1514 			udp_err_ack(q, mp, TSYSERR, error);
1515 			return;
1516 		}
1517 	}
1518 	udp->udp_state = TS_IDLE;
1519 	udp_bind_hash_insert(udpf, udp);
1520 	mutex_exit(&udpf->uf_lock);
1521 
1522 	if (cl_inet_bind) {
1523 		/*
1524 		 * Running in cluster mode - register bind information
1525 		 */
1526 		if (udp->udp_ipversion == IPV4_VERSION) {
1527 			(*cl_inet_bind)(IPPROTO_UDP, AF_INET,
1528 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
1529 			    (in_port_t)udp->udp_port);
1530 		} else {
1531 			(*cl_inet_bind)(IPPROTO_UDP, AF_INET6,
1532 			    (uint8_t *)&(udp->udp_v6src),
1533 			    (in_port_t)udp->udp_port);
1534 		}
1535 
1536 	}
1537 
1538 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
1539 	if (is_system_labeled() && (!connp->conn_anon_port ||
1540 	    connp->conn_anon_mlp)) {
1541 		uint16_t mlpport;
1542 		cred_t *cr = connp->conn_cred;
1543 		zone_t *zone;
1544 
1545 		connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth :
1546 		    mlptSingle;
1547 		addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src);
1548 		if (addrtype == mlptSingle) {
1549 			udp_err_ack(q, mp, TNOADDR, 0);
1550 			connp->conn_anon_port = B_FALSE;
1551 			connp->conn_mlp_type = mlptSingle;
1552 			return;
1553 		}
1554 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
1555 		zone = crgetzone(cr);
1556 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
1557 		    addrtype);
1558 		if (mlptype != mlptSingle &&
1559 		    (connp->conn_mlp_type == mlptSingle ||
1560 		    secpolicy_net_bindmlp(cr) != 0)) {
1561 			if (udp->udp_debug) {
1562 				(void) strlog(UDP_MOD_ID, 0, 1,
1563 				    SL_ERROR|SL_TRACE,
1564 				    "udp_bind: no priv for multilevel port %d",
1565 				    mlpport);
1566 			}
1567 			udp_err_ack(q, mp, TACCES, 0);
1568 			connp->conn_anon_port = B_FALSE;
1569 			connp->conn_mlp_type = mlptSingle;
1570 			return;
1571 		}
1572 
1573 		/*
1574 		 * If we're specifically binding a shared IP address and the
1575 		 * port is MLP on shared addresses, then check to see if this
1576 		 * zone actually owns the MLP.  Reject if not.
1577 		 */
1578 		if (mlptype == mlptShared && addrtype == mlptShared) {
1579 			zoneid_t mlpzone;
1580 
1581 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
1582 			    htons(mlpport));
1583 			if (connp->conn_zoneid != mlpzone) {
1584 				if (udp->udp_debug) {
1585 					(void) strlog(UDP_MOD_ID, 0, 1,
1586 					    SL_ERROR|SL_TRACE,
1587 					    "udp_bind: attempt to bind port "
1588 					    "%d on shared addr in zone %d "
1589 					    "(should be %d)",
1590 					    mlpport, connp->conn_zoneid,
1591 					    mlpzone);
1592 				}
1593 				udp_err_ack(q, mp, TACCES, 0);
1594 				connp->conn_anon_port = B_FALSE;
1595 				connp->conn_mlp_type = mlptSingle;
1596 				return;
1597 			}
1598 		}
1599 		if (connp->conn_anon_port) {
1600 			int error;
1601 
1602 			error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp,
1603 			    port, B_TRUE);
1604 			if (error != 0) {
1605 				if (udp->udp_debug) {
1606 					(void) strlog(UDP_MOD_ID, 0, 1,
1607 					    SL_ERROR|SL_TRACE,
1608 					    "udp_bind: cannot establish anon "
1609 					    "MLP for port %d", port);
1610 				}
1611 				udp_err_ack(q, mp, TACCES, 0);
1612 				connp->conn_anon_port = B_FALSE;
1613 				connp->conn_mlp_type = mlptSingle;
1614 				return;
1615 			}
1616 		}
1617 		connp->conn_mlp_type = mlptype;
1618 	}
1619 
1620 	/* Pass the protocol number in the message following the address. */
1621 	*mp->b_wptr++ = IPPROTO_UDP;
1622 	if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
1623 		/*
1624 		 * Append a request for an IRE if udp_v6src not
1625 		 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address).
1626 		 */
1627 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1628 		if (!mp->b_cont) {
1629 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
1630 			return;
1631 		}
1632 		mp->b_cont->b_wptr += sizeof (ire_t);
1633 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1634 	}
1635 	if (udp->udp_family == AF_INET6)
1636 		mp = ip_bind_v6(q, mp, connp, NULL);
1637 	else
1638 		mp = ip_bind_v4(q, mp, connp);
1639 
1640 	if (mp != NULL)
1641 		udp_rput_other(_RD(q), mp);
1642 	else
1643 		CONN_INC_REF(connp);
1644 }
1645 
1646 
1647 void
1648 udp_resume_bind(conn_t *connp, mblk_t *mp)
1649 {
1650 	udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY);
1651 }
1652 
1653 /*
1654  * This is called from ip_wput_nondata to resume a deferred UDP bind.
1655  */
1656 /* ARGSUSED */
1657 static void
1658 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2)
1659 {
1660 	conn_t *connp = arg;
1661 
1662 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
1663 
1664 	udp_rput_other(connp->conn_rq, mp);
1665 
1666 	CONN_OPER_PENDING_DONE(connp);
1667 	udp_exit(connp);
1668 }
1669 
1670 /*
1671  * This routine handles each T_CONN_REQ message passed to udp.  It
1672  * associates a default destination address with the stream.
1673  *
1674  * This routine sends down a T_BIND_REQ to IP with the following mblks:
1675  *	T_BIND_REQ	- specifying local and remote address/port
1676  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
1677  *	T_OK_ACK	- for the T_CONN_REQ
1678  *	T_CONN_CON	- to keep the TPI user happy
1679  *
1680  * The connect completes in udp_rput.
1681  * When a T_BIND_ACK is received information is extracted from the IRE
1682  * and the two appended messages are sent to the TPI user.
1683  * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert
1684  * it to an error ack for the appropriate primitive.
1685  */
1686 static void
1687 udp_connect(queue_t *q, mblk_t *mp)
1688 {
1689 	sin6_t	*sin6;
1690 	sin_t	*sin;
1691 	struct T_conn_req	*tcr;
1692 	in6_addr_t v6dst;
1693 	ipaddr_t v4dst;
1694 	uint16_t dstport;
1695 	uint32_t flowinfo;
1696 	mblk_t	*mp1, *mp2;
1697 	udp_fanout_t	*udpf;
1698 	udp_t	*udp, *udp1;
1699 
1700 	udp = Q_TO_UDP(q);
1701 
1702 	tcr = (struct T_conn_req *)mp->b_rptr;
1703 
1704 	/* A bit of sanity checking */
1705 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
1706 		udp_err_ack(q, mp, TPROTO, 0);
1707 		return;
1708 	}
1709 	/*
1710 	 * This UDP must have bound to a port already before doing
1711 	 * a connect.
1712 	 */
1713 	if (udp->udp_state == TS_UNBND) {
1714 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
1715 		    "udp_connect: bad state, %u", udp->udp_state);
1716 		udp_err_ack(q, mp, TOUTSTATE, 0);
1717 		return;
1718 	}
1719 	ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL);
1720 
1721 	udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)];
1722 
1723 	if (udp->udp_state == TS_DATA_XFER) {
1724 		/* Already connected - clear out state */
1725 		mutex_enter(&udpf->uf_lock);
1726 		udp->udp_v6src = udp->udp_bound_v6src;
1727 		udp->udp_state = TS_IDLE;
1728 		mutex_exit(&udpf->uf_lock);
1729 	}
1730 
1731 	if (tcr->OPT_length != 0) {
1732 		udp_err_ack(q, mp, TBADOPT, 0);
1733 		return;
1734 	}
1735 
1736 	/*
1737 	 * Determine packet type based on type of address passed in
1738 	 * the request should contain an IPv4 or IPv6 address.
1739 	 * Make sure that address family matches the type of
1740 	 * family of the the address passed down
1741 	 */
1742 	switch (tcr->DEST_length) {
1743 	default:
1744 		udp_err_ack(q, mp, TBADADDR, 0);
1745 		return;
1746 
1747 	case sizeof (sin_t):
1748 		sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
1749 		    sizeof (sin_t));
1750 		if (sin == NULL || !OK_32PTR((char *)sin)) {
1751 			udp_err_ack(q, mp, TSYSERR, EINVAL);
1752 			return;
1753 		}
1754 		if (udp->udp_family != AF_INET ||
1755 		    sin->sin_family != AF_INET) {
1756 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
1757 			return;
1758 		}
1759 		v4dst = sin->sin_addr.s_addr;
1760 		dstport = sin->sin_port;
1761 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
1762 		ASSERT(udp->udp_ipversion == IPV4_VERSION);
1763 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE +
1764 		    udp->udp_ip_snd_options_len;
1765 		break;
1766 
1767 	case sizeof (sin6_t):
1768 		sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
1769 		    sizeof (sin6_t));
1770 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
1771 			udp_err_ack(q, mp, TSYSERR, EINVAL);
1772 			return;
1773 		}
1774 		if (udp->udp_family != AF_INET6 ||
1775 		    sin6->sin6_family != AF_INET6) {
1776 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
1777 			return;
1778 		}
1779 		v6dst = sin6->sin6_addr;
1780 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
1781 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
1782 			udp->udp_ipversion = IPV4_VERSION;
1783 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
1784 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
1785 			flowinfo = 0;
1786 		} else {
1787 			udp->udp_ipversion = IPV6_VERSION;
1788 			udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len;
1789 			flowinfo = sin6->sin6_flowinfo;
1790 		}
1791 		dstport = sin6->sin6_port;
1792 		break;
1793 	}
1794 	if (dstport == 0) {
1795 		udp_err_ack(q, mp, TBADADDR, 0);
1796 		return;
1797 	}
1798 
1799 	/*
1800 	 * Create a default IP header with no IP options.
1801 	 */
1802 	udp->udp_dstport = dstport;
1803 	if (udp->udp_ipversion == IPV4_VERSION) {
1804 		/*
1805 		 * Interpret a zero destination to mean loopback.
1806 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
1807 		 * generate the T_CONN_CON.
1808 		 */
1809 		if (v4dst == INADDR_ANY) {
1810 			v4dst = htonl(INADDR_LOOPBACK);
1811 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
1812 			if (udp->udp_family == AF_INET) {
1813 				sin->sin_addr.s_addr = v4dst;
1814 			} else {
1815 				sin6->sin6_addr = v6dst;
1816 			}
1817 		}
1818 		udp->udp_v6dst = v6dst;
1819 		udp->udp_flowinfo = 0;
1820 
1821 		/*
1822 		 * If the destination address is multicast and
1823 		 * an outgoing multicast interface has been set,
1824 		 * use the address of that interface as our
1825 		 * source address if no source address has been set.
1826 		 */
1827 		if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY &&
1828 		    CLASSD(v4dst) &&
1829 		    udp->udp_multicast_if_addr != INADDR_ANY) {
1830 			IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr,
1831 			    &udp->udp_v6src);
1832 		}
1833 	} else {
1834 		ASSERT(udp->udp_ipversion == IPV6_VERSION);
1835 		/*
1836 		 * Interpret a zero destination to mean loopback.
1837 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
1838 		 * generate the T_CONN_CON.
1839 		 */
1840 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
1841 			v6dst = ipv6_loopback;
1842 			sin6->sin6_addr = v6dst;
1843 		}
1844 		udp->udp_v6dst = v6dst;
1845 		udp->udp_flowinfo = flowinfo;
1846 		/*
1847 		 * If the destination address is multicast and
1848 		 * an outgoing multicast interface has been set,
1849 		 * then the ip bind logic will pick the correct source
1850 		 * address (i.e. matching the outgoing multicast interface).
1851 		 */
1852 	}
1853 
1854 	/*
1855 	 * Verify that the src/port/dst/port is unique for all
1856 	 * connections in TS_DATA_XFER
1857 	 */
1858 	mutex_enter(&udpf->uf_lock);
1859 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
1860 		if (udp1->udp_state != TS_DATA_XFER)
1861 			continue;
1862 		if (udp->udp_port != udp1->udp_port ||
1863 		    udp->udp_ipversion != udp1->udp_ipversion ||
1864 		    dstport != udp1->udp_dstport ||
1865 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) ||
1866 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst))
1867 			continue;
1868 		mutex_exit(&udpf->uf_lock);
1869 		udp_err_ack(q, mp, TBADADDR, 0);
1870 		return;
1871 	}
1872 	udp->udp_state = TS_DATA_XFER;
1873 	mutex_exit(&udpf->uf_lock);
1874 
1875 	/*
1876 	 * Send down bind to IP to verify that there is a route
1877 	 * and to determine the source address.
1878 	 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput.
1879 	 */
1880 	if (udp->udp_family == AF_INET)
1881 		mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t));
1882 	else
1883 		mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t));
1884 	if (mp1 == NULL) {
1885 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1886 bind_failed:
1887 		mutex_enter(&udpf->uf_lock);
1888 		udp->udp_state = TS_IDLE;
1889 		mutex_exit(&udpf->uf_lock);
1890 		return;
1891 	}
1892 
1893 	/*
1894 	 * We also have to send a connection confirmation to
1895 	 * keep TLI happy. Prepare it for udp_rput.
1896 	 */
1897 	if (udp->udp_family == AF_INET)
1898 		mp2 = mi_tpi_conn_con(NULL, (char *)sin,
1899 		    sizeof (*sin), NULL, 0);
1900 	else
1901 		mp2 = mi_tpi_conn_con(NULL, (char *)sin6,
1902 		    sizeof (*sin6), NULL, 0);
1903 	if (mp2 == NULL) {
1904 		freemsg(mp1);
1905 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
1906 		goto bind_failed;
1907 	}
1908 
1909 	mp = mi_tpi_ok_ack_alloc(mp);
1910 	if (mp == NULL) {
1911 		/* Unable to reuse the T_CONN_REQ for the ack. */
1912 		freemsg(mp2);
1913 		udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
1914 		goto bind_failed;
1915 	}
1916 
1917 	/* Hang onto the T_OK_ACK and T_CONN_CON for later. */
1918 	linkb(mp1, mp);
1919 	linkb(mp1, mp2);
1920 
1921 	mblk_setcred(mp1, udp->udp_connp->conn_cred);
1922 	if (udp->udp_family == AF_INET)
1923 		mp1 = ip_bind_v4(q, mp1, udp->udp_connp);
1924 	else
1925 		mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL);
1926 
1927 	if (mp1 != NULL)
1928 		udp_rput_other(_RD(q), mp1);
1929 	else
1930 		CONN_INC_REF(udp->udp_connp);
1931 }
1932 
1933 static int
1934 udp_close(queue_t *q)
1935 {
1936 	conn_t	*connp = Q_TO_CONN(UDP_WR(q));
1937 	udp_t	*udp;
1938 	queue_t	*ip_rq = RD(UDP_WR(q));
1939 
1940 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
1941 	udp = connp->conn_udp;
1942 
1943 	ip_quiesce_conn(connp);
1944 	/*
1945 	 * Disable read-side synchronous stream
1946 	 * interface and drain any queued data.
1947 	 */
1948 	udp_rcv_drain(q, udp, B_TRUE);
1949 	ASSERT(!udp->udp_direct_sockfs);
1950 
1951 	qprocsoff(q);
1952 
1953 	/* restore IP module's high and low water marks to default values */
1954 	ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat;
1955 	WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat;
1956 	WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat;
1957 
1958 	ASSERT(udp->udp_rcv_cnt == 0);
1959 	ASSERT(udp->udp_rcv_msgcnt == 0);
1960 	ASSERT(udp->udp_rcv_list_head == NULL);
1961 	ASSERT(udp->udp_rcv_list_tail == NULL);
1962 
1963 	udp_close_free(connp);
1964 
1965 	/*
1966 	 * Restore connp as an IP endpoint.
1967 	 * Locking required to prevent a race with udp_snmp_get()/
1968 	 * ipcl_get_next_conn(), which selects conn_t which are
1969 	 * IPCL_UDP and not CONN_CONDEMNED.
1970 	 */
1971 	mutex_enter(&connp->conn_lock);
1972 	connp->conn_flags &= ~IPCL_UDP;
1973 	connp->conn_state_flags &=
1974 	    ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED);
1975 	connp->conn_ulp_labeled = B_FALSE;
1976 	mutex_exit(&connp->conn_lock);
1977 
1978 	return (0);
1979 }
1980 
1981 /*
1982  * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn
1983  */
1984 void
1985 udp_quiesce_conn(conn_t *connp)
1986 {
1987 	udp_t	*udp = connp->conn_udp;
1988 
1989 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
1990 		/*
1991 		 * Running in cluster mode - register unbind information
1992 		 */
1993 		if (udp->udp_ipversion == IPV4_VERSION) {
1994 			(*cl_inet_unbind)(IPPROTO_UDP, AF_INET,
1995 			    (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))),
1996 			    (in_port_t)udp->udp_port);
1997 		} else {
1998 			(*cl_inet_unbind)(IPPROTO_UDP, AF_INET6,
1999 			    (uint8_t *)(&(udp->udp_v6src)),
2000 			    (in_port_t)udp->udp_port);
2001 		}
2002 	}
2003 
2004 	udp_bind_hash_remove(udp, B_FALSE);
2005 
2006 	mutex_enter(&connp->conn_lock);
2007 	while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 ||
2008 	    udp->udp_mode != UDP_MT_HOT) {
2009 		cv_wait(&connp->conn_cv, &connp->conn_lock);
2010 	}
2011 	mutex_exit(&connp->conn_lock);
2012 }
2013 
2014 void
2015 udp_close_free(conn_t *connp)
2016 {
2017 	udp_t *udp = connp->conn_udp;
2018 
2019 	/* If there are any options associated with the stream, free them. */
2020 	if (udp->udp_ip_snd_options) {
2021 		mi_free((char *)udp->udp_ip_snd_options);
2022 		udp->udp_ip_snd_options = NULL;
2023 	}
2024 
2025 	if (udp->udp_ip_rcv_options) {
2026 		mi_free((char *)udp->udp_ip_rcv_options);
2027 		udp->udp_ip_rcv_options = NULL;
2028 	}
2029 
2030 	/* Free memory associated with sticky options */
2031 	if (udp->udp_sticky_hdrs_len != 0) {
2032 		kmem_free(udp->udp_sticky_hdrs,
2033 		    udp->udp_sticky_hdrs_len);
2034 		udp->udp_sticky_hdrs = NULL;
2035 		udp->udp_sticky_hdrs_len = 0;
2036 	}
2037 
2038 	ip6_pkt_free(&udp->udp_sticky_ipp);
2039 
2040 	udp->udp_connp = NULL;
2041 	connp->conn_udp = NULL;
2042 	kmem_cache_free(udp_cache, udp);
2043 }
2044 
2045 /*
2046  * This routine handles each T_DISCON_REQ message passed to udp
2047  * as an indicating that UDP is no longer connected. This results
2048  * in sending a T_BIND_REQ to IP to restore the binding to just
2049  * the local address/port.
2050  *
2051  * This routine sends down a T_BIND_REQ to IP with the following mblks:
2052  *	T_BIND_REQ	- specifying just the local address/port
2053  *	T_OK_ACK	- for the T_DISCON_REQ
2054  *
2055  * The disconnect completes in udp_rput.
2056  * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
2057  * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert
2058  * it to an error ack for the appropriate primitive.
2059  */
2060 static void
2061 udp_disconnect(queue_t *q, mblk_t *mp)
2062 {
2063 	udp_t	*udp = Q_TO_UDP(q);
2064 	mblk_t	*mp1;
2065 	udp_fanout_t *udpf;
2066 
2067 	if (udp->udp_state != TS_DATA_XFER) {
2068 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
2069 		    "udp_disconnect: bad state, %u", udp->udp_state);
2070 		udp_err_ack(q, mp, TOUTSTATE, 0);
2071 		return;
2072 	}
2073 	udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)];
2074 	mutex_enter(&udpf->uf_lock);
2075 	udp->udp_v6src = udp->udp_bound_v6src;
2076 	udp->udp_state = TS_IDLE;
2077 	mutex_exit(&udpf->uf_lock);
2078 
2079 	/*
2080 	 * Send down bind to IP to remove the full binding and revert
2081 	 * to the local address binding.
2082 	 */
2083 	if (udp->udp_family == AF_INET)
2084 		mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t));
2085 	else
2086 		mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t));
2087 	if (mp1 == NULL) {
2088 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
2089 		return;
2090 	}
2091 	mp = mi_tpi_ok_ack_alloc(mp);
2092 	if (mp == NULL) {
2093 		/* Unable to reuse the T_DISCON_REQ for the ack. */
2094 		udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
2095 		return;
2096 	}
2097 
2098 	if (udp->udp_family == AF_INET6) {
2099 		int error;
2100 
2101 		/* Rebuild the header template */
2102 		error = udp_build_hdrs(q, udp);
2103 		if (error != 0) {
2104 			udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
2105 			freemsg(mp1);
2106 			return;
2107 		}
2108 	}
2109 	mutex_enter(&udpf->uf_lock);
2110 	udp->udp_discon_pending = 1;
2111 	mutex_exit(&udpf->uf_lock);
2112 
2113 	/* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */
2114 	linkb(mp1, mp);
2115 
2116 	if (udp->udp_family == AF_INET6)
2117 		mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL);
2118 	else
2119 		mp1 = ip_bind_v4(q, mp1, udp->udp_connp);
2120 
2121 	if (mp1 != NULL)
2122 		udp_rput_other(_RD(q), mp1);
2123 	else
2124 		CONN_INC_REF(udp->udp_connp);
2125 }
2126 
2127 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
2128 static void
2129 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
2130 {
2131 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
2132 		putnext(UDP_RD(q), mp);
2133 }
2134 
2135 /* Shorthand to generate and send TPI error acks to our client */
2136 static void
2137 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error,
2138     int sys_error)
2139 {
2140 	struct T_error_ack	*teackp;
2141 
2142 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2143 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
2144 		teackp = (struct T_error_ack *)mp->b_rptr;
2145 		teackp->ERROR_prim = primitive;
2146 		teackp->TLI_error = t_error;
2147 		teackp->UNIX_error = sys_error;
2148 		putnext(UDP_RD(q), mp);
2149 	}
2150 }
2151 
2152 /*ARGSUSED*/
2153 static int
2154 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
2155 {
2156 	int i;
2157 
2158 	for (i = 0; i < udp_g_num_epriv_ports; i++) {
2159 		if (udp_g_epriv_ports[i] != 0)
2160 			(void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]);
2161 	}
2162 	return (0);
2163 }
2164 
2165 /* ARGSUSED */
2166 static int
2167 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
2168     cred_t *cr)
2169 {
2170 	long	new_value;
2171 	int	i;
2172 
2173 	/*
2174 	 * Fail the request if the new value does not lie within the
2175 	 * port number limits.
2176 	 */
2177 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
2178 	    new_value <= 0 || new_value >= 65536) {
2179 		return (EINVAL);
2180 	}
2181 
2182 	/* Check if the value is already in the list */
2183 	for (i = 0; i < udp_g_num_epriv_ports; i++) {
2184 		if (new_value == udp_g_epriv_ports[i]) {
2185 			return (EEXIST);
2186 		}
2187 	}
2188 	/* Find an empty slot */
2189 	for (i = 0; i < udp_g_num_epriv_ports; i++) {
2190 		if (udp_g_epriv_ports[i] == 0)
2191 			break;
2192 	}
2193 	if (i == udp_g_num_epriv_ports) {
2194 		return (EOVERFLOW);
2195 	}
2196 
2197 	/* Set the new value */
2198 	udp_g_epriv_ports[i] = (in_port_t)new_value;
2199 	return (0);
2200 }
2201 
2202 /* ARGSUSED */
2203 static int
2204 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
2205     cred_t *cr)
2206 {
2207 	long	new_value;
2208 	int	i;
2209 
2210 	/*
2211 	 * Fail the request if the new value does not lie within the
2212 	 * port number limits.
2213 	 */
2214 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
2215 	    new_value <= 0 || new_value >= 65536) {
2216 		return (EINVAL);
2217 	}
2218 
2219 	/* Check that the value is already in the list */
2220 	for (i = 0; i < udp_g_num_epriv_ports; i++) {
2221 		if (udp_g_epriv_ports[i] == new_value)
2222 			break;
2223 	}
2224 	if (i == udp_g_num_epriv_ports) {
2225 		return (ESRCH);
2226 	}
2227 
2228 	/* Clear the value */
2229 	udp_g_epriv_ports[i] = 0;
2230 	return (0);
2231 }
2232 
2233 /* At minimum we need 4 bytes of UDP header */
2234 #define	ICMP_MIN_UDP_HDR	4
2235 
2236 /*
2237  * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP.
2238  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
2239  * Assumes that IP has pulled up everything up to and including the ICMP header.
2240  * An M_CTL could potentially come here from some other module (i.e. if UDP
2241  * is pushed on some module other than IP). Thus, if we find that the M_CTL
2242  * does not have enough ICMP information , following STREAMS conventions,
2243  * we send it upstream assuming it is an M_CTL we don't understand.
2244  */
2245 static void
2246 udp_icmp_error(queue_t *q, mblk_t *mp)
2247 {
2248 	icmph_t *icmph;
2249 	ipha_t	*ipha;
2250 	int	iph_hdr_length;
2251 	udpha_t	*udpha;
2252 	sin_t	sin;
2253 	sin6_t	sin6;
2254 	mblk_t	*mp1;
2255 	int	error = 0;
2256 	size_t	mp_size = MBLKL(mp);
2257 	udp_t	*udp = Q_TO_UDP(q);
2258 
2259 	/*
2260 	 * Assume IP provides aligned packets - otherwise toss
2261 	 */
2262 	if (!OK_32PTR(mp->b_rptr)) {
2263 		freemsg(mp);
2264 		return;
2265 	}
2266 
2267 	/*
2268 	 * Verify that we have a complete IP header and the application has
2269 	 * asked for errors. If not, send it upstream.
2270 	 */
2271 	if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) {
2272 noticmpv4:
2273 		putnext(UDP_RD(q), mp);
2274 		return;
2275 	}
2276 
2277 	ipha = (ipha_t *)mp->b_rptr;
2278 	/*
2279 	 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent
2280 	 * upstream. ICMPv6  is handled in udp_icmp_error_ipv6.
2281 	 */
2282 	switch (IPH_HDR_VERSION(ipha)) {
2283 	case IPV6_VERSION:
2284 		udp_icmp_error_ipv6(q, mp);
2285 		return;
2286 	case IPV4_VERSION:
2287 		break;
2288 	default:
2289 		goto noticmpv4;
2290 	}
2291 
2292 	/* Skip past the outer IP and ICMP headers */
2293 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
2294 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
2295 	/*
2296 	 * If we don't have the correct outer IP header length or if the ULP
2297 	 * is not IPPROTO_ICMP or if we don't have a complete inner IP header
2298 	 * send the packet upstream.
2299 	 */
2300 	if (iph_hdr_length < sizeof (ipha_t) ||
2301 	    ipha->ipha_protocol != IPPROTO_ICMP ||
2302 	    (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) {
2303 		goto noticmpv4;
2304 	}
2305 	ipha = (ipha_t *)&icmph[1];
2306 
2307 	/* Skip past the inner IP and find the ULP header */
2308 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
2309 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
2310 	/*
2311 	 * If we don't have the correct inner IP header length or if the ULP
2312 	 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR
2313 	 * bytes of UDP header, send it upstream.
2314 	 */
2315 	if (iph_hdr_length < sizeof (ipha_t) ||
2316 	    ipha->ipha_protocol != IPPROTO_UDP ||
2317 	    (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) {
2318 		goto noticmpv4;
2319 	}
2320 
2321 	switch (icmph->icmph_type) {
2322 	case ICMP_DEST_UNREACHABLE:
2323 		switch (icmph->icmph_code) {
2324 		case ICMP_FRAGMENTATION_NEEDED:
2325 			/*
2326 			 * IP has already adjusted the path MTU.
2327 			 * XXX Somehow pass MTU indication to application?
2328 			 */
2329 			break;
2330 		case ICMP_PORT_UNREACHABLE:
2331 		case ICMP_PROTOCOL_UNREACHABLE:
2332 			error = ECONNREFUSED;
2333 			break;
2334 		default:
2335 			/* Transient errors */
2336 			break;
2337 		}
2338 		break;
2339 	default:
2340 		/* Transient errors */
2341 		break;
2342 	}
2343 	if (error == 0) {
2344 		freemsg(mp);
2345 		return;
2346 	}
2347 
2348 	switch (udp->udp_family) {
2349 	case AF_INET:
2350 		sin = sin_null;
2351 		sin.sin_family = AF_INET;
2352 		sin.sin_addr.s_addr = ipha->ipha_dst;
2353 		sin.sin_port = udpha->uha_dst_port;
2354 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
2355 		    error);
2356 		break;
2357 	case AF_INET6:
2358 		sin6 = sin6_null;
2359 		sin6.sin6_family = AF_INET6;
2360 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
2361 		sin6.sin6_port = udpha->uha_dst_port;
2362 
2363 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
2364 		    NULL, 0, error);
2365 		break;
2366 	}
2367 	if (mp1)
2368 		putnext(UDP_RD(q), mp1);
2369 	freemsg(mp);
2370 }
2371 
2372 /*
2373  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
2374  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
2375  * Assumes that IP has pulled up all the extension headers as well as the
2376  * ICMPv6 header.
2377  * An M_CTL could potentially come here from some other module (i.e. if UDP
2378  * is pushed on some module other than IP). Thus, if we find that the M_CTL
2379  * does not have enough ICMP information , following STREAMS conventions,
2380  * we send it upstream assuming it is an M_CTL we don't understand. The reason
2381  * it might get here is if the non-ICMP M_CTL accidently has 6 in the version
2382  * field (when cast to ipha_t in udp_icmp_error).
2383  */
2384 static void
2385 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
2386 {
2387 	icmp6_t		*icmp6;
2388 	ip6_t		*ip6h, *outer_ip6h;
2389 	uint16_t	hdr_length;
2390 	uint8_t		*nexthdrp;
2391 	udpha_t		*udpha;
2392 	sin6_t		sin6;
2393 	mblk_t		*mp1;
2394 	int		error = 0;
2395 	size_t		mp_size = MBLKL(mp);
2396 	udp_t		*udp = Q_TO_UDP(q);
2397 
2398 	/*
2399 	 * Verify that we have a complete IP header. If not, send it upstream.
2400 	 */
2401 	if (mp_size < sizeof (ip6_t)) {
2402 noticmpv6:
2403 		putnext(UDP_RD(q), mp);
2404 		return;
2405 	}
2406 
2407 	outer_ip6h = (ip6_t *)mp->b_rptr;
2408 	/*
2409 	 * Verify this is an ICMPV6 packet, else send it upstream
2410 	 */
2411 	if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) {
2412 		hdr_length = IPV6_HDR_LEN;
2413 	} else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length,
2414 	    &nexthdrp) ||
2415 	    *nexthdrp != IPPROTO_ICMPV6) {
2416 		goto noticmpv6;
2417 	}
2418 	icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length];
2419 	ip6h = (ip6_t *)&icmp6[1];
2420 	/*
2421 	 * Verify we have a complete ICMP and inner IP header.
2422 	 */
2423 	if ((uchar_t *)&ip6h[1] > mp->b_wptr)
2424 		goto noticmpv6;
2425 
2426 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
2427 		goto noticmpv6;
2428 	udpha = (udpha_t *)((char *)ip6h + hdr_length);
2429 	/*
2430 	 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't
2431 	 * have at least ICMP_MIN_UDP_HDR bytes of  UDP header send the
2432 	 * packet upstream.
2433 	 */
2434 	if ((*nexthdrp != IPPROTO_UDP) ||
2435 	    ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) {
2436 		goto noticmpv6;
2437 	}
2438 
2439 	switch (icmp6->icmp6_type) {
2440 	case ICMP6_DST_UNREACH:
2441 		switch (icmp6->icmp6_code) {
2442 		case ICMP6_DST_UNREACH_NOPORT:
2443 			error = ECONNREFUSED;
2444 			break;
2445 		case ICMP6_DST_UNREACH_ADMIN:
2446 		case ICMP6_DST_UNREACH_NOROUTE:
2447 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
2448 		case ICMP6_DST_UNREACH_ADDR:
2449 			/* Transient errors */
2450 			break;
2451 		default:
2452 			break;
2453 		}
2454 		break;
2455 	case ICMP6_PACKET_TOO_BIG: {
2456 		struct T_unitdata_ind	*tudi;
2457 		struct T_opthdr		*toh;
2458 		size_t			udi_size;
2459 		mblk_t			*newmp;
2460 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
2461 		    sizeof (struct ip6_mtuinfo);
2462 		sin6_t			*sin6;
2463 		struct ip6_mtuinfo	*mtuinfo;
2464 
2465 		/*
2466 		 * If the application has requested to receive path mtu
2467 		 * information, send up an empty message containing an
2468 		 * IPV6_PATHMTU ancillary data item.
2469 		 */
2470 		if (!udp->udp_ipv6_recvpathmtu)
2471 			break;
2472 
2473 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
2474 		    opt_length;
2475 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
2476 			BUMP_MIB(&udp_mib, udpInErrors);
2477 			break;
2478 		}
2479 
2480 		/*
2481 		 * newmp->b_cont is left to NULL on purpose.  This is an
2482 		 * empty message containing only ancillary data.
2483 		 */
2484 		newmp->b_datap->db_type = M_PROTO;
2485 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
2486 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
2487 		tudi->PRIM_type = T_UNITDATA_IND;
2488 		tudi->SRC_length = sizeof (sin6_t);
2489 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
2490 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
2491 		tudi->OPT_length = opt_length;
2492 
2493 		sin6 = (sin6_t *)&tudi[1];
2494 		bzero(sin6, sizeof (sin6_t));
2495 		sin6->sin6_family = AF_INET6;
2496 		sin6->sin6_addr = udp->udp_v6dst;
2497 
2498 		toh = (struct T_opthdr *)&sin6[1];
2499 		toh->level = IPPROTO_IPV6;
2500 		toh->name = IPV6_PATHMTU;
2501 		toh->len = opt_length;
2502 		toh->status = 0;
2503 
2504 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
2505 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
2506 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
2507 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
2508 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
2509 		/*
2510 		 * We've consumed everything we need from the original
2511 		 * message.  Free it, then send our empty message.
2512 		 */
2513 		freemsg(mp);
2514 		putnext(UDP_RD(q), newmp);
2515 		return;
2516 	}
2517 	case ICMP6_TIME_EXCEEDED:
2518 		/* Transient errors */
2519 		break;
2520 	case ICMP6_PARAM_PROB:
2521 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
2522 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
2523 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
2524 		    (uchar_t *)nexthdrp) {
2525 			error = ECONNREFUSED;
2526 			break;
2527 		}
2528 		break;
2529 	}
2530 	if (error == 0) {
2531 		freemsg(mp);
2532 		return;
2533 	}
2534 
2535 	sin6 = sin6_null;
2536 	sin6.sin6_family = AF_INET6;
2537 	sin6.sin6_addr = ip6h->ip6_dst;
2538 	sin6.sin6_port = udpha->uha_dst_port;
2539 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
2540 
2541 	mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
2542 	    error);
2543 	if (mp1)
2544 		putnext(UDP_RD(q), mp1);
2545 	freemsg(mp);
2546 }
2547 
2548 /*
2549  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
2550  * The local address is filled in if endpoint is bound. The remote address
2551  * is filled in if remote address has been precified ("connected endpoint")
2552  * (The concept of connected CLTS sockets is alien to published TPI
2553  *  but we support it anyway).
2554  */
2555 static void
2556 udp_addr_req(queue_t *q, mblk_t *mp)
2557 {
2558 	sin_t	*sin;
2559 	sin6_t	*sin6;
2560 	mblk_t	*ackmp;
2561 	struct T_addr_ack *taa;
2562 	udp_t	*udp = Q_TO_UDP(q);
2563 
2564 	/* Make it large enough for worst case */
2565 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
2566 	    2 * sizeof (sin6_t), 1);
2567 	if (ackmp == NULL) {
2568 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
2569 		return;
2570 	}
2571 	taa = (struct T_addr_ack *)ackmp->b_rptr;
2572 
2573 	bzero(taa, sizeof (struct T_addr_ack));
2574 	ackmp->b_wptr = (uchar_t *)&taa[1];
2575 
2576 	taa->PRIM_type = T_ADDR_ACK;
2577 	ackmp->b_datap->db_type = M_PCPROTO;
2578 	/*
2579 	 * Note: Following code assumes 32 bit alignment of basic
2580 	 * data structures like sin_t and struct T_addr_ack.
2581 	 */
2582 	if (udp->udp_state != TS_UNBND) {
2583 		/*
2584 		 * Fill in local address first
2585 		 */
2586 		taa->LOCADDR_offset = sizeof (*taa);
2587 		if (udp->udp_family == AF_INET) {
2588 			taa->LOCADDR_length = sizeof (sin_t);
2589 			sin = (sin_t *)&taa[1];
2590 			/* Fill zeroes and then initialize non-zero fields */
2591 			*sin = sin_null;
2592 			sin->sin_family = AF_INET;
2593 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
2594 			    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
2595 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src,
2596 				    sin->sin_addr.s_addr);
2597 			} else {
2598 				/*
2599 				 * INADDR_ANY
2600 				 * udp_v6src is not set, we might be bound to
2601 				 * broadcast/multicast. Use udp_bound_v6src as
2602 				 * local address instead (that could
2603 				 * also still be INADDR_ANY)
2604 				 */
2605 				IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src,
2606 				    sin->sin_addr.s_addr);
2607 			}
2608 			sin->sin_port = udp->udp_port;
2609 			ackmp->b_wptr = (uchar_t *)&sin[1];
2610 			if (udp->udp_state == TS_DATA_XFER) {
2611 				/*
2612 				 * connected, fill remote address too
2613 				 */
2614 				taa->REMADDR_length = sizeof (sin_t);
2615 				/* assumed 32-bit alignment */
2616 				taa->REMADDR_offset = taa->LOCADDR_offset +
2617 				    taa->LOCADDR_length;
2618 
2619 				sin = (sin_t *)(ackmp->b_rptr +
2620 				    taa->REMADDR_offset);
2621 				/* initialize */
2622 				*sin = sin_null;
2623 				sin->sin_family = AF_INET;
2624 				sin->sin_addr.s_addr =
2625 				    V4_PART_OF_V6(udp->udp_v6dst);
2626 				sin->sin_port = udp->udp_dstport;
2627 				ackmp->b_wptr = (uchar_t *)&sin[1];
2628 			}
2629 		} else {
2630 			taa->LOCADDR_length = sizeof (sin6_t);
2631 			sin6 = (sin6_t *)&taa[1];
2632 			/* Fill zeroes and then initialize non-zero fields */
2633 			*sin6 = sin6_null;
2634 			sin6->sin6_family = AF_INET6;
2635 			if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
2636 				sin6->sin6_addr = udp->udp_v6src;
2637 			} else {
2638 				/*
2639 				 * UNSPECIFIED
2640 				 * udp_v6src is not set, we might be bound to
2641 				 * broadcast/multicast. Use udp_bound_v6src as
2642 				 * local address instead (that could
2643 				 * also still be UNSPECIFIED)
2644 				 */
2645 				sin6->sin6_addr =
2646 				    udp->udp_bound_v6src;
2647 			}
2648 			sin6->sin6_port = udp->udp_port;
2649 			ackmp->b_wptr = (uchar_t *)&sin6[1];
2650 			if (udp->udp_state == TS_DATA_XFER) {
2651 				/*
2652 				 * connected, fill remote address too
2653 				 */
2654 				taa->REMADDR_length = sizeof (sin6_t);
2655 				/* assumed 32-bit alignment */
2656 				taa->REMADDR_offset = taa->LOCADDR_offset +
2657 				    taa->LOCADDR_length;
2658 
2659 				sin6 = (sin6_t *)(ackmp->b_rptr +
2660 				    taa->REMADDR_offset);
2661 				/* initialize */
2662 				*sin6 = sin6_null;
2663 				sin6->sin6_family = AF_INET6;
2664 				sin6->sin6_addr = udp->udp_v6dst;
2665 				sin6->sin6_port =  udp->udp_dstport;
2666 				ackmp->b_wptr = (uchar_t *)&sin6[1];
2667 			}
2668 			ackmp->b_wptr = (uchar_t *)&sin6[1];
2669 		}
2670 	}
2671 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
2672 	putnext(UDP_RD(q), ackmp);
2673 }
2674 
2675 static void
2676 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
2677 {
2678 	if (udp->udp_family == AF_INET) {
2679 		*tap = udp_g_t_info_ack_ipv4;
2680 	} else {
2681 		*tap = udp_g_t_info_ack_ipv6;
2682 	}
2683 	tap->CURRENT_state = udp->udp_state;
2684 	tap->OPT_size = udp_max_optsize;
2685 }
2686 
2687 /*
2688  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
2689  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
2690  * udp_g_t_info_ack.  The current state of the stream is copied from
2691  * udp_state.
2692  */
2693 static void
2694 udp_capability_req(queue_t *q, mblk_t *mp)
2695 {
2696 	t_uscalar_t		cap_bits1;
2697 	struct T_capability_ack	*tcap;
2698 	udp_t	*udp = Q_TO_UDP(q);
2699 
2700 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
2701 
2702 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
2703 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
2704 	if (!mp)
2705 		return;
2706 
2707 	tcap = (struct T_capability_ack *)mp->b_rptr;
2708 	tcap->CAP_bits1 = 0;
2709 
2710 	if (cap_bits1 & TC1_INFO) {
2711 		udp_copy_info(&tcap->INFO_ack, udp);
2712 		tcap->CAP_bits1 |= TC1_INFO;
2713 	}
2714 
2715 	putnext(UDP_RD(q), mp);
2716 }
2717 
2718 /*
2719  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
2720  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
2721  * The current state of the stream is copied from udp_state.
2722  */
2723 static void
2724 udp_info_req(queue_t *q, mblk_t *mp)
2725 {
2726 	udp_t *udp = Q_TO_UDP(q);
2727 
2728 	/* Create a T_INFO_ACK message. */
2729 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
2730 	    T_INFO_ACK);
2731 	if (!mp)
2732 		return;
2733 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
2734 	putnext(UDP_RD(q), mp);
2735 }
2736 
2737 /*
2738  * IP recognizes seven kinds of bind requests:
2739  *
2740  * - A zero-length address binds only to the protocol number.
2741  *
2742  * - A 4-byte address is treated as a request to
2743  * validate that the address is a valid local IPv4
2744  * address, appropriate for an application to bind to.
2745  * IP does the verification, but does not make any note
2746  * of the address at this time.
2747  *
2748  * - A 16-byte address contains is treated as a request
2749  * to validate a local IPv6 address, as the 4-byte
2750  * address case above.
2751  *
2752  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
2753  * use it for the inbound fanout of packets.
2754  *
2755  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
2756  * use it for the inbound fanout of packets.
2757  *
2758  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
2759  * information consisting of local and remote addresses
2760  * and ports.  In this case, the addresses are both
2761  * validated as appropriate for this operation, and, if
2762  * so, the information is retained for use in the
2763  * inbound fanout.
2764  *
2765  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
2766  * fanout information, like the 12-byte case above.
2767  *
2768  * IP will also fill in the IRE request mblk with information
2769  * regarding our peer.  In all cases, we notify IP of our protocol
2770  * type by appending a single protocol byte to the bind request.
2771  */
2772 static mblk_t *
2773 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length)
2774 {
2775 	char	*cp;
2776 	mblk_t	*mp;
2777 	struct T_bind_req *tbr;
2778 	ipa_conn_t	*ac;
2779 	ipa6_conn_t	*ac6;
2780 	sin_t		*sin;
2781 	sin6_t		*sin6;
2782 
2783 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
2784 
2785 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
2786 	if (!mp)
2787 		return (mp);
2788 	mp->b_datap->db_type = M_PROTO;
2789 	tbr = (struct T_bind_req *)mp->b_rptr;
2790 	tbr->PRIM_type = bind_prim;
2791 	tbr->ADDR_offset = sizeof (*tbr);
2792 	tbr->CONIND_number = 0;
2793 	tbr->ADDR_length = addr_length;
2794 	cp = (char *)&tbr[1];
2795 	switch (addr_length) {
2796 	case sizeof (ipa_conn_t):
2797 		ASSERT(udp->udp_family == AF_INET);
2798 		/* Append a request for an IRE */
2799 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
2800 		if (!mp->b_cont) {
2801 			freemsg(mp);
2802 			return (NULL);
2803 		}
2804 		mp->b_cont->b_wptr += sizeof (ire_t);
2805 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
2806 
2807 		/* cp known to be 32 bit aligned */
2808 		ac = (ipa_conn_t *)cp;
2809 		ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src);
2810 		ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst);
2811 		ac->ac_fport = udp->udp_dstport;
2812 		ac->ac_lport = udp->udp_port;
2813 		break;
2814 
2815 	case sizeof (ipa6_conn_t):
2816 		ASSERT(udp->udp_family == AF_INET6);
2817 		/* Append a request for an IRE */
2818 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
2819 		if (!mp->b_cont) {
2820 			freemsg(mp);
2821 			return (NULL);
2822 		}
2823 		mp->b_cont->b_wptr += sizeof (ire_t);
2824 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
2825 
2826 		/* cp known to be 32 bit aligned */
2827 		ac6 = (ipa6_conn_t *)cp;
2828 		ac6->ac6_laddr = udp->udp_v6src;
2829 		ac6->ac6_faddr = udp->udp_v6dst;
2830 		ac6->ac6_fport = udp->udp_dstport;
2831 		ac6->ac6_lport = udp->udp_port;
2832 		break;
2833 
2834 	case sizeof (sin_t):
2835 		ASSERT(udp->udp_family == AF_INET);
2836 		/* Append a request for an IRE */
2837 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
2838 		if (!mp->b_cont) {
2839 			freemsg(mp);
2840 			return (NULL);
2841 		}
2842 		mp->b_cont->b_wptr += sizeof (ire_t);
2843 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
2844 
2845 		sin = (sin_t *)cp;
2846 		*sin = sin_null;
2847 		sin->sin_family = AF_INET;
2848 		sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src);
2849 		sin->sin_port = udp->udp_port;
2850 		break;
2851 
2852 	case sizeof (sin6_t):
2853 		ASSERT(udp->udp_family == AF_INET6);
2854 		/* Append a request for an IRE */
2855 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
2856 		if (!mp->b_cont) {
2857 			freemsg(mp);
2858 			return (NULL);
2859 		}
2860 		mp->b_cont->b_wptr += sizeof (ire_t);
2861 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
2862 
2863 		sin6 = (sin6_t *)cp;
2864 		*sin6 = sin6_null;
2865 		sin6->sin6_family = AF_INET6;
2866 		sin6->sin6_addr = udp->udp_bound_v6src;
2867 		sin6->sin6_port = udp->udp_port;
2868 		break;
2869 	}
2870 	/* Add protocol number to end */
2871 	cp[addr_length] = (char)IPPROTO_UDP;
2872 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
2873 	return (mp);
2874 }
2875 
2876 /*
2877  * This is the open routine for udp.  It allocates a udp_t structure for
2878  * the stream and, on the first open of the module, creates an ND table.
2879  */
2880 /* ARGSUSED */
2881 static int
2882 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
2883 {
2884 	int	err;
2885 	udp_t	*udp;
2886 	conn_t *connp;
2887 	zoneid_t zoneid = getzoneid();
2888 	queue_t	*ip_wq;
2889 	char	*name;
2890 
2891 	TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q);
2892 
2893 	/* If the stream is already open, return immediately. */
2894 	if (q->q_ptr != NULL)
2895 		return (0);
2896 
2897 	/* If this is not a push of udp as a module, fail. */
2898 	if (sflag != MODOPEN)
2899 		return (EINVAL);
2900 
2901 	q->q_hiwat = udp_recv_hiwat;
2902 	WR(q)->q_hiwat = udp_xmit_hiwat;
2903 	WR(q)->q_lowat = udp_xmit_lowat;
2904 
2905 	/* Insert ourselves in the stream since we're about to walk q_next */
2906 	qprocson(q);
2907 
2908 	udp = kmem_cache_alloc(udp_cache, KM_SLEEP);
2909 	bzero(udp, sizeof (*udp));
2910 
2911 	/*
2912 	 * UDP is supported only as a module and it has to be pushed directly
2913 	 * above the device instance of IP. If UDP is pushed anywhere else
2914 	 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the
2915 	 * sake of MIB browsers and fail everything else.
2916 	 */
2917 	ip_wq = WR(q)->q_next;
2918 	if (ip_wq->q_next != NULL ||
2919 	    (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL ||
2920 	    strcmp(name, IP_MOD_NAME) != 0 ||
2921 	    ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) {
2922 		/* Support just SNMP for MIB browsers */
2923 		connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP);
2924 		connp->conn_rq = q;
2925 		connp->conn_wq = WR(q);
2926 		connp->conn_flags |= IPCL_UDPMOD;
2927 		connp->conn_cred = credp;
2928 		connp->conn_zoneid = zoneid;
2929 		connp->conn_udp = udp;
2930 		udp->udp_connp = connp;
2931 		q->q_ptr = WR(q)->q_ptr = connp;
2932 		crhold(credp);
2933 		q->q_qinfo = &udp_snmp_rinit;
2934 		WR(q)->q_qinfo = &udp_snmp_winit;
2935 		return (0);
2936 	}
2937 
2938 	/*
2939 	 * Initialize the udp_t structure for this stream.
2940 	 */
2941 	q = RD(ip_wq);
2942 	connp = Q_TO_CONN(q);
2943 	mutex_enter(&connp->conn_lock);
2944 	connp->conn_proto = IPPROTO_UDP;
2945 	connp->conn_flags |= IPCL_UDP;
2946 	connp->conn_sqp = IP_SQUEUE_GET(lbolt);
2947 	connp->conn_udp = udp;
2948 
2949 	/* Set the initial state of the stream and the privilege status. */
2950 	udp->udp_connp = connp;
2951 	udp->udp_state = TS_UNBND;
2952 	udp->udp_mode = UDP_MT_HOT;
2953 	if (getmajor(*devp) == (major_t)UDP6_MAJ) {
2954 		udp->udp_family = AF_INET6;
2955 		udp->udp_ipversion = IPV6_VERSION;
2956 		udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
2957 		udp->udp_ttl = udp_ipv6_hoplimit;
2958 		connp->conn_af_isv6 = B_TRUE;
2959 		connp->conn_flags |= IPCL_ISV6;
2960 	} else {
2961 		udp->udp_family = AF_INET;
2962 		udp->udp_ipversion = IPV4_VERSION;
2963 		udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE;
2964 		udp->udp_ttl = udp_ipv4_ttl;
2965 		connp->conn_af_isv6 = B_FALSE;
2966 		connp->conn_flags &= ~IPCL_ISV6;
2967 	}
2968 
2969 	udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2970 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
2971 	connp->conn_zoneid = zoneid;
2972 
2973 	/*
2974 	 * If the caller has the process-wide flag set, then default to MAC
2975 	 * exempt mode.  This allows read-down to unlabeled hosts.
2976 	 */
2977 	if (getpflags(NET_MAC_AWARE, credp) != 0)
2978 		udp->udp_mac_exempt = B_TRUE;
2979 
2980 	if (connp->conn_flags & IPCL_SOCKET) {
2981 		udp->udp_issocket = B_TRUE;
2982 		udp->udp_direct_sockfs = B_TRUE;
2983 	}
2984 
2985 	connp->conn_ulp_labeled = is_system_labeled();
2986 
2987 	mutex_exit(&connp->conn_lock);
2988 
2989 	/*
2990 	 * The transmit hiwat/lowat is only looked at on IP's queue.
2991 	 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF
2992 	 * getsockopts.
2993 	 */
2994 	q->q_hiwat = udp_recv_hiwat;
2995 	WR(q)->q_hiwat = udp_xmit_hiwat;
2996 	WR(q)->q_lowat = udp_xmit_lowat;
2997 
2998 	if (udp->udp_family == AF_INET6) {
2999 		/* Build initial header template for transmit */
3000 		if ((err = udp_build_hdrs(q, udp)) != 0) {
3001 error:
3002 			qprocsoff(UDP_RD(q));
3003 			udp->udp_connp = NULL;
3004 			connp->conn_udp = NULL;
3005 			kmem_cache_free(udp_cache, udp);
3006 			return (err);
3007 		}
3008 	}
3009 
3010 	/* Set the Stream head write offset and high watermark. */
3011 	(void) mi_set_sth_wroff(UDP_RD(q),
3012 	    udp->udp_max_hdr_len + udp_wroff_extra);
3013 	(void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat));
3014 
3015 	WR(UDP_RD(q))->q_qinfo = &udp_winit;
3016 
3017 	return (0);
3018 }
3019 
3020 /*
3021  * Which UDP options OK to set through T_UNITDATA_REQ...
3022  */
3023 /* ARGSUSED */
3024 static boolean_t
3025 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
3026 {
3027 	return (B_TRUE);
3028 }
3029 
3030 /*
3031  * This routine gets default values of certain options whose default
3032  * values are maintained by protcol specific code
3033  */
3034 /* ARGSUSED */
3035 int
3036 udp_opt_default(queue_t	*q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
3037 {
3038 	int *i1 = (int *)ptr;
3039 
3040 	switch (level) {
3041 	case IPPROTO_IP:
3042 		switch (name) {
3043 		case IP_MULTICAST_TTL:
3044 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
3045 			return (sizeof (uchar_t));
3046 		case IP_MULTICAST_LOOP:
3047 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
3048 			return (sizeof (uchar_t));
3049 		}
3050 		break;
3051 	case IPPROTO_IPV6:
3052 		switch (name) {
3053 		case IPV6_MULTICAST_HOPS:
3054 			*i1 = IP_DEFAULT_MULTICAST_TTL;
3055 			return (sizeof (int));
3056 		case IPV6_MULTICAST_LOOP:
3057 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
3058 			return (sizeof (int));
3059 		case IPV6_UNICAST_HOPS:
3060 			*i1 = udp_ipv6_hoplimit;
3061 			return (sizeof (int));
3062 		}
3063 		break;
3064 	}
3065 	return (-1);
3066 }
3067 
3068 /*
3069  * This routine retrieves the current status of socket options
3070  * and expects the caller to pass in the queue pointer of the
3071  * upper instance.  It returns the size of the option retrieved.
3072  */
3073 int
3074 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
3075 {
3076 	int	*i1 = (int *)ptr;
3077 	conn_t	*connp;
3078 	udp_t	*udp;
3079 	ip6_pkt_t *ipp;
3080 	int	len;
3081 
3082 	q = UDP_WR(q);
3083 	connp = Q_TO_CONN(q);
3084 	udp = connp->conn_udp;
3085 	ipp = &udp->udp_sticky_ipp;
3086 
3087 	switch (level) {
3088 	case SOL_SOCKET:
3089 		switch (name) {
3090 		case SO_DEBUG:
3091 			*i1 = udp->udp_debug;
3092 			break;	/* goto sizeof (int) option return */
3093 		case SO_REUSEADDR:
3094 			*i1 = udp->udp_reuseaddr;
3095 			break;	/* goto sizeof (int) option return */
3096 		case SO_TYPE:
3097 			*i1 = SOCK_DGRAM;
3098 			break;	/* goto sizeof (int) option return */
3099 
3100 		/*
3101 		 * The following three items are available here,
3102 		 * but are only meaningful to IP.
3103 		 */
3104 		case SO_DONTROUTE:
3105 			*i1 = udp->udp_dontroute;
3106 			break;	/* goto sizeof (int) option return */
3107 		case SO_USELOOPBACK:
3108 			*i1 = udp->udp_useloopback;
3109 			break;	/* goto sizeof (int) option return */
3110 		case SO_BROADCAST:
3111 			*i1 = udp->udp_broadcast;
3112 			break;	/* goto sizeof (int) option return */
3113 
3114 		case SO_SNDBUF:
3115 			*i1 = q->q_hiwat;
3116 			break;	/* goto sizeof (int) option return */
3117 		case SO_RCVBUF:
3118 			*i1 = RD(q)->q_hiwat;
3119 			break;	/* goto sizeof (int) option return */
3120 		case SO_DGRAM_ERRIND:
3121 			*i1 = udp->udp_dgram_errind;
3122 			break;	/* goto sizeof (int) option return */
3123 		case SO_RECVUCRED:
3124 			*i1 = udp->udp_recvucred;
3125 			break;	/* goto sizeof (int) option return */
3126 		case SO_TIMESTAMP:
3127 			*i1 = udp->udp_timestamp;
3128 			break;	/* goto sizeof (int) option return */
3129 		case SO_ANON_MLP:
3130 			*i1 = udp->udp_anon_mlp;
3131 			break;	/* goto sizeof (int) option return */
3132 		case SO_MAC_EXEMPT:
3133 			*i1 = udp->udp_mac_exempt;
3134 			break;	/* goto sizeof (int) option return */
3135 		case SO_ALLZONES:
3136 			*i1 = connp->conn_allzones;
3137 			break;	/* goto sizeof (int) option return */
3138 		case SO_EXCLBIND:
3139 			*i1 = udp->udp_exclbind ? SO_EXCLBIND : 0;
3140 			break;
3141 		default:
3142 			return (-1);
3143 		}
3144 		break;
3145 	case IPPROTO_IP:
3146 		if (udp->udp_family != AF_INET)
3147 			return (-1);
3148 		switch (name) {
3149 		case IP_OPTIONS:
3150 		case T_IP_OPTIONS:
3151 			len = udp->udp_ip_rcv_options_len - udp->udp_label_len;
3152 			if (len > 0) {
3153 				bcopy(udp->udp_ip_rcv_options +
3154 				    udp->udp_label_len, ptr, len);
3155 			}
3156 			return (len);
3157 		case IP_TOS:
3158 		case T_IP_TOS:
3159 			*i1 = (int)udp->udp_type_of_service;
3160 			break;	/* goto sizeof (int) option return */
3161 		case IP_TTL:
3162 			*i1 = (int)udp->udp_ttl;
3163 			break;	/* goto sizeof (int) option return */
3164 		case IP_NEXTHOP:
3165 			/* Handled at IP level */
3166 			return (-EINVAL);
3167 		case IP_MULTICAST_IF:
3168 			/* 0 address if not set */
3169 			*(ipaddr_t *)ptr = udp->udp_multicast_if_addr;
3170 			return (sizeof (ipaddr_t));
3171 		case IP_MULTICAST_TTL:
3172 			*(uchar_t *)ptr = udp->udp_multicast_ttl;
3173 			return (sizeof (uchar_t));
3174 		case IP_MULTICAST_LOOP:
3175 			*ptr = connp->conn_multicast_loop;
3176 			return (sizeof (uint8_t));
3177 		case IP_RECVOPTS:
3178 			*i1 = udp->udp_recvopts;
3179 			break;	/* goto sizeof (int) option return */
3180 		case IP_RECVDSTADDR:
3181 			*i1 = udp->udp_recvdstaddr;
3182 			break;	/* goto sizeof (int) option return */
3183 		case IP_RECVIF:
3184 			*i1 = udp->udp_recvif;
3185 			break;	/* goto sizeof (int) option return */
3186 		case IP_RECVSLLA:
3187 			*i1 = udp->udp_recvslla;
3188 			break;	/* goto sizeof (int) option return */
3189 		case IP_RECVTTL:
3190 			*i1 = udp->udp_recvttl;
3191 			break;	/* goto sizeof (int) option return */
3192 		case IP_ADD_MEMBERSHIP:
3193 		case IP_DROP_MEMBERSHIP:
3194 		case IP_BLOCK_SOURCE:
3195 		case IP_UNBLOCK_SOURCE:
3196 		case IP_ADD_SOURCE_MEMBERSHIP:
3197 		case IP_DROP_SOURCE_MEMBERSHIP:
3198 		case MCAST_JOIN_GROUP:
3199 		case MCAST_LEAVE_GROUP:
3200 		case MCAST_BLOCK_SOURCE:
3201 		case MCAST_UNBLOCK_SOURCE:
3202 		case MCAST_JOIN_SOURCE_GROUP:
3203 		case MCAST_LEAVE_SOURCE_GROUP:
3204 		case IP_DONTFAILOVER_IF:
3205 			/* cannot "get" the value for these */
3206 			return (-1);
3207 		case IP_BOUND_IF:
3208 			/* Zero if not set */
3209 			*i1 = udp->udp_bound_if;
3210 			break;	/* goto sizeof (int) option return */
3211 		case IP_UNSPEC_SRC:
3212 			*i1 = udp->udp_unspec_source;
3213 			break;	/* goto sizeof (int) option return */
3214 		case IP_XMIT_IF:
3215 			*i1 = udp->udp_xmit_if;
3216 			break; /* goto sizeof (int) option return */
3217 		default:
3218 			return (-1);
3219 		}
3220 		break;
3221 	case IPPROTO_IPV6:
3222 		if (udp->udp_family != AF_INET6)
3223 			return (-1);
3224 		switch (name) {
3225 		case IPV6_UNICAST_HOPS:
3226 			*i1 = (unsigned int)udp->udp_ttl;
3227 			break;	/* goto sizeof (int) option return */
3228 		case IPV6_MULTICAST_IF:
3229 			/* 0 index if not set */
3230 			*i1 = udp->udp_multicast_if_index;
3231 			break;	/* goto sizeof (int) option return */
3232 		case IPV6_MULTICAST_HOPS:
3233 			*i1 = udp->udp_multicast_ttl;
3234 			break;	/* goto sizeof (int) option return */
3235 		case IPV6_MULTICAST_LOOP:
3236 			*i1 = connp->conn_multicast_loop;
3237 			break;	/* goto sizeof (int) option return */
3238 		case IPV6_JOIN_GROUP:
3239 		case IPV6_LEAVE_GROUP:
3240 		case MCAST_JOIN_GROUP:
3241 		case MCAST_LEAVE_GROUP:
3242 		case MCAST_BLOCK_SOURCE:
3243 		case MCAST_UNBLOCK_SOURCE:
3244 		case MCAST_JOIN_SOURCE_GROUP:
3245 		case MCAST_LEAVE_SOURCE_GROUP:
3246 			/* cannot "get" the value for these */
3247 			return (-1);
3248 		case IPV6_BOUND_IF:
3249 			/* Zero if not set */
3250 			*i1 = udp->udp_bound_if;
3251 			break;	/* goto sizeof (int) option return */
3252 		case IPV6_UNSPEC_SRC:
3253 			*i1 = udp->udp_unspec_source;
3254 			break;	/* goto sizeof (int) option return */
3255 		case IPV6_RECVPKTINFO:
3256 			*i1 = udp->udp_ipv6_recvpktinfo;
3257 			break;	/* goto sizeof (int) option return */
3258 		case IPV6_RECVTCLASS:
3259 			*i1 = udp->udp_ipv6_recvtclass;
3260 			break;	/* goto sizeof (int) option return */
3261 		case IPV6_RECVPATHMTU:
3262 			*i1 = udp->udp_ipv6_recvpathmtu;
3263 			break;	/* goto sizeof (int) option return */
3264 		case IPV6_RECVHOPLIMIT:
3265 			*i1 = udp->udp_ipv6_recvhoplimit;
3266 			break;	/* goto sizeof (int) option return */
3267 		case IPV6_RECVHOPOPTS:
3268 			*i1 = udp->udp_ipv6_recvhopopts;
3269 			break;	/* goto sizeof (int) option return */
3270 		case IPV6_RECVDSTOPTS:
3271 			*i1 = udp->udp_ipv6_recvdstopts;
3272 			break;	/* goto sizeof (int) option return */
3273 		case _OLD_IPV6_RECVDSTOPTS:
3274 			*i1 = udp->udp_old_ipv6_recvdstopts;
3275 			break;	/* goto sizeof (int) option return */
3276 		case IPV6_RECVRTHDRDSTOPTS:
3277 			*i1 = udp->udp_ipv6_recvrthdrdstopts;
3278 			break;	/* goto sizeof (int) option return */
3279 		case IPV6_RECVRTHDR:
3280 			*i1 = udp->udp_ipv6_recvrthdr;
3281 			break;	/* goto sizeof (int) option return */
3282 		case IPV6_PKTINFO: {
3283 			/* XXX assumes that caller has room for max size! */
3284 			struct in6_pktinfo *pkti;
3285 
3286 			pkti = (struct in6_pktinfo *)ptr;
3287 			if (ipp->ipp_fields & IPPF_IFINDEX)
3288 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
3289 			else
3290 				pkti->ipi6_ifindex = 0;
3291 			if (ipp->ipp_fields & IPPF_ADDR)
3292 				pkti->ipi6_addr = ipp->ipp_addr;
3293 			else
3294 				pkti->ipi6_addr = ipv6_all_zeros;
3295 			return (sizeof (struct in6_pktinfo));
3296 		}
3297 		case IPV6_TCLASS:
3298 			if (ipp->ipp_fields & IPPF_TCLASS)
3299 				*i1 = ipp->ipp_tclass;
3300 			else
3301 				*i1 = IPV6_FLOW_TCLASS(
3302 				    IPV6_DEFAULT_VERS_AND_FLOW);
3303 			break;	/* goto sizeof (int) option return */
3304 		case IPV6_NEXTHOP: {
3305 			sin6_t *sin6 = (sin6_t *)ptr;
3306 
3307 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
3308 				return (0);
3309 			*sin6 = sin6_null;
3310 			sin6->sin6_family = AF_INET6;
3311 			sin6->sin6_addr = ipp->ipp_nexthop;
3312 			return (sizeof (sin6_t));
3313 		}
3314 		case IPV6_HOPOPTS:
3315 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
3316 				return (0);
3317 			if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6)
3318 				return (0);
3319 			/*
3320 			 * The cipso/label option is added by kernel.
3321 			 * User is not usually aware of this option.
3322 			 * We copy out the hbh opt after the label option.
3323 			 */
3324 			bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6,
3325 			    ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6);
3326 			if (udp->udp_label_len_v6 > 0) {
3327 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
3328 				ptr[1] = (ipp->ipp_hopoptslen -
3329 				    udp->udp_label_len_v6 + 7) / 8 - 1;
3330 			}
3331 			return (ipp->ipp_hopoptslen - udp->udp_label_len_v6);
3332 		case IPV6_RTHDRDSTOPTS:
3333 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
3334 				return (0);
3335 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
3336 			return (ipp->ipp_rtdstoptslen);
3337 		case IPV6_RTHDR:
3338 			if (!(ipp->ipp_fields & IPPF_RTHDR))
3339 				return (0);
3340 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
3341 			return (ipp->ipp_rthdrlen);
3342 		case IPV6_DSTOPTS:
3343 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
3344 				return (0);
3345 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
3346 			return (ipp->ipp_dstoptslen);
3347 		case IPV6_PATHMTU:
3348 			return (ip_fill_mtuinfo(&udp->udp_v6dst,
3349 				udp->udp_dstport, (struct ip6_mtuinfo *)ptr));
3350 		default:
3351 			return (-1);
3352 		}
3353 		break;
3354 	case IPPROTO_UDP:
3355 		switch (name) {
3356 		case UDP_ANONPRIVBIND:
3357 			*i1 = udp->udp_anon_priv_bind;
3358 			break;
3359 		case UDP_EXCLBIND:
3360 			*i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0;
3361 			break;
3362 		case UDP_RCVHDR:
3363 			*i1 = udp->udp_rcvhdr ? 1 : 0;
3364 			break;
3365 		default:
3366 			return (-1);
3367 		}
3368 		break;
3369 	default:
3370 		return (-1);
3371 	}
3372 	return (sizeof (int));
3373 }
3374 
3375 /*
3376  * This routine sets socket options; it expects the caller
3377  * to pass in the queue pointer of the upper instance.
3378  */
3379 /* ARGSUSED */
3380 int
3381 udp_opt_set(queue_t *q, uint_t optset_context, int level,
3382     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
3383     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3384 {
3385 	udpattrs_t *attrs = thisdg_attrs;
3386 	int	*i1 = (int *)invalp;
3387 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
3388 	boolean_t checkonly;
3389 	int	error;
3390 	conn_t	*connp;
3391 	udp_t	*udp;
3392 	uint_t	newlen;
3393 
3394 	q = UDP_WR(q);
3395 	connp = Q_TO_CONN(q);
3396 	udp = connp->conn_udp;
3397 
3398 	switch (optset_context) {
3399 	case SETFN_OPTCOM_CHECKONLY:
3400 		checkonly = B_TRUE;
3401 		/*
3402 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3403 		 * inlen != 0 implies value supplied and
3404 		 * 	we have to "pretend" to set it.
3405 		 * inlen == 0 implies that there is no
3406 		 * 	value part in T_CHECK request and just validation
3407 		 * done elsewhere should be enough, we just return here.
3408 		 */
3409 		if (inlen == 0) {
3410 			*outlenp = 0;
3411 			return (0);
3412 		}
3413 		break;
3414 	case SETFN_OPTCOM_NEGOTIATE:
3415 		checkonly = B_FALSE;
3416 		break;
3417 	case SETFN_UD_NEGOTIATE:
3418 	case SETFN_CONN_NEGOTIATE:
3419 		checkonly = B_FALSE;
3420 		/*
3421 		 * Negotiating local and "association-related" options
3422 		 * through T_UNITDATA_REQ.
3423 		 *
3424 		 * Following routine can filter out ones we do not
3425 		 * want to be "set" this way.
3426 		 */
3427 		if (!udp_opt_allow_udr_set(level, name)) {
3428 			*outlenp = 0;
3429 			return (EINVAL);
3430 		}
3431 		break;
3432 	default:
3433 		/*
3434 		 * We should never get here
3435 		 */
3436 		*outlenp = 0;
3437 		return (EINVAL);
3438 	}
3439 
3440 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3441 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3442 
3443 	/*
3444 	 * For fixed length options, no sanity check
3445 	 * of passed in length is done. It is assumed *_optcom_req()
3446 	 * routines do the right thing.
3447 	 */
3448 
3449 	switch (level) {
3450 	case SOL_SOCKET:
3451 		switch (name) {
3452 		case SO_REUSEADDR:
3453 			if (!checkonly)
3454 				udp->udp_reuseaddr = onoff;
3455 			break;
3456 		case SO_DEBUG:
3457 			if (!checkonly)
3458 				udp->udp_debug = onoff;
3459 			break;
3460 		/*
3461 		 * The following three items are available here,
3462 		 * but are only meaningful to IP.
3463 		 */
3464 		case SO_DONTROUTE:
3465 			if (!checkonly)
3466 				udp->udp_dontroute = onoff;
3467 			break;
3468 		case SO_USELOOPBACK:
3469 			if (!checkonly)
3470 				udp->udp_useloopback = onoff;
3471 			break;
3472 		case SO_BROADCAST:
3473 			if (!checkonly)
3474 				udp->udp_broadcast = onoff;
3475 			break;
3476 
3477 		case SO_SNDBUF:
3478 			if (*i1 > udp_max_buf) {
3479 				*outlenp = 0;
3480 				return (ENOBUFS);
3481 			}
3482 			if (!checkonly) {
3483 				q->q_hiwat = *i1;
3484 				WR(UDP_RD(q))->q_hiwat = *i1;
3485 			}
3486 			break;
3487 		case SO_RCVBUF:
3488 			if (*i1 > udp_max_buf) {
3489 				*outlenp = 0;
3490 				return (ENOBUFS);
3491 			}
3492 			if (!checkonly) {
3493 				RD(q)->q_hiwat = *i1;
3494 				UDP_RD(q)->q_hiwat = *i1;
3495 				(void) mi_set_sth_hiwat(UDP_RD(q),
3496 				    udp_set_rcv_hiwat(udp, *i1));
3497 			}
3498 			break;
3499 		case SO_DGRAM_ERRIND:
3500 			if (!checkonly)
3501 				udp->udp_dgram_errind = onoff;
3502 			break;
3503 		case SO_RECVUCRED:
3504 			if (!checkonly)
3505 				udp->udp_recvucred = onoff;
3506 			break;
3507 		case SO_ALLZONES:
3508 			/*
3509 			 * "soft" error (negative)
3510 			 * option not handled at this level
3511 			 * Do not modify *outlenp.
3512 			 */
3513 			return (-EINVAL);
3514 		case SO_TIMESTAMP:
3515 			if (!checkonly)
3516 				udp->udp_timestamp = onoff;
3517 			break;
3518 		case SO_ANON_MLP:
3519 			if (!checkonly)
3520 				udp->udp_anon_mlp = onoff;
3521 			break;
3522 		case SO_MAC_EXEMPT:
3523 			if (secpolicy_net_mac_aware(cr) != 0 ||
3524 			    udp->udp_state != TS_UNBND)
3525 				return (EACCES);
3526 			if (!checkonly)
3527 				udp->udp_mac_exempt = onoff;
3528 			break;
3529 		case SCM_UCRED: {
3530 			struct ucred_s *ucr;
3531 			cred_t *cr, *newcr;
3532 			ts_label_t *tsl;
3533 
3534 			/*
3535 			 * Only sockets that have proper privileges and are
3536 			 * bound to MLPs will have any other value here, so
3537 			 * this implicitly tests for privilege to set label.
3538 			 */
3539 			if (connp->conn_mlp_type == mlptSingle)
3540 				break;
3541 			ucr = (struct ucred_s *)invalp;
3542 			if (inlen != ucredsize ||
3543 			    ucr->uc_labeloff < sizeof (*ucr) ||
3544 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
3545 				return (EINVAL);
3546 			if (!checkonly) {
3547 				mblk_t *mb;
3548 
3549 				if (attrs == NULL ||
3550 				    (mb = attrs->udpattr_mb) == NULL)
3551 					return (EINVAL);
3552 				if ((cr = DB_CRED(mb)) == NULL)
3553 					cr = udp->udp_connp->conn_cred;
3554 				ASSERT(cr != NULL);
3555 				if ((tsl = crgetlabel(cr)) == NULL)
3556 					return (EINVAL);
3557 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
3558 				    tsl->tsl_doi, KM_NOSLEEP);
3559 				if (newcr == NULL)
3560 					return (ENOSR);
3561 				mblk_setcred(mb, newcr);
3562 				attrs->udpattr_credset = B_TRUE;
3563 				crfree(newcr);
3564 			}
3565 			break;
3566 		}
3567 		case SO_EXCLBIND:
3568 			if (!checkonly)
3569 				udp->udp_exclbind = onoff;
3570 			break;
3571 		default:
3572 			*outlenp = 0;
3573 			return (EINVAL);
3574 		}
3575 		break;
3576 	case IPPROTO_IP:
3577 		if (udp->udp_family != AF_INET) {
3578 			*outlenp = 0;
3579 			return (ENOPROTOOPT);
3580 		}
3581 		switch (name) {
3582 		case IP_OPTIONS:
3583 		case T_IP_OPTIONS:
3584 			/* Save options for use by IP. */
3585 			newlen = inlen + udp->udp_label_len;
3586 			if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
3587 				*outlenp = 0;
3588 				return (EINVAL);
3589 			}
3590 			if (checkonly)
3591 				break;
3592 
3593 			if (!tsol_option_set(&udp->udp_ip_snd_options,
3594 			    &udp->udp_ip_snd_options_len,
3595 			    udp->udp_label_len, invalp, inlen)) {
3596 				*outlenp = 0;
3597 				return (ENOMEM);
3598 			}
3599 
3600 			udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
3601 			    UDPH_SIZE + udp->udp_ip_snd_options_len;
3602 			(void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len +
3603 			    udp_wroff_extra);
3604 			break;
3605 
3606 		case IP_TTL:
3607 			if (!checkonly) {
3608 				udp->udp_ttl = (uchar_t)*i1;
3609 			}
3610 			break;
3611 		case IP_TOS:
3612 		case T_IP_TOS:
3613 			if (!checkonly) {
3614 				udp->udp_type_of_service = (uchar_t)*i1;
3615 			}
3616 			break;
3617 		case IP_MULTICAST_IF: {
3618 			/*
3619 			 * TODO should check OPTMGMT reply and undo this if
3620 			 * there is an error.
3621 			 */
3622 			struct in_addr *inap = (struct in_addr *)invalp;
3623 			if (!checkonly) {
3624 				udp->udp_multicast_if_addr =
3625 				    inap->s_addr;
3626 			}
3627 			break;
3628 		}
3629 		case IP_MULTICAST_TTL:
3630 			if (!checkonly)
3631 				udp->udp_multicast_ttl = *invalp;
3632 			break;
3633 		case IP_MULTICAST_LOOP:
3634 			if (!checkonly)
3635 				connp->conn_multicast_loop = *invalp;
3636 			break;
3637 		case IP_RECVOPTS:
3638 			if (!checkonly)
3639 				udp->udp_recvopts = onoff;
3640 			break;
3641 		case IP_RECVDSTADDR:
3642 			if (!checkonly)
3643 				udp->udp_recvdstaddr = onoff;
3644 			break;
3645 		case IP_RECVIF:
3646 			if (!checkonly)
3647 				udp->udp_recvif = onoff;
3648 			break;
3649 		case IP_RECVSLLA:
3650 			if (!checkonly)
3651 				udp->udp_recvslla = onoff;
3652 			break;
3653 		case IP_RECVTTL:
3654 			if (!checkonly)
3655 				udp->udp_recvttl = onoff;
3656 			break;
3657 		case IP_ADD_MEMBERSHIP:
3658 		case IP_DROP_MEMBERSHIP:
3659 		case IP_BLOCK_SOURCE:
3660 		case IP_UNBLOCK_SOURCE:
3661 		case IP_ADD_SOURCE_MEMBERSHIP:
3662 		case IP_DROP_SOURCE_MEMBERSHIP:
3663 		case MCAST_JOIN_GROUP:
3664 		case MCAST_LEAVE_GROUP:
3665 		case MCAST_BLOCK_SOURCE:
3666 		case MCAST_UNBLOCK_SOURCE:
3667 		case MCAST_JOIN_SOURCE_GROUP:
3668 		case MCAST_LEAVE_SOURCE_GROUP:
3669 		case IP_SEC_OPT:
3670 		case IP_NEXTHOP:
3671 			/*
3672 			 * "soft" error (negative)
3673 			 * option not handled at this level
3674 			 * Do not modify *outlenp.
3675 			 */
3676 			return (-EINVAL);
3677 		case IP_BOUND_IF:
3678 			if (!checkonly)
3679 				udp->udp_bound_if = *i1;
3680 			break;
3681 		case IP_UNSPEC_SRC:
3682 			if (!checkonly)
3683 				udp->udp_unspec_source = onoff;
3684 			break;
3685 		case IP_XMIT_IF:
3686 			if (!checkonly)
3687 				udp->udp_xmit_if = *i1;
3688 			break;
3689 		default:
3690 			*outlenp = 0;
3691 			return (EINVAL);
3692 		}
3693 		break;
3694 	case IPPROTO_IPV6: {
3695 		ip6_pkt_t		*ipp;
3696 		boolean_t		sticky;
3697 
3698 		if (udp->udp_family != AF_INET6) {
3699 			*outlenp = 0;
3700 			return (ENOPROTOOPT);
3701 		}
3702 		/*
3703 		 * Deal with both sticky options and ancillary data
3704 		 */
3705 		sticky = B_FALSE;
3706 		if (attrs == NULL || (ipp = attrs->udpattr_ipp) == NULL) {
3707 			/* sticky options, or none */
3708 			ipp = &udp->udp_sticky_ipp;
3709 			sticky = B_TRUE;
3710 		}
3711 
3712 		switch (name) {
3713 		case IPV6_MULTICAST_IF:
3714 			if (!checkonly)
3715 				udp->udp_multicast_if_index = *i1;
3716 			break;
3717 		case IPV6_UNICAST_HOPS:
3718 			/* -1 means use default */
3719 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
3720 				*outlenp = 0;
3721 				return (EINVAL);
3722 			}
3723 			if (!checkonly) {
3724 				if (*i1 == -1) {
3725 					udp->udp_ttl = ipp->ipp_unicast_hops =
3726 					    udp_ipv6_hoplimit;
3727 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
3728 					/* Pass modified value to IP. */
3729 					*i1 = udp->udp_ttl;
3730 				} else {
3731 					udp->udp_ttl = ipp->ipp_unicast_hops =
3732 					    (uint8_t)*i1;
3733 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
3734 				}
3735 				/* Rebuild the header template */
3736 				error = udp_build_hdrs(q, udp);
3737 				if (error != 0) {
3738 					*outlenp = 0;
3739 					return (error);
3740 				}
3741 			}
3742 			break;
3743 		case IPV6_MULTICAST_HOPS:
3744 			/* -1 means use default */
3745 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
3746 				*outlenp = 0;
3747 				return (EINVAL);
3748 			}
3749 			if (!checkonly) {
3750 				if (*i1 == -1) {
3751 					udp->udp_multicast_ttl =
3752 					    ipp->ipp_multicast_hops =
3753 					    IP_DEFAULT_MULTICAST_TTL;
3754 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
3755 					/* Pass modified value to IP. */
3756 					*i1 = udp->udp_multicast_ttl;
3757 				} else {
3758 					udp->udp_multicast_ttl =
3759 					    ipp->ipp_multicast_hops =
3760 					    (uint8_t)*i1;
3761 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
3762 				}
3763 			}
3764 			break;
3765 		case IPV6_MULTICAST_LOOP:
3766 			if (*i1 != 0 && *i1 != 1) {
3767 				*outlenp = 0;
3768 				return (EINVAL);
3769 			}
3770 			if (!checkonly)
3771 				connp->conn_multicast_loop = *i1;
3772 			break;
3773 		case IPV6_JOIN_GROUP:
3774 		case IPV6_LEAVE_GROUP:
3775 		case MCAST_JOIN_GROUP:
3776 		case MCAST_LEAVE_GROUP:
3777 		case MCAST_BLOCK_SOURCE:
3778 		case MCAST_UNBLOCK_SOURCE:
3779 		case MCAST_JOIN_SOURCE_GROUP:
3780 		case MCAST_LEAVE_SOURCE_GROUP:
3781 			/*
3782 			 * "soft" error (negative)
3783 			 * option not handled at this level
3784 			 * Note: Do not modify *outlenp
3785 			 */
3786 			return (-EINVAL);
3787 		case IPV6_BOUND_IF:
3788 			if (!checkonly)
3789 				udp->udp_bound_if = *i1;
3790 			break;
3791 		case IPV6_UNSPEC_SRC:
3792 			if (!checkonly)
3793 				udp->udp_unspec_source = onoff;
3794 			break;
3795 		/*
3796 		 * Set boolean switches for ancillary data delivery
3797 		 */
3798 		case IPV6_RECVPKTINFO:
3799 			if (!checkonly)
3800 				udp->udp_ipv6_recvpktinfo = onoff;
3801 			break;
3802 		case IPV6_RECVTCLASS:
3803 			if (!checkonly) {
3804 				udp->udp_ipv6_recvtclass = onoff;
3805 			}
3806 			break;
3807 		case IPV6_RECVPATHMTU:
3808 			if (!checkonly) {
3809 				udp->udp_ipv6_recvpathmtu = onoff;
3810 			}
3811 			break;
3812 		case IPV6_RECVHOPLIMIT:
3813 			if (!checkonly)
3814 				udp->udp_ipv6_recvhoplimit = onoff;
3815 			break;
3816 		case IPV6_RECVHOPOPTS:
3817 			if (!checkonly)
3818 				udp->udp_ipv6_recvhopopts = onoff;
3819 			break;
3820 		case IPV6_RECVDSTOPTS:
3821 			if (!checkonly)
3822 				udp->udp_ipv6_recvdstopts = onoff;
3823 			break;
3824 		case _OLD_IPV6_RECVDSTOPTS:
3825 			if (!checkonly)
3826 				udp->udp_old_ipv6_recvdstopts = onoff;
3827 			break;
3828 		case IPV6_RECVRTHDRDSTOPTS:
3829 			if (!checkonly)
3830 				udp->udp_ipv6_recvrthdrdstopts = onoff;
3831 			break;
3832 		case IPV6_RECVRTHDR:
3833 			if (!checkonly)
3834 				udp->udp_ipv6_recvrthdr = onoff;
3835 			break;
3836 		/*
3837 		 * Set sticky options or ancillary data.
3838 		 * If sticky options, (re)build any extension headers
3839 		 * that might be needed as a result.
3840 		 */
3841 		case IPV6_PKTINFO:
3842 			/*
3843 			 * The source address and ifindex are verified
3844 			 * in ip_opt_set(). For ancillary data the
3845 			 * source address is checked in ip_wput_v6.
3846 			 */
3847 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
3848 				return (EINVAL);
3849 			if (checkonly)
3850 				break;
3851 
3852 			if (inlen == 0) {
3853 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
3854 				ipp->ipp_sticky_ignored |=
3855 				    (IPPF_IFINDEX|IPPF_ADDR);
3856 			} else {
3857 				struct in6_pktinfo *pkti;
3858 
3859 				pkti = (struct in6_pktinfo *)invalp;
3860 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
3861 				ipp->ipp_addr = pkti->ipi6_addr;
3862 				if (ipp->ipp_ifindex != 0)
3863 					ipp->ipp_fields |= IPPF_IFINDEX;
3864 				else
3865 					ipp->ipp_fields &= ~IPPF_IFINDEX;
3866 				if (!IN6_IS_ADDR_UNSPECIFIED(
3867 				    &ipp->ipp_addr))
3868 					ipp->ipp_fields |= IPPF_ADDR;
3869 				else
3870 					ipp->ipp_fields &= ~IPPF_ADDR;
3871 			}
3872 			if (sticky) {
3873 				error = udp_build_hdrs(q, udp);
3874 				if (error != 0)
3875 					return (error);
3876 			}
3877 			break;
3878 		case IPV6_HOPLIMIT:
3879 			if (sticky)
3880 				return (EINVAL);
3881 			if (inlen != 0 && inlen != sizeof (int))
3882 				return (EINVAL);
3883 			if (checkonly)
3884 				break;
3885 
3886 			if (inlen == 0) {
3887 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
3888 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
3889 			} else {
3890 				if (*i1 > 255 || *i1 < -1)
3891 					return (EINVAL);
3892 				if (*i1 == -1)
3893 					ipp->ipp_hoplimit = udp_ipv6_hoplimit;
3894 				else
3895 					ipp->ipp_hoplimit = *i1;
3896 				ipp->ipp_fields |= IPPF_HOPLIMIT;
3897 			}
3898 			break;
3899 		case IPV6_TCLASS:
3900 			if (inlen != 0 && inlen != sizeof (int))
3901 				return (EINVAL);
3902 			if (checkonly)
3903 				break;
3904 
3905 			if (inlen == 0) {
3906 				ipp->ipp_fields &= ~IPPF_TCLASS;
3907 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
3908 			} else {
3909 				if (*i1 > 255 || *i1 < -1)
3910 					return (EINVAL);
3911 				if (*i1 == -1)
3912 					ipp->ipp_tclass = 0;
3913 				else
3914 					ipp->ipp_tclass = *i1;
3915 				ipp->ipp_fields |= IPPF_TCLASS;
3916 			}
3917 			if (sticky) {
3918 				error = udp_build_hdrs(q, udp);
3919 				if (error != 0)
3920 					return (error);
3921 			}
3922 			break;
3923 		case IPV6_NEXTHOP:
3924 			/*
3925 			 * IP will verify that the nexthop is reachable
3926 			 * and fail for sticky options.
3927 			 */
3928 			if (inlen != 0 && inlen != sizeof (sin6_t))
3929 				return (EINVAL);
3930 			if (checkonly)
3931 				break;
3932 
3933 			if (inlen == 0) {
3934 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
3935 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
3936 			} else {
3937 				sin6_t *sin6 = (sin6_t *)invalp;
3938 
3939 				if (sin6->sin6_family != AF_INET6)
3940 					return (EAFNOSUPPORT);
3941 				if (IN6_IS_ADDR_V4MAPPED(
3942 				    &sin6->sin6_addr))
3943 					return (EADDRNOTAVAIL);
3944 				ipp->ipp_nexthop = sin6->sin6_addr;
3945 				if (!IN6_IS_ADDR_UNSPECIFIED(
3946 				    &ipp->ipp_nexthop))
3947 					ipp->ipp_fields |= IPPF_NEXTHOP;
3948 				else
3949 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
3950 			}
3951 			if (sticky) {
3952 				error = udp_build_hdrs(q, udp);
3953 				if (error != 0)
3954 					return (error);
3955 			}
3956 			break;
3957 		case IPV6_HOPOPTS: {
3958 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
3959 			/*
3960 			 * Sanity checks - minimum size, size a multiple of
3961 			 * eight bytes, and matching size passed in.
3962 			 */
3963 			if (inlen != 0 &&
3964 			    inlen != (8 * (hopts->ip6h_len + 1)))
3965 				return (EINVAL);
3966 
3967 			if (checkonly)
3968 				break;
3969 
3970 			error = optcom_pkt_set(invalp, inlen, sticky,
3971 			    (uchar_t **)&ipp->ipp_hopopts,
3972 			    &ipp->ipp_hopoptslen,
3973 			    sticky ? udp->udp_label_len_v6 : 0);
3974 			if (error != 0)
3975 				return (error);
3976 			if (ipp->ipp_hopoptslen == 0) {
3977 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
3978 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
3979 			} else {
3980 				ipp->ipp_fields |= IPPF_HOPOPTS;
3981 			}
3982 			if (sticky) {
3983 				error = udp_build_hdrs(q, udp);
3984 				if (error != 0)
3985 					return (error);
3986 			}
3987 			break;
3988 		}
3989 		case IPV6_RTHDRDSTOPTS: {
3990 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
3991 
3992 			/*
3993 			 * Sanity checks - minimum size, size a multiple of
3994 			 * eight bytes, and matching size passed in.
3995 			 */
3996 			if (inlen != 0 &&
3997 			    inlen != (8 * (dopts->ip6d_len + 1)))
3998 				return (EINVAL);
3999 
4000 			if (checkonly)
4001 				break;
4002 
4003 			if (inlen == 0) {
4004 				if (sticky &&
4005 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
4006 					kmem_free(ipp->ipp_rtdstopts,
4007 					    ipp->ipp_rtdstoptslen);
4008 					ipp->ipp_rtdstopts = NULL;
4009 					ipp->ipp_rtdstoptslen = 0;
4010 				}
4011 
4012 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
4013 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
4014 			} else {
4015 				error = optcom_pkt_set(invalp, inlen, sticky,
4016 				    (uchar_t **)&ipp->ipp_rtdstopts,
4017 				    &ipp->ipp_rtdstoptslen, 0);
4018 				if (error != 0)
4019 					return (error);
4020 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
4021 			}
4022 			if (sticky) {
4023 				error = udp_build_hdrs(q, udp);
4024 				if (error != 0)
4025 					return (error);
4026 			}
4027 			break;
4028 		}
4029 		case IPV6_DSTOPTS: {
4030 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
4031 
4032 			/*
4033 			 * Sanity checks - minimum size, size a multiple of
4034 			 * eight bytes, and matching size passed in.
4035 			 */
4036 			if (inlen != 0 &&
4037 			    inlen != (8 * (dopts->ip6d_len + 1)))
4038 				return (EINVAL);
4039 
4040 			if (checkonly)
4041 				break;
4042 
4043 			if (inlen == 0) {
4044 				if (sticky &&
4045 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
4046 					kmem_free(ipp->ipp_dstopts,
4047 					    ipp->ipp_dstoptslen);
4048 					ipp->ipp_dstopts = NULL;
4049 					ipp->ipp_dstoptslen = 0;
4050 				}
4051 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
4052 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
4053 			} else {
4054 				error = optcom_pkt_set(invalp, inlen, sticky,
4055 				    (uchar_t **)&ipp->ipp_dstopts,
4056 				    &ipp->ipp_dstoptslen, 0);
4057 				if (error != 0)
4058 					return (error);
4059 				ipp->ipp_fields |= IPPF_DSTOPTS;
4060 			}
4061 			if (sticky) {
4062 				error = udp_build_hdrs(q, udp);
4063 				if (error != 0)
4064 					return (error);
4065 			}
4066 			break;
4067 		}
4068 		case IPV6_RTHDR: {
4069 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
4070 
4071 			/*
4072 			 * Sanity checks - minimum size, size a multiple of
4073 			 * eight bytes, and matching size passed in.
4074 			 */
4075 			if (inlen != 0 &&
4076 			    inlen != (8 * (rt->ip6r_len + 1)))
4077 				return (EINVAL);
4078 
4079 			if (checkonly)
4080 				break;
4081 
4082 			if (inlen == 0) {
4083 				if (sticky &&
4084 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
4085 					kmem_free(ipp->ipp_rthdr,
4086 					    ipp->ipp_rthdrlen);
4087 					ipp->ipp_rthdr = NULL;
4088 					ipp->ipp_rthdrlen = 0;
4089 				}
4090 				ipp->ipp_fields &= ~IPPF_RTHDR;
4091 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
4092 			} else {
4093 				error = optcom_pkt_set(invalp, inlen, sticky,
4094 				    (uchar_t **)&ipp->ipp_rthdr,
4095 				    &ipp->ipp_rthdrlen, 0);
4096 				if (error != 0)
4097 					return (error);
4098 				ipp->ipp_fields |= IPPF_RTHDR;
4099 			}
4100 			if (sticky) {
4101 				error = udp_build_hdrs(q, udp);
4102 				if (error != 0)
4103 					return (error);
4104 			}
4105 			break;
4106 		}
4107 
4108 		case IPV6_DONTFRAG:
4109 			if (checkonly)
4110 				break;
4111 
4112 			if (onoff) {
4113 				ipp->ipp_fields |= IPPF_DONTFRAG;
4114 			} else {
4115 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
4116 			}
4117 			break;
4118 
4119 		case IPV6_USE_MIN_MTU:
4120 			if (inlen != sizeof (int))
4121 				return (EINVAL);
4122 
4123 			if (*i1 < -1 || *i1 > 1)
4124 				return (EINVAL);
4125 
4126 			if (checkonly)
4127 				break;
4128 
4129 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
4130 			ipp->ipp_use_min_mtu = *i1;
4131 			break;
4132 
4133 		case IPV6_BOUND_PIF:
4134 		case IPV6_SEC_OPT:
4135 		case IPV6_DONTFAILOVER_IF:
4136 		case IPV6_SRC_PREFERENCES:
4137 		case IPV6_V6ONLY:
4138 			/* Handled at the IP level */
4139 			return (-EINVAL);
4140 		default:
4141 			*outlenp = 0;
4142 			return (EINVAL);
4143 		}
4144 		break;
4145 		}		/* end IPPROTO_IPV6 */
4146 	case IPPROTO_UDP:
4147 		switch (name) {
4148 		case UDP_ANONPRIVBIND:
4149 			if ((error = secpolicy_net_privaddr(cr, 0)) != 0) {
4150 				*outlenp = 0;
4151 				return (error);
4152 			}
4153 			if (!checkonly) {
4154 				udp->udp_anon_priv_bind = onoff;
4155 			}
4156 			break;
4157 		case UDP_EXCLBIND:
4158 			if (!checkonly)
4159 				udp->udp_exclbind = onoff;
4160 			break;
4161 		case UDP_RCVHDR:
4162 			if (!checkonly)
4163 				udp->udp_rcvhdr = onoff;
4164 			break;
4165 		default:
4166 			*outlenp = 0;
4167 			return (EINVAL);
4168 		}
4169 		break;
4170 	default:
4171 		*outlenp = 0;
4172 		return (EINVAL);
4173 	}
4174 	/*
4175 	 * Common case of OK return with outval same as inval.
4176 	 */
4177 	if (invalp != outvalp) {
4178 		/* don't trust bcopy for identical src/dst */
4179 		(void) bcopy(invalp, outvalp, inlen);
4180 	}
4181 	*outlenp = inlen;
4182 	return (0);
4183 }
4184 
4185 /*
4186  * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl.
4187  * The headers include ip6i_t (if needed), ip6_t, any sticky extension
4188  * headers, and the udp header.
4189  * Returns failure if can't allocate memory.
4190  */
4191 static int
4192 udp_build_hdrs(queue_t *q, udp_t *udp)
4193 {
4194 	uchar_t	*hdrs;
4195 	uint_t	hdrs_len;
4196 	ip6_t	*ip6h;
4197 	ip6i_t	*ip6i;
4198 	udpha_t	*udpha;
4199 	ip6_pkt_t *ipp = &udp->udp_sticky_ipp;
4200 
4201 	hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE;
4202 	ASSERT(hdrs_len != 0);
4203 	if (hdrs_len != udp->udp_sticky_hdrs_len) {
4204 		/* Need to reallocate */
4205 		hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
4206 		if (hdrs == NULL)
4207 			return (ENOMEM);
4208 
4209 		if (udp->udp_sticky_hdrs_len != 0) {
4210 			kmem_free(udp->udp_sticky_hdrs,
4211 			    udp->udp_sticky_hdrs_len);
4212 		}
4213 		udp->udp_sticky_hdrs = hdrs;
4214 		udp->udp_sticky_hdrs_len = hdrs_len;
4215 	}
4216 	ip_build_hdrs_v6(udp->udp_sticky_hdrs,
4217 	    udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP);
4218 
4219 	/* Set header fields not in ipp */
4220 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
4221 		ip6i = (ip6i_t *)udp->udp_sticky_hdrs;
4222 		ip6h = (ip6_t *)&ip6i[1];
4223 	} else {
4224 		ip6h = (ip6_t *)udp->udp_sticky_hdrs;
4225 	}
4226 
4227 	if (!(ipp->ipp_fields & IPPF_ADDR))
4228 		ip6h->ip6_src = udp->udp_v6src;
4229 
4230 	udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE);
4231 	udpha->uha_src_port = udp->udp_port;
4232 
4233 	/* Try to get everything in a single mblk */
4234 	if (hdrs_len > udp->udp_max_hdr_len) {
4235 		udp->udp_max_hdr_len = hdrs_len;
4236 		(void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len +
4237 		    udp_wroff_extra);
4238 	}
4239 	return (0);
4240 }
4241 
4242 /*
4243  * This routine retrieves the value of an ND variable in a udpparam_t
4244  * structure.  It is called through nd_getset when a user reads the
4245  * variable.
4246  */
4247 /* ARGSUSED */
4248 static int
4249 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4250 {
4251 	udpparam_t *udppa = (udpparam_t *)cp;
4252 
4253 	(void) mi_mpprintf(mp, "%d", udppa->udp_param_value);
4254 	return (0);
4255 }
4256 
4257 /*
4258  * Walk through the param array specified registering each element with the
4259  * named dispatch (ND) handler.
4260  */
4261 static boolean_t
4262 udp_param_register(udpparam_t *udppa, int cnt)
4263 {
4264 	for (; cnt-- > 0; udppa++) {
4265 		if (udppa->udp_param_name && udppa->udp_param_name[0]) {
4266 			if (!nd_load(&udp_g_nd, udppa->udp_param_name,
4267 			    udp_param_get, udp_param_set,
4268 			    (caddr_t)udppa)) {
4269 				nd_free(&udp_g_nd);
4270 				return (B_FALSE);
4271 			}
4272 		}
4273 	}
4274 	if (!nd_load(&udp_g_nd, "udp_extra_priv_ports",
4275 	    udp_extra_priv_ports_get, NULL, NULL)) {
4276 		nd_free(&udp_g_nd);
4277 		return (B_FALSE);
4278 	}
4279 	if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add",
4280 	    NULL, udp_extra_priv_ports_add, NULL)) {
4281 		nd_free(&udp_g_nd);
4282 		return (B_FALSE);
4283 	}
4284 	if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del",
4285 	    NULL, udp_extra_priv_ports_del, NULL)) {
4286 		nd_free(&udp_g_nd);
4287 		return (B_FALSE);
4288 	}
4289 	if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL,
4290 	    NULL)) {
4291 		nd_free(&udp_g_nd);
4292 		return (B_FALSE);
4293 	}
4294 	if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL,
4295 	    NULL)) {
4296 		nd_free(&udp_g_nd);
4297 		return (B_FALSE);
4298 	}
4299 	return (B_TRUE);
4300 }
4301 
4302 /* This routine sets an ND variable in a udpparam_t structure. */
4303 /* ARGSUSED */
4304 static int
4305 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
4306 {
4307 	long		new_value;
4308 	udpparam_t	*udppa = (udpparam_t *)cp;
4309 
4310 	/*
4311 	 * Fail the request if the new value does not lie within the
4312 	 * required bounds.
4313 	 */
4314 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
4315 	    new_value < udppa->udp_param_min ||
4316 	    new_value > udppa->udp_param_max) {
4317 		return (EINVAL);
4318 	}
4319 
4320 	/* Set the new value */
4321 	udppa->udp_param_value = new_value;
4322 	return (0);
4323 }
4324 
4325 /*
4326  * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with
4327  * T_opthdr) and return the number of bytes copied.  'dbuf' may be NULL to
4328  * just count the length needed for allocation.  If 'dbuf' is non-NULL,
4329  * then it's assumed to be allocated to be large enough.
4330  *
4331  * Returns zero if trimming of the security option causes all options to go
4332  * away.
4333  */
4334 static size_t
4335 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf)
4336 {
4337 	struct T_opthdr *toh;
4338 	size_t hol = ipp->ipp_hopoptslen;
4339 	ip6_hbh_t *dstopt = NULL;
4340 	const ip6_hbh_t *srcopt = ipp->ipp_hopopts;
4341 	size_t tlen, olen, plen;
4342 	boolean_t deleting;
4343 	const struct ip6_opt *sopt, *lastpad;
4344 	struct ip6_opt *dopt;
4345 
4346 	if ((toh = (struct T_opthdr *)dbuf) != NULL) {
4347 		toh->level = IPPROTO_IPV6;
4348 		toh->name = IPV6_HOPOPTS;
4349 		toh->status = 0;
4350 		dstopt = (ip6_hbh_t *)(toh + 1);
4351 	}
4352 
4353 	/*
4354 	 * If labeling is enabled, then skip the label option
4355 	 * but get other options if there are any.
4356 	 */
4357 	if (is_system_labeled()) {
4358 		dopt = NULL;
4359 		if (dstopt != NULL) {
4360 			/* will fill in ip6h_len later */
4361 			dstopt->ip6h_nxt = srcopt->ip6h_nxt;
4362 			dopt = (struct ip6_opt *)(dstopt + 1);
4363 		}
4364 		sopt = (const struct ip6_opt *)(srcopt + 1);
4365 		hol -= sizeof (*srcopt);
4366 		tlen = sizeof (*dstopt);
4367 		lastpad = NULL;
4368 		deleting = B_FALSE;
4369 		/*
4370 		 * This loop finds the first (lastpad pointer) of any number of
4371 		 * pads that preceeds the security option, then treats the
4372 		 * security option as though it were a pad, and then finds the
4373 		 * next non-pad option (or end of list).
4374 		 *
4375 		 * It then treats the entire block as one big pad.  To preserve
4376 		 * alignment of any options that follow, or just the end of the
4377 		 * list, it computes a minimal new padding size that keeps the
4378 		 * same alignment for the next option.
4379 		 *
4380 		 * If it encounters just a sequence of pads with no security
4381 		 * option, those are copied as-is rather than collapsed.
4382 		 *
4383 		 * Note that to handle the end of list case, the code makes one
4384 		 * loop with 'hol' set to zero.
4385 		 */
4386 		for (;;) {
4387 			if (hol > 0) {
4388 				if (sopt->ip6o_type == IP6OPT_PAD1) {
4389 					if (lastpad == NULL)
4390 						lastpad = sopt;
4391 					sopt = (const struct ip6_opt *)
4392 					    &sopt->ip6o_len;
4393 					hol--;
4394 					continue;
4395 				}
4396 				olen = sopt->ip6o_len + sizeof (*sopt);
4397 				if (olen > hol)
4398 					olen = hol;
4399 				if (sopt->ip6o_type == IP6OPT_PADN ||
4400 				    sopt->ip6o_type == ip6opt_ls) {
4401 					if (sopt->ip6o_type == ip6opt_ls)
4402 						deleting = B_TRUE;
4403 					if (lastpad == NULL)
4404 						lastpad = sopt;
4405 					sopt = (const struct ip6_opt *)
4406 					    ((const char *)sopt + olen);
4407 					hol -= olen;
4408 					continue;
4409 				}
4410 			} else {
4411 				/* if nothing was copied at all, then delete */
4412 				if (tlen == sizeof (*dstopt))
4413 					return (0);
4414 				/* last pass; pick up any trailing padding */
4415 				olen = 0;
4416 			}
4417 			if (deleting) {
4418 				/*
4419 				 * compute aligning effect of deleted material
4420 				 * to reproduce with pad.
4421 				 */
4422 				plen = ((const char *)sopt -
4423 				    (const char *)lastpad) & 7;
4424 				tlen += plen;
4425 				if (dopt != NULL) {
4426 					if (plen == 1) {
4427 						dopt->ip6o_type = IP6OPT_PAD1;
4428 					} else if (plen > 1) {
4429 						plen -= sizeof (*dopt);
4430 						dopt->ip6o_type = IP6OPT_PADN;
4431 						dopt->ip6o_len = plen;
4432 						if (plen > 0)
4433 							bzero(dopt + 1, plen);
4434 					}
4435 					dopt = (struct ip6_opt *)
4436 					    ((char *)dopt + plen);
4437 				}
4438 				deleting = B_FALSE;
4439 				lastpad = NULL;
4440 			}
4441 			/* if there's uncopied padding, then copy that now */
4442 			if (lastpad != NULL) {
4443 				olen += (const char *)sopt -
4444 				    (const char *)lastpad;
4445 				sopt = lastpad;
4446 				lastpad = NULL;
4447 			}
4448 			if (dopt != NULL && olen > 0) {
4449 				bcopy(sopt, dopt, olen);
4450 				dopt = (struct ip6_opt *)((char *)dopt + olen);
4451 			}
4452 			if (hol == 0)
4453 				break;
4454 			tlen += olen;
4455 			sopt = (const struct ip6_opt *)
4456 			    ((const char *)sopt + olen);
4457 			hol -= olen;
4458 		}
4459 		/* go back and patch up the length value, rounded upward */
4460 		if (dstopt != NULL)
4461 			dstopt->ip6h_len = (tlen - 1) >> 3;
4462 	} else {
4463 		tlen = hol;
4464 		if (dstopt != NULL)
4465 			bcopy(srcopt, dstopt, hol);
4466 	}
4467 
4468 	tlen += sizeof (*toh);
4469 	if (toh != NULL)
4470 		toh->len = tlen;
4471 
4472 	return (tlen);
4473 }
4474 
4475 static void
4476 udp_input(conn_t *connp, mblk_t *mp)
4477 {
4478 	struct T_unitdata_ind	*tudi;
4479 	uchar_t			*rptr;		/* Pointer to IP header */
4480 	int			hdr_length;	/* Length of IP+UDP headers */
4481 	int			udi_size;	/* Size of T_unitdata_ind */
4482 	int			mp_len;
4483 	udp_t			*udp;
4484 	udpha_t			*udpha;
4485 	int			ipversion;
4486 	ip6_pkt_t		ipp;
4487 	ip6_t			*ip6h;
4488 	ip6i_t			*ip6i;
4489 	mblk_t			*mp1;
4490 	mblk_t			*options_mp = NULL;
4491 	in_pktinfo_t		*pinfo = NULL;
4492 	cred_t			*cr = NULL;
4493 	queue_t			*q = connp->conn_rq;
4494 	pid_t			cpid;
4495 	cred_t			*rcr = connp->conn_cred;
4496 
4497 	TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START,
4498 	    "udp_rput_start: q %p mp %p", q, mp);
4499 
4500 	udp = connp->conn_udp;
4501 	rptr = mp->b_rptr;
4502 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
4503 	ASSERT(OK_32PTR(rptr));
4504 
4505 	/*
4506 	 * IP should have prepended the options data in an M_CTL
4507 	 * Check M_CTL "type" to make sure are not here bcos of
4508 	 * a valid ICMP message
4509 	 */
4510 	if (DB_TYPE(mp) == M_CTL) {
4511 		if (MBLKL(mp) == sizeof (in_pktinfo_t) &&
4512 		    ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type ==
4513 		    IN_PKTINFO) {
4514 			/*
4515 			 * IP_RECVIF or IP_RECVSLLA information has been
4516 			 * appended to the packet by IP. We need to
4517 			 * extract the mblk and adjust the rptr
4518 			 */
4519 			pinfo = (in_pktinfo_t *)mp->b_rptr;
4520 			options_mp = mp;
4521 			mp = mp->b_cont;
4522 			rptr = mp->b_rptr;
4523 			UDP_STAT(udp_in_pktinfo);
4524 		} else {
4525 			/*
4526 			 * ICMP messages.
4527 			 */
4528 			udp_icmp_error(q, mp);
4529 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
4530 				"udp_rput_end: q %p (%S)", q, "m_ctl");
4531 			return;
4532 		}
4533 	}
4534 
4535 	mp_len = msgdsize(mp);
4536 	/*
4537 	 * This is the inbound data path.
4538 	 * First, we check to make sure the IP version number is correct,
4539 	 * and then pull the IP and UDP headers into the first mblk.
4540 	 * Assume IP provides aligned packets - otherwise toss.
4541 	 * Also, check if we have a complete IP header.
4542 	 */
4543 
4544 	/* Initialize regardless if ipversion is IPv4 or IPv6 */
4545 	ipp.ipp_fields = 0;
4546 
4547 	ipversion = IPH_HDR_VERSION(rptr);
4548 	switch (ipversion) {
4549 	case IPV4_VERSION:
4550 		ASSERT(MBLKL(mp) >= sizeof (ipha_t));
4551 		ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
4552 		hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
4553 		if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) ||
4554 		    (udp->udp_ip_rcv_options_len)) {
4555 			/*
4556 			 * Handle IPv4 packets with options outside of the
4557 			 * main data path. Not needed for AF_INET6 sockets
4558 			 * since they don't support a getsockopt of IP_OPTIONS.
4559 			 */
4560 			if (udp->udp_family == AF_INET6)
4561 				break;
4562 			/*
4563 			 * UDP length check performed for IPv4 packets with
4564 			 * options to check whether UDP length specified in
4565 			 * the header is the same as the physical length of
4566 			 * the packet.
4567 			 */
4568 			udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
4569 			if (mp_len != (ntohs(udpha->uha_length) +
4570 			    hdr_length - UDPH_SIZE)) {
4571 				goto tossit;
4572 			}
4573 			/*
4574 			 * Handle the case where the packet has IP options
4575 			 * and the IP_RECVSLLA & IP_RECVIF are set
4576 			 */
4577 			if (pinfo != NULL)
4578 				mp = options_mp;
4579 			udp_become_writer(connp, mp, udp_rput_other_wrapper,
4580 			    SQTAG_UDP_INPUT);
4581 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
4582 				"udp_rput_end: q %p (%S)", q, "end");
4583 			return;
4584 		}
4585 
4586 		/* Handle IPV6_RECVHOPLIMIT. */
4587 		if ((udp->udp_family == AF_INET6) && (pinfo != NULL) &&
4588 		    udp->udp_ipv6_recvpktinfo) {
4589 			if (pinfo->in_pkt_flags & IPF_RECVIF) {
4590 				ipp.ipp_fields |= IPPF_IFINDEX;
4591 				ipp.ipp_ifindex = pinfo->in_pkt_ifindex;
4592 			}
4593 		}
4594 		break;
4595 	case IPV6_VERSION:
4596 		/*
4597 		 * IPv6 packets can only be received by applications
4598 		 * that are prepared to receive IPv6 addresses.
4599 		 * The IP fanout must ensure this.
4600 		 */
4601 		ASSERT(udp->udp_family == AF_INET6);
4602 
4603 		ip6h = (ip6_t *)rptr;
4604 		ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
4605 
4606 		if (ip6h->ip6_nxt != IPPROTO_UDP) {
4607 			uint8_t nexthdrp;
4608 			/* Look for ifindex information */
4609 			if (ip6h->ip6_nxt == IPPROTO_RAW) {
4610 				ip6i = (ip6i_t *)ip6h;
4611 				if ((uchar_t *)&ip6i[1] > mp->b_wptr)
4612 					goto tossit;
4613 
4614 				if (ip6i->ip6i_flags & IP6I_IFINDEX) {
4615 					ASSERT(ip6i->ip6i_ifindex != 0);
4616 					ipp.ipp_fields |= IPPF_IFINDEX;
4617 					ipp.ipp_ifindex = ip6i->ip6i_ifindex;
4618 				}
4619 				rptr = (uchar_t *)&ip6i[1];
4620 				mp->b_rptr = rptr;
4621 				if (rptr == mp->b_wptr) {
4622 					mp1 = mp->b_cont;
4623 					freeb(mp);
4624 					mp = mp1;
4625 					rptr = mp->b_rptr;
4626 				}
4627 				if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE))
4628 					goto tossit;
4629 				ip6h = (ip6_t *)rptr;
4630 				mp_len = msgdsize(mp);
4631 			}
4632 			/*
4633 			 * Find any potentially interesting extension headers
4634 			 * as well as the length of the IPv6 + extension
4635 			 * headers.
4636 			 */
4637 			hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) +
4638 			    UDPH_SIZE;
4639 			ASSERT(nexthdrp == IPPROTO_UDP);
4640 		} else {
4641 			hdr_length = IPV6_HDR_LEN + UDPH_SIZE;
4642 			ip6i = NULL;
4643 		}
4644 		break;
4645 	default:
4646 		ASSERT(0);
4647 	}
4648 
4649 	/*
4650 	 * IP inspected the UDP header thus all of it must be in the mblk.
4651 	 * UDP length check is performed for IPv6 packets and IPv4 packets
4652 	 * without options to check if the size of the packet as specified
4653 	 * by the header is the same as the physical size of the packet.
4654 	 */
4655 	udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE));
4656 	if ((MBLKL(mp) < hdr_length) ||
4657 	    (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) {
4658 		goto tossit;
4659 	}
4660 
4661 	/* Walk past the headers. */
4662 	if (!udp->udp_rcvhdr) {
4663 		mp->b_rptr = rptr + hdr_length;
4664 		mp_len -= hdr_length;
4665 	}
4666 
4667 	/*
4668 	 * This is the inbound data path.  Packets are passed upstream as
4669 	 * T_UNITDATA_IND messages with full IP headers still attached.
4670 	 */
4671 	if (udp->udp_family == AF_INET) {
4672 		sin_t *sin;
4673 
4674 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
4675 
4676 		/*
4677 		 * Normally only send up the address.
4678 		 * If IP_RECVDSTADDR is set we include the destination IP
4679 		 * address as an option. With IP_RECVOPTS we include all
4680 		 * the IP options. Only ip_rput_other() handles packets
4681 		 * that contain IP options.
4682 		 */
4683 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
4684 		if (udp->udp_recvdstaddr) {
4685 			udi_size += sizeof (struct T_opthdr) +
4686 			    sizeof (struct in_addr);
4687 			UDP_STAT(udp_in_recvdstaddr);
4688 		}
4689 
4690 		/*
4691 		 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
4692 		 * space accordingly
4693 		 */
4694 		if (udp->udp_recvif && (pinfo != NULL) &&
4695 		    (pinfo->in_pkt_flags & IPF_RECVIF)) {
4696 			udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
4697 			UDP_STAT(udp_in_recvif);
4698 		}
4699 
4700 		if (udp->udp_recvslla && (pinfo != NULL) &&
4701 		    (pinfo->in_pkt_flags & IPF_RECVSLLA)) {
4702 			udi_size += sizeof (struct T_opthdr) +
4703 			    sizeof (struct sockaddr_dl);
4704 			UDP_STAT(udp_in_recvslla);
4705 		}
4706 
4707 		if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
4708 			udi_size += sizeof (struct T_opthdr) + ucredsize;
4709 			cpid = DB_CPID(mp);
4710 			UDP_STAT(udp_in_recvucred);
4711 		}
4712 
4713 		/*
4714 		 * If SO_TIMESTAMP is set allocate the appropriate sized
4715 		 * buffer. Since gethrestime() expects a pointer aligned
4716 		 * argument, we allocate space necessary for extra
4717 		 * alignment (even though it might not be used).
4718 		 */
4719 		if (udp->udp_timestamp) {
4720 			udi_size += sizeof (struct T_opthdr) +
4721 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4722 			UDP_STAT(udp_in_timestamp);
4723 		}
4724 
4725 		/*
4726 		 * If IP_RECVTTL is set allocate the appropriate sized buffer
4727 		 */
4728 		if (udp->udp_recvttl) {
4729 			udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
4730 			UDP_STAT(udp_in_recvttl);
4731 		}
4732 		ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH);
4733 
4734 		/* Allocate a message block for the T_UNITDATA_IND structure. */
4735 		mp1 = allocb(udi_size, BPRI_MED);
4736 		if (mp1 == NULL) {
4737 			freemsg(mp);
4738 			if (options_mp != NULL)
4739 				freeb(options_mp);
4740 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
4741 				"udp_rput_end: q %p (%S)", q, "allocbfail");
4742 			BUMP_MIB(&udp_mib, udpInErrors);
4743 			return;
4744 		}
4745 		mp1->b_cont = mp;
4746 		mp = mp1;
4747 		mp->b_datap->db_type = M_PROTO;
4748 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
4749 		mp->b_wptr = (uchar_t *)tudi + udi_size;
4750 		tudi->PRIM_type = T_UNITDATA_IND;
4751 		tudi->SRC_length = sizeof (sin_t);
4752 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
4753 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
4754 		    sizeof (sin_t);
4755 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
4756 		tudi->OPT_length = udi_size;
4757 		sin = (sin_t *)&tudi[1];
4758 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
4759 		sin->sin_port =	udpha->uha_src_port;
4760 		sin->sin_family = udp->udp_family;
4761 		*(uint32_t *)&sin->sin_zero[0] = 0;
4762 		*(uint32_t *)&sin->sin_zero[4] = 0;
4763 
4764 		/*
4765 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
4766 		 * IP_RECVTTL has been set.
4767 		 */
4768 		if (udi_size != 0) {
4769 			/*
4770 			 * Copy in destination address before options to avoid
4771 			 * any padding issues.
4772 			 */
4773 			char *dstopt;
4774 
4775 			dstopt = (char *)&sin[1];
4776 			if (udp->udp_recvdstaddr) {
4777 				struct T_opthdr *toh;
4778 				ipaddr_t *dstptr;
4779 
4780 				toh = (struct T_opthdr *)dstopt;
4781 				toh->level = IPPROTO_IP;
4782 				toh->name = IP_RECVDSTADDR;
4783 				toh->len = sizeof (struct T_opthdr) +
4784 				    sizeof (ipaddr_t);
4785 				toh->status = 0;
4786 				dstopt += sizeof (struct T_opthdr);
4787 				dstptr = (ipaddr_t *)dstopt;
4788 				*dstptr = ((ipha_t *)rptr)->ipha_dst;
4789 				dstopt = (char *)toh + toh->len;
4790 				udi_size -= toh->len;
4791 			}
4792 
4793 			if (udp->udp_recvslla && (pinfo != NULL) &&
4794 			    (pinfo->in_pkt_flags & IPF_RECVSLLA)) {
4795 
4796 				struct T_opthdr *toh;
4797 				struct sockaddr_dl	*dstptr;
4798 
4799 				toh = (struct T_opthdr *)dstopt;
4800 				toh->level = IPPROTO_IP;
4801 				toh->name = IP_RECVSLLA;
4802 				toh->len = sizeof (struct T_opthdr) +
4803 					sizeof (struct sockaddr_dl);
4804 				toh->status = 0;
4805 				dstopt += sizeof (struct T_opthdr);
4806 				dstptr = (struct sockaddr_dl *)dstopt;
4807 				bcopy(&pinfo->in_pkt_slla, dstptr,
4808 				    sizeof (struct sockaddr_dl));
4809 				dstopt = (char *)toh + toh->len;
4810 				udi_size -= toh->len;
4811 			}
4812 
4813 			if (udp->udp_recvif && (pinfo != NULL) &&
4814 			    (pinfo->in_pkt_flags & IPF_RECVIF)) {
4815 
4816 				struct T_opthdr *toh;
4817 				uint_t		*dstptr;
4818 
4819 				toh = (struct T_opthdr *)dstopt;
4820 				toh->level = IPPROTO_IP;
4821 				toh->name = IP_RECVIF;
4822 				toh->len = sizeof (struct T_opthdr) +
4823 					sizeof (uint_t);
4824 				toh->status = 0;
4825 				dstopt += sizeof (struct T_opthdr);
4826 				dstptr = (uint_t *)dstopt;
4827 				*dstptr = pinfo->in_pkt_ifindex;
4828 				dstopt = (char *)toh + toh->len;
4829 				udi_size -= toh->len;
4830 			}
4831 
4832 			if (cr != NULL) {
4833 				struct T_opthdr *toh;
4834 
4835 				toh = (struct T_opthdr *)dstopt;
4836 				toh->level = SOL_SOCKET;
4837 				toh->name = SCM_UCRED;
4838 				toh->len = sizeof (struct T_opthdr) + ucredsize;
4839 				toh->status = 0;
4840 				(void) cred2ucred(cr, cpid, &toh[1], rcr);
4841 				dstopt = (char *)toh + toh->len;
4842 				udi_size -= toh->len;
4843 			}
4844 
4845 			if (udp->udp_timestamp) {
4846 				struct	T_opthdr *toh;
4847 
4848 				toh = (struct T_opthdr *)dstopt;
4849 				toh->level = SOL_SOCKET;
4850 				toh->name = SCM_TIMESTAMP;
4851 				toh->len = sizeof (struct T_opthdr) +
4852 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
4853 				toh->status = 0;
4854 				dstopt += sizeof (struct T_opthdr);
4855 				/* Align for gethrestime() */
4856 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
4857 				    sizeof (intptr_t));
4858 				gethrestime((timestruc_t *)dstopt);
4859 				dstopt = (char *)toh + toh->len;
4860 				udi_size -= toh->len;
4861 			}
4862 
4863 			/*
4864 			 * CAUTION:
4865 			 * Due to aligment issues
4866 			 * Processing of IP_RECVTTL option
4867 			 * should always be the last. Adding
4868 			 * any option processing after this will
4869 			 * cause alignment panic.
4870 			 */
4871 			if (udp->udp_recvttl) {
4872 				struct	T_opthdr *toh;
4873 				uint8_t	*dstptr;
4874 
4875 				toh = (struct T_opthdr *)dstopt;
4876 				toh->level = IPPROTO_IP;
4877 				toh->name = IP_RECVTTL;
4878 				toh->len = sizeof (struct T_opthdr) +
4879 				    sizeof (uint8_t);
4880 				toh->status = 0;
4881 				dstopt += sizeof (struct T_opthdr);
4882 				dstptr = (uint8_t *)dstopt;
4883 				*dstptr = ((ipha_t *)rptr)->ipha_ttl;
4884 				dstopt = (char *)toh + toh->len;
4885 				udi_size -= toh->len;
4886 			}
4887 
4888 			/* Consumed all of allocated space */
4889 			ASSERT(udi_size == 0);
4890 		}
4891 	} else {
4892 		sin6_t *sin6;
4893 
4894 		/*
4895 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
4896 		 *
4897 		 * Normally we only send up the address. If receiving of any
4898 		 * optional receive side information is enabled, we also send
4899 		 * that up as options.
4900 		 * [ Only udp_rput_other() handles packets that contain IP
4901 		 * options so code to account for does not appear immediately
4902 		 * below but elsewhere ]
4903 		 */
4904 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
4905 
4906 		if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
4907 		    IPPF_RTHDR|IPPF_IFINDEX)) {
4908 			if (udp->udp_ipv6_recvhopopts &&
4909 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
4910 				size_t hlen;
4911 
4912 				UDP_STAT(udp_in_recvhopopts);
4913 				hlen = copy_hop_opts(&ipp, NULL);
4914 				if (hlen == 0)
4915 					ipp.ipp_fields &= ~IPPF_HOPOPTS;
4916 				udi_size += hlen;
4917 			}
4918 			if ((udp->udp_ipv6_recvdstopts ||
4919 				udp->udp_old_ipv6_recvdstopts) &&
4920 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
4921 				udi_size += sizeof (struct T_opthdr) +
4922 				    ipp.ipp_dstoptslen;
4923 				UDP_STAT(udp_in_recvdstopts);
4924 			}
4925 			if (((udp->udp_ipv6_recvdstopts &&
4926 			    udp->udp_ipv6_recvrthdr &&
4927 			    (ipp.ipp_fields & IPPF_RTHDR)) ||
4928 			    udp->udp_ipv6_recvrthdrdstopts) &&
4929 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
4930 				udi_size += sizeof (struct T_opthdr) +
4931 				    ipp.ipp_rtdstoptslen;
4932 				UDP_STAT(udp_in_recvrtdstopts);
4933 			}
4934 			if (udp->udp_ipv6_recvrthdr &&
4935 			    (ipp.ipp_fields & IPPF_RTHDR)) {
4936 				udi_size += sizeof (struct T_opthdr) +
4937 				    ipp.ipp_rthdrlen;
4938 				UDP_STAT(udp_in_recvrthdr);
4939 			}
4940 			if (udp->udp_ipv6_recvpktinfo &&
4941 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
4942 				udi_size += sizeof (struct T_opthdr) +
4943 				    sizeof (struct in6_pktinfo);
4944 				UDP_STAT(udp_in_recvpktinfo);
4945 			}
4946 
4947 		}
4948 		if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
4949 			udi_size += sizeof (struct T_opthdr) + ucredsize;
4950 			cpid = DB_CPID(mp);
4951 			UDP_STAT(udp_in_recvucred);
4952 		}
4953 
4954 		if (udp->udp_ipv6_recvhoplimit) {
4955 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4956 			UDP_STAT(udp_in_recvhoplimit);
4957 		}
4958 
4959 		if (udp->udp_ipv6_recvtclass) {
4960 			udi_size += sizeof (struct T_opthdr) + sizeof (int);
4961 			UDP_STAT(udp_in_recvtclass);
4962 		}
4963 
4964 		mp1 = allocb(udi_size, BPRI_MED);
4965 		if (mp1 == NULL) {
4966 			freemsg(mp);
4967 			if (options_mp != NULL)
4968 				freeb(options_mp);
4969 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
4970 				"udp_rput_end: q %p (%S)", q, "allocbfail");
4971 			BUMP_MIB(&udp_mib, udpInErrors);
4972 			return;
4973 		}
4974 		mp1->b_cont = mp;
4975 		mp = mp1;
4976 		mp->b_datap->db_type = M_PROTO;
4977 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
4978 		mp->b_wptr = (uchar_t *)tudi + udi_size;
4979 		tudi->PRIM_type = T_UNITDATA_IND;
4980 		tudi->SRC_length = sizeof (sin6_t);
4981 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
4982 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
4983 		    sizeof (sin6_t);
4984 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
4985 		tudi->OPT_length = udi_size;
4986 		sin6 = (sin6_t *)&tudi[1];
4987 		if (ipversion == IPV4_VERSION) {
4988 			in6_addr_t v6dst;
4989 
4990 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
4991 			    &sin6->sin6_addr);
4992 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
4993 			    &v6dst);
4994 			sin6->sin6_flowinfo = 0;
4995 			sin6->sin6_scope_id = 0;
4996 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
4997 			    connp->conn_zoneid);
4998 		} else {
4999 			sin6->sin6_addr = ip6h->ip6_src;
5000 			/* No sin6_flowinfo per API */
5001 			sin6->sin6_flowinfo = 0;
5002 			/* For link-scope source pass up scope id */
5003 			if ((ipp.ipp_fields & IPPF_IFINDEX) &&
5004 			    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
5005 				sin6->sin6_scope_id = ipp.ipp_ifindex;
5006 			else
5007 				sin6->sin6_scope_id = 0;
5008 			sin6->__sin6_src_id = ip_srcid_find_addr(
5009 			    &ip6h->ip6_dst, connp->conn_zoneid);
5010 		}
5011 		sin6->sin6_port = udpha->uha_src_port;
5012 		sin6->sin6_family = udp->udp_family;
5013 
5014 		if (udi_size != 0) {
5015 			uchar_t *dstopt;
5016 
5017 			dstopt = (uchar_t *)&sin6[1];
5018 			if (udp->udp_ipv6_recvpktinfo &&
5019 			    (ipp.ipp_fields & IPPF_IFINDEX)) {
5020 				struct T_opthdr *toh;
5021 				struct in6_pktinfo *pkti;
5022 
5023 				toh = (struct T_opthdr *)dstopt;
5024 				toh->level = IPPROTO_IPV6;
5025 				toh->name = IPV6_PKTINFO;
5026 				toh->len = sizeof (struct T_opthdr) +
5027 				    sizeof (*pkti);
5028 				toh->status = 0;
5029 				dstopt += sizeof (struct T_opthdr);
5030 				pkti = (struct in6_pktinfo *)dstopt;
5031 				if (ipversion == IPV6_VERSION)
5032 					pkti->ipi6_addr = ip6h->ip6_dst;
5033 				else
5034 					IN6_IPADDR_TO_V4MAPPED(
5035 						((ipha_t *)rptr)->ipha_dst,
5036 						    &pkti->ipi6_addr);
5037 				pkti->ipi6_ifindex = ipp.ipp_ifindex;
5038 				dstopt += sizeof (*pkti);
5039 				udi_size -= toh->len;
5040 			}
5041 			if (udp->udp_ipv6_recvhoplimit) {
5042 				struct T_opthdr *toh;
5043 
5044 				toh = (struct T_opthdr *)dstopt;
5045 				toh->level = IPPROTO_IPV6;
5046 				toh->name = IPV6_HOPLIMIT;
5047 				toh->len = sizeof (struct T_opthdr) +
5048 				    sizeof (uint_t);
5049 				toh->status = 0;
5050 				dstopt += sizeof (struct T_opthdr);
5051 				if (ipversion == IPV6_VERSION)
5052 					*(uint_t *)dstopt = ip6h->ip6_hops;
5053 				else
5054 					*(uint_t *)dstopt =
5055 					    ((ipha_t *)rptr)->ipha_ttl;
5056 				dstopt += sizeof (uint_t);
5057 				udi_size -= toh->len;
5058 			}
5059 			if (udp->udp_ipv6_recvtclass) {
5060 				struct T_opthdr *toh;
5061 
5062 				toh = (struct T_opthdr *)dstopt;
5063 				toh->level = IPPROTO_IPV6;
5064 				toh->name = IPV6_TCLASS;
5065 				toh->len = sizeof (struct T_opthdr) +
5066 				    sizeof (uint_t);
5067 				toh->status = 0;
5068 				dstopt += sizeof (struct T_opthdr);
5069 				if (ipversion == IPV6_VERSION) {
5070 					*(uint_t *)dstopt =
5071 					IPV6_FLOW_TCLASS(ip6h->ip6_flow);
5072 				} else {
5073 					ipha_t *ipha = (ipha_t *)rptr;
5074 					*(uint_t *)dstopt =
5075 					    ipha->ipha_type_of_service;
5076 				}
5077 				dstopt += sizeof (uint_t);
5078 				udi_size -= toh->len;
5079 			}
5080 			if (udp->udp_ipv6_recvhopopts &&
5081 			    (ipp.ipp_fields & IPPF_HOPOPTS)) {
5082 				size_t hlen;
5083 
5084 				hlen = copy_hop_opts(&ipp, dstopt);
5085 				dstopt += hlen;
5086 				udi_size -= hlen;
5087 			}
5088 			if (udp->udp_ipv6_recvdstopts &&
5089 			    udp->udp_ipv6_recvrthdr &&
5090 			    (ipp.ipp_fields & IPPF_RTHDR) &&
5091 			    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
5092 				struct T_opthdr *toh;
5093 
5094 				toh = (struct T_opthdr *)dstopt;
5095 				toh->level = IPPROTO_IPV6;
5096 				toh->name = IPV6_DSTOPTS;
5097 				toh->len = sizeof (struct T_opthdr) +
5098 				    ipp.ipp_rtdstoptslen;
5099 				toh->status = 0;
5100 				dstopt += sizeof (struct T_opthdr);
5101 				bcopy(ipp.ipp_rtdstopts, dstopt,
5102 				    ipp.ipp_rtdstoptslen);
5103 				dstopt += ipp.ipp_rtdstoptslen;
5104 				udi_size -= toh->len;
5105 			}
5106 			if (udp->udp_ipv6_recvrthdr &&
5107 			    (ipp.ipp_fields & IPPF_RTHDR)) {
5108 				struct T_opthdr *toh;
5109 
5110 				toh = (struct T_opthdr *)dstopt;
5111 				toh->level = IPPROTO_IPV6;
5112 				toh->name = IPV6_RTHDR;
5113 				toh->len = sizeof (struct T_opthdr) +
5114 				    ipp.ipp_rthdrlen;
5115 				toh->status = 0;
5116 				dstopt += sizeof (struct T_opthdr);
5117 				bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
5118 				dstopt += ipp.ipp_rthdrlen;
5119 				udi_size -= toh->len;
5120 			}
5121 			if (udp->udp_ipv6_recvdstopts &&
5122 			    (ipp.ipp_fields & IPPF_DSTOPTS)) {
5123 				struct T_opthdr *toh;
5124 
5125 				toh = (struct T_opthdr *)dstopt;
5126 				toh->level = IPPROTO_IPV6;
5127 				toh->name = IPV6_DSTOPTS;
5128 				toh->len = sizeof (struct T_opthdr) +
5129 				    ipp.ipp_dstoptslen;
5130 				toh->status = 0;
5131 				dstopt += sizeof (struct T_opthdr);
5132 				bcopy(ipp.ipp_dstopts, dstopt,
5133 				    ipp.ipp_dstoptslen);
5134 				dstopt += ipp.ipp_dstoptslen;
5135 				udi_size -= toh->len;
5136 			}
5137 
5138 			if (cr != NULL) {
5139 				struct T_opthdr *toh;
5140 
5141 				toh = (struct T_opthdr *)dstopt;
5142 				toh->level = SOL_SOCKET;
5143 				toh->name = SCM_UCRED;
5144 				toh->len = sizeof (struct T_opthdr) + ucredsize;
5145 				toh->status = 0;
5146 				(void) cred2ucred(cr, cpid, &toh[1], rcr);
5147 				dstopt += toh->len;
5148 				udi_size -= toh->len;
5149 			}
5150 			/* Consumed all of allocated space */
5151 			ASSERT(udi_size == 0);
5152 		}
5153 #undef	sin6
5154 		/* No IP_RECVDSTADDR for IPv6. */
5155 	}
5156 
5157 	BUMP_MIB(&udp_mib, udpInDatagrams);
5158 	TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
5159 		"udp_rput_end: q %p (%S)", q, "end");
5160 	if (options_mp != NULL)
5161 		freeb(options_mp);
5162 
5163 	if (udp->udp_direct_sockfs) {
5164 		/*
5165 		 * There is nothing above us except for the stream head;
5166 		 * use the read-side synchronous stream interface in
5167 		 * order to reduce the time spent in interrupt thread.
5168 		 */
5169 		ASSERT(udp->udp_issocket);
5170 		udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len);
5171 	} else {
5172 		/*
5173 		 * Use regular STREAMS interface to pass data upstream
5174 		 * if this is not a socket endpoint, or if we have
5175 		 * switched over to the slow mode due to sockmod being
5176 		 * popped or a module being pushed on top of us.
5177 		 */
5178 		putnext(UDP_RD(q), mp);
5179 	}
5180 	return;
5181 
5182 tossit:
5183 	freemsg(mp);
5184 	if (options_mp != NULL)
5185 		freeb(options_mp);
5186 	BUMP_MIB(&udp_mib, udpInErrors);
5187 }
5188 
5189 void
5190 udp_conn_recv(conn_t *connp, mblk_t *mp)
5191 {
5192 	_UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT);
5193 }
5194 
5195 /* ARGSUSED */
5196 static void
5197 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2)
5198 {
5199 	udp_input((conn_t *)arg, mp);
5200 	_UDP_EXIT((conn_t *)arg);
5201 }
5202 
5203 /*
5204  * Process non-M_DATA messages as well as M_DATA messages that requires
5205  * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options.
5206  */
5207 static void
5208 udp_rput_other(queue_t *q, mblk_t *mp)
5209 {
5210 	struct T_unitdata_ind	*tudi;
5211 	mblk_t			*mp1;
5212 	uchar_t			*rptr;
5213 	uchar_t			*new_rptr;
5214 	int			hdr_length;
5215 	int			udi_size;	/* Size of T_unitdata_ind */
5216 	int			opt_len;	/* Length of IP options */
5217 	sin_t			*sin;
5218 	struct T_error_ack	*tea;
5219 	mblk_t			*options_mp = NULL;
5220 	in_pktinfo_t		*pinfo;
5221 	boolean_t		recv_on = B_FALSE;
5222 	cred_t			*cr = NULL;
5223 	udp_t			*udp = Q_TO_UDP(q);
5224 	pid_t			cpid;
5225 	cred_t			*rcr = udp->udp_connp->conn_cred;
5226 
5227 	TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START,
5228 	    "udp_rput_other: q %p mp %p", q, mp);
5229 
5230 	ASSERT(OK_32PTR(mp->b_rptr));
5231 	rptr = mp->b_rptr;
5232 
5233 	switch (mp->b_datap->db_type) {
5234 	case M_CTL:
5235 		/*
5236 		 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set
5237 		 */
5238 		recv_on = B_TRUE;
5239 		options_mp = mp;
5240 		pinfo = (in_pktinfo_t *)options_mp->b_rptr;
5241 
5242 		/*
5243 		 * The actual data is in mp->b_cont
5244 		 */
5245 		mp = mp->b_cont;
5246 		ASSERT(OK_32PTR(mp->b_rptr));
5247 		rptr = mp->b_rptr;
5248 		break;
5249 	case M_DATA:
5250 		/*
5251 		 * M_DATA messages contain IPv4 datagrams.  They are handled
5252 		 * after this switch.
5253 		 */
5254 		break;
5255 	case M_PROTO:
5256 	case M_PCPROTO:
5257 		/* M_PROTO messages contain some type of TPI message. */
5258 		ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX);
5259 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
5260 			freemsg(mp);
5261 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
5262 			    "udp_rput_other_end: q %p (%S)", q, "protoshort");
5263 			return;
5264 		}
5265 		tea = (struct T_error_ack *)rptr;
5266 
5267 		switch (tea->PRIM_type) {
5268 		case T_ERROR_ACK:
5269 			switch (tea->ERROR_prim) {
5270 			case O_T_BIND_REQ:
5271 			case T_BIND_REQ: {
5272 				/*
5273 				 * If our O_T_BIND_REQ/T_BIND_REQ fails,
5274 				 * clear out the associated port and source
5275 				 * address before passing the message
5276 				 * upstream. If this was caused by a T_CONN_REQ
5277 				 * revert back to bound state.
5278 				 */
5279 				udp_fanout_t	*udpf;
5280 
5281 				udpf = &udp_bind_fanout[
5282 				    UDP_BIND_HASH(udp->udp_port)];
5283 				mutex_enter(&udpf->uf_lock);
5284 				if (udp->udp_state == TS_DATA_XFER) {
5285 					/* Connect failed */
5286 					tea->ERROR_prim = T_CONN_REQ;
5287 					/* Revert back to the bound source */
5288 					udp->udp_v6src = udp->udp_bound_v6src;
5289 					udp->udp_state = TS_IDLE;
5290 					mutex_exit(&udpf->uf_lock);
5291 					if (udp->udp_family == AF_INET6)
5292 						(void) udp_build_hdrs(q, udp);
5293 					break;
5294 				}
5295 
5296 				if (udp->udp_discon_pending) {
5297 					tea->ERROR_prim = T_DISCON_REQ;
5298 					udp->udp_discon_pending = 0;
5299 				}
5300 				V6_SET_ZERO(udp->udp_v6src);
5301 				V6_SET_ZERO(udp->udp_bound_v6src);
5302 				udp->udp_state = TS_UNBND;
5303 				udp_bind_hash_remove(udp, B_TRUE);
5304 				udp->udp_port = 0;
5305 				mutex_exit(&udpf->uf_lock);
5306 				if (udp->udp_family == AF_INET6)
5307 					(void) udp_build_hdrs(q, udp);
5308 				break;
5309 			}
5310 			default:
5311 				break;
5312 			}
5313 			break;
5314 		case T_BIND_ACK:
5315 			udp_rput_bind_ack(q, mp);
5316 			return;
5317 
5318 		case T_OPTMGMT_ACK:
5319 		case T_OK_ACK:
5320 			break;
5321 		default:
5322 			freemsg(mp);
5323 			return;
5324 		}
5325 		putnext(UDP_RD(q), mp);
5326 		return;
5327 	}
5328 
5329 	/*
5330 	 * This is the inbound data path.
5331 	 * First, we make sure the data contains both IP and UDP headers.
5332 	 *
5333 	 * This handle IPv4 packets for only AF_INET sockets.
5334 	 * AF_INET6 sockets can never access udp_ip_rcv_options thus there
5335 	 * is no need saving the options.
5336 	 */
5337 	ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
5338 	hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE;
5339 	if (mp->b_wptr - rptr < hdr_length) {
5340 		if (!pullupmsg(mp, hdr_length)) {
5341 			freemsg(mp);
5342 			if (options_mp != NULL)
5343 				freeb(options_mp);
5344 			BUMP_MIB(&udp_mib, udpInErrors);
5345 			TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
5346 			    "udp_rput_other_end: q %p (%S)", q, "hdrshort");
5347 			BUMP_MIB(&udp_mib, udpInErrors);
5348 			return;
5349 		}
5350 		rptr = mp->b_rptr;
5351 	}
5352 	/* Walk past the headers. */
5353 	new_rptr = rptr + hdr_length;
5354 	if (!udp->udp_rcvhdr)
5355 		mp->b_rptr = new_rptr;
5356 
5357 	/* Save the options if any */
5358 	opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE);
5359 	if (opt_len > 0) {
5360 		if (opt_len > udp->udp_ip_rcv_options_len) {
5361 			if (udp->udp_ip_rcv_options_len)
5362 				mi_free((char *)udp->udp_ip_rcv_options);
5363 			udp->udp_ip_rcv_options_len = 0;
5364 			udp->udp_ip_rcv_options =
5365 			    (uchar_t *)mi_alloc(opt_len, BPRI_HI);
5366 			if (udp->udp_ip_rcv_options)
5367 				udp->udp_ip_rcv_options_len = opt_len;
5368 		}
5369 		if (udp->udp_ip_rcv_options_len) {
5370 			bcopy(rptr + IP_SIMPLE_HDR_LENGTH,
5371 			    udp->udp_ip_rcv_options, opt_len);
5372 			/* Adjust length if we are resusing the space */
5373 			udp->udp_ip_rcv_options_len = opt_len;
5374 		}
5375 	} else if (udp->udp_ip_rcv_options_len) {
5376 		mi_free((char *)udp->udp_ip_rcv_options);
5377 		udp->udp_ip_rcv_options = NULL;
5378 		udp->udp_ip_rcv_options_len = 0;
5379 	}
5380 
5381 	/*
5382 	 * Normally only send up the address.
5383 	 * If IP_RECVDSTADDR is set we include the destination IP
5384 	 * address as an option. With IP_RECVOPTS we include all
5385 	 * the IP options.
5386 	 */
5387 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
5388 	if (udp->udp_recvdstaddr) {
5389 		udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr);
5390 		UDP_STAT(udp_in_recvdstaddr);
5391 	}
5392 	if (udp->udp_recvopts && opt_len > 0) {
5393 		udi_size += sizeof (struct T_opthdr) + opt_len;
5394 		UDP_STAT(udp_in_recvopts);
5395 	}
5396 
5397 	/*
5398 	 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate
5399 	 * space accordingly
5400 	 */
5401 	if (udp->udp_recvif && recv_on &&
5402 	    (pinfo->in_pkt_flags & IPF_RECVIF)) {
5403 		udi_size += sizeof (struct T_opthdr) + sizeof (uint_t);
5404 		UDP_STAT(udp_in_recvif);
5405 	}
5406 
5407 	if (udp->udp_recvslla && recv_on &&
5408 	    (pinfo->in_pkt_flags & IPF_RECVSLLA)) {
5409 		udi_size += sizeof (struct T_opthdr) +
5410 		    sizeof (struct sockaddr_dl);
5411 		UDP_STAT(udp_in_recvslla);
5412 	}
5413 
5414 	if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) {
5415 		udi_size += sizeof (struct T_opthdr) + ucredsize;
5416 		cpid = DB_CPID(mp);
5417 		UDP_STAT(udp_in_recvucred);
5418 	}
5419 	/*
5420 	 * If IP_RECVTTL is set allocate the appropriate sized buffer
5421 	 */
5422 	if (udp->udp_recvttl) {
5423 		udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
5424 		UDP_STAT(udp_in_recvttl);
5425 	}
5426 
5427 	/* Allocate a message block for the T_UNITDATA_IND structure. */
5428 	mp1 = allocb(udi_size, BPRI_MED);
5429 	if (mp1 == NULL) {
5430 		freemsg(mp);
5431 		if (options_mp != NULL)
5432 			freeb(options_mp);
5433 		TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
5434 			"udp_rput_other_end: q %p (%S)", q, "allocbfail");
5435 		BUMP_MIB(&udp_mib, udpInErrors);
5436 		return;
5437 	}
5438 	mp1->b_cont = mp;
5439 	mp = mp1;
5440 	mp->b_datap->db_type = M_PROTO;
5441 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
5442 	mp->b_wptr = (uchar_t *)tudi + udi_size;
5443 	tudi->PRIM_type = T_UNITDATA_IND;
5444 	tudi->SRC_length = sizeof (sin_t);
5445 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
5446 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
5447 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
5448 	tudi->OPT_length = udi_size;
5449 
5450 	sin = (sin_t *)&tudi[1];
5451 	sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
5452 	sin->sin_port =	((in_port_t *)
5453 	    new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))];
5454 	sin->sin_family = AF_INET;
5455 	*(uint32_t *)&sin->sin_zero[0] = 0;
5456 	*(uint32_t *)&sin->sin_zero[4] = 0;
5457 
5458 	/*
5459 	 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
5460 	 * IP_RECVTTL has been set.
5461 	 */
5462 	if (udi_size != 0) {
5463 		/*
5464 		 * Copy in destination address before options to avoid any
5465 		 * padding issues.
5466 		 */
5467 		char *dstopt;
5468 
5469 		dstopt = (char *)&sin[1];
5470 		if (udp->udp_recvdstaddr) {
5471 			struct T_opthdr *toh;
5472 			ipaddr_t *dstptr;
5473 
5474 			toh = (struct T_opthdr *)dstopt;
5475 			toh->level = IPPROTO_IP;
5476 			toh->name = IP_RECVDSTADDR;
5477 			toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
5478 			toh->status = 0;
5479 			dstopt += sizeof (struct T_opthdr);
5480 			dstptr = (ipaddr_t *)dstopt;
5481 			*dstptr = (((ipaddr_t *)rptr)[4]);
5482 			dstopt += sizeof (ipaddr_t);
5483 			udi_size -= toh->len;
5484 		}
5485 		if (udp->udp_recvopts && udi_size != 0) {
5486 			struct T_opthdr *toh;
5487 
5488 			toh = (struct T_opthdr *)dstopt;
5489 			toh->level = IPPROTO_IP;
5490 			toh->name = IP_RECVOPTS;
5491 			toh->len = sizeof (struct T_opthdr) + opt_len;
5492 			toh->status = 0;
5493 			dstopt += sizeof (struct T_opthdr);
5494 			bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len);
5495 			dstopt += opt_len;
5496 			udi_size -= toh->len;
5497 		}
5498 
5499 		if (udp->udp_recvslla && recv_on &&
5500 		    (pinfo->in_pkt_flags & IPF_RECVSLLA)) {
5501 
5502 			struct T_opthdr *toh;
5503 			struct sockaddr_dl	*dstptr;
5504 
5505 			toh = (struct T_opthdr *)dstopt;
5506 			toh->level = IPPROTO_IP;
5507 			toh->name = IP_RECVSLLA;
5508 			toh->len = sizeof (struct T_opthdr) +
5509 			    sizeof (struct sockaddr_dl);
5510 			toh->status = 0;
5511 			dstopt += sizeof (struct T_opthdr);
5512 			dstptr = (struct sockaddr_dl *)dstopt;
5513 			bcopy(&pinfo->in_pkt_slla, dstptr,
5514 			    sizeof (struct sockaddr_dl));
5515 			dstopt += sizeof (struct sockaddr_dl);
5516 			udi_size -= toh->len;
5517 		}
5518 
5519 		if (udp->udp_recvif && recv_on &&
5520 		    (pinfo->in_pkt_flags & IPF_RECVIF)) {
5521 
5522 			struct T_opthdr *toh;
5523 			uint_t		*dstptr;
5524 
5525 			toh = (struct T_opthdr *)dstopt;
5526 			toh->level = IPPROTO_IP;
5527 			toh->name = IP_RECVIF;
5528 			toh->len = sizeof (struct T_opthdr) +
5529 			    sizeof (uint_t);
5530 			toh->status = 0;
5531 			dstopt += sizeof (struct T_opthdr);
5532 			dstptr = (uint_t *)dstopt;
5533 			*dstptr = pinfo->in_pkt_ifindex;
5534 			dstopt += sizeof (uint_t);
5535 			udi_size -= toh->len;
5536 		}
5537 
5538 		if (cr != NULL) {
5539 			struct T_opthdr *toh;
5540 
5541 			toh = (struct T_opthdr *)dstopt;
5542 			toh->level = SOL_SOCKET;
5543 			toh->name = SCM_UCRED;
5544 			toh->len = sizeof (struct T_opthdr) + ucredsize;
5545 			toh->status = 0;
5546 			(void) cred2ucred(cr, cpid, &toh[1], rcr);
5547 			dstopt += toh->len;
5548 			udi_size -= toh->len;
5549 		}
5550 
5551 		if (udp->udp_recvttl) {
5552 			struct	T_opthdr *toh;
5553 			uint8_t	*dstptr;
5554 
5555 			toh = (struct T_opthdr *)dstopt;
5556 			toh->level = IPPROTO_IP;
5557 			toh->name = IP_RECVTTL;
5558 			toh->len = sizeof (struct T_opthdr) +
5559 			    sizeof (uint8_t);
5560 			toh->status = 0;
5561 			dstopt += sizeof (struct T_opthdr);
5562 			dstptr = (uint8_t *)dstopt;
5563 			*dstptr = ((ipha_t *)rptr)->ipha_ttl;
5564 			dstopt += sizeof (uint8_t);
5565 			udi_size -= toh->len;
5566 		}
5567 
5568 		ASSERT(udi_size == 0);	/* "Consumed" all of allocated space */
5569 	}
5570 	BUMP_MIB(&udp_mib, udpInDatagrams);
5571 	TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END,
5572 	    "udp_rput_other_end: q %p (%S)", q, "end");
5573 	if (options_mp != NULL)
5574 		freeb(options_mp);
5575 
5576 	if (udp->udp_direct_sockfs) {
5577 		/*
5578 		 * There is nothing above us except for the stream head;
5579 		 * use the read-side synchronous stream interface in
5580 		 * order to reduce the time spent in interrupt thread.
5581 		 */
5582 		ASSERT(udp->udp_issocket);
5583 		udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp));
5584 	} else {
5585 		/*
5586 		 * Use regular STREAMS interface to pass data upstream
5587 		 * if this is not a socket endpoint, or if we have
5588 		 * switched over to the slow mode due to sockmod being
5589 		 * popped or a module being pushed on top of us.
5590 		 */
5591 		putnext(UDP_RD(q), mp);
5592 	}
5593 }
5594 
5595 /* ARGSUSED */
5596 static void
5597 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2)
5598 {
5599 	conn_t *connp = arg;
5600 
5601 	udp_rput_other(connp->conn_rq, mp);
5602 	udp_exit(connp);
5603 }
5604 
5605 /*
5606  * Process a T_BIND_ACK
5607  */
5608 static void
5609 udp_rput_bind_ack(queue_t *q, mblk_t *mp)
5610 {
5611 	udp_t	*udp = Q_TO_UDP(q);
5612 	mblk_t	*mp1;
5613 	ire_t	*ire;
5614 	struct T_bind_ack *tba;
5615 	uchar_t *addrp;
5616 	ipa_conn_t	*ac;
5617 	ipa6_conn_t	*ac6;
5618 
5619 	if (udp->udp_discon_pending)
5620 		udp->udp_discon_pending = 0;
5621 
5622 	/*
5623 	 * If a broadcast/multicast address was bound set
5624 	 * the source address to 0.
5625 	 * This ensures no datagrams with broadcast address
5626 	 * as source address are emitted (which would violate
5627 	 * RFC1122 - Hosts requirements)
5628 	 *
5629 	 * Note that when connecting the returned IRE is
5630 	 * for the destination address and we only perform
5631 	 * the broadcast check for the source address (it
5632 	 * is OK to connect to a broadcast/multicast address.)
5633 	 */
5634 	mp1 = mp->b_cont;
5635 	if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) {
5636 		ire = (ire_t *)mp1->b_rptr;
5637 
5638 		/*
5639 		 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
5640 		 * local address.
5641 		 */
5642 		if (ire->ire_type == IRE_BROADCAST &&
5643 		    udp->udp_state != TS_DATA_XFER) {
5644 			/* This was just a local bind to a broadcast addr */
5645 			V6_SET_ZERO(udp->udp_v6src);
5646 			if (udp->udp_family == AF_INET6)
5647 				(void) udp_build_hdrs(q, udp);
5648 		} else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) {
5649 			/*
5650 			 * Local address not yet set - pick it from the
5651 			 * T_bind_ack
5652 			 */
5653 			tba = (struct T_bind_ack *)mp->b_rptr;
5654 			addrp = &mp->b_rptr[tba->ADDR_offset];
5655 			switch (udp->udp_family) {
5656 			case AF_INET:
5657 				if (tba->ADDR_length == sizeof (ipa_conn_t)) {
5658 					ac = (ipa_conn_t *)addrp;
5659 				} else {
5660 					ASSERT(tba->ADDR_length ==
5661 					    sizeof (ipa_conn_x_t));
5662 					ac = &((ipa_conn_x_t *)addrp)->acx_conn;
5663 				}
5664 				IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
5665 				    &udp->udp_v6src);
5666 				break;
5667 			case AF_INET6:
5668 				if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
5669 					ac6 = (ipa6_conn_t *)addrp;
5670 				} else {
5671 					ASSERT(tba->ADDR_length ==
5672 					    sizeof (ipa6_conn_x_t));
5673 					ac6 = &((ipa6_conn_x_t *)
5674 					    addrp)->ac6x_conn;
5675 				}
5676 				udp->udp_v6src = ac6->ac6_laddr;
5677 				(void) udp_build_hdrs(q, udp);
5678 				break;
5679 			}
5680 		}
5681 		mp1 = mp1->b_cont;
5682 	}
5683 	/*
5684 	 * Look for one or more appended ACK message added by
5685 	 * udp_connect or udp_disconnect.
5686 	 * If none found just send up the T_BIND_ACK.
5687 	 * udp_connect has appended a T_OK_ACK and a T_CONN_CON.
5688 	 * udp_disconnect has appended a T_OK_ACK.
5689 	 */
5690 	if (mp1 != NULL) {
5691 		if (mp->b_cont == mp1)
5692 			mp->b_cont = NULL;
5693 		else {
5694 			ASSERT(mp->b_cont->b_cont == mp1);
5695 			mp->b_cont->b_cont = NULL;
5696 		}
5697 		freemsg(mp);
5698 		mp = mp1;
5699 		while (mp != NULL) {
5700 			mp1 = mp->b_cont;
5701 			mp->b_cont = NULL;
5702 			putnext(UDP_RD(q), mp);
5703 			mp = mp1;
5704 		}
5705 		return;
5706 	}
5707 	freemsg(mp->b_cont);
5708 	mp->b_cont = NULL;
5709 	putnext(UDP_RD(q), mp);
5710 }
5711 
5712 /*
5713  * return SNMP stuff in buffer in mpdata
5714  */
5715 int
5716 udp_snmp_get(queue_t *q, mblk_t *mpctl)
5717 {
5718 	mblk_t			*mpdata;
5719 	mblk_t			*mp_conn_ctl;
5720 	mblk_t			*mp_attr_ctl;
5721 	mblk_t			*mp6_conn_ctl;
5722 	mblk_t			*mp6_attr_ctl;
5723 	mblk_t			*mp_conn_tail;
5724 	mblk_t			*mp_attr_tail;
5725 	mblk_t			*mp6_conn_tail;
5726 	mblk_t			*mp6_attr_tail;
5727 	struct opthdr		*optp;
5728 	mib2_udpEntry_t		ude;
5729 	mib2_udp6Entry_t	ude6;
5730 	mib2_transportMLPEntry_t mlp;
5731 	int			state;
5732 	zoneid_t		zoneid;
5733 	int			i;
5734 	connf_t			*connfp;
5735 	conn_t			*connp = Q_TO_CONN(q);
5736 	udp_t			*udp = connp->conn_udp;
5737 	int			v4_conn_idx;
5738 	int			v6_conn_idx;
5739 	boolean_t		needattr;
5740 
5741 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
5742 	if (mpctl == NULL ||
5743 	    (mpdata = mpctl->b_cont) == NULL ||
5744 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
5745 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
5746 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
5747 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
5748 		freemsg(mp_conn_ctl);
5749 		freemsg(mp_attr_ctl);
5750 		freemsg(mp6_conn_ctl);
5751 		return (0);
5752 	}
5753 
5754 	zoneid = connp->conn_zoneid;
5755 
5756 	/* fixed length structure for IPv4 and IPv6 counters */
5757 	SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
5758 	SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
5759 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
5760 	optp->level = MIB2_UDP;
5761 	optp->name = 0;
5762 	(void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib));
5763 	optp->len = msgdsize(mpdata);
5764 	qreply(q, mpctl);
5765 
5766 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
5767 	v4_conn_idx = v6_conn_idx = 0;
5768 
5769 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
5770 		connfp = &ipcl_globalhash_fanout[i];
5771 		connp = NULL;
5772 
5773 		while ((connp = ipcl_get_next_conn(connfp, connp,
5774 		    IPCL_UDP))) {
5775 			udp = connp->conn_udp;
5776 			if (zoneid != connp->conn_zoneid)
5777 				continue;
5778 
5779 			/*
5780 			 * Note that the port numbers are sent in
5781 			 * host byte order
5782 			 */
5783 
5784 			if (udp->udp_state == TS_UNBND)
5785 				state = MIB2_UDP_unbound;
5786 			else if (udp->udp_state == TS_IDLE)
5787 				state = MIB2_UDP_idle;
5788 			else if (udp->udp_state == TS_DATA_XFER)
5789 				state = MIB2_UDP_connected;
5790 			else
5791 				state = MIB2_UDP_unknown;
5792 
5793 			needattr = B_FALSE;
5794 			bzero(&mlp, sizeof (mlp));
5795 			if (connp->conn_mlp_type != mlptSingle) {
5796 				if (connp->conn_mlp_type == mlptShared ||
5797 				    connp->conn_mlp_type == mlptBoth)
5798 					mlp.tme_flags |= MIB2_TMEF_SHARED;
5799 				if (connp->conn_mlp_type == mlptPrivate ||
5800 				    connp->conn_mlp_type == mlptBoth)
5801 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
5802 				needattr = B_TRUE;
5803 			}
5804 
5805 			/*
5806 			 * Create an IPv4 table entry for IPv4 entries and also
5807 			 * any IPv6 entries which are bound to in6addr_any
5808 			 * (i.e. anything a IPv4 peer could connect/send to).
5809 			 */
5810 			if (udp->udp_ipversion == IPV4_VERSION ||
5811 			    (udp->udp_state <= TS_IDLE &&
5812 			    IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) {
5813 				ude.udpEntryInfo.ue_state = state;
5814 				/*
5815 				 * If in6addr_any this will set it to
5816 				 * INADDR_ANY
5817 				 */
5818 				ude.udpLocalAddress =
5819 				    V4_PART_OF_V6(udp->udp_v6src);
5820 				ude.udpLocalPort = ntohs(udp->udp_port);
5821 				if (udp->udp_state == TS_DATA_XFER) {
5822 					/*
5823 					 * Can potentially get here for
5824 					 * v6 socket if another process
5825 					 * (say, ping) has just done a
5826 					 * sendto(), changing the state
5827 					 * from the TS_IDLE above to
5828 					 * TS_DATA_XFER by the time we hit
5829 					 * this part of the code.
5830 					 */
5831 					ude.udpEntryInfo.ue_RemoteAddress =
5832 					    V4_PART_OF_V6(udp->udp_v6dst);
5833 					ude.udpEntryInfo.ue_RemotePort =
5834 					    ntohs(udp->udp_dstport);
5835 				} else {
5836 					ude.udpEntryInfo.ue_RemoteAddress = 0;
5837 					ude.udpEntryInfo.ue_RemotePort = 0;
5838 				}
5839 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
5840 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
5841 				mlp.tme_connidx = v4_conn_idx++;
5842 				if (needattr)
5843 					(void) snmp_append_data2(
5844 					    mp_attr_ctl->b_cont, &mp_attr_tail,
5845 					    (char *)&mlp, sizeof (mlp));
5846 			}
5847 			if (udp->udp_ipversion == IPV6_VERSION) {
5848 				ude6.udp6EntryInfo.ue_state  = state;
5849 				ude6.udp6LocalAddress = udp->udp_v6src;
5850 				ude6.udp6LocalPort = ntohs(udp->udp_port);
5851 				ude6.udp6IfIndex = udp->udp_bound_if;
5852 				if (udp->udp_state == TS_DATA_XFER) {
5853 					ude6.udp6EntryInfo.ue_RemoteAddress =
5854 					    udp->udp_v6dst;
5855 					ude6.udp6EntryInfo.ue_RemotePort =
5856 					    ntohs(udp->udp_dstport);
5857 				} else {
5858 					ude6.udp6EntryInfo.ue_RemoteAddress =
5859 					    sin6_null.sin6_addr;
5860 					ude6.udp6EntryInfo.ue_RemotePort = 0;
5861 				}
5862 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
5863 				    &mp6_conn_tail, (char *)&ude6,
5864 				    sizeof (ude6));
5865 				mlp.tme_connidx = v6_conn_idx++;
5866 				if (needattr)
5867 					(void) snmp_append_data2(
5868 					    mp6_attr_ctl->b_cont,
5869 					    &mp6_attr_tail, (char *)&mlp,
5870 					    sizeof (mlp));
5871 			}
5872 		}
5873 	}
5874 
5875 	/* IPv4 UDP endpoints */
5876 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
5877 	    sizeof (struct T_optmgmt_ack)];
5878 	optp->level = MIB2_UDP;
5879 	optp->name = MIB2_UDP_ENTRY;
5880 	optp->len = msgdsize(mp_conn_ctl->b_cont);
5881 	qreply(q, mp_conn_ctl);
5882 
5883 	/* table of MLP attributes... */
5884 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
5885 	    sizeof (struct T_optmgmt_ack)];
5886 	optp->level = MIB2_UDP;
5887 	optp->name = EXPER_XPORT_MLP;
5888 	optp->len = msgdsize(mp_attr_ctl->b_cont);
5889 	if (optp->len == 0)
5890 		freemsg(mp_attr_ctl);
5891 	else
5892 		qreply(q, mp_attr_ctl);
5893 
5894 	/* IPv6 UDP endpoints */
5895 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
5896 	    sizeof (struct T_optmgmt_ack)];
5897 	optp->level = MIB2_UDP6;
5898 	optp->name = MIB2_UDP6_ENTRY;
5899 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
5900 	qreply(q, mp6_conn_ctl);
5901 
5902 	/* table of MLP attributes... */
5903 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
5904 	    sizeof (struct T_optmgmt_ack)];
5905 	optp->level = MIB2_UDP6;
5906 	optp->name = EXPER_XPORT_MLP;
5907 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
5908 	if (optp->len == 0)
5909 		freemsg(mp6_attr_ctl);
5910 	else
5911 		qreply(q, mp6_attr_ctl);
5912 
5913 	return (1);
5914 }
5915 
5916 /*
5917  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
5918  * NOTE: Per MIB-II, UDP has no writable data.
5919  * TODO:  If this ever actually tries to set anything, it needs to be
5920  * to do the appropriate locking.
5921  */
5922 /* ARGSUSED */
5923 int
5924 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
5925     uchar_t *ptr, int len)
5926 {
5927 	switch (level) {
5928 	case MIB2_UDP:
5929 		return (0);
5930 	default:
5931 		return (1);
5932 	}
5933 }
5934 
5935 static void
5936 udp_report_item(mblk_t *mp, udp_t *udp)
5937 {
5938 	char *state;
5939 	char addrbuf1[INET6_ADDRSTRLEN];
5940 	char addrbuf2[INET6_ADDRSTRLEN];
5941 	uint_t print_len, buf_len;
5942 
5943 	buf_len = mp->b_datap->db_lim - mp->b_wptr;
5944 	ASSERT(buf_len >= 0);
5945 	if (buf_len == 0)
5946 		return;
5947 
5948 	if (udp->udp_state == TS_UNBND)
5949 		state = "UNBOUND";
5950 	else if (udp->udp_state == TS_IDLE)
5951 		state = "IDLE";
5952 	else if (udp->udp_state == TS_DATA_XFER)
5953 		state = "CONNECTED";
5954 	else
5955 		state = "UnkState";
5956 	print_len = snprintf((char *)mp->b_wptr, buf_len,
5957 	    MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n",
5958 	    (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port),
5959 	    inet_ntop(AF_INET6, &udp->udp_v6src,
5960 		addrbuf1, sizeof (addrbuf1)),
5961 	    inet_ntop(AF_INET6, &udp->udp_v6dst,
5962 		addrbuf2, sizeof (addrbuf2)),
5963 	    ntohs(udp->udp_dstport), state);
5964 	if (print_len < buf_len) {
5965 		mp->b_wptr += print_len;
5966 	} else {
5967 		mp->b_wptr += buf_len;
5968 	}
5969 }
5970 
5971 /* Report for ndd "udp_status" */
5972 /* ARGSUSED */
5973 static int
5974 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
5975 {
5976 	zoneid_t zoneid;
5977 	connf_t	*connfp;
5978 	conn_t	*connp = Q_TO_CONN(q);
5979 	udp_t	*udp = connp->conn_udp;
5980 	int	i;
5981 
5982 	/*
5983 	 * Because of the ndd constraint, at most we can have 64K buffer
5984 	 * to put in all UDP info.  So to be more efficient, just
5985 	 * allocate a 64K buffer here, assuming we need that large buffer.
5986 	 * This may be a problem as any user can read udp_status.  Therefore
5987 	 * we limit the rate of doing this using udp_ndd_get_info_interval.
5988 	 * This should be OK as normal users should not do this too often.
5989 	 */
5990 	if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) {
5991 		if (ddi_get_lbolt() - udp_last_ndd_get_info_time <
5992 		    drv_usectohz(udp_ndd_get_info_interval * 1000)) {
5993 			(void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG);
5994 			return (0);
5995 		}
5996 	}
5997 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
5998 		/* The following may work even if we cannot get a large buf. */
5999 		(void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG);
6000 		return (0);
6001 	}
6002 	(void) mi_mpprintf(mp,
6003 	    "UDP     " MI_COL_HDRPAD_STR
6004 	/*   12345678[89ABCDEF] */
6005 	    " zone lport src addr        dest addr       port  state");
6006 	/*    1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */
6007 
6008 	zoneid = connp->conn_zoneid;
6009 
6010 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
6011 		connfp = &ipcl_globalhash_fanout[i];
6012 		connp = NULL;
6013 
6014 		while ((connp = ipcl_get_next_conn(connfp, connp,
6015 		    IPCL_UDP))) {
6016 			udp = connp->conn_udp;
6017 			if (zoneid != GLOBAL_ZONEID &&
6018 			    zoneid != connp->conn_zoneid)
6019 				continue;
6020 
6021 			udp_report_item(mp->b_cont, udp);
6022 		}
6023 	}
6024 	udp_last_ndd_get_info_time = ddi_get_lbolt();
6025 	return (0);
6026 }
6027 
6028 /*
6029  * This routine creates a T_UDERROR_IND message and passes it upstream.
6030  * The address and options are copied from the T_UNITDATA_REQ message
6031  * passed in mp.  This message is freed.
6032  */
6033 static void
6034 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen,
6035     t_scalar_t err)
6036 {
6037 	struct T_unitdata_req *tudr;
6038 	mblk_t	*mp1;
6039 	uchar_t	*optaddr;
6040 	t_scalar_t optlen;
6041 
6042 	if (DB_TYPE(mp) == M_DATA) {
6043 		ASSERT(destaddr != NULL && destlen != 0);
6044 		optaddr = NULL;
6045 		optlen = 0;
6046 	} else {
6047 		if ((mp->b_wptr < mp->b_rptr) ||
6048 		    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
6049 			goto done;
6050 		}
6051 		tudr = (struct T_unitdata_req *)mp->b_rptr;
6052 		destaddr = mp->b_rptr + tudr->DEST_offset;
6053 		if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
6054 		    destaddr + tudr->DEST_length < mp->b_rptr ||
6055 		    destaddr + tudr->DEST_length > mp->b_wptr) {
6056 			goto done;
6057 		}
6058 		optaddr = mp->b_rptr + tudr->OPT_offset;
6059 		if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
6060 		    optaddr + tudr->OPT_length < mp->b_rptr ||
6061 		    optaddr + tudr->OPT_length > mp->b_wptr) {
6062 			goto done;
6063 		}
6064 		destlen = tudr->DEST_length;
6065 		optlen = tudr->OPT_length;
6066 	}
6067 
6068 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
6069 	    (char *)optaddr, optlen, err);
6070 	if (mp1 != NULL)
6071 		putnext(UDP_RD(q), mp1);
6072 
6073 done:
6074 	freemsg(mp);
6075 }
6076 
6077 /*
6078  * This routine removes a port number association from a stream.  It
6079  * is called by udp_wput to handle T_UNBIND_REQ messages.
6080  */
6081 static void
6082 udp_unbind(queue_t *q, mblk_t *mp)
6083 {
6084 	udp_t *udp = Q_TO_UDP(q);
6085 
6086 	/* If a bind has not been done, we can't unbind. */
6087 	if (udp->udp_state == TS_UNBND) {
6088 		udp_err_ack(q, mp, TOUTSTATE, 0);
6089 		return;
6090 	}
6091 	if (cl_inet_unbind != NULL) {
6092 		/*
6093 		 * Running in cluster mode - register unbind information
6094 		 */
6095 		if (udp->udp_ipversion == IPV4_VERSION) {
6096 			(*cl_inet_unbind)(IPPROTO_UDP, AF_INET,
6097 			    (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)),
6098 			    (in_port_t)udp->udp_port);
6099 		} else {
6100 			(*cl_inet_unbind)(IPPROTO_UDP, AF_INET6,
6101 			    (uint8_t *)&(udp->udp_v6src),
6102 			    (in_port_t)udp->udp_port);
6103 		}
6104 	}
6105 
6106 	udp_bind_hash_remove(udp, B_FALSE);
6107 	V6_SET_ZERO(udp->udp_v6src);
6108 	V6_SET_ZERO(udp->udp_bound_v6src);
6109 	udp->udp_port = 0;
6110 	udp->udp_state = TS_UNBND;
6111 
6112 	if (udp->udp_family == AF_INET6) {
6113 		int error;
6114 
6115 		/* Rebuild the header template */
6116 		error = udp_build_hdrs(q, udp);
6117 		if (error != 0) {
6118 			udp_err_ack(q, mp, TSYSERR, error);
6119 			return;
6120 		}
6121 	}
6122 	/*
6123 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
6124 	 * and therefore ip_unbind must never return NULL.
6125 	 */
6126 	mp = ip_unbind(q, mp);
6127 	ASSERT(mp != NULL);
6128 	putnext(UDP_RD(q), mp);
6129 }
6130 
6131 /*
6132  * Don't let port fall into the privileged range.
6133  * Since the extra privileged ports can be arbitrary we also
6134  * ensure that we exclude those from consideration.
6135  * udp_g_epriv_ports is not sorted thus we loop over it until
6136  * there are no changes.
6137  */
6138 static in_port_t
6139 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
6140 {
6141 	int i;
6142 	in_port_t nextport;
6143 	boolean_t restart = B_FALSE;
6144 
6145 	if (random && udp_random_anon_port != 0) {
6146 		(void) random_get_pseudo_bytes((uint8_t *)&port,
6147 		    sizeof (in_port_t));
6148 		/*
6149 		 * Unless changed by a sys admin, the smallest anon port
6150 		 * is 32768 and the largest anon port is 65535.  It is
6151 		 * very likely (50%) for the random port to be smaller
6152 		 * than the smallest anon port.  When that happens,
6153 		 * add port % (anon port range) to the smallest anon
6154 		 * port to get the random port.  It should fall into the
6155 		 * valid anon port range.
6156 		 */
6157 		if (port < udp_smallest_anon_port) {
6158 			port = udp_smallest_anon_port +
6159 			    port % (udp_largest_anon_port -
6160 			    udp_smallest_anon_port);
6161 		}
6162 	}
6163 
6164 retry:
6165 	if (port < udp_smallest_anon_port)
6166 		port = udp_smallest_anon_port;
6167 
6168 	if (port > udp_largest_anon_port) {
6169 		port = udp_smallest_anon_port;
6170 		if (restart)
6171 			return (0);
6172 		restart = B_TRUE;
6173 	}
6174 
6175 	if (port < udp_smallest_nonpriv_port)
6176 		port = udp_smallest_nonpriv_port;
6177 
6178 	for (i = 0; i < udp_g_num_epriv_ports; i++) {
6179 		if (port == udp_g_epriv_ports[i]) {
6180 			port++;
6181 			/*
6182 			 * Make sure that the port is in the
6183 			 * valid range.
6184 			 */
6185 			goto retry;
6186 		}
6187 	}
6188 
6189 	if (is_system_labeled() &&
6190 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
6191 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
6192 		port = nextport;
6193 		goto retry;
6194 	}
6195 
6196 	return (port);
6197 }
6198 
6199 static int
6200 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst)
6201 {
6202 	int err;
6203 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
6204 	udp_t *udp = Q_TO_UDP(wq);
6205 
6206 	err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst,
6207 	    opt_storage, udp->udp_mac_exempt);
6208 	if (err == 0) {
6209 		err = tsol_update_options(&udp->udp_ip_snd_options,
6210 		    &udp->udp_ip_snd_options_len, &udp->udp_label_len,
6211 		    opt_storage);
6212 	}
6213 	if (err != 0) {
6214 		DTRACE_PROBE4(
6215 		    tx__ip__log__info__updatelabel__udp,
6216 		    char *, "queue(1) failed to update options(2) on mp(3)",
6217 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
6218 	} else {
6219 		IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst);
6220 	}
6221 	return (err);
6222 }
6223 
6224 static mblk_t *
6225 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port,
6226     uint_t srcid, int *error)
6227 {
6228 	udp_t	*udp = connp->conn_udp;
6229 	queue_t	*q = connp->conn_wq;
6230 	mblk_t	*mp1 = mp;
6231 	mblk_t	*mp2;
6232 	ipha_t	*ipha;
6233 	int	ip_hdr_length;
6234 	uint32_t ip_len;
6235 	udpha_t	*udpha;
6236 	udpattrs_t	attrs;
6237 	uchar_t	ip_snd_opt[IP_MAX_OPT_LENGTH];
6238 	uint32_t	ip_snd_opt_len = 0;
6239 
6240 	*error = 0;
6241 
6242 	if (v4dst == INADDR_ANY)
6243 		v4dst = htonl(INADDR_LOOPBACK);
6244 
6245 	/*
6246 	 * If options passed in, feed it for verification and handling
6247 	 */
6248 	attrs.udpattr_credset = B_FALSE;
6249 	if (DB_TYPE(mp) != M_DATA) {
6250 		mp1 = mp->b_cont;
6251 		if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) {
6252 			attrs.udpattr_ipp = NULL;
6253 			attrs.udpattr_mb = mp;
6254 			if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0)
6255 				goto done;
6256 			/*
6257 			 * Note: success in processing options.
6258 			 * mp option buffer represented by
6259 			 * OPT_length/offset now potentially modified
6260 			 * and contain option setting results
6261 			 */
6262 			ASSERT(*error == 0);
6263 		}
6264 	}
6265 
6266 	/* mp1 points to the M_DATA mblk carrying the packet */
6267 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
6268 
6269 	/*
6270 	 * Check if our saved options are valid; update if not
6271 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
6272 	 * to different destination may require different labels.
6273 	 * We use conn_lock to ensure that lastdst, ip_snd_options,
6274 	 * and ip_snd_options_len are consistent for the current
6275 	 * destination and are updated atomically.
6276 	 */
6277 	mutex_enter(&connp->conn_lock);
6278 	if (is_system_labeled()) {
6279 		/* Using UDP MLP requires SCM_UCRED from user */
6280 		if (connp->conn_mlp_type != mlptSingle &&
6281 		    !attrs.udpattr_credset) {
6282 			mutex_exit(&connp->conn_lock);
6283 			DTRACE_PROBE4(
6284 			    tx__ip__log__info__output__udp,
6285 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
6286 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
6287 			*error = ECONNREFUSED;
6288 			goto done;
6289 		}
6290 		if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) ||
6291 		    V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) &&
6292 		    (*error = udp_update_label(q, mp, v4dst)) != 0) {
6293 			mutex_exit(&connp->conn_lock);
6294 			goto done;
6295 		}
6296 	}
6297 	if (udp->udp_ip_snd_options_len > 0) {
6298 		ip_snd_opt_len = udp->udp_ip_snd_options_len;
6299 		bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len);
6300 	}
6301 	mutex_exit(&connp->conn_lock);
6302 
6303 	/* Add an IP header */
6304 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len;
6305 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
6306 	if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) ||
6307 	    !OK_32PTR(ipha)) {
6308 		mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO);
6309 		if (mp2 == NULL) {
6310 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6311 			    "udp_wput_end: q %p (%S)", q, "allocbfail2");
6312 			*error = ENOMEM;
6313 			goto done;
6314 		}
6315 		mp2->b_wptr = DB_LIM(mp2);
6316 		mp2->b_cont = mp1;
6317 		mp1 = mp2;
6318 		if (DB_TYPE(mp) != M_DATA)
6319 			mp->b_cont = mp1;
6320 		else
6321 			mp = mp1;
6322 
6323 		ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length);
6324 	}
6325 	ip_hdr_length -= UDPH_SIZE;
6326 #ifdef	_BIG_ENDIAN
6327 	/* Set version, header length, and tos */
6328 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
6329 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
6330 		udp->udp_type_of_service);
6331 	/* Set ttl and protocol */
6332 	*(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP;
6333 #else
6334 	/* Set version, header length, and tos */
6335 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
6336 		((udp->udp_type_of_service << 8) |
6337 		    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
6338 	/* Set ttl and protocol */
6339 	*(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl;
6340 #endif
6341 	/*
6342 	 * Copy our address into the packet.  If this is zero,
6343 	 * first look at __sin6_src_id for a hint. If we leave the source
6344 	 * as INADDR_ANY then ip will fill in the real source address.
6345 	 */
6346 	IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src);
6347 	if (srcid != 0 && ipha->ipha_src == INADDR_ANY) {
6348 		in6_addr_t v6src;
6349 
6350 		ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid);
6351 		IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src);
6352 	}
6353 
6354 	ipha->ipha_fragment_offset_and_flags = 0;
6355 	ipha->ipha_ident = 0;
6356 
6357 	mp1->b_rptr = (uchar_t *)ipha;
6358 
6359 	ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <=
6360 	    (uintptr_t)UINT_MAX);
6361 
6362 	/* Determine length of packet */
6363 	ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha);
6364 	if ((mp2 = mp1->b_cont) != NULL) {
6365 		do {
6366 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
6367 			ip_len += (uint32_t)MBLKL(mp2);
6368 		} while ((mp2 = mp2->b_cont) != NULL);
6369 	}
6370 	/*
6371 	 * If the size of the packet is greater than the maximum allowed by
6372 	 * ip, return an error. Passing this down could cause panics because
6373 	 * the size will have wrapped and be inconsistent with the msg size.
6374 	 */
6375 	if (ip_len > IP_MAXPACKET) {
6376 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6377 		    "udp_wput_end: q %p (%S)", q, "IP length exceeded");
6378 		*error = EMSGSIZE;
6379 		goto done;
6380 	}
6381 	ipha->ipha_length = htons((uint16_t)ip_len);
6382 	ip_len -= ip_hdr_length;
6383 	ip_len = htons((uint16_t)ip_len);
6384 	udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length);
6385 
6386 	/*
6387 	 * Copy in the destination address
6388 	 */
6389 	ipha->ipha_dst = v4dst;
6390 
6391 	/*
6392 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
6393 	 */
6394 	if (CLASSD(v4dst))
6395 		ipha->ipha_ttl = udp->udp_multicast_ttl;
6396 
6397 	udpha->uha_dst_port = port;
6398 	udpha->uha_src_port = udp->udp_port;
6399 
6400 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
6401 		uint32_t	cksum;
6402 
6403 		bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len);
6404 		/*
6405 		 * Massage source route putting first source route in ipha_dst.
6406 		 * Ignore the destination in T_unitdata_req.
6407 		 * Create a checksum adjustment for a source route, if any.
6408 		 */
6409 		cksum = ip_massage_options(ipha);
6410 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
6411 		cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) +
6412 		    (ipha->ipha_dst & 0xFFFF);
6413 		if ((int)cksum < 0)
6414 			cksum--;
6415 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
6416 		/*
6417 		 * IP does the checksum if uha_checksum is non-zero,
6418 		 * We make it easy for IP to include our pseudo header
6419 		 * by putting our length in uha_checksum.
6420 		 */
6421 		cksum += ip_len;
6422 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
6423 		/* There might be a carry. */
6424 		cksum = (cksum & 0xFFFF) + (cksum >> 16);
6425 #ifdef _LITTLE_ENDIAN
6426 		if (udp_do_checksum)
6427 			ip_len = (cksum << 16) | ip_len;
6428 #else
6429 		if (udp_do_checksum)
6430 			ip_len = (ip_len << 16) | cksum;
6431 		else
6432 			ip_len <<= 16;
6433 #endif
6434 	} else {
6435 		/*
6436 		 * IP does the checksum if uha_checksum is non-zero,
6437 		 * We make it easy for IP to include our pseudo header
6438 		 * by putting our length in uha_checksum.
6439 		 */
6440 		if (udp_do_checksum)
6441 			ip_len |= (ip_len << 16);
6442 #ifndef _LITTLE_ENDIAN
6443 		else
6444 			ip_len <<= 16;
6445 #endif
6446 	}
6447 	/* Set UDP length and checksum */
6448 	*((uint32_t *)&udpha->uha_length) = ip_len;
6449 	if (DB_CRED(mp) != NULL)
6450 		mblk_setcred(mp1, DB_CRED(mp));
6451 
6452 	if (DB_TYPE(mp) != M_DATA) {
6453 		ASSERT(mp != mp1);
6454 		freeb(mp);
6455 	}
6456 
6457 	/* mp has been consumed and we'll return success */
6458 	ASSERT(*error == 0);
6459 	mp = NULL;
6460 
6461 	/* We're done.  Pass the packet to ip. */
6462 	BUMP_MIB(&udp_mib, udpOutDatagrams);
6463 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6464 		"udp_wput_end: q %p (%S)", q, "end");
6465 
6466 	if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 ||
6467 	    CONN_OUTBOUND_POLICY_PRESENT(connp) ||
6468 	    connp->conn_dontroute || connp->conn_xmit_if_ill != NULL ||
6469 	    connp->conn_nofailover_ill != NULL ||
6470 	    connp->conn_outgoing_ill != NULL ||
6471 	    ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION ||
6472 	    IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) {
6473 		UDP_STAT(udp_ip_send);
6474 		ip_output(connp, mp1, connp->conn_wq, IP_WPUT);
6475 	} else {
6476 		udp_send_data(udp, connp->conn_wq, mp1, ipha);
6477 	}
6478 
6479 done:
6480 	if (*error != 0) {
6481 		ASSERT(mp != NULL);
6482 		BUMP_MIB(&udp_mib, udpOutErrors);
6483 	}
6484 	return (mp);
6485 }
6486 
6487 static void
6488 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha)
6489 {
6490 	conn_t	*connp = udp->udp_connp;
6491 	ipaddr_t src, dst;
6492 	ill_t	*ill;
6493 	ire_t	*ire;
6494 	ipif_t	*ipif = NULL;
6495 	mblk_t	*ire_fp_mp;
6496 	uint_t	ire_fp_mp_len;
6497 	uint16_t *up;
6498 	uint32_t cksum, hcksum_txflags;
6499 	queue_t	*dev_q;
6500 	boolean_t retry_caching;
6501 
6502 	dst = ipha->ipha_dst;
6503 	src = ipha->ipha_src;
6504 	ASSERT(ipha->ipha_ident == 0);
6505 
6506 	if (CLASSD(dst)) {
6507 		int err;
6508 
6509 		ipif = conn_get_held_ipif(connp,
6510 		    &connp->conn_multicast_ipif, &err);
6511 
6512 		if (ipif == NULL || ipif->ipif_isv6 ||
6513 		    (ipif->ipif_ill->ill_phyint->phyint_flags &
6514 		    PHYI_LOOPBACK)) {
6515 			if (ipif != NULL)
6516 				ipif_refrele(ipif);
6517 			UDP_STAT(udp_ip_send);
6518 			ip_output(connp, mp, q, IP_WPUT);
6519 			return;
6520 		}
6521 	}
6522 
6523 	retry_caching = B_FALSE;
6524 	mutex_enter(&connp->conn_lock);
6525 	ire = connp->conn_ire_cache;
6526 	ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT));
6527 
6528 	if (ire == NULL || ire->ire_addr != dst ||
6529 	    (ire->ire_marks & IRE_MARK_CONDEMNED)) {
6530 		retry_caching = B_TRUE;
6531 	} else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) {
6532 		ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr;
6533 
6534 		ASSERT(ipif != NULL);
6535 		if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL ||
6536 		    stq_ill->ill_group != ipif->ipif_ill->ill_group))
6537 			retry_caching = B_TRUE;
6538 	}
6539 
6540 	if (!retry_caching) {
6541 		ASSERT(ire != NULL);
6542 		IRE_REFHOLD(ire);
6543 		mutex_exit(&connp->conn_lock);
6544 	} else {
6545 		boolean_t cached = B_FALSE;
6546 
6547 		connp->conn_ire_cache = NULL;
6548 		mutex_exit(&connp->conn_lock);
6549 
6550 		/* Release the old ire */
6551 		if (ire != NULL) {
6552 			IRE_REFRELE_NOTR(ire);
6553 			ire = NULL;
6554 		}
6555 
6556 		if (CLASSD(dst)) {
6557 			ASSERT(ipif != NULL);
6558 			ire = ire_ctable_lookup(dst, 0, 0, ipif,
6559 			    connp->conn_zoneid, MBLK_GETLABEL(mp),
6560 			    MATCH_IRE_ILL_GROUP);
6561 		} else {
6562 			ASSERT(ipif == NULL);
6563 			ire = ire_cache_lookup(dst, connp->conn_zoneid,
6564 			    MBLK_GETLABEL(mp));
6565 		}
6566 
6567 		if (ire == NULL) {
6568 			if (ipif != NULL)
6569 				ipif_refrele(ipif);
6570 			UDP_STAT(udp_ire_null);
6571 			ip_output(connp, mp, q, IP_WPUT);
6572 			return;
6573 		}
6574 		IRE_REFHOLD_NOTR(ire);
6575 
6576 		mutex_enter(&connp->conn_lock);
6577 		if (!(connp->conn_state_flags & CONN_CLOSING) &&
6578 		    connp->conn_ire_cache == NULL) {
6579 			rw_enter(&ire->ire_bucket->irb_lock, RW_READER);
6580 			if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
6581 				connp->conn_ire_cache = ire;
6582 				cached = B_TRUE;
6583 			}
6584 			rw_exit(&ire->ire_bucket->irb_lock);
6585 		}
6586 		mutex_exit(&connp->conn_lock);
6587 
6588 		/*
6589 		 * We can continue to use the ire but since it was not
6590 		 * cached, we should drop the extra reference.
6591 		 */
6592 		if (!cached)
6593 			IRE_REFRELE_NOTR(ire);
6594 	}
6595 	ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION);
6596 	ASSERT(!CLASSD(dst) || ipif != NULL);
6597 
6598 	/*
6599 	 * Check if we can take the fast-path.
6600 	 * Note that "incomplete" ire's (where the link-layer for next hop
6601 	 * is not resolved, or where the fast-path header in nce_fp_mp is not
6602 	 * available yet) are sent down the legacy (slow) path
6603 	 */
6604 	if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) ||
6605 	    (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL ||
6606 	    ire->ire_max_frag < ntohs(ipha->ipha_length) ||
6607 	    (ire->ire_nce != NULL &&
6608 	    (ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) ||
6609 	    (connp->conn_nexthop_set) ||
6610 	    (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) {
6611 		if (ipif != NULL)
6612 			ipif_refrele(ipif);
6613 		UDP_STAT(udp_ip_ire_send);
6614 		IRE_REFRELE(ire);
6615 		ip_output(connp, mp, q, IP_WPUT);
6616 		return;
6617 	}
6618 
6619 	BUMP_MIB(&ip_mib, ipOutRequests);
6620 
6621 	ill = ire_to_ill(ire);
6622 	ASSERT(ill != NULL);
6623 
6624 	dev_q = ire->ire_stq->q_next;
6625 	ASSERT(dev_q != NULL);
6626 	/*
6627 	 * If the service thread is already running, or if the driver
6628 	 * queue is currently flow-controlled, queue this packet.
6629 	 */
6630 	if ((q->q_first != NULL || connp->conn_draining) ||
6631 	    ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) {
6632 		if (ip_output_queue) {
6633 			(void) putq(q, mp);
6634 		} else {
6635 			BUMP_MIB(&ip_mib, ipOutDiscards);
6636 			freemsg(mp);
6637 		}
6638 		if (ipif != NULL)
6639 			ipif_refrele(ipif);
6640 		IRE_REFRELE(ire);
6641 		return;
6642 	}
6643 
6644 	ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1);
6645 #ifndef _BIG_ENDIAN
6646 	ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8);
6647 #endif
6648 
6649 	if (src == INADDR_ANY && !connp->conn_unspec_src) {
6650 		if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC))
6651 			src = ipha->ipha_src = ipif->ipif_src_addr;
6652 		else
6653 			src = ipha->ipha_src = ire->ire_src_addr;
6654 	}
6655 
6656 	if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) {
6657 		ASSERT(ill->ill_hcksum_capab != NULL);
6658 		hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags;
6659 	} else {
6660 		hcksum_txflags = 0;
6661 	}
6662 
6663 	/* pseudo-header checksum (do it in parts for IP header checksum) */
6664 	cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
6665 
6666 	ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION);
6667 	up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
6668 	if (*up != 0) {
6669 		IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags,
6670 		    mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH,
6671 		    ntohs(ipha->ipha_length), cksum);
6672 
6673 		/* Software checksum? */
6674 		if (DB_CKSUMFLAGS(mp) == 0) {
6675 			UDP_STAT(udp_out_sw_cksum);
6676 			UDP_STAT_UPDATE(udp_out_sw_cksum_bytes,
6677 			    ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH);
6678 		}
6679 	}
6680 
6681 	ipha->ipha_fragment_offset_and_flags |=
6682 	    (uint32_t)htons(ire->ire_frag_flag);
6683 
6684 	/* Calculate IP header checksum if hardware isn't capable */
6685 	if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) {
6686 		IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0],
6687 		    ((uint16_t *)ipha)[4]);
6688 	}
6689 
6690 	if (CLASSD(dst)) {
6691 		ilm_t *ilm;
6692 
6693 		ILM_WALKER_HOLD(ill);
6694 		ilm = ilm_lookup_ill(ill, dst, ALL_ZONES);
6695 		ILM_WALKER_RELE(ill);
6696 		if (ilm != NULL) {
6697 			ip_multicast_loopback(q, ill, mp,
6698 			    connp->conn_multicast_loop ? 0 :
6699 			    IP_FF_NO_MCAST_LOOP, connp->conn_zoneid);
6700 		}
6701 
6702 		/* If multicast TTL is 0 then we are done */
6703 		if (ipha->ipha_ttl == 0) {
6704 			if (ipif != NULL)
6705 				ipif_refrele(ipif);
6706 			freemsg(mp);
6707 			IRE_REFRELE(ire);
6708 			return;
6709 		}
6710 	}
6711 
6712 	ASSERT(DB_TYPE(ire_fp_mp) == M_DATA);
6713 	mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len;
6714 	bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len);
6715 
6716 	UPDATE_OB_PKT_COUNT(ire);
6717 	ire->ire_last_used_time = lbolt;
6718 
6719 	if (ILL_DLS_CAPABLE(ill)) {
6720 		/*
6721 		 * Send the packet directly to DLD, where it may be queued
6722 		 * depending on the availability of transmit resources at
6723 		 * the media layer.
6724 		 */
6725 		IP_DLS_ILL_TX(ill, mp);
6726 	} else {
6727 		putnext(ire->ire_stq, mp);
6728 	}
6729 
6730 	if (ipif != NULL)
6731 		ipif_refrele(ipif);
6732 	IRE_REFRELE(ire);
6733 }
6734 
6735 static boolean_t
6736 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst)
6737 {
6738 	udp_t *udp = Q_TO_UDP(wq);
6739 	int err;
6740 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
6741 
6742 	err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred),
6743 	    dst, opt_storage, udp->udp_mac_exempt);
6744 	if (err == 0) {
6745 		err = tsol_update_sticky(&udp->udp_sticky_ipp,
6746 		    &udp->udp_label_len_v6, opt_storage);
6747 	}
6748 	if (err != 0) {
6749 		DTRACE_PROBE4(
6750 		    tx__ip__log__drop__updatelabel__udp6,
6751 		    char *, "queue(1) failed to update options(2) on mp(3)",
6752 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
6753 	} else {
6754 		udp->udp_v6lastdst = *dst;
6755 	}
6756 	return (err);
6757 }
6758 
6759 /*
6760  * This routine handles all messages passed downstream.  It either
6761  * consumes the message or passes it downstream; it never queues a
6762  * a message.
6763  */
6764 static void
6765 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
6766 {
6767 	sin6_t		*sin6;
6768 	sin_t		*sin;
6769 	ipaddr_t	v4dst;
6770 	uint16_t	port;
6771 	uint_t		srcid;
6772 	queue_t		*q = connp->conn_wq;
6773 	udp_t		*udp = connp->conn_udp;
6774 	int		error = 0;
6775 	struct sockaddr_storage ss;
6776 
6777 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START,
6778 	    "udp_wput_start: connp %p mp %p", connp, mp);
6779 
6780 	/*
6781 	 * We directly handle several cases here: T_UNITDATA_REQ message
6782 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both
6783 	 * connected and non-connected socket.  The latter carries the
6784 	 * address structure along when this routine gets called.
6785 	 */
6786 	switch (DB_TYPE(mp)) {
6787 	case M_DATA:
6788 		if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) {
6789 			if (!udp->udp_direct_sockfs ||
6790 			    addr == NULL || addrlen == 0) {
6791 				/* Not connected; address is required */
6792 				BUMP_MIB(&udp_mib, udpOutErrors);
6793 				UDP_STAT(udp_out_err_notconn);
6794 				freemsg(mp);
6795 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6796 				    "udp_wput_end: connp %p (%S)", connp,
6797 				    "not-connected; address required");
6798 				return;
6799 			}
6800 			ASSERT(udp->udp_issocket);
6801 			UDP_DBGSTAT(udp_data_notconn);
6802 			/* Not connected; do some more checks below */
6803 			break;
6804 		}
6805 		/* M_DATA for connected socket */
6806 		UDP_DBGSTAT(udp_data_conn);
6807 		IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst);
6808 
6809 		/* Initialize addr and addrlen as if they're passed in */
6810 		if (udp->udp_family == AF_INET) {
6811 			sin = (sin_t *)&ss;
6812 			sin->sin_family = AF_INET;
6813 			sin->sin_port = udp->udp_dstport;
6814 			sin->sin_addr.s_addr = v4dst;
6815 			addr = (struct sockaddr *)sin;
6816 			addrlen = sizeof (*sin);
6817 		} else {
6818 			sin6 = (sin6_t *)&ss;
6819 			sin6->sin6_family = AF_INET6;
6820 			sin6->sin6_port = udp->udp_dstport;
6821 			sin6->sin6_flowinfo = udp->udp_flowinfo;
6822 			sin6->sin6_addr = udp->udp_v6dst;
6823 			sin6->sin6_scope_id = 0;
6824 			sin6->__sin6_src_id = 0;
6825 			addr = (struct sockaddr *)sin6;
6826 			addrlen = sizeof (*sin6);
6827 		}
6828 
6829 		if (udp->udp_family == AF_INET ||
6830 		    IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) {
6831 			/*
6832 			 * Handle both AF_INET and AF_INET6; the latter
6833 			 * for IPV4 mapped destination addresses.  Note
6834 			 * here that both addr and addrlen point to the
6835 			 * corresponding struct depending on the address
6836 			 * family of the socket.
6837 			 */
6838 			mp = udp_output_v4(connp, mp, v4dst,
6839 			    udp->udp_dstport, 0, &error);
6840 		} else {
6841 			mp = udp_output_v6(connp, mp, sin6, &error);
6842 		}
6843 		if (error != 0) {
6844 			ASSERT(addr != NULL && addrlen != 0);
6845 			goto ud_error;
6846 		}
6847 		return;
6848 	case M_PROTO:
6849 	case M_PCPROTO: {
6850 		struct T_unitdata_req *tudr;
6851 
6852 		ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX);
6853 		tudr = (struct T_unitdata_req *)mp->b_rptr;
6854 
6855 		/* Handle valid T_UNITDATA_REQ here */
6856 		if (MBLKL(mp) >= sizeof (*tudr) &&
6857 		    ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) {
6858 			if (mp->b_cont == NULL) {
6859 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6860 				    "udp_wput_end: q %p (%S)", q, "badaddr");
6861 				error = EPROTO;
6862 				goto ud_error;
6863 			}
6864 
6865 			if (!MBLKIN(mp, 0, tudr->DEST_offset +
6866 			    tudr->DEST_length)) {
6867 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6868 				    "udp_wput_end: q %p (%S)", q, "badaddr");
6869 				error = EADDRNOTAVAIL;
6870 				goto ud_error;
6871 			}
6872 			/*
6873 			 * If a port has not been bound to the stream, fail.
6874 			 * This is not a problem when sockfs is directly
6875 			 * above us, because it will ensure that the socket
6876 			 * is first bound before allowing data to be sent.
6877 			 */
6878 			if (udp->udp_state == TS_UNBND) {
6879 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6880 				    "udp_wput_end: q %p (%S)", q, "outstate");
6881 				error = EPROTO;
6882 				goto ud_error;
6883 			}
6884 			addr = (struct sockaddr *)
6885 			    &mp->b_rptr[tudr->DEST_offset];
6886 			addrlen = tudr->DEST_length;
6887 			if (tudr->OPT_length != 0)
6888 				UDP_STAT(udp_out_opt);
6889 			break;
6890 		}
6891 		/* FALLTHRU */
6892 	}
6893 	default:
6894 		udp_become_writer(connp, mp, udp_wput_other_wrapper,
6895 		    SQTAG_UDP_OUTPUT);
6896 		return;
6897 	}
6898 	ASSERT(addr != NULL);
6899 
6900 	switch (udp->udp_family) {
6901 	case AF_INET6:
6902 		sin6 = (sin6_t *)addr;
6903 		if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) ||
6904 		    sin6->sin6_family != AF_INET6) {
6905 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6906 			    "udp_wput_end: q %p (%S)", q, "badaddr");
6907 			error = EADDRNOTAVAIL;
6908 			goto ud_error;
6909 		}
6910 
6911 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6912 			/*
6913 			 * Destination is a non-IPv4-compatible IPv6 address.
6914 			 * Send out an IPv6 format packet.
6915 			 */
6916 			mp = udp_output_v6(connp, mp, sin6, &error);
6917 			if (error != 0)
6918 				goto ud_error;
6919 
6920 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6921 			    "udp_wput_end: q %p (%S)", q, "udp_output_v6");
6922 			return;
6923 		}
6924 		/*
6925 		 * If the local address is not zero or a mapped address
6926 		 * return an error.  It would be possible to send an IPv4
6927 		 * packet but the response would never make it back to the
6928 		 * application since it is bound to a non-mapped address.
6929 		 */
6930 		if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) &&
6931 		    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
6932 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6933 			    "udp_wput_end: q %p (%S)", q, "badaddr");
6934 			error = EADDRNOTAVAIL;
6935 			goto ud_error;
6936 		}
6937 		/* Send IPv4 packet without modifying udp_ipversion */
6938 		/* Extract port and ipaddr */
6939 		port = sin6->sin6_port;
6940 		IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst);
6941 		srcid = sin6->__sin6_src_id;
6942 		break;
6943 
6944 	case AF_INET:
6945 		sin = (sin_t *)addr;
6946 		if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) ||
6947 		    sin->sin_family != AF_INET) {
6948 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END,
6949 			    "udp_wput_end: q %p (%S)", q, "badaddr");
6950 			error = EADDRNOTAVAIL;
6951 			goto ud_error;
6952 		}
6953 		/* Extract port and ipaddr */
6954 		port = sin->sin_port;
6955 		v4dst = sin->sin_addr.s_addr;
6956 		srcid = 0;
6957 		break;
6958 	}
6959 
6960 	mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error);
6961 	if (error != 0) {
6962 ud_error:
6963 		UDP_STAT(udp_out_err_output);
6964 		ASSERT(mp != NULL);
6965 		/* mp is freed by the following routine */
6966 		udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen,
6967 		    (t_scalar_t)error);
6968 	}
6969 }
6970 
6971 /* ARGSUSED */
6972 static void
6973 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2)
6974 {
6975 	udp_output((conn_t *)arg, mp, NULL, 0);
6976 	_UDP_EXIT((conn_t *)arg);
6977 }
6978 
6979 static void
6980 udp_wput(queue_t *q, mblk_t *mp)
6981 {
6982 	_UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper,
6983 	    SQTAG_UDP_WPUT);
6984 }
6985 
6986 /*
6987  * Allocate and prepare a T_UNITDATA_REQ message.
6988  */
6989 static mblk_t *
6990 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen)
6991 {
6992 	struct T_unitdata_req *tudr;
6993 	mblk_t *mp;
6994 
6995 	mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED);
6996 	if (mp != NULL) {
6997 		mp->b_wptr += sizeof (*tudr) + addrlen;
6998 		DB_TYPE(mp) = M_PROTO;
6999 
7000 		tudr = (struct T_unitdata_req *)mp->b_rptr;
7001 		tudr->PRIM_type = T_UNITDATA_REQ;
7002 		tudr->DEST_length = addrlen;
7003 		tudr->DEST_offset = (t_scalar_t)sizeof (*tudr);
7004 		tudr->OPT_length = 0;
7005 		tudr->OPT_offset = 0;
7006 		bcopy(addr, tudr+1, addrlen);
7007 	}
7008 	return (mp);
7009 }
7010 
7011 /*
7012  * Entry point for sockfs when udp is in "direct sockfs" mode.  This mode
7013  * is valid when we are directly beneath the stream head, and thus sockfs
7014  * is able to bypass STREAMS and directly call us, passing along the sockaddr
7015  * structure without the cumbersome T_UNITDATA_REQ interface.  Note that
7016  * this is done for both connected and non-connected endpoint.
7017  */
7018 void
7019 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen)
7020 {
7021 	conn_t	*connp;
7022 	udp_t	*udp;
7023 
7024 	q = UDP_WR(q);
7025 	connp = Q_TO_CONN(q);
7026 	udp = connp->conn_udp;
7027 
7028 	/* udpsockfs should only send down M_DATA for this entry point */
7029 	ASSERT(DB_TYPE(mp) == M_DATA);
7030 
7031 	mutex_enter(&connp->conn_lock);
7032 	UDP_MODE_ASSERTIONS(udp, UDP_ENTER);
7033 
7034 	if (udp->udp_mode != UDP_MT_HOT) {
7035 		/*
7036 		 * We can't enter this conn right away because another
7037 		 * thread is currently executing as writer; therefore we
7038 		 * need to deposit the message into the squeue to be
7039 		 * drained later.  If a socket address is present, we
7040 		 * need to create a T_UNITDATA_REQ message as placeholder.
7041 		 */
7042 		if (addr != NULL && addrlen != 0) {
7043 			mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen);
7044 
7045 			if (tudr_mp == NULL) {
7046 				mutex_exit(&connp->conn_lock);
7047 				BUMP_MIB(&udp_mib, udpOutErrors);
7048 				UDP_STAT(udp_out_err_tudr);
7049 				freemsg(mp);
7050 				return;
7051 			}
7052 			/* Tag the packet with T_UNITDATA_REQ */
7053 			tudr_mp->b_cont = mp;
7054 			mp = tudr_mp;
7055 		}
7056 		mutex_exit(&connp->conn_lock);
7057 		udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT);
7058 		return;
7059 	}
7060 
7061 	/* We can execute as reader right away. */
7062 	UDP_READERS_INCREF(udp);
7063 	mutex_exit(&connp->conn_lock);
7064 
7065 	udp_output(connp, mp, addr, addrlen);
7066 
7067 	udp_exit(connp);
7068 }
7069 
7070 /*
7071  * udp_output_v6():
7072  * Assumes that udp_wput did some sanity checking on the destination
7073  * address.
7074  */
7075 static mblk_t *
7076 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error)
7077 {
7078 	ip6_t		*ip6h;
7079 	ip6i_t		*ip6i;	/* mp1->b_rptr even if no ip6i_t */
7080 	mblk_t		*mp1 = mp;
7081 	mblk_t		*mp2;
7082 	int		udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE;
7083 	size_t		ip_len;
7084 	udpha_t		*udph;
7085 	udp_t		*udp = connp->conn_udp;
7086 	queue_t		*q = connp->conn_wq;
7087 	ip6_pkt_t	ipp_s;	/* For ancillary data options */
7088 	ip6_pkt_t	*ipp = &ipp_s;
7089 	ip6_pkt_t	*tipp;	/* temporary ipp */
7090 	uint32_t	csum = 0;
7091 	uint_t		ignore = 0;
7092 	uint_t		option_exists = 0, is_sticky = 0;
7093 	uint8_t		*cp;
7094 	uint8_t		*nxthdr_ptr;
7095 	in6_addr_t	ip6_dst;
7096 	udpattrs_t	attrs;
7097 	boolean_t	opt_present;
7098 	ip6_hbh_t	*hopoptsptr = NULL;
7099 	uint_t		hopoptslen = 0;
7100 	boolean_t	is_ancillary = B_FALSE;
7101 
7102 	*error = 0;
7103 
7104 	/*
7105 	 * If the local address is a mapped address return
7106 	 * an error.
7107 	 * It would be possible to send an IPv6 packet but the
7108 	 * response would never make it back to the application
7109 	 * since it is bound to a mapped address.
7110 	 */
7111 	if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) {
7112 		*error = EADDRNOTAVAIL;
7113 		goto done;
7114 	}
7115 
7116 	ipp->ipp_fields = 0;
7117 	ipp->ipp_sticky_ignored = 0;
7118 
7119 	/*
7120 	 * If TPI options passed in, feed it for verification and handling
7121 	 */
7122 	attrs.udpattr_credset = B_FALSE;
7123 	opt_present = B_FALSE;
7124 	if (DB_TYPE(mp) != M_DATA) {
7125 		mp1 = mp->b_cont;
7126 		if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) {
7127 			attrs.udpattr_ipp = ipp;
7128 			attrs.udpattr_mb = mp;
7129 			if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0)
7130 				goto done;
7131 			ASSERT(*error == 0);
7132 			opt_present = B_TRUE;
7133 		}
7134 	}
7135 	ignore = ipp->ipp_sticky_ignored;
7136 
7137 	/* mp1 points to the M_DATA mblk carrying the packet */
7138 	ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA);
7139 
7140 	if (sin6->sin6_scope_id != 0 &&
7141 	    IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
7142 		/*
7143 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
7144 		 * option nor ancillary data.  It needs to be
7145 		 * explicitly set in options_exists.
7146 		 */
7147 		option_exists |= IPPF_SCOPE_ID;
7148 	}
7149 
7150 	/*
7151 	 * Compute the destination address
7152 	 */
7153 	ip6_dst = sin6->sin6_addr;
7154 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
7155 		ip6_dst = ipv6_loopback;
7156 
7157 	/*
7158 	 * If we're not going to the same destination as last time, then
7159 	 * recompute the label required.  This is done in a separate routine to
7160 	 * avoid blowing up our stack here.
7161 	 *
7162 	 * TSOL Note: Since we are not in WRITER mode, UDP packets
7163 	 * to different destination may require different labels.
7164 	 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts,
7165 	 * and sticky ipp_hopoptslen are consistent for the current
7166 	 * destination and are updated atomically.
7167 	 */
7168 	mutex_enter(&connp->conn_lock);
7169 	if (is_system_labeled()) {
7170 		/* Using UDP MLP requires SCM_UCRED from user */
7171 		if (connp->conn_mlp_type != mlptSingle &&
7172 		    !attrs.udpattr_credset) {
7173 			DTRACE_PROBE4(
7174 			    tx__ip__log__info__output__udp6,
7175 			    char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)",
7176 			    mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q);
7177 			*error = ECONNREFUSED;
7178 			mutex_exit(&connp->conn_lock);
7179 			goto done;
7180 		}
7181 		if ((opt_present ||
7182 		    !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) &&
7183 		    (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) {
7184 			mutex_exit(&connp->conn_lock);
7185 			goto done;
7186 		}
7187 	}
7188 
7189 	/*
7190 	 * If there's a security label here, then we ignore any options the
7191 	 * user may try to set.  We keep the peer's label as a hidden sticky
7192 	 * option. We make a private copy of this label before releasing the
7193 	 * lock so that label is kept consistent with the destination addr.
7194 	 */
7195 	if (udp->udp_label_len_v6 > 0) {
7196 		ignore &= ~IPPF_HOPOPTS;
7197 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
7198 	}
7199 
7200 	if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) {
7201 		/* No sticky options nor ancillary data. */
7202 		mutex_exit(&connp->conn_lock);
7203 		goto no_options;
7204 	}
7205 
7206 	/*
7207 	 * Go through the options figuring out where each is going to
7208 	 * come from and build two masks.  The first mask indicates if
7209 	 * the option exists at all.  The second mask indicates if the
7210 	 * option is sticky or ancillary.
7211 	 */
7212 	if (!(ignore & IPPF_HOPOPTS)) {
7213 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
7214 			option_exists |= IPPF_HOPOPTS;
7215 			udp_ip_hdr_len += ipp->ipp_hopoptslen;
7216 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
7217 			option_exists |= IPPF_HOPOPTS;
7218 			is_sticky |= IPPF_HOPOPTS;
7219 			ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0);
7220 			hopoptsptr = kmem_alloc(
7221 			    udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP);
7222 			if (hopoptsptr == NULL) {
7223 				*error = ENOMEM;
7224 				mutex_exit(&connp->conn_lock);
7225 				goto done;
7226 			}
7227 			hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen;
7228 			bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr,
7229 			    hopoptslen);
7230 			udp_ip_hdr_len += hopoptslen;
7231 		}
7232 	}
7233 	mutex_exit(&connp->conn_lock);
7234 
7235 	if (!(ignore & IPPF_RTHDR)) {
7236 		if (ipp->ipp_fields & IPPF_RTHDR) {
7237 			option_exists |= IPPF_RTHDR;
7238 			udp_ip_hdr_len += ipp->ipp_rthdrlen;
7239 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
7240 			option_exists |= IPPF_RTHDR;
7241 			is_sticky |= IPPF_RTHDR;
7242 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen;
7243 		}
7244 	}
7245 
7246 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
7247 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
7248 			option_exists |= IPPF_RTDSTOPTS;
7249 			udp_ip_hdr_len += ipp->ipp_rtdstoptslen;
7250 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
7251 			option_exists |= IPPF_RTDSTOPTS;
7252 			is_sticky |= IPPF_RTDSTOPTS;
7253 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen;
7254 		}
7255 	}
7256 
7257 	if (!(ignore & IPPF_DSTOPTS)) {
7258 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
7259 			option_exists |= IPPF_DSTOPTS;
7260 			udp_ip_hdr_len += ipp->ipp_dstoptslen;
7261 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
7262 			option_exists |= IPPF_DSTOPTS;
7263 			is_sticky |= IPPF_DSTOPTS;
7264 			udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen;
7265 		}
7266 	}
7267 
7268 	if (!(ignore & IPPF_IFINDEX)) {
7269 		if (ipp->ipp_fields & IPPF_IFINDEX) {
7270 			option_exists |= IPPF_IFINDEX;
7271 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
7272 			option_exists |= IPPF_IFINDEX;
7273 			is_sticky |= IPPF_IFINDEX;
7274 		}
7275 	}
7276 
7277 	if (!(ignore & IPPF_ADDR)) {
7278 		if (ipp->ipp_fields & IPPF_ADDR) {
7279 			option_exists |= IPPF_ADDR;
7280 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) {
7281 			option_exists |= IPPF_ADDR;
7282 			is_sticky |= IPPF_ADDR;
7283 		}
7284 	}
7285 
7286 	if (!(ignore & IPPF_DONTFRAG)) {
7287 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
7288 			option_exists |= IPPF_DONTFRAG;
7289 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
7290 			option_exists |= IPPF_DONTFRAG;
7291 			is_sticky |= IPPF_DONTFRAG;
7292 		}
7293 	}
7294 
7295 	if (!(ignore & IPPF_USE_MIN_MTU)) {
7296 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
7297 			option_exists |= IPPF_USE_MIN_MTU;
7298 		} else if (udp->udp_sticky_ipp.ipp_fields &
7299 		    IPPF_USE_MIN_MTU) {
7300 			option_exists |= IPPF_USE_MIN_MTU;
7301 			is_sticky |= IPPF_USE_MIN_MTU;
7302 		}
7303 	}
7304 
7305 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
7306 		option_exists |= IPPF_HOPLIMIT;
7307 	/* IPV6_HOPLIMIT can never be sticky */
7308 	ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
7309 
7310 	if (!(ignore & IPPF_UNICAST_HOPS) &&
7311 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
7312 		option_exists |= IPPF_UNICAST_HOPS;
7313 		is_sticky |= IPPF_UNICAST_HOPS;
7314 	}
7315 
7316 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
7317 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
7318 		option_exists |= IPPF_MULTICAST_HOPS;
7319 		is_sticky |= IPPF_MULTICAST_HOPS;
7320 	}
7321 
7322 	if (!(ignore & IPPF_TCLASS)) {
7323 		if (ipp->ipp_fields & IPPF_TCLASS) {
7324 			option_exists |= IPPF_TCLASS;
7325 		} else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
7326 			option_exists |= IPPF_TCLASS;
7327 			is_sticky |= IPPF_TCLASS;
7328 		}
7329 	}
7330 
7331 	if (!(ignore & IPPF_NEXTHOP) &&
7332 	    (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) {
7333 		option_exists |= IPPF_NEXTHOP;
7334 		is_sticky |= IPPF_NEXTHOP;
7335 	}
7336 
7337 no_options:
7338 
7339 	/*
7340 	 * If any options carried in the ip6i_t were specified, we
7341 	 * need to account for the ip6i_t in the data we'll be sending
7342 	 * down.
7343 	 */
7344 	if (option_exists & IPPF_HAS_IP6I)
7345 		udp_ip_hdr_len += sizeof (ip6i_t);
7346 
7347 	/* check/fix buffer config, setup pointers into it */
7348 	ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len];
7349 	if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) ||
7350 	    !OK_32PTR(ip6h)) {
7351 		/* Try to get everything in a single mblk next time */
7352 		if (udp_ip_hdr_len > udp->udp_max_hdr_len) {
7353 			udp->udp_max_hdr_len = udp_ip_hdr_len;
7354 			(void) mi_set_sth_wroff(UDP_RD(q),
7355 			    udp->udp_max_hdr_len + udp_wroff_extra);
7356 		}
7357 		mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO);
7358 		if (mp2 == NULL) {
7359 			*error = ENOMEM;
7360 			goto done;
7361 		}
7362 		mp2->b_wptr = DB_LIM(mp2);
7363 		mp2->b_cont = mp1;
7364 		mp1 = mp2;
7365 		if (DB_TYPE(mp) != M_DATA)
7366 			mp->b_cont = mp1;
7367 		else
7368 			mp = mp1;
7369 
7370 		ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len);
7371 	}
7372 	mp1->b_rptr = (unsigned char *)ip6h;
7373 	ip6i = (ip6i_t *)ip6h;
7374 
7375 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp)
7376 	if (option_exists & IPPF_HAS_IP6I) {
7377 		ip6h = (ip6_t *)&ip6i[1];
7378 		ip6i->ip6i_flags = 0;
7379 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
7380 
7381 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
7382 		if (option_exists & IPPF_SCOPE_ID) {
7383 			ip6i->ip6i_flags |= IP6I_IFINDEX;
7384 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
7385 		} else if (option_exists & IPPF_IFINDEX) {
7386 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
7387 			ASSERT(tipp->ipp_ifindex != 0);
7388 			ip6i->ip6i_flags |= IP6I_IFINDEX;
7389 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
7390 		}
7391 
7392 		if (option_exists & IPPF_ADDR) {
7393 			/*
7394 			 * Enable per-packet source address verification if
7395 			 * IPV6_PKTINFO specified the source address.
7396 			 * ip6_src is set in the transport's _wput function.
7397 			 */
7398 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
7399 		}
7400 
7401 		if (option_exists & IPPF_DONTFRAG) {
7402 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
7403 		}
7404 
7405 		if (option_exists & IPPF_USE_MIN_MTU) {
7406 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
7407 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
7408 		}
7409 
7410 		if (option_exists & IPPF_NEXTHOP) {
7411 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
7412 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
7413 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
7414 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
7415 		}
7416 
7417 		/*
7418 		 * tell IP this is an ip6i_t private header
7419 		 */
7420 		ip6i->ip6i_nxt = IPPROTO_RAW;
7421 	}
7422 
7423 	/* Initialize IPv6 header */
7424 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
7425 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
7426 
7427 	/* Set the hoplimit of the outgoing packet. */
7428 	if (option_exists & IPPF_HOPLIMIT) {
7429 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
7430 		ip6h->ip6_hops = ipp->ipp_hoplimit;
7431 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
7432 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
7433 		ip6h->ip6_hops = udp->udp_multicast_ttl;
7434 		if (option_exists & IPPF_MULTICAST_HOPS)
7435 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
7436 	} else {
7437 		ip6h->ip6_hops = udp->udp_ttl;
7438 		if (option_exists & IPPF_UNICAST_HOPS)
7439 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
7440 	}
7441 
7442 	if (option_exists & IPPF_ADDR) {
7443 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
7444 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
7445 		ip6h->ip6_src = tipp->ipp_addr;
7446 	} else {
7447 		/*
7448 		 * The source address was not set using IPV6_PKTINFO.
7449 		 * First look at the bound source.
7450 		 * If unspecified fallback to __sin6_src_id.
7451 		 */
7452 		ip6h->ip6_src = udp->udp_v6src;
7453 		if (sin6->__sin6_src_id != 0 &&
7454 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
7455 			ip_srcid_find_id(sin6->__sin6_src_id,
7456 			    &ip6h->ip6_src, connp->conn_zoneid);
7457 		}
7458 	}
7459 
7460 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
7461 	cp = (uint8_t *)&ip6h[1];
7462 
7463 	/*
7464 	 * Here's where we have to start stringing together
7465 	 * any extension headers in the right order:
7466 	 * Hop-by-hop, destination, routing, and final destination opts.
7467 	 */
7468 	if (option_exists & IPPF_HOPOPTS) {
7469 		/* Hop-by-hop options */
7470 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
7471 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
7472 		if (hopoptslen == 0) {
7473 			hopoptsptr = tipp->ipp_hopopts;
7474 			hopoptslen = tipp->ipp_hopoptslen;
7475 			is_ancillary = B_TRUE;
7476 		}
7477 
7478 		*nxthdr_ptr = IPPROTO_HOPOPTS;
7479 		nxthdr_ptr = &hbh->ip6h_nxt;
7480 
7481 		bcopy(hopoptsptr, cp, hopoptslen);
7482 		cp += hopoptslen;
7483 
7484 		if (hopoptsptr != NULL && !is_ancillary) {
7485 			kmem_free(hopoptsptr, hopoptslen);
7486 			hopoptsptr = NULL;
7487 			hopoptslen = 0;
7488 		}
7489 	}
7490 	/*
7491 	 * En-route destination options
7492 	 * Only do them if there's a routing header as well
7493 	 */
7494 	if (option_exists & IPPF_RTDSTOPTS) {
7495 		ip6_dest_t *dst = (ip6_dest_t *)cp;
7496 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
7497 
7498 		*nxthdr_ptr = IPPROTO_DSTOPTS;
7499 		nxthdr_ptr = &dst->ip6d_nxt;
7500 
7501 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
7502 		cp += tipp->ipp_rtdstoptslen;
7503 	}
7504 	/*
7505 	 * Routing header next
7506 	 */
7507 	if (option_exists & IPPF_RTHDR) {
7508 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
7509 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
7510 
7511 		*nxthdr_ptr = IPPROTO_ROUTING;
7512 		nxthdr_ptr = &rt->ip6r_nxt;
7513 
7514 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
7515 		cp += tipp->ipp_rthdrlen;
7516 	}
7517 	/*
7518 	 * Do ultimate destination options
7519 	 */
7520 	if (option_exists & IPPF_DSTOPTS) {
7521 		ip6_dest_t *dest = (ip6_dest_t *)cp;
7522 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
7523 
7524 		*nxthdr_ptr = IPPROTO_DSTOPTS;
7525 		nxthdr_ptr = &dest->ip6d_nxt;
7526 
7527 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
7528 		cp += tipp->ipp_dstoptslen;
7529 	}
7530 	/*
7531 	 * Now set the last header pointer to the proto passed in
7532 	 */
7533 	ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE));
7534 	*nxthdr_ptr = IPPROTO_UDP;
7535 
7536 	/* Update UDP header */
7537 	udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE);
7538 	udph->uha_dst_port = sin6->sin6_port;
7539 	udph->uha_src_port = udp->udp_port;
7540 
7541 	/*
7542 	 * Copy in the destination address
7543 	 */
7544 	ip6h->ip6_dst = ip6_dst;
7545 
7546 	ip6h->ip6_vcf =
7547 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
7548 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
7549 
7550 	if (option_exists & IPPF_TCLASS) {
7551 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
7552 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
7553 		    tipp->ipp_tclass);
7554 	}
7555 
7556 	if (option_exists & IPPF_RTHDR) {
7557 		ip6_rthdr_t	*rth;
7558 
7559 		/*
7560 		 * Perform any processing needed for source routing.
7561 		 * We know that all extension headers will be in the same mblk
7562 		 * as the IPv6 header.
7563 		 */
7564 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
7565 		if (rth != NULL && rth->ip6r_segleft != 0) {
7566 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
7567 				/*
7568 				 * Drop packet - only support Type 0 routing.
7569 				 * Notify the application as well.
7570 				 */
7571 				*error = EPROTO;
7572 				goto done;
7573 			}
7574 
7575 			/*
7576 			 * rth->ip6r_len is twice the number of
7577 			 * addresses in the header. Thus it must be even.
7578 			 */
7579 			if (rth->ip6r_len & 0x1) {
7580 				*error = EPROTO;
7581 				goto done;
7582 			}
7583 			/*
7584 			 * Shuffle the routing header and ip6_dst
7585 			 * addresses, and get the checksum difference
7586 			 * between the first hop (in ip6_dst) and
7587 			 * the destination (in the last routing hdr entry).
7588 			 */
7589 			csum = ip_massage_options_v6(ip6h, rth);
7590 			/*
7591 			 * Verify that the first hop isn't a mapped address.
7592 			 * Routers along the path need to do this verification
7593 			 * for subsequent hops.
7594 			 */
7595 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
7596 				*error = EADDRNOTAVAIL;
7597 				goto done;
7598 			}
7599 
7600 			cp += (rth->ip6r_len + 1)*8;
7601 		}
7602 	}
7603 
7604 	/* count up length of UDP packet */
7605 	ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN;
7606 	if ((mp2 = mp1->b_cont) != NULL) {
7607 		do {
7608 			ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX);
7609 			ip_len += (uint32_t)MBLKL(mp2);
7610 		} while ((mp2 = mp2->b_cont) != NULL);
7611 	}
7612 
7613 	/*
7614 	 * If the size of the packet is greater than the maximum allowed by
7615 	 * ip, return an error. Passing this down could cause panics because
7616 	 * the size will have wrapped and be inconsistent with the msg size.
7617 	 */
7618 	if (ip_len > IP_MAXPACKET) {
7619 		*error = EMSGSIZE;
7620 		goto done;
7621 	}
7622 
7623 	/* Store the UDP length. Subtract length of extension hdrs */
7624 	udph->uha_length = htons(ip_len + IPV6_HDR_LEN -
7625 	    (int)((uchar_t *)udph - (uchar_t *)ip6h));
7626 
7627 	/*
7628 	 * We make it easy for IP to include our pseudo header
7629 	 * by putting our length in uh_checksum, modified (if
7630 	 * we have a routing header) by the checksum difference
7631 	 * between the ultimate destination and first hop addresses.
7632 	 * Note: UDP over IPv6 must always checksum the packet.
7633 	 */
7634 	csum += udph->uha_length;
7635 	csum = (csum & 0xFFFF) + (csum >> 16);
7636 	udph->uha_checksum = (uint16_t)csum;
7637 
7638 #ifdef _LITTLE_ENDIAN
7639 	ip_len = htons(ip_len);
7640 #endif
7641 	ip6h->ip6_plen = ip_len;
7642 	if (DB_CRED(mp) != NULL)
7643 		mblk_setcred(mp1, DB_CRED(mp));
7644 
7645 	if (DB_TYPE(mp) != M_DATA) {
7646 		ASSERT(mp != mp1);
7647 		freeb(mp);
7648 	}
7649 
7650 	/* mp has been consumed and we'll return success */
7651 	ASSERT(*error == 0);
7652 	mp = NULL;
7653 
7654 	/* We're done. Pass the packet to IP */
7655 	BUMP_MIB(&udp_mib, udpOutDatagrams);
7656 	ip_output_v6(connp, mp1, q, IP_WPUT);
7657 
7658 done:
7659 	if (hopoptsptr != NULL && !is_ancillary) {
7660 		kmem_free(hopoptsptr, hopoptslen);
7661 		hopoptsptr = NULL;
7662 	}
7663 	if (*error != 0) {
7664 		ASSERT(mp != NULL);
7665 		BUMP_MIB(&udp_mib, udpOutErrors);
7666 	}
7667 	return (mp);
7668 }
7669 
7670 static void
7671 udp_wput_other(queue_t *q, mblk_t *mp)
7672 {
7673 	uchar_t	*rptr = mp->b_rptr;
7674 	struct datab *db;
7675 	struct iocblk *iocp;
7676 	cred_t	*cr;
7677 	conn_t	*connp = Q_TO_CONN(q);
7678 	udp_t	*udp = connp->conn_udp;
7679 
7680 	TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START,
7681 		"udp_wput_other_start: q %p", q);
7682 
7683 	db = mp->b_datap;
7684 
7685 	cr = DB_CREDDEF(mp, connp->conn_cred);
7686 
7687 	switch (db->db_type) {
7688 	case M_PROTO:
7689 	case M_PCPROTO:
7690 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
7691 			freemsg(mp);
7692 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7693 				"udp_wput_other_end: q %p (%S)",
7694 				q, "protoshort");
7695 			return;
7696 		}
7697 		switch (((t_primp_t)rptr)->type) {
7698 		case T_ADDR_REQ:
7699 			udp_addr_req(q, mp);
7700 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7701 				"udp_wput_other_end: q %p (%S)", q, "addrreq");
7702 			return;
7703 		case O_T_BIND_REQ:
7704 		case T_BIND_REQ:
7705 			udp_bind(q, mp);
7706 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7707 				"udp_wput_other_end: q %p (%S)", q, "bindreq");
7708 			return;
7709 		case T_CONN_REQ:
7710 			udp_connect(q, mp);
7711 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7712 				"udp_wput_other_end: q %p (%S)", q, "connreq");
7713 			return;
7714 		case T_CAPABILITY_REQ:
7715 			udp_capability_req(q, mp);
7716 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7717 				"udp_wput_other_end: q %p (%S)", q, "capabreq");
7718 			return;
7719 		case T_INFO_REQ:
7720 			udp_info_req(q, mp);
7721 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7722 				"udp_wput_other_end: q %p (%S)", q, "inforeq");
7723 			return;
7724 		case T_UNITDATA_REQ:
7725 			/*
7726 			 * If a T_UNITDATA_REQ gets here, the address must
7727 			 * be bad.  Valid T_UNITDATA_REQs are handled
7728 			 * in udp_wput.
7729 			 */
7730 			udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL);
7731 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7732 				"udp_wput_other_end: q %p (%S)",
7733 				q, "unitdatareq");
7734 			return;
7735 		case T_UNBIND_REQ:
7736 			udp_unbind(q, mp);
7737 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7738 			    "udp_wput_other_end: q %p (%S)", q, "unbindreq");
7739 			return;
7740 		case T_SVR4_OPTMGMT_REQ:
7741 			if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr))
7742 				/*
7743 				 * Use upper queue for option processing in
7744 				 * case the request is not handled at this
7745 				 * level and needs to be passed down to IP.
7746 				 */
7747 				(void) svr4_optcom_req(_WR(UDP_RD(q)),
7748 				    mp, cr, &udp_opt_obj);
7749 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7750 			    "udp_wput_other_end: q %p (%S)",
7751 			    q, "optmgmtreq");
7752 			return;
7753 
7754 		case T_OPTMGMT_REQ:
7755 			/*
7756 			 * Use upper queue for option processing in
7757 			 * case the request is not handled at this
7758 			 * level and needs to be passed down to IP.
7759 			 */
7760 			(void) tpi_optcom_req(_WR(UDP_RD(q)),
7761 			    mp, cr, &udp_opt_obj);
7762 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7763 				"udp_wput_other_end: q %p (%S)",
7764 				q, "optmgmtreq");
7765 			return;
7766 
7767 		case T_DISCON_REQ:
7768 			udp_disconnect(q, mp);
7769 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7770 				"udp_wput_other_end: q %p (%S)",
7771 				q, "disconreq");
7772 			return;
7773 
7774 		/* The following TPI message is not supported by udp. */
7775 		case O_T_CONN_RES:
7776 		case T_CONN_RES:
7777 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
7778 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7779 				"udp_wput_other_end: q %p (%S)",
7780 				q, "connres/disconreq");
7781 			return;
7782 
7783 		/* The following 3 TPI messages are illegal for udp. */
7784 		case T_DATA_REQ:
7785 		case T_EXDATA_REQ:
7786 		case T_ORDREL_REQ:
7787 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
7788 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7789 				"udp_wput_other_end: q %p (%S)",
7790 				q, "data/exdata/ordrel");
7791 			return;
7792 		default:
7793 			break;
7794 		}
7795 		break;
7796 	case M_FLUSH:
7797 		if (*rptr & FLUSHW)
7798 			flushq(q, FLUSHDATA);
7799 		break;
7800 	case M_IOCTL:
7801 		iocp = (struct iocblk *)mp->b_rptr;
7802 		switch (iocp->ioc_cmd) {
7803 		case TI_GETPEERNAME:
7804 			if (udp->udp_state != TS_DATA_XFER) {
7805 				/*
7806 				 * If a default destination address has not
7807 				 * been associated with the stream, then we
7808 				 * don't know the peer's name.
7809 				 */
7810 				iocp->ioc_error = ENOTCONN;
7811 				iocp->ioc_count = 0;
7812 				mp->b_datap->db_type = M_IOCACK;
7813 				putnext(UDP_RD(q), mp);
7814 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7815 					"udp_wput_other_end: q %p (%S)",
7816 					q, "getpeername");
7817 				return;
7818 			}
7819 			/* FALLTHRU */
7820 		case TI_GETMYNAME: {
7821 			/*
7822 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
7823 			 * need to copyin the user's strbuf structure.
7824 			 * Processing will continue in the M_IOCDATA case
7825 			 * below.
7826 			 */
7827 			mi_copyin(q, mp, NULL,
7828 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
7829 			TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7830 				"udp_wput_other_end: q %p (%S)",
7831 				q, "getmyname");
7832 			return;
7833 			}
7834 		case ND_SET:
7835 			/* nd_getset performs the necessary checking */
7836 		case ND_GET:
7837 			if (nd_getset(q, udp_g_nd, mp)) {
7838 				putnext(UDP_RD(q), mp);
7839 				TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7840 					"udp_wput_other_end: q %p (%S)",
7841 					q, "get");
7842 				return;
7843 			}
7844 			break;
7845 		case _SIOCSOCKFALLBACK:
7846 			/*
7847 			 * Either sockmod is about to be popped and the
7848 			 * socket would now be treated as a plain stream,
7849 			 * or a module is about to be pushed so we could
7850 			 * no longer use read-side synchronous stream.
7851 			 * Drain any queued data and disable direct sockfs
7852 			 * interface from now on.
7853 			 */
7854 			if (!udp->udp_issocket) {
7855 				DB_TYPE(mp) = M_IOCNAK;
7856 				iocp->ioc_error = EINVAL;
7857 			} else {
7858 				udp->udp_issocket = B_FALSE;
7859 				if (udp->udp_direct_sockfs) {
7860 					/*
7861 					 * Disable read-side synchronous
7862 					 * stream interface and drain any
7863 					 * queued data.
7864 					 */
7865 					udp_rcv_drain(UDP_RD(q), udp,
7866 					    B_FALSE);
7867 					ASSERT(!udp->udp_direct_sockfs);
7868 					UDP_STAT(udp_sock_fallback);
7869 				}
7870 				DB_TYPE(mp) = M_IOCACK;
7871 				iocp->ioc_error = 0;
7872 			}
7873 			iocp->ioc_count = 0;
7874 			iocp->ioc_rval = 0;
7875 			putnext(UDP_RD(q), mp);
7876 			return;
7877 		default:
7878 			break;
7879 		}
7880 		break;
7881 	case M_IOCDATA:
7882 		udp_wput_iocdata(q, mp);
7883 		TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7884 			"udp_wput_other_end: q %p (%S)", q, "iocdata");
7885 		return;
7886 	default:
7887 		/* Unrecognized messages are passed through without change. */
7888 		break;
7889 	}
7890 	TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END,
7891 		"udp_wput_other_end: q %p (%S)", q, "end");
7892 	ip_output(connp, mp, q, IP_WPUT);
7893 }
7894 
7895 /* ARGSUSED */
7896 static void
7897 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2)
7898 {
7899 	udp_wput_other(((conn_t *)arg)->conn_wq, mp);
7900 	udp_exit((conn_t *)arg);
7901 }
7902 
7903 /*
7904  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
7905  * messages.
7906  */
7907 static void
7908 udp_wput_iocdata(queue_t *q, mblk_t *mp)
7909 {
7910 	mblk_t	*mp1;
7911 	STRUCT_HANDLE(strbuf, sb);
7912 	uint16_t port;
7913 	in6_addr_t	v6addr;
7914 	ipaddr_t	v4addr;
7915 	uint32_t	flowinfo = 0;
7916 	int		addrlen;
7917 	udp_t		*udp = Q_TO_UDP(q);
7918 
7919 	/* Make sure it is one of ours. */
7920 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
7921 	case TI_GETMYNAME:
7922 	case TI_GETPEERNAME:
7923 		break;
7924 	default:
7925 		ip_output(udp->udp_connp, mp, q, IP_WPUT);
7926 		return;
7927 	}
7928 
7929 	q = WR(UDP_RD(q));
7930 	switch (mi_copy_state(q, mp, &mp1)) {
7931 	case -1:
7932 		return;
7933 	case MI_COPY_CASE(MI_COPY_IN, 1):
7934 		break;
7935 	case MI_COPY_CASE(MI_COPY_OUT, 1):
7936 		/*
7937 		 * The address has been copied out, so now
7938 		 * copyout the strbuf.
7939 		 */
7940 		mi_copyout(q, mp);
7941 		return;
7942 	case MI_COPY_CASE(MI_COPY_OUT, 2):
7943 		/*
7944 		 * The address and strbuf have been copied out.
7945 		 * We're done, so just acknowledge the original
7946 		 * M_IOCTL.
7947 		 */
7948 		mi_copy_done(q, mp, 0);
7949 		return;
7950 	default:
7951 		/*
7952 		 * Something strange has happened, so acknowledge
7953 		 * the original M_IOCTL with an EPROTO error.
7954 		 */
7955 		mi_copy_done(q, mp, EPROTO);
7956 		return;
7957 	}
7958 
7959 	/*
7960 	 * Now we have the strbuf structure for TI_GETMYNAME
7961 	 * and TI_GETPEERNAME.  Next we copyout the requested
7962 	 * address and then we'll copyout the strbuf.
7963 	 */
7964 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
7965 	    (void *)mp1->b_rptr);
7966 	if (udp->udp_family == AF_INET)
7967 		addrlen = sizeof (sin_t);
7968 	else
7969 		addrlen = sizeof (sin6_t);
7970 
7971 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
7972 		mi_copy_done(q, mp, EINVAL);
7973 		return;
7974 	}
7975 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
7976 	case TI_GETMYNAME:
7977 		if (udp->udp_family == AF_INET) {
7978 			ASSERT(udp->udp_ipversion == IPV4_VERSION);
7979 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) &&
7980 			    !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
7981 				v4addr = V4_PART_OF_V6(udp->udp_v6src);
7982 			} else {
7983 				/*
7984 				 * INADDR_ANY
7985 				 * udp_v6src is not set, we might be bound to
7986 				 * broadcast/multicast. Use udp_bound_v6src as
7987 				 * local address instead (that could
7988 				 * also still be INADDR_ANY)
7989 				 */
7990 				v4addr = V4_PART_OF_V6(udp->udp_bound_v6src);
7991 			}
7992 		} else {
7993 			/* udp->udp_family == AF_INET6 */
7994 			if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) {
7995 				v6addr = udp->udp_v6src;
7996 			} else {
7997 				/*
7998 				 * UNSPECIFIED
7999 				 * udp_v6src is not set, we might be bound to
8000 				 * broadcast/multicast. Use udp_bound_v6src as
8001 				 * local address instead (that could
8002 				 * also still be UNSPECIFIED)
8003 				 */
8004 				v6addr = udp->udp_bound_v6src;
8005 			}
8006 		}
8007 		port = udp->udp_port;
8008 		break;
8009 	case TI_GETPEERNAME:
8010 		if (udp->udp_state != TS_DATA_XFER) {
8011 			mi_copy_done(q, mp, ENOTCONN);
8012 			return;
8013 		}
8014 		if (udp->udp_family == AF_INET) {
8015 			ASSERT(udp->udp_ipversion == IPV4_VERSION);
8016 			v4addr = V4_PART_OF_V6(udp->udp_v6dst);
8017 		} else {
8018 			/* udp->udp_family == AF_INET6) */
8019 			v6addr = udp->udp_v6dst;
8020 			flowinfo = udp->udp_flowinfo;
8021 		}
8022 		port = udp->udp_dstport;
8023 		break;
8024 	default:
8025 		mi_copy_done(q, mp, EPROTO);
8026 		return;
8027 	}
8028 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
8029 	if (!mp1)
8030 		return;
8031 
8032 	if (udp->udp_family == AF_INET) {
8033 		sin_t *sin;
8034 
8035 		STRUCT_FSET(sb, len, (int)sizeof (sin_t));
8036 		sin = (sin_t *)mp1->b_rptr;
8037 		mp1->b_wptr = (uchar_t *)&sin[1];
8038 		*sin = sin_null;
8039 		sin->sin_family = AF_INET;
8040 		sin->sin_addr.s_addr = v4addr;
8041 		sin->sin_port = port;
8042 	} else {
8043 		/* udp->udp_family == AF_INET6 */
8044 		sin6_t *sin6;
8045 
8046 		STRUCT_FSET(sb, len, (int)sizeof (sin6_t));
8047 		sin6 = (sin6_t *)mp1->b_rptr;
8048 		mp1->b_wptr = (uchar_t *)&sin6[1];
8049 		*sin6 = sin6_null;
8050 		sin6->sin6_family = AF_INET6;
8051 		sin6->sin6_flowinfo = flowinfo;
8052 		sin6->sin6_addr = v6addr;
8053 		sin6->sin6_port = port;
8054 	}
8055 	/* Copy out the address */
8056 	mi_copyout(q, mp);
8057 }
8058 
8059 
8060 static int
8061 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
8062     udpattrs_t *udpattrs)
8063 {
8064 	struct T_unitdata_req *udreqp;
8065 	int is_absreq_failure;
8066 	cred_t *cr;
8067 	conn_t	*connp = Q_TO_CONN(q);
8068 
8069 	ASSERT(((t_primp_t)mp->b_rptr)->type);
8070 
8071 	cr = DB_CREDDEF(mp, connp->conn_cred);
8072 
8073 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
8074 
8075 	/*
8076 	 * Use upper queue for option processing since the callback
8077 	 * routines expect to be called in UDP instance instead of IP.
8078 	 */
8079 	*errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length,
8080 	    udreqp->OPT_offset, cr, &udp_opt_obj,
8081 	    udpattrs, &is_absreq_failure);
8082 
8083 	if (*errorp != 0) {
8084 		/*
8085 		 * Note: No special action needed in this
8086 		 * module for "is_absreq_failure"
8087 		 */
8088 		return (-1);		/* failure */
8089 	}
8090 	ASSERT(is_absreq_failure == 0);
8091 	return (0);	/* success */
8092 }
8093 
8094 void
8095 udp_ddi_init(void)
8096 {
8097 	int i;
8098 
8099 	UDP6_MAJ = ddi_name_to_major(UDP6);
8100 
8101 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
8102 	    udp_opt_obj.odb_opt_arr_cnt);
8103 
8104 	if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) {
8105 		/* Not a power of two. Round up to nearest power of two */
8106 		for (i = 0; i < 31; i++) {
8107 			if (udp_bind_fanout_size < (1 << i))
8108 				break;
8109 		}
8110 		udp_bind_fanout_size = 1 << i;
8111 	}
8112 	udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size *
8113 	    sizeof (udp_fanout_t), KM_SLEEP);
8114 	for (i = 0; i < udp_bind_fanout_size; i++) {
8115 		mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
8116 		    NULL);
8117 	}
8118 	(void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr));
8119 
8120 	udp_kstat_init();
8121 
8122 	udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t),
8123 	    CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0);
8124 }
8125 
8126 void
8127 udp_ddi_destroy(void)
8128 {
8129 	int i;
8130 
8131 	nd_free(&udp_g_nd);
8132 
8133 	for (i = 0; i < udp_bind_fanout_size; i++) {
8134 		mutex_destroy(&udp_bind_fanout[i].uf_lock);
8135 	}
8136 
8137 	kmem_free(udp_bind_fanout, udp_bind_fanout_size *
8138 	    sizeof (udp_fanout_t));
8139 
8140 	udp_kstat_fini();
8141 
8142 	kmem_cache_destroy(udp_cache);
8143 }
8144 
8145 static void
8146 udp_kstat_init(void)
8147 {
8148 	udp_named_kstat_t template = {
8149 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
8150 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
8151 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
8152 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
8153 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
8154 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
8155 	};
8156 
8157 	udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME,
8158 	    "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0);
8159 
8160 	if (udp_mibkp == NULL)
8161 		return;
8162 
8163 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
8164 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
8165 
8166 	bcopy(&template, udp_mibkp->ks_data, sizeof (template));
8167 
8168 	udp_mibkp->ks_update = udp_kstat_update;
8169 
8170 	kstat_install(udp_mibkp);
8171 
8172 	if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat",
8173 	    "net", KSTAT_TYPE_NAMED,
8174 	    sizeof (udp_statistics) / sizeof (kstat_named_t),
8175 	    KSTAT_FLAG_VIRTUAL)) != NULL) {
8176 		udp_ksp->ks_data = &udp_statistics;
8177 		kstat_install(udp_ksp);
8178 	}
8179 }
8180 
8181 static void
8182 udp_kstat_fini(void)
8183 {
8184 	if (udp_ksp != NULL) {
8185 		kstat_delete(udp_ksp);
8186 		udp_ksp = NULL;
8187 	}
8188 	if (udp_mibkp != NULL) {
8189 		kstat_delete(udp_mibkp);
8190 		udp_mibkp = NULL;
8191 	}
8192 }
8193 
8194 static int
8195 udp_kstat_update(kstat_t *kp, int rw)
8196 {
8197 	udp_named_kstat_t *udpkp;
8198 
8199 	if ((kp == NULL) || (kp->ks_data == NULL))
8200 		return (EIO);
8201 
8202 	if (rw == KSTAT_WRITE)
8203 		return (EACCES);
8204 
8205 	udpkp = (udp_named_kstat_t *)kp->ks_data;
8206 
8207 	udpkp->inDatagrams.value.ui32 =	udp_mib.udpInDatagrams;
8208 	udpkp->inErrors.value.ui32 =	udp_mib.udpInErrors;
8209 	udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams;
8210 	udpkp->outErrors.value.ui32 =	udp_mib.udpOutErrors;
8211 
8212 	return (0);
8213 }
8214 
8215 /* ARGSUSED */
8216 static void
8217 udp_rput(queue_t *q, mblk_t *mp)
8218 {
8219 	/*
8220 	 * We get here whenever we do qreply() from IP,
8221 	 * i.e as part of handlings ioctls, etc.
8222 	 */
8223 	putnext(q, mp);
8224 }
8225 
8226 /*
8227  * Read-side synchronous stream info entry point, called as a
8228  * result of handling certain STREAMS ioctl operations.
8229  */
8230 static int
8231 udp_rinfop(queue_t *q, infod_t *dp)
8232 {
8233 	mblk_t	*mp;
8234 	uint_t	cmd = dp->d_cmd;
8235 	int	res = 0;
8236 	int	error = 0;
8237 	udp_t	*udp = Q_TO_UDP(RD(UDP_WR(q)));
8238 	struct stdata *stp = STREAM(q);
8239 
8240 	mutex_enter(&udp->udp_drain_lock);
8241 	/* If shutdown on read has happened, return nothing */
8242 	mutex_enter(&stp->sd_lock);
8243 	if (stp->sd_flag & STREOF) {
8244 		mutex_exit(&stp->sd_lock);
8245 		goto done;
8246 	}
8247 	mutex_exit(&stp->sd_lock);
8248 
8249 	if ((mp = udp->udp_rcv_list_head) == NULL)
8250 		goto done;
8251 
8252 	ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL);
8253 
8254 	if (cmd & INFOD_COUNT) {
8255 		/*
8256 		 * Return the number of messages.
8257 		 */
8258 		dp->d_count += udp->udp_rcv_msgcnt;
8259 		res |= INFOD_COUNT;
8260 	}
8261 	if (cmd & INFOD_BYTES) {
8262 		/*
8263 		 * Return size of all data messages.
8264 		 */
8265 		dp->d_bytes += udp->udp_rcv_cnt;
8266 		res |= INFOD_BYTES;
8267 	}
8268 	if (cmd & INFOD_FIRSTBYTES) {
8269 		/*
8270 		 * Return size of first data message.
8271 		 */
8272 		dp->d_bytes = msgdsize(mp);
8273 		res |= INFOD_FIRSTBYTES;
8274 		dp->d_cmd &= ~INFOD_FIRSTBYTES;
8275 	}
8276 	if (cmd & INFOD_COPYOUT) {
8277 		mblk_t *mp1 = mp->b_cont;
8278 		int n;
8279 		/*
8280 		 * Return data contents of first message.
8281 		 */
8282 		ASSERT(DB_TYPE(mp1) == M_DATA);
8283 		while (mp1 != NULL && dp->d_uiop->uio_resid > 0) {
8284 			n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1));
8285 			if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n,
8286 			    UIO_READ, dp->d_uiop)) != 0) {
8287 				goto done;
8288 			}
8289 			mp1 = mp1->b_cont;
8290 		}
8291 		res |= INFOD_COPYOUT;
8292 		dp->d_cmd &= ~INFOD_COPYOUT;
8293 	}
8294 done:
8295 	mutex_exit(&udp->udp_drain_lock);
8296 
8297 	dp->d_res |= res;
8298 
8299 	return (error);
8300 }
8301 
8302 /*
8303  * Read-side synchronous stream entry point.  This is called as a result
8304  * of recv/read operation done at sockfs, and is guaranteed to execute
8305  * outside of the interrupt thread context.  It returns a single datagram
8306  * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer.
8307  */
8308 static int
8309 udp_rrw(queue_t *q, struiod_t *dp)
8310 {
8311 	mblk_t	*mp;
8312 	udp_t	*udp = Q_TO_UDP(_RD(UDP_WR(q)));
8313 
8314 	/* We should never get here when we're in SNMP mode */
8315 	ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD));
8316 
8317 	/*
8318 	 * Dequeue datagram from the head of the list and return
8319 	 * it to caller; also ensure that RSLEEP sd_wakeq flag is
8320 	 * set/cleared depending on whether or not there's data
8321 	 * remaining in the list.
8322 	 */
8323 	mutex_enter(&udp->udp_drain_lock);
8324 	if (!udp->udp_direct_sockfs) {
8325 		mutex_exit(&udp->udp_drain_lock);
8326 		UDP_STAT(udp_rrw_busy);
8327 		return (EBUSY);
8328 	}
8329 	if ((mp = udp->udp_rcv_list_head) != NULL) {
8330 		uint_t size = msgdsize(mp);
8331 
8332 		/* Last datagram in the list? */
8333 		if ((udp->udp_rcv_list_head = mp->b_next) == NULL)
8334 			udp->udp_rcv_list_tail = NULL;
8335 		mp->b_next = NULL;
8336 
8337 		udp->udp_rcv_cnt -= size;
8338 		udp->udp_rcv_msgcnt--;
8339 		UDP_STAT(udp_rrw_msgcnt);
8340 
8341 		/* No longer flow-controlling? */
8342 		if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat &&
8343 		    udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat)
8344 			udp->udp_drain_qfull = B_FALSE;
8345 	}
8346 	if (udp->udp_rcv_list_head == NULL) {
8347 		/*
8348 		 * Either we just dequeued the last datagram or
8349 		 * we get here from sockfs and have nothing to
8350 		 * return; in this case clear RSLEEP.
8351 		 */
8352 		ASSERT(udp->udp_rcv_cnt == 0);
8353 		ASSERT(udp->udp_rcv_msgcnt == 0);
8354 		ASSERT(udp->udp_rcv_list_tail == NULL);
8355 		STR_WAKEUP_CLEAR(STREAM(q));
8356 	} else {
8357 		/*
8358 		 * More data follows; we need udp_rrw() to be
8359 		 * called in future to pick up the rest.
8360 		 */
8361 		STR_WAKEUP_SET(STREAM(q));
8362 	}
8363 	mutex_exit(&udp->udp_drain_lock);
8364 	dp->d_mp = mp;
8365 	return (0);
8366 }
8367 
8368 /*
8369  * Enqueue a completely-built T_UNITDATA_IND message into the receive
8370  * list; this is typically executed within the interrupt thread context
8371  * and so we do things as quickly as possible.
8372  */
8373 static void
8374 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len)
8375 {
8376 	ASSERT(q == RD(q));
8377 	ASSERT(pkt_len == msgdsize(mp));
8378 	ASSERT(mp->b_next == NULL && mp->b_cont != NULL);
8379 	ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA);
8380 	ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind));
8381 
8382 	mutex_enter(&udp->udp_drain_lock);
8383 	/*
8384 	 * Wake up and signal the receiving app; it is okay to do this
8385 	 * before enqueueing the mp because we are holding the drain lock.
8386 	 * One of the advantages of synchronous stream is the ability for
8387 	 * us to find out when the application performs a read on the
8388 	 * socket by way of udp_rrw() entry point being called.  We need
8389 	 * to generate SIGPOLL/SIGIO for each received data in the case
8390 	 * of asynchronous socket just as in the strrput() case.  However,
8391 	 * we only wake the application up when necessary, i.e. during the
8392 	 * first enqueue.  When udp_rrw() is called, we send up a single
8393 	 * datagram upstream and call STR_WAKEUP_SET() again when there
8394 	 * are still data remaining in our receive queue.
8395 	 */
8396 	if (udp->udp_rcv_list_head == NULL) {
8397 		STR_WAKEUP_SET(STREAM(q));
8398 		udp->udp_rcv_list_head = mp;
8399 	} else {
8400 		udp->udp_rcv_list_tail->b_next = mp;
8401 	}
8402 	udp->udp_rcv_list_tail = mp;
8403 	udp->udp_rcv_cnt += pkt_len;
8404 	udp->udp_rcv_msgcnt++;
8405 
8406 	/* Need to flow-control? */
8407 	if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat ||
8408 	    udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat)
8409 		udp->udp_drain_qfull = B_TRUE;
8410 
8411 	/* Update poll events and send SIGPOLL/SIGIO if necessary */
8412 	STR_SENDSIG(STREAM(q));
8413 	mutex_exit(&udp->udp_drain_lock);
8414 }
8415 
8416 /*
8417  * Drain the contents of receive list to the module upstream; we do
8418  * this during close or when we fallback to the slow mode due to
8419  * sockmod being popped or a module being pushed on top of us.
8420  */
8421 static void
8422 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing)
8423 {
8424 	mblk_t *mp;
8425 
8426 	ASSERT(q == RD(q));
8427 
8428 	mutex_enter(&udp->udp_drain_lock);
8429 	/*
8430 	 * There is no race with a concurrent udp_input() sending
8431 	 * up packets using putnext() after we have cleared the
8432 	 * udp_direct_sockfs flag but before we have completed
8433 	 * sending up the packets in udp_rcv_list, since we are
8434 	 * either a writer or we have quiesced the conn.
8435 	 */
8436 	udp->udp_direct_sockfs = B_FALSE;
8437 	mutex_exit(&udp->udp_drain_lock);
8438 
8439 	if (udp->udp_rcv_list_head != NULL)
8440 		UDP_STAT(udp_drain);
8441 
8442 	/*
8443 	 * Send up everything via putnext(); note here that we
8444 	 * don't need the udp_drain_lock to protect us since
8445 	 * nothing can enter udp_rrw() and that we currently
8446 	 * have exclusive access to this udp.
8447 	 */
8448 	while ((mp = udp->udp_rcv_list_head) != NULL) {
8449 		udp->udp_rcv_list_head = mp->b_next;
8450 		mp->b_next = NULL;
8451 		udp->udp_rcv_cnt -= msgdsize(mp);
8452 		udp->udp_rcv_msgcnt--;
8453 		if (closing) {
8454 			freemsg(mp);
8455 		} else {
8456 			putnext(q, mp);
8457 		}
8458 	}
8459 	ASSERT(udp->udp_rcv_cnt == 0);
8460 	ASSERT(udp->udp_rcv_msgcnt == 0);
8461 	ASSERT(udp->udp_rcv_list_head == NULL);
8462 	udp->udp_rcv_list_tail = NULL;
8463 	udp->udp_drain_qfull = B_FALSE;
8464 }
8465 
8466 static size_t
8467 udp_set_rcv_hiwat(udp_t *udp, size_t size)
8468 {
8469 	/* We add a bit of extra buffering */
8470 	size += size >> 1;
8471 	if (size > udp_max_buf)
8472 		size = udp_max_buf;
8473 
8474 	udp->udp_rcv_hiwat = size;
8475 	return (size);
8476 }
8477 
8478 /*
8479  * Little helper for IPsec's NAT-T processing.
8480  */
8481 boolean_t
8482 udp_compute_checksum(void)
8483 {
8484 	return (udp_do_checksum);
8485 }
8486