xref: /titanic_41/usr/src/uts/common/inet/ip/icmp.c (revision c33df7ede245a3815b726e3eb38752e85ebb081f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/stropts.h>
33 #include <sys/strlog.h>
34 #include <sys/strsun.h>
35 #define	_SUN_TPI_VERSION 2
36 #include <sys/tihdr.h>
37 #include <sys/timod.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/strsubr.h>
41 #include <sys/cmn_err.h>
42 #include <sys/debug.h>
43 #include <sys/kmem.h>
44 #include <sys/policy.h>
45 #include <sys/priv.h>
46 #include <sys/zone.h>
47 #include <sys/time.h>
48 
49 #include <sys/socket.h>
50 #include <sys/isa_defs.h>
51 #include <sys/suntpi.h>
52 #include <sys/xti_inet.h>
53 #include <sys/netstack.h>
54 
55 #include <net/route.h>
56 #include <net/if.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/ip6.h>
60 #include <netinet/icmp6.h>
61 #include <inet/common.h>
62 #include <inet/ip.h>
63 #include <inet/ip6.h>
64 #include <inet/mi.h>
65 #include <inet/nd.h>
66 #include <inet/optcom.h>
67 #include <inet/snmpcom.h>
68 #include <inet/kstatcom.h>
69 #include <inet/rawip_impl.h>
70 
71 #include <netinet/ip_mroute.h>
72 #include <inet/tcp.h>
73 #include <net/pfkeyv2.h>
74 #include <inet/ipsec_info.h>
75 #include <inet/ipclassifier.h>
76 
77 #include <sys/tsol/label.h>
78 #include <sys/tsol/tnet.h>
79 
80 #include <inet/ip_ire.h>
81 #include <inet/ip_if.h>
82 
83 #include <inet/ip_impl.h>
84 
85 /*
86  * Synchronization notes:
87  *
88  * RAWIP is MT and uses the usual kernel synchronization primitives. There is
89  * locks, which is icmp_rwlock. We also use conn_lock when updating things
90  * which affect the IP classifier lookup.
91  * The lock order is icmp_rwlock -> conn_lock.
92  *
93  * The icmp_rwlock:
94  * This protects most of the other fields in the icmp_t. The exact list of
95  * fields which are protected by each of the above locks is documented in
96  * the icmp_t structure definition.
97  *
98  * Plumbing notes:
99  * ICMP is always a device driver. For compatibility with mibopen() code
100  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
101  * dummy module.
102  */
103 
104 static void	icmp_addr_req(queue_t *q, mblk_t *mp);
105 static void	icmp_bind(queue_t *q, mblk_t *mp);
106 static void	icmp_bind_proto(queue_t *q);
107 static void	icmp_bind_result(conn_t *, mblk_t *);
108 static void	icmp_bind_ack(conn_t *, mblk_t *mp);
109 static void	icmp_bind_error(conn_t *, mblk_t *mp);
110 static int	icmp_build_hdrs(icmp_t *icmp);
111 static void	icmp_capability_req(queue_t *q, mblk_t *mp);
112 static int	icmp_close(queue_t *q);
113 static void	icmp_connect(queue_t *q, mblk_t *mp);
114 static void	icmp_disconnect(queue_t *q, mblk_t *mp);
115 static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
116 		    int sys_error);
117 static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
118 		    t_scalar_t t_error, int sys_error);
119 static void	icmp_icmp_error(queue_t *q, mblk_t *mp);
120 static void	icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
121 static void	icmp_info_req(queue_t *q, mblk_t *mp);
122 static void	icmp_input(void *, mblk_t *, void *);
123 static mblk_t	*icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim,
124 		    t_scalar_t addr_length, in_port_t);
125 static int	icmp_open(queue_t *q, dev_t *devp, int flag, int sflag,
126 		    cred_t *credp, boolean_t isv6);
127 static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
128 		    cred_t *credp);
129 static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
130 		    cred_t *credp);
131 static void	icmp_output(queue_t *q, mblk_t *mp);
132 static int	icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
133 		    int *errorp, void *thisdg_attrs);
134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
135 int		icmp_opt_set(queue_t *q, uint_t optset_context,
136 		    int level, int name, uint_t inlen,
137 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
138 		    void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
139 int		icmp_opt_get(queue_t *q, int level, int name,
140 		    uchar_t *ptr);
141 static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
143 static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
144 		    caddr_t cp, cred_t *cr);
145 static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
146 		    uchar_t *ptr, int len);
147 static int	icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
148 		    cred_t *cr);
149 static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
150 static void	icmp_unbind(queue_t *q, mblk_t *mp);
151 static void	icmp_wput(queue_t *q, mblk_t *mp);
152 static void	icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6,
153 		    t_scalar_t tudr_optlen);
154 static void	icmp_wput_other(queue_t *q, mblk_t *mp);
155 static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
156 static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
157 
158 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
159 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
160 
161 static void	*rawip_kstat_init(netstackid_t stackid);
162 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
163 static int	rawip_kstat_update(kstat_t *kp, int rw);
164 
165 
166 static struct module_info icmp_mod_info =  {
167 	5707, "icmp", 1, INFPSZ, 512, 128
168 };
169 
170 /*
171  * Entry points for ICMP as a device.
172  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
173  */
174 static struct qinit icmprinitv4 = {
175 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
176 };
177 
178 static struct qinit icmprinitv6 = {
179 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
180 };
181 
182 static struct qinit icmpwinit = {
183 	(pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
184 };
185 
186 /* For AF_INET aka /dev/icmp */
187 struct streamtab icmpinfov4 = {
188 	&icmprinitv4, &icmpwinit
189 };
190 
191 /* For AF_INET6 aka /dev/icmp6 */
192 struct streamtab icmpinfov6 = {
193 	&icmprinitv6, &icmpwinit
194 };
195 
196 static sin_t	sin_null;	/* Zero address for quick clears */
197 static sin6_t	sin6_null;	/* Zero address for quick clears */
198 
199 /* Default structure copied into T_INFO_ACK messages */
200 static struct T_info_ack icmp_g_t_info_ack = {
201 	T_INFO_ACK,
202 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
203 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
204 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
205 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
206 	0,		/* ADDR_size - filled in later. */
207 	0,		/* OPT_size - not initialized here */
208 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
209 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
210 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
211 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
212 };
213 
214 /*
215  * Table of ND variables supported by icmp.  These are loaded into is_nd
216  * when the stack instance is created.
217  * All of these are alterable, within the min/max values given, at run time.
218  */
219 static icmpparam_t	icmp_param_arr[] = {
220 	/* min	max	value	name */
221 	{ 0,	128,	32,	"icmp_wroff_extra" },
222 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
223 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
224 	{ 0,	1,	1,	"icmp_bsd_compat" },
225 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
226 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
227 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
228 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
229 };
230 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
231 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
232 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
233 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
234 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
235 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
236 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
237 #define	is_max_buf			is_param_arr[7].icmp_param_value
238 
239 /*
240  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
241  * passed to icmp_wput.
242  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
243  * protocol type placed in the message following the address. A T_BIND_ACK
244  * message is returned by ip_bind_v4/v6.
245  */
246 static void
247 icmp_bind(queue_t *q, mblk_t *mp)
248 {
249 	sin_t	*sin;
250 	sin6_t	*sin6;
251 	mblk_t	*mp1;
252 	struct T_bind_req	*tbr;
253 	icmp_t	*icmp;
254 	conn_t	*connp = Q_TO_CONN(q);
255 
256 	icmp = connp->conn_icmp;
257 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
258 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
259 		    "icmp_bind: bad req, len %u",
260 		    (uint_t)(mp->b_wptr - mp->b_rptr));
261 		icmp_err_ack(q, mp, TPROTO, 0);
262 		return;
263 	}
264 	if (icmp->icmp_state != TS_UNBND) {
265 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
266 		    "icmp_bind: bad state, %d", icmp->icmp_state);
267 		icmp_err_ack(q, mp, TOUTSTATE, 0);
268 		return;
269 	}
270 	/*
271 	 * Reallocate the message to make sure we have enough room for an
272 	 * address and the protocol type.
273 	 */
274 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
275 	if (!mp1) {
276 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
277 		return;
278 	}
279 	mp = mp1;
280 	tbr = (struct T_bind_req *)mp->b_rptr;
281 	switch (tbr->ADDR_length) {
282 	case 0:			/* Generic request */
283 		tbr->ADDR_offset = sizeof (struct T_bind_req);
284 		if (icmp->icmp_family == AF_INET) {
285 			tbr->ADDR_length = sizeof (sin_t);
286 			sin = (sin_t *)&tbr[1];
287 			*sin = sin_null;
288 			sin->sin_family = AF_INET;
289 			mp->b_wptr = (uchar_t *)&sin[1];
290 		} else {
291 			ASSERT(icmp->icmp_family == AF_INET6);
292 			tbr->ADDR_length = sizeof (sin6_t);
293 			sin6 = (sin6_t *)&tbr[1];
294 			*sin6 = sin6_null;
295 			sin6->sin6_family = AF_INET6;
296 			mp->b_wptr = (uchar_t *)&sin6[1];
297 		}
298 		break;
299 	case sizeof (sin_t):	/* Complete IP address */
300 		sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
301 		    sizeof (sin_t));
302 		if (sin == NULL || !OK_32PTR((char *)sin)) {
303 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
304 			return;
305 		}
306 		if (icmp->icmp_family != AF_INET ||
307 		    sin->sin_family != AF_INET) {
308 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
309 			return;
310 		}
311 		break;
312 	case sizeof (sin6_t):	/* Complete IP address */
313 		sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
314 		    sizeof (sin6_t));
315 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
316 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
317 			return;
318 		}
319 		if (icmp->icmp_family != AF_INET6 ||
320 		    sin6->sin6_family != AF_INET6) {
321 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
322 			return;
323 		}
324 		/* No support for mapped addresses on raw sockets */
325 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
326 			icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
327 			return;
328 		}
329 		break;
330 	default:
331 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
332 		    "icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
333 		icmp_err_ack(q, mp, TBADADDR, 0);
334 		return;
335 	}
336 
337 	/*
338 	 * The state must be TS_UNBND. TPI mandates that users must send
339 	 * TPI primitives only 1 at a time and wait for the response before
340 	 * sending the next primitive.
341 	 */
342 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
343 	if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
344 		rw_exit(&icmp->icmp_rwlock);
345 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
346 		    "icmp_bind: bad state, %d", icmp->icmp_state);
347 		icmp_err_ack(q, mp, TOUTSTATE, 0);
348 		return;
349 	}
350 
351 	icmp->icmp_pending_op = tbr->PRIM_type;
352 
353 	/*
354 	 * Copy the source address into our icmp structure.  This address
355 	 * may still be zero; if so, ip will fill in the correct address
356 	 * each time an outbound packet is passed to it.
357 	 * If we are binding to a broadcast or multicast address then
358 	 * icmp_bind_ack will clear the source address when it receives
359 	 * the T_BIND_ACK.
360 	 */
361 	icmp->icmp_state = TS_IDLE;
362 
363 	if (icmp->icmp_family == AF_INET) {
364 		ASSERT(sin != NULL);
365 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
366 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
367 		    &icmp->icmp_v6src);
368 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
369 		    icmp->icmp_ip_snd_options_len;
370 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
371 	} else {
372 		int error;
373 
374 		ASSERT(sin6 != NULL);
375 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
376 		icmp->icmp_v6src = sin6->sin6_addr;
377 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
378 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
379 
380 		/* Rebuild the header template */
381 		error = icmp_build_hdrs(icmp);
382 		if (error != 0) {
383 			icmp->icmp_pending_op = -1;
384 			rw_exit(&icmp->icmp_rwlock);
385 			icmp_err_ack(q, mp, TSYSERR, error);
386 			return;
387 		}
388 	}
389 	/*
390 	 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following
391 	 * the address.
392 	 */
393 	*mp->b_wptr++ = icmp->icmp_proto;
394 	if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
395 		/*
396 		 * Append a request for an IRE if src not 0 (INADDR_ANY)
397 		 */
398 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
399 		if (!mp->b_cont) {
400 			icmp->icmp_pending_op = -1;
401 			rw_exit(&icmp->icmp_rwlock);
402 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
403 			return;
404 		}
405 		mp->b_cont->b_wptr += sizeof (ire_t);
406 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
407 	}
408 	rw_exit(&icmp->icmp_rwlock);
409 
410 	/* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */
411 	if (icmp->icmp_family == AF_INET6)
412 		mp = ip_bind_v6(q, mp, connp, NULL);
413 	else
414 		mp = ip_bind_v4(q, mp, connp);
415 
416 	/* The above return NULL if the bind needs to be deferred */
417 	if (mp != NULL)
418 		icmp_bind_result(connp, mp);
419 	else
420 		CONN_INC_REF(connp);
421 }
422 
423 /*
424  * Send message to IP to just bind to the protocol.
425  */
426 static void
427 icmp_bind_proto(queue_t *q)
428 {
429 	mblk_t	*mp;
430 	struct T_bind_req	*tbr;
431 	icmp_t	*icmp;
432 	conn_t	*connp = Q_TO_CONN(q);
433 
434 	icmp = connp->conn_icmp;
435 
436 	mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1,
437 	    BPRI_MED);
438 	if (!mp) {
439 		return;
440 	}
441 	mp->b_datap->db_type = M_PROTO;
442 	tbr = (struct T_bind_req *)mp->b_rptr;
443 	tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */
444 	tbr->ADDR_offset = sizeof (struct T_bind_req);
445 
446 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
447 	if (icmp->icmp_ipversion == IPV4_VERSION) {
448 		sin_t	*sin;
449 
450 		tbr->ADDR_length = sizeof (sin_t);
451 		sin = (sin_t *)&tbr[1];
452 		*sin = sin_null;
453 		sin->sin_family = AF_INET;
454 		mp->b_wptr = (uchar_t *)&sin[1];
455 	} else {
456 		sin6_t	*sin6;
457 
458 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
459 		tbr->ADDR_length = sizeof (sin6_t);
460 		sin6 = (sin6_t *)&tbr[1];
461 		*sin6 = sin6_null;
462 		sin6->sin6_family = AF_INET6;
463 		mp->b_wptr = (uchar_t *)&sin6[1];
464 	}
465 
466 	/* Place protocol type in the O_T_BIND_REQ following the address. */
467 	*mp->b_wptr++ = icmp->icmp_proto;
468 	rw_exit(&icmp->icmp_rwlock);
469 
470 	/* Pass the O_T_BIND_REQ to ip. */
471 	if (icmp->icmp_family == AF_INET6)
472 		mp = ip_bind_v6(q, mp, connp, NULL);
473 	else
474 		mp = ip_bind_v4(q, mp, connp);
475 
476 	/* The above return NULL if the bind needs to be deferred */
477 	if (mp != NULL)
478 		icmp_bind_result(connp, mp);
479 	else
480 		CONN_INC_REF(connp);
481 }
482 
483 /*
484  * This is called from ip_wput_nondata to handle the results of a
485  * deferred RAWIP bind.  It is called once the bind has been completed.
486  */
487 void
488 rawip_resume_bind(conn_t *connp, mblk_t *mp)
489 {
490 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
491 
492 	icmp_bind_result(connp, mp);
493 
494 	CONN_OPER_PENDING_DONE(connp);
495 }
496 
497 /*
498  * This routine handles each T_CONN_REQ message passed to icmp.  It
499  * associates a default destination address with the stream.
500  *
501  * This routine sends down a T_BIND_REQ to IP with the following mblks:
502  *	T_BIND_REQ	- specifying local and remote address.
503  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
504  *	T_OK_ACK	- for the T_CONN_REQ
505  *	T_CONN_CON	- to keep the TPI user happy
506  *
507  * The connect completes in icmp_bind_result.
508  * When a T_BIND_ACK is received information is extracted from the IRE
509  * and the two appended messages are sent to the TPI user.
510  * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
511  * convert it to an error ack for the appropriate primitive.
512  */
513 static void
514 icmp_connect(queue_t *q, mblk_t *mp)
515 {
516 	sin_t	*sin;
517 	sin6_t	*sin6;
518 	mblk_t	*mp1, *mp2;
519 	struct T_conn_req	*tcr;
520 	icmp_t	*icmp;
521 	ipaddr_t	v4dst;
522 	in6_addr_t	v6dst;
523 	uint32_t	flowinfo;
524 	conn_t	*connp = Q_TO_CONN(q);
525 
526 	icmp = connp->conn_icmp;
527 	tcr = (struct T_conn_req *)mp->b_rptr;
528 	/* Sanity checks */
529 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
530 		icmp_err_ack(q, mp, TPROTO, 0);
531 		return;
532 	}
533 
534 	if (tcr->OPT_length != 0) {
535 		icmp_err_ack(q, mp, TBADOPT, 0);
536 		return;
537 	}
538 
539 	switch (tcr->DEST_length) {
540 	default:
541 		icmp_err_ack(q, mp, TBADADDR, 0);
542 		return;
543 
544 	case sizeof (sin_t):
545 		sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
546 		    sizeof (sin_t));
547 		if (sin == NULL || !OK_32PTR((char *)sin)) {
548 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
549 			return;
550 		}
551 		if (icmp->icmp_family != AF_INET ||
552 		    sin->sin_family != AF_INET) {
553 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
554 			return;
555 		}
556 		v4dst = sin->sin_addr.s_addr;
557 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
558 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
559 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
560 		    icmp->icmp_ip_snd_options_len;
561 		break;
562 
563 	case sizeof (sin6_t):
564 		sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
565 		    sizeof (sin6_t));
566 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
567 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
568 			return;
569 		}
570 		if (icmp->icmp_family != AF_INET6 ||
571 		    sin6->sin6_family != AF_INET6) {
572 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
573 			return;
574 		}
575 		/* No support for mapped addresses on raw sockets */
576 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
577 			icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
578 			return;
579 		}
580 		v6dst = sin6->sin6_addr;
581 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
582 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
583 		flowinfo = sin6->sin6_flowinfo;
584 		break;
585 	}
586 	if (icmp->icmp_ipversion == IPV4_VERSION) {
587 		/*
588 		 * Interpret a zero destination to mean loopback.
589 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
590 		 * generate the T_CONN_CON.
591 		 */
592 		if (v4dst == INADDR_ANY) {
593 			v4dst = htonl(INADDR_LOOPBACK);
594 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
595 			if (icmp->icmp_family == AF_INET) {
596 				sin->sin_addr.s_addr = v4dst;
597 			} else {
598 				sin6->sin6_addr = v6dst;
599 			}
600 		}
601 		icmp->icmp_v6dst = v6dst;
602 		icmp->icmp_flowinfo = 0;
603 
604 		/*
605 		 * If the destination address is multicast and
606 		 * an outgoing multicast interface has been set,
607 		 * use the address of that interface as our
608 		 * source address if no source address has been set.
609 		 */
610 		if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
611 		    CLASSD(v4dst) &&
612 		    icmp->icmp_multicast_if_addr != INADDR_ANY) {
613 			IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
614 			    &icmp->icmp_v6src);
615 		}
616 	} else {
617 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
618 		/*
619 		 * Interpret a zero destination to mean loopback.
620 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
621 		 * generate the T_CONN_CON.
622 		 */
623 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
624 			v6dst = ipv6_loopback;
625 			sin6->sin6_addr = v6dst;
626 		}
627 		icmp->icmp_v6dst = v6dst;
628 		icmp->icmp_flowinfo = flowinfo;
629 		/*
630 		 * If the destination address is multicast and
631 		 * an outgoing multicast interface has been set,
632 		 * then the ip bind logic will pick the correct source
633 		 * address (i.e. matching the outgoing multicast interface).
634 		 */
635 	}
636 
637 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
638 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
639 		rw_exit(&icmp->icmp_rwlock);
640 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
641 		    "icmp_connect: bad state, %d", icmp->icmp_state);
642 		icmp_err_ack(q, mp, TOUTSTATE, 0);
643 		return;
644 	}
645 	icmp->icmp_pending_op = T_CONN_REQ;
646 
647 	if (icmp->icmp_state == TS_DATA_XFER) {
648 		/* Already connected - clear out state */
649 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
650 		icmp->icmp_state = TS_IDLE;
651 	}
652 
653 	/*
654 	 * Send down bind to IP to verify that there is a route
655 	 * and to determine the source address.
656 	 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput.
657 	 */
658 	if (icmp->icmp_family == AF_INET) {
659 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t),
660 		    sin->sin_port);
661 	} else {
662 		ASSERT(icmp->icmp_family == AF_INET6);
663 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t),
664 		    sin6->sin6_port);
665 	}
666 	if (mp1 == NULL) {
667 		icmp->icmp_pending_op = -1;
668 		rw_exit(&icmp->icmp_rwlock);
669 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
670 		return;
671 	}
672 
673 	/*
674 	 * We also have to send a connection confirmation to
675 	 * keep TLI happy. Prepare it for icmp_bind_result.
676 	 */
677 	if (icmp->icmp_family == AF_INET) {
678 		mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL,
679 		    0);
680 	} else {
681 		ASSERT(icmp->icmp_family == AF_INET6);
682 		mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL,
683 		    0);
684 	}
685 	if (mp2 == NULL) {
686 		freemsg(mp1);
687 		icmp->icmp_pending_op = -1;
688 		rw_exit(&icmp->icmp_rwlock);
689 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
690 		return;
691 	}
692 
693 	mp = mi_tpi_ok_ack_alloc(mp);
694 	if (mp == NULL) {
695 		/* Unable to reuse the T_CONN_REQ for the ack. */
696 		freemsg(mp2);
697 		icmp->icmp_pending_op = -1;
698 		rw_exit(&icmp->icmp_rwlock);
699 		icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
700 		return;
701 	}
702 
703 	icmp->icmp_state = TS_DATA_XFER;
704 	rw_exit(&icmp->icmp_rwlock);
705 
706 	/* Hang onto the T_OK_ACK and T_CONN_CON for later. */
707 	linkb(mp1, mp);
708 	linkb(mp1, mp2);
709 
710 	mblk_setcred(mp1, connp->conn_cred);
711 	if (icmp->icmp_family == AF_INET)
712 		mp1 = ip_bind_v4(q, mp1, connp);
713 	else
714 		mp1 = ip_bind_v6(q, mp1, connp, NULL);
715 
716 	/* The above return NULL if the bind needs to be deferred */
717 	if (mp1 != NULL)
718 		icmp_bind_result(connp, mp1);
719 	else
720 		CONN_INC_REF(connp);
721 }
722 
723 static void
724 icmp_close_free(conn_t *connp)
725 {
726 	icmp_t *icmp = connp->conn_icmp;
727 
728 	/* If there are any options associated with the stream, free them. */
729 	if (icmp->icmp_ip_snd_options)
730 		mi_free((char *)icmp->icmp_ip_snd_options);
731 
732 	if (icmp->icmp_filter != NULL)
733 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
734 
735 	/* Free memory associated with sticky options */
736 	if (icmp->icmp_sticky_hdrs_len != 0) {
737 		kmem_free(icmp->icmp_sticky_hdrs,
738 		    icmp->icmp_sticky_hdrs_len);
739 		icmp->icmp_sticky_hdrs = NULL;
740 		icmp->icmp_sticky_hdrs_len = 0;
741 	}
742 	ip6_pkt_free(&icmp->icmp_sticky_ipp);
743 }
744 
745 static int
746 icmp_close(queue_t *q)
747 {
748 	conn_t	*connp = (conn_t *)q->q_ptr;
749 
750 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
751 
752 	ip_quiesce_conn(connp);
753 
754 	qprocsoff(connp->conn_rq);
755 
756 	icmp_close_free(connp);
757 
758 	/*
759 	 * Now we are truly single threaded on this stream, and can
760 	 * delete the things hanging off the connp, and finally the connp.
761 	 * We removed this connp from the fanout list, it cannot be
762 	 * accessed thru the fanouts, and we already waited for the
763 	 * conn_ref to drop to 0. We are already in close, so
764 	 * there cannot be any other thread from the top. qprocsoff
765 	 * has completed, and service has completed or won't run in
766 	 * future.
767 	 */
768 	ASSERT(connp->conn_ref == 1);
769 
770 	inet_minor_free(ip_minor_arena, connp->conn_dev);
771 
772 	connp->conn_ref--;
773 	ipcl_conn_destroy(connp);
774 
775 	q->q_ptr = WR(q)->q_ptr = NULL;
776 	return (0);
777 }
778 
779 /*
780  * This routine handles each T_DISCON_REQ message passed to icmp
781  * as an indicating that ICMP is no longer connected. This results
782  * in sending a T_BIND_REQ to IP to restore the binding to just
783  * the local address.
784  *
785  * This routine sends down a T_BIND_REQ to IP with the following mblks:
786  *	T_BIND_REQ	- specifying just the local address.
787  *	T_OK_ACK	- for the T_DISCON_REQ
788  *
789  * The disconnect completes in icmp_bind_result.
790  * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
791  * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
792  * convert it to an error ack for the appropriate primitive.
793  */
794 static void
795 icmp_disconnect(queue_t *q, mblk_t *mp)
796 {
797 	icmp_t	*icmp;
798 	mblk_t	*mp1;
799 	conn_t	*connp = Q_TO_CONN(q);
800 
801 	icmp = connp->conn_icmp;
802 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
803 	if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
804 		rw_exit(&icmp->icmp_rwlock);
805 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
806 		    "icmp_disconnect: bad state, %d", icmp->icmp_state);
807 		icmp_err_ack(q, mp, TOUTSTATE, 0);
808 		return;
809 	}
810 	icmp->icmp_pending_op = T_DISCON_REQ;
811 	icmp->icmp_v6src = icmp->icmp_bound_v6src;
812 	icmp->icmp_state = TS_IDLE;
813 
814 	/*
815 	 * Send down bind to IP to remove the full binding and revert
816 	 * to the local address binding.
817 	 */
818 	if (icmp->icmp_family == AF_INET) {
819 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0);
820 	} else {
821 		ASSERT(icmp->icmp_family == AF_INET6);
822 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0);
823 	}
824 	if (mp1 == NULL) {
825 		icmp->icmp_pending_op = -1;
826 		rw_exit(&icmp->icmp_rwlock);
827 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
828 		return;
829 	}
830 	mp = mi_tpi_ok_ack_alloc(mp);
831 	if (mp == NULL) {
832 		/* Unable to reuse the T_DISCON_REQ for the ack. */
833 		icmp->icmp_pending_op = -1;
834 		rw_exit(&icmp->icmp_rwlock);
835 		icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
836 		return;
837 	}
838 
839 	if (icmp->icmp_family == AF_INET6) {
840 		int error;
841 
842 		/* Rebuild the header template */
843 		error = icmp_build_hdrs(icmp);
844 		if (error != 0) {
845 			icmp->icmp_pending_op = -1;
846 			rw_exit(&icmp->icmp_rwlock);
847 			icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
848 			freemsg(mp1);
849 			return;
850 		}
851 	}
852 
853 	rw_exit(&icmp->icmp_rwlock);
854 	/* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */
855 	linkb(mp1, mp);
856 
857 	if (icmp->icmp_family == AF_INET6)
858 		mp1 = ip_bind_v6(q, mp1, connp, NULL);
859 	else
860 		mp1 = ip_bind_v4(q, mp1, connp);
861 
862 	/* The above return NULL if the bind needs to be deferred */
863 	if (mp1 != NULL)
864 		icmp_bind_result(connp, mp1);
865 	else
866 		CONN_INC_REF(connp);
867 }
868 
869 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
870 static void
871 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
872 {
873 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
874 		qreply(q, mp);
875 }
876 
877 /* Shorthand to generate and send TPI error acks to our client */
878 static void
879 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
880     t_scalar_t t_error, int sys_error)
881 {
882 	struct T_error_ack	*teackp;
883 
884 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
885 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
886 		teackp = (struct T_error_ack *)mp->b_rptr;
887 		teackp->ERROR_prim = primitive;
888 		teackp->TLI_error = t_error;
889 		teackp->UNIX_error = sys_error;
890 		qreply(q, mp);
891 	}
892 }
893 
894 /*
895  * icmp_icmp_error is called by icmp_input to process ICMP
896  * messages passed up by IP.
897  * Generates the appropriate T_UDERROR_IND for permanent
898  * (non-transient) errors.
899  * Assumes that IP has pulled up everything up to and including
900  * the ICMP header.
901  */
902 static void
903 icmp_icmp_error(queue_t *q, mblk_t *mp)
904 {
905 	icmph_t *icmph;
906 	ipha_t	*ipha;
907 	int	iph_hdr_length;
908 	sin_t	sin;
909 	sin6_t	sin6;
910 	mblk_t	*mp1;
911 	int	error = 0;
912 	icmp_t	*icmp = Q_TO_ICMP(q);
913 
914 	ipha = (ipha_t *)mp->b_rptr;
915 
916 	ASSERT(OK_32PTR(mp->b_rptr));
917 
918 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
919 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
920 		icmp_icmp_error_ipv6(q, mp);
921 		return;
922 	}
923 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
924 
925 	/* Skip past the outer IP and ICMP headers */
926 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
927 	icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
928 	ipha = (ipha_t *)&icmph[1];
929 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
930 
931 	switch (icmph->icmph_type) {
932 	case ICMP_DEST_UNREACHABLE:
933 		switch (icmph->icmph_code) {
934 		case ICMP_FRAGMENTATION_NEEDED:
935 			/*
936 			 * IP has already adjusted the path MTU.
937 			 */
938 			break;
939 		case ICMP_PORT_UNREACHABLE:
940 		case ICMP_PROTOCOL_UNREACHABLE:
941 			error = ECONNREFUSED;
942 			break;
943 		default:
944 			/* Transient errors */
945 			break;
946 		}
947 		break;
948 	default:
949 		/* Transient errors */
950 		break;
951 	}
952 	if (error == 0) {
953 		freemsg(mp);
954 		return;
955 	}
956 
957 	/*
958 	 * Deliver T_UDERROR_IND when the application has asked for it.
959 	 * The socket layer enables this automatically when connected.
960 	 */
961 	if (!icmp->icmp_dgram_errind) {
962 		freemsg(mp);
963 		return;
964 	}
965 
966 	switch (icmp->icmp_family) {
967 	case AF_INET:
968 		sin = sin_null;
969 		sin.sin_family = AF_INET;
970 		sin.sin_addr.s_addr = ipha->ipha_dst;
971 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
972 		    error);
973 		break;
974 	case AF_INET6:
975 		sin6 = sin6_null;
976 		sin6.sin6_family = AF_INET6;
977 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
978 
979 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
980 		    NULL, 0, error);
981 		break;
982 	}
983 	if (mp1)
984 		putnext(q, mp1);
985 	freemsg(mp);
986 }
987 
988 /*
989  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
990  * for IPv6 packets.
991  * Send permanent (non-transient) errors upstream.
992  * Assumes that IP has pulled up all the extension headers as well
993  * as the ICMPv6 header.
994  */
995 static void
996 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
997 {
998 	icmp6_t		*icmp6;
999 	ip6_t		*ip6h, *outer_ip6h;
1000 	uint16_t	iph_hdr_length;
1001 	uint8_t		*nexthdrp;
1002 	sin6_t		sin6;
1003 	mblk_t		*mp1;
1004 	int		error = 0;
1005 	icmp_t		*icmp = Q_TO_ICMP(q);
1006 
1007 	outer_ip6h = (ip6_t *)mp->b_rptr;
1008 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1009 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1010 	else
1011 		iph_hdr_length = IPV6_HDR_LEN;
1012 
1013 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1014 	ip6h = (ip6_t *)&icmp6[1];
1015 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1016 		freemsg(mp);
1017 		return;
1018 	}
1019 
1020 	switch (icmp6->icmp6_type) {
1021 	case ICMP6_DST_UNREACH:
1022 		switch (icmp6->icmp6_code) {
1023 		case ICMP6_DST_UNREACH_NOPORT:
1024 			error = ECONNREFUSED;
1025 			break;
1026 		case ICMP6_DST_UNREACH_ADMIN:
1027 		case ICMP6_DST_UNREACH_NOROUTE:
1028 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1029 		case ICMP6_DST_UNREACH_ADDR:
1030 			/* Transient errors */
1031 			break;
1032 		default:
1033 			break;
1034 		}
1035 		break;
1036 	case ICMP6_PACKET_TOO_BIG: {
1037 		struct T_unitdata_ind	*tudi;
1038 		struct T_opthdr		*toh;
1039 		size_t			udi_size;
1040 		mblk_t			*newmp;
1041 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1042 		    sizeof (struct ip6_mtuinfo);
1043 		sin6_t			*sin6;
1044 		struct ip6_mtuinfo	*mtuinfo;
1045 
1046 		/*
1047 		 * If the application has requested to receive path mtu
1048 		 * information, send up an empty message containing an
1049 		 * IPV6_PATHMTU ancillary data item.
1050 		 */
1051 		if (!icmp->icmp_ipv6_recvpathmtu)
1052 			break;
1053 
1054 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1055 		    opt_length;
1056 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1057 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
1058 			break;
1059 		}
1060 
1061 		/*
1062 		 * newmp->b_cont is left to NULL on purpose.  This is an
1063 		 * empty message containing only ancillary data.
1064 		 */
1065 		newmp->b_datap->db_type = M_PROTO;
1066 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1067 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1068 		tudi->PRIM_type = T_UNITDATA_IND;
1069 		tudi->SRC_length = sizeof (sin6_t);
1070 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1071 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1072 		tudi->OPT_length = opt_length;
1073 
1074 		sin6 = (sin6_t *)&tudi[1];
1075 		bzero(sin6, sizeof (sin6_t));
1076 		sin6->sin6_family = AF_INET6;
1077 		sin6->sin6_addr = icmp->icmp_v6dst;
1078 
1079 		toh = (struct T_opthdr *)&sin6[1];
1080 		toh->level = IPPROTO_IPV6;
1081 		toh->name = IPV6_PATHMTU;
1082 		toh->len = opt_length;
1083 		toh->status = 0;
1084 
1085 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1086 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1087 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1088 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1089 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1090 		/*
1091 		 * We've consumed everything we need from the original
1092 		 * message.  Free it, then send our empty message.
1093 		 */
1094 		freemsg(mp);
1095 		putnext(q, newmp);
1096 		return;
1097 	}
1098 	case ICMP6_TIME_EXCEEDED:
1099 		/* Transient errors */
1100 		break;
1101 	case ICMP6_PARAM_PROB:
1102 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1103 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1104 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1105 		    (uchar_t *)nexthdrp) {
1106 			error = ECONNREFUSED;
1107 			break;
1108 		}
1109 		break;
1110 	}
1111 	if (error == 0) {
1112 		freemsg(mp);
1113 		return;
1114 	}
1115 
1116 	/*
1117 	 * Deliver T_UDERROR_IND when the application has asked for it.
1118 	 * The socket layer enables this automatically when connected.
1119 	 */
1120 	if (!icmp->icmp_dgram_errind) {
1121 		freemsg(mp);
1122 		return;
1123 	}
1124 
1125 	sin6 = sin6_null;
1126 	sin6.sin6_family = AF_INET6;
1127 	sin6.sin6_addr = ip6h->ip6_dst;
1128 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1129 
1130 	mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
1131 	    error);
1132 	if (mp1)
1133 		putnext(q, mp1);
1134 	freemsg(mp);
1135 }
1136 
1137 /*
1138  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
1139  * The local address is filled in if endpoint is bound. The remote address
1140  * is filled in if remote address has been precified ("connected endpoint")
1141  * (The concept of connected CLTS sockets is alien to published TPI
1142  *  but we support it anyway).
1143  */
1144 static void
1145 icmp_addr_req(queue_t *q, mblk_t *mp)
1146 {
1147 	icmp_t	*icmp = Q_TO_ICMP(q);
1148 	mblk_t	*ackmp;
1149 	struct T_addr_ack *taa;
1150 
1151 	/* Make it large enough for worst case */
1152 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1153 	    2 * sizeof (sin6_t), 1);
1154 	if (ackmp == NULL) {
1155 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1156 		return;
1157 	}
1158 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1159 
1160 	bzero(taa, sizeof (struct T_addr_ack));
1161 	ackmp->b_wptr = (uchar_t *)&taa[1];
1162 
1163 	taa->PRIM_type = T_ADDR_ACK;
1164 	ackmp->b_datap->db_type = M_PCPROTO;
1165 	rw_enter(&icmp->icmp_rwlock, RW_READER);
1166 	/*
1167 	 * Note: Following code assumes 32 bit alignment of basic
1168 	 * data structures like sin_t and struct T_addr_ack.
1169 	 */
1170 	if (icmp->icmp_state != TS_UNBND) {
1171 		/*
1172 		 * Fill in local address
1173 		 */
1174 		taa->LOCADDR_offset = sizeof (*taa);
1175 		if (icmp->icmp_family == AF_INET) {
1176 			sin_t	*sin;
1177 
1178 			taa->LOCADDR_length = sizeof (sin_t);
1179 			sin = (sin_t *)&taa[1];
1180 			/* Fill zeroes and then intialize non-zero fields */
1181 			*sin = sin_null;
1182 			sin->sin_family = AF_INET;
1183 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
1184 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1185 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
1186 				    sin->sin_addr.s_addr);
1187 			} else {
1188 				/*
1189 				 * INADDR_ANY
1190 				 * icmp_v6src is not set, we might be bound to
1191 				 * broadcast/multicast. Use icmp_bound_v6src as
1192 				 * local address instead (that could
1193 				 * also still be INADDR_ANY)
1194 				 */
1195 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
1196 				    sin->sin_addr.s_addr);
1197 			}
1198 			ackmp->b_wptr = (uchar_t *)&sin[1];
1199 		} else {
1200 			sin6_t	*sin6;
1201 
1202 			ASSERT(icmp->icmp_family == AF_INET6);
1203 			taa->LOCADDR_length = sizeof (sin6_t);
1204 			sin6 = (sin6_t *)&taa[1];
1205 			/* Fill zeroes and then intialize non-zero fields */
1206 			*sin6 = sin6_null;
1207 			sin6->sin6_family = AF_INET6;
1208 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1209 				sin6->sin6_addr = icmp->icmp_v6src;
1210 			} else {
1211 				/*
1212 				 * UNSPECIFIED
1213 				 * icmp_v6src is not set, we might be bound to
1214 				 * broadcast/multicast. Use icmp_bound_v6src as
1215 				 * local address instead (that could
1216 				 * also still be UNSPECIFIED)
1217 				 */
1218 				sin6->sin6_addr = icmp->icmp_bound_v6src;
1219 			}
1220 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1221 		}
1222 	}
1223 	rw_exit(&icmp->icmp_rwlock);
1224 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1225 	qreply(q, ackmp);
1226 }
1227 
1228 static void
1229 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1230 {
1231 	*tap = icmp_g_t_info_ack;
1232 
1233 	if (icmp->icmp_family == AF_INET6)
1234 		tap->ADDR_size = sizeof (sin6_t);
1235 	else
1236 		tap->ADDR_size = sizeof (sin_t);
1237 	tap->CURRENT_state = icmp->icmp_state;
1238 	tap->OPT_size = icmp_max_optsize;
1239 }
1240 
1241 /*
1242  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1243  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1244  * icmp_g_t_info_ack.  The current state of the stream is copied from
1245  * icmp_state.
1246  */
1247 static void
1248 icmp_capability_req(queue_t *q, mblk_t *mp)
1249 {
1250 	icmp_t			*icmp = Q_TO_ICMP(q);
1251 	t_uscalar_t		cap_bits1;
1252 	struct T_capability_ack	*tcap;
1253 
1254 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1255 
1256 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1257 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1258 	if (!mp)
1259 		return;
1260 
1261 	tcap = (struct T_capability_ack *)mp->b_rptr;
1262 	tcap->CAP_bits1 = 0;
1263 
1264 	if (cap_bits1 & TC1_INFO) {
1265 		icmp_copy_info(&tcap->INFO_ack, icmp);
1266 		tcap->CAP_bits1 |= TC1_INFO;
1267 	}
1268 
1269 	qreply(q, mp);
1270 }
1271 
1272 /*
1273  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
1274  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1275  * The current state of the stream is copied from icmp_state.
1276  */
1277 static void
1278 icmp_info_req(queue_t *q, mblk_t *mp)
1279 {
1280 	icmp_t	*icmp = Q_TO_ICMP(q);
1281 
1282 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1283 	    T_INFO_ACK);
1284 	if (!mp)
1285 		return;
1286 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1287 	qreply(q, mp);
1288 }
1289 
1290 /*
1291  * IP recognizes seven kinds of bind requests:
1292  *
1293  * - A zero-length address binds only to the protocol number.
1294  *
1295  * - A 4-byte address is treated as a request to
1296  * validate that the address is a valid local IPv4
1297  * address, appropriate for an application to bind to.
1298  * IP does the verification, but does not make any note
1299  * of the address at this time.
1300  *
1301  * - A 16-byte address contains is treated as a request
1302  * to validate a local IPv6 address, as the 4-byte
1303  * address case above.
1304  *
1305  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
1306  * use it for the inbound fanout of packets.
1307  *
1308  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
1309  * use it for the inbound fanout of packets.
1310  *
1311  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
1312  * information consisting of local and remote addresses
1313  * and ports (unused for raw sockets).  In this case, the addresses are both
1314  * validated as appropriate for this operation, and, if
1315  * so, the information is retained for use in the
1316  * inbound fanout.
1317  *
1318  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
1319  * fanout information, like the 12-byte case above.
1320  *
1321  * IP will also fill in the IRE request mblk with information
1322  * regarding our peer.  In all cases, we notify IP of our protocol
1323  * type by appending a single protocol byte to the bind request.
1324  */
1325 static mblk_t *
1326 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length,
1327     in_port_t fport)
1328 {
1329 	char	*cp;
1330 	mblk_t	*mp;
1331 	struct T_bind_req *tbr;
1332 	ipa_conn_t	*ac;
1333 	ipa6_conn_t	*ac6;
1334 	sin_t		*sin;
1335 	sin6_t		*sin6;
1336 
1337 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
1338 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
1339 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
1340 	if (mp == NULL)
1341 		return (NULL);
1342 	mp->b_datap->db_type = M_PROTO;
1343 	tbr = (struct T_bind_req *)mp->b_rptr;
1344 	tbr->PRIM_type = bind_prim;
1345 	tbr->ADDR_offset = sizeof (*tbr);
1346 	tbr->CONIND_number = 0;
1347 	tbr->ADDR_length = addr_length;
1348 	cp = (char *)&tbr[1];
1349 	switch (addr_length) {
1350 	case sizeof (ipa_conn_t):
1351 		ASSERT(icmp->icmp_family == AF_INET);
1352 		/* Append a request for an IRE */
1353 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1354 		if (mp->b_cont == NULL) {
1355 			freemsg(mp);
1356 			return (NULL);
1357 		}
1358 		mp->b_cont->b_wptr += sizeof (ire_t);
1359 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1360 
1361 		/* cp known to be 32 bit aligned */
1362 		ac = (ipa_conn_t *)cp;
1363 		ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src);
1364 		ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst);
1365 		ac->ac_fport = fport;
1366 		ac->ac_lport = 0;
1367 		break;
1368 
1369 	case sizeof (ipa6_conn_t):
1370 		ASSERT(icmp->icmp_family == AF_INET6);
1371 		/* Append a request for an IRE */
1372 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1373 		if (mp->b_cont == NULL) {
1374 			freemsg(mp);
1375 			return (NULL);
1376 		}
1377 		mp->b_cont->b_wptr += sizeof (ire_t);
1378 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1379 
1380 		/* cp known to be 32 bit aligned */
1381 		ac6 = (ipa6_conn_t *)cp;
1382 		ac6->ac6_laddr = icmp->icmp_v6src;
1383 		ac6->ac6_faddr = icmp->icmp_v6dst;
1384 		ac6->ac6_fport = fport;
1385 		ac6->ac6_lport = 0;
1386 		break;
1387 
1388 	case sizeof (sin_t):
1389 		ASSERT(icmp->icmp_family == AF_INET);
1390 		/* Append a request for an IRE */
1391 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1392 		if (!mp->b_cont) {
1393 			freemsg(mp);
1394 			return (NULL);
1395 		}
1396 		mp->b_cont->b_wptr += sizeof (ire_t);
1397 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1398 
1399 		sin = (sin_t *)cp;
1400 		*sin = sin_null;
1401 		sin->sin_family = AF_INET;
1402 		sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
1403 		break;
1404 
1405 	case sizeof (sin6_t):
1406 		ASSERT(icmp->icmp_family == AF_INET6);
1407 		/* Append a request for an IRE */
1408 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1409 		if (!mp->b_cont) {
1410 			freemsg(mp);
1411 			return (NULL);
1412 		}
1413 		mp->b_cont->b_wptr += sizeof (ire_t);
1414 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1415 
1416 		sin6 = (sin6_t *)cp;
1417 		*sin6 = sin6_null;
1418 		sin6->sin6_family = AF_INET6;
1419 		sin6->sin6_addr = icmp->icmp_bound_v6src;
1420 		break;
1421 	}
1422 	/* Add protocol number to end */
1423 	cp[addr_length] = icmp->icmp_proto;
1424 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
1425 	return (mp);
1426 }
1427 
1428 /* For /dev/icmp aka AF_INET open */
1429 static int
1430 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1431 {
1432 	return (icmp_open(q, devp, flag, sflag, credp, B_FALSE));
1433 }
1434 
1435 /* For /dev/icmp6 aka AF_INET6 open */
1436 static int
1437 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1438 {
1439 	return (icmp_open(q, devp, flag, sflag, credp, B_TRUE));
1440 }
1441 
1442 /*
1443  * This is the open routine for icmp.  It allocates a icmp_t structure for
1444  * the stream and, on the first open of the module, creates an ND table.
1445  */
1446 /*ARGSUSED2*/
1447 static int
1448 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1449     boolean_t isv6)
1450 {
1451 	int	err;
1452 	icmp_t	*icmp;
1453 	conn_t *connp;
1454 	dev_t	conn_dev;
1455 	zoneid_t zoneid;
1456 	netstack_t *ns;
1457 	icmp_stack_t *is;
1458 
1459 	/* If the stream is already open, return immediately. */
1460 	if (q->q_ptr != NULL)
1461 		return (0);
1462 
1463 	if (sflag == MODOPEN)
1464 		return (EINVAL);
1465 
1466 	ns = netstack_find_by_cred(credp);
1467 	ASSERT(ns != NULL);
1468 	is = ns->netstack_icmp;
1469 	ASSERT(is != NULL);
1470 
1471 	/*
1472 	 * For exclusive stacks we set the zoneid to zero
1473 	 * to make ICMP operate as if in the global zone.
1474 	 */
1475 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
1476 		zoneid = GLOBAL_ZONEID;
1477 	else
1478 		zoneid = crgetzoneid(credp);
1479 
1480 	if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) {
1481 		netstack_rele(ns);
1482 		return (EBUSY);
1483 	}
1484 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1485 
1486 	connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns);
1487 	connp->conn_dev = conn_dev;
1488 	icmp = connp->conn_icmp;
1489 
1490 	/*
1491 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1492 	 * done by netstack_find_by_cred()
1493 	 */
1494 	netstack_rele(ns);
1495 
1496 	/*
1497 	 * Initialize the icmp_t structure for this stream.
1498 	 */
1499 	q->q_ptr = connp;
1500 	WR(q)->q_ptr = connp;
1501 	connp->conn_rq = q;
1502 	connp->conn_wq = WR(q);
1503 
1504 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1505 	ASSERT(connp->conn_ulp == IPPROTO_ICMP);
1506 	ASSERT(connp->conn_icmp == icmp);
1507 	ASSERT(icmp->icmp_connp == connp);
1508 
1509 	/* Set the initial state of the stream and the privilege status. */
1510 	icmp->icmp_state = TS_UNBND;
1511 	if (isv6) {
1512 		icmp->icmp_ipversion = IPV6_VERSION;
1513 		icmp->icmp_family = AF_INET6;
1514 		connp->conn_ulp = IPPROTO_ICMPV6;
1515 		/* May be changed by a SO_PROTOTYPE socket option. */
1516 		icmp->icmp_proto = IPPROTO_ICMPV6;
1517 		icmp->icmp_checksum_off = 2;	/* Offset for icmp6_cksum */
1518 		icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
1519 		icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
1520 		connp->conn_af_isv6 = B_TRUE;
1521 		connp->conn_flags |= IPCL_ISV6;
1522 	} else {
1523 		icmp->icmp_ipversion = IPV4_VERSION;
1524 		icmp->icmp_family = AF_INET;
1525 		/* May be changed by a SO_PROTOTYPE socket option. */
1526 		icmp->icmp_proto = IPPROTO_ICMP;
1527 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
1528 		icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
1529 		connp->conn_af_isv6 = B_FALSE;
1530 		connp->conn_flags &= ~IPCL_ISV6;
1531 	}
1532 	icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1533 	icmp->icmp_pending_op = -1;
1534 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1535 	connp->conn_zoneid = zoneid;
1536 
1537 	/*
1538 	 * If the caller has the process-wide flag set, then default to MAC
1539 	 * exempt mode.  This allows read-down to unlabeled hosts.
1540 	 */
1541 	if (getpflags(NET_MAC_AWARE, credp) != 0)
1542 		icmp->icmp_mac_exempt = B_TRUE;
1543 
1544 	connp->conn_ulp_labeled = is_system_labeled();
1545 
1546 	icmp->icmp_is = is;
1547 
1548 	q->q_hiwat = is->is_recv_hiwat;
1549 	WR(q)->q_hiwat = is->is_xmit_hiwat;
1550 	WR(q)->q_lowat = is->is_xmit_lowat;
1551 
1552 	connp->conn_recv = icmp_input;
1553 	crhold(credp);
1554 	connp->conn_cred = credp;
1555 
1556 	mutex_enter(&connp->conn_lock);
1557 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1558 	mutex_exit(&connp->conn_lock);
1559 
1560 	qprocson(q);
1561 
1562 	if (icmp->icmp_family == AF_INET6) {
1563 		/* Build initial header template for transmit */
1564 		if ((err = icmp_build_hdrs(icmp)) != 0) {
1565 			rw_exit(&icmp->icmp_rwlock);
1566 			qprocsoff(q);
1567 			ipcl_conn_destroy(connp);
1568 			return (err);
1569 		}
1570 	}
1571 	rw_exit(&icmp->icmp_rwlock);
1572 
1573 	/* Set the Stream head write offset. */
1574 	(void) mi_set_sth_wroff(q,
1575 	    icmp->icmp_max_hdr_len + is->is_wroff_extra);
1576 	(void) mi_set_sth_hiwat(q, q->q_hiwat);
1577 
1578 	return (0);
1579 }
1580 
1581 /*
1582  * Which ICMP options OK to set through T_UNITDATA_REQ...
1583  */
1584 /* ARGSUSED */
1585 static boolean_t
1586 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1587 {
1588 	return (B_TRUE);
1589 }
1590 
1591 /*
1592  * This routine gets default values of certain options whose default
1593  * values are maintained by protcol specific code
1594  */
1595 /* ARGSUSED */
1596 int
1597 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
1598 {
1599 	icmp_t *icmp = Q_TO_ICMP(q);
1600 	icmp_stack_t *is = icmp->icmp_is;
1601 	int *i1 = (int *)ptr;
1602 
1603 	switch (level) {
1604 	case IPPROTO_IP:
1605 		switch (name) {
1606 		case IP_MULTICAST_TTL:
1607 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1608 			return (sizeof (uchar_t));
1609 		case IP_MULTICAST_LOOP:
1610 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1611 			return (sizeof (uchar_t));
1612 		}
1613 		break;
1614 	case IPPROTO_IPV6:
1615 		switch (name) {
1616 		case IPV6_MULTICAST_HOPS:
1617 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1618 			return (sizeof (int));
1619 		case IPV6_MULTICAST_LOOP:
1620 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1621 			return (sizeof (int));
1622 		case IPV6_UNICAST_HOPS:
1623 			*i1 = is->is_ipv6_hoplimit;
1624 			return (sizeof (int));
1625 		}
1626 		break;
1627 	case IPPROTO_ICMPV6:
1628 		switch (name) {
1629 		case ICMP6_FILTER:
1630 			/* Make it look like "pass all" */
1631 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1632 			return (sizeof (icmp6_filter_t));
1633 		}
1634 		break;
1635 	}
1636 	return (-1);
1637 }
1638 
1639 /*
1640  * This routine retrieves the current status of socket options.
1641  * It returns the size of the option retrieved.
1642  */
1643 int
1644 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
1645 {
1646 	conn_t	*connp = Q_TO_CONN(q);
1647 	icmp_t	*icmp = connp->conn_icmp;
1648 	icmp_stack_t *is = icmp->icmp_is;
1649 	int	*i1 = (int *)ptr;
1650 	ip6_pkt_t	*ipp = &icmp->icmp_sticky_ipp;
1651 
1652 	switch (level) {
1653 	case SOL_SOCKET:
1654 		switch (name) {
1655 		case SO_DEBUG:
1656 			*i1 = icmp->icmp_debug;
1657 			break;
1658 		case SO_TYPE:
1659 			*i1 = SOCK_RAW;
1660 			break;
1661 		case SO_PROTOTYPE:
1662 			*i1 = icmp->icmp_proto;
1663 			break;
1664 		case SO_REUSEADDR:
1665 			*i1 = icmp->icmp_reuseaddr;
1666 			break;
1667 
1668 		/*
1669 		 * The following three items are available here,
1670 		 * but are only meaningful to IP.
1671 		 */
1672 		case SO_DONTROUTE:
1673 			*i1 = icmp->icmp_dontroute;
1674 			break;
1675 		case SO_USELOOPBACK:
1676 			*i1 = icmp->icmp_useloopback;
1677 			break;
1678 		case SO_BROADCAST:
1679 			*i1 = icmp->icmp_broadcast;
1680 			break;
1681 
1682 		case SO_SNDBUF:
1683 			ASSERT(q->q_hiwat <= INT_MAX);
1684 			*i1 = (int)q->q_hiwat;
1685 			break;
1686 		case SO_RCVBUF:
1687 			ASSERT(RD(q)->q_hiwat <= INT_MAX);
1688 			*i1 = (int)RD(q)->q_hiwat;
1689 			break;
1690 		case SO_DGRAM_ERRIND:
1691 			*i1 = icmp->icmp_dgram_errind;
1692 			break;
1693 		case SO_TIMESTAMP:
1694 			*i1 = icmp->icmp_timestamp;
1695 			break;
1696 		case SO_MAC_EXEMPT:
1697 			*i1 = icmp->icmp_mac_exempt;
1698 			break;
1699 		case SO_DOMAIN:
1700 			*i1 = icmp->icmp_family;
1701 			break;
1702 
1703 		/*
1704 		 * Following four not meaningful for icmp
1705 		 * Action is same as "default" to which we fallthrough
1706 		 * so we keep them in comments.
1707 		 * case SO_LINGER:
1708 		 * case SO_KEEPALIVE:
1709 		 * case SO_OOBINLINE:
1710 		 * case SO_ALLZONES:
1711 		 */
1712 		default:
1713 			return (-1);
1714 		}
1715 		break;
1716 	case IPPROTO_IP:
1717 		/*
1718 		 * Only allow IPv4 option processing on IPv4 sockets.
1719 		 */
1720 		if (icmp->icmp_family != AF_INET)
1721 			return (-1);
1722 
1723 		switch (name) {
1724 		case IP_OPTIONS:
1725 		case T_IP_OPTIONS:
1726 			/* Options are passed up with each packet */
1727 			return (0);
1728 		case IP_HDRINCL:
1729 			*i1 = (int)icmp->icmp_hdrincl;
1730 			break;
1731 		case IP_TOS:
1732 		case T_IP_TOS:
1733 			*i1 = (int)icmp->icmp_type_of_service;
1734 			break;
1735 		case IP_TTL:
1736 			*i1 = (int)icmp->icmp_ttl;
1737 			break;
1738 		case IP_MULTICAST_IF:
1739 			/* 0 address if not set */
1740 			*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
1741 			return (sizeof (ipaddr_t));
1742 		case IP_MULTICAST_TTL:
1743 			*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
1744 			return (sizeof (uchar_t));
1745 		case IP_MULTICAST_LOOP:
1746 			*ptr = connp->conn_multicast_loop;
1747 			return (sizeof (uint8_t));
1748 		case IP_BOUND_IF:
1749 			/* Zero if not set */
1750 			*i1 = icmp->icmp_bound_if;
1751 			break;	/* goto sizeof (int) option return */
1752 		case IP_UNSPEC_SRC:
1753 			*ptr = icmp->icmp_unspec_source;
1754 			break;	/* goto sizeof (int) option return */
1755 		case IP_XMIT_IF:
1756 			*i1 = icmp->icmp_xmit_if;
1757 			break;	/* goto sizeof (int) option return */
1758 		case IP_RECVIF:
1759 			*ptr = icmp->icmp_recvif;
1760 			break;	/* goto sizeof (int) option return */
1761 		case IP_RECVPKTINFO:
1762 			/*
1763 			 * This also handles IP_PKTINFO.
1764 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
1765 			 * Differentiation is based on the size of the argument
1766 			 * passed in.
1767 			 * This option is handled in IP which will return an
1768 			 * error for IP_PKTINFO as it's not supported as a
1769 			 * sticky option.
1770 			 */
1771 			return (-EINVAL);
1772 		/*
1773 		 * Cannot "get" the value of following options
1774 		 * at this level. Action is same as "default" to
1775 		 * which we fallthrough so we keep them in comments.
1776 		 *
1777 		 * case IP_ADD_MEMBERSHIP:
1778 		 * case IP_DROP_MEMBERSHIP:
1779 		 * case IP_BLOCK_SOURCE:
1780 		 * case IP_UNBLOCK_SOURCE:
1781 		 * case IP_ADD_SOURCE_MEMBERSHIP:
1782 		 * case IP_DROP_SOURCE_MEMBERSHIP:
1783 		 * case MCAST_JOIN_GROUP:
1784 		 * case MCAST_LEAVE_GROUP:
1785 		 * case MCAST_BLOCK_SOURCE:
1786 		 * case MCAST_UNBLOCK_SOURCE:
1787 		 * case MCAST_JOIN_SOURCE_GROUP:
1788 		 * case MCAST_LEAVE_SOURCE_GROUP:
1789 		 * case MRT_INIT:
1790 		 * case MRT_DONE:
1791 		 * case MRT_ADD_VIF:
1792 		 * case MRT_DEL_VIF:
1793 		 * case MRT_ADD_MFC:
1794 		 * case MRT_DEL_MFC:
1795 		 * case MRT_VERSION:
1796 		 * case MRT_ASSERT:
1797 		 * case IP_SEC_OPT:
1798 		 * case IP_DONTFAILOVER_IF:
1799 		 * case IP_NEXTHOP:
1800 		 */
1801 		default:
1802 			return (-1);
1803 		}
1804 		break;
1805 	case IPPROTO_IPV6:
1806 		/*
1807 		 * Only allow IPv6 option processing on native IPv6 sockets.
1808 		 */
1809 		if (icmp->icmp_family != AF_INET6)
1810 			return (-1);
1811 		switch (name) {
1812 		case IPV6_UNICAST_HOPS:
1813 			*i1 = (unsigned int)icmp->icmp_ttl;
1814 			break;
1815 		case IPV6_MULTICAST_IF:
1816 			/* 0 index if not set */
1817 			*i1 = icmp->icmp_multicast_if_index;
1818 			break;
1819 		case IPV6_MULTICAST_HOPS:
1820 			*i1 = icmp->icmp_multicast_ttl;
1821 			break;
1822 		case IPV6_MULTICAST_LOOP:
1823 			*i1 = connp->conn_multicast_loop;
1824 			break;
1825 		case IPV6_BOUND_IF:
1826 			/* Zero if not set */
1827 			*i1 = icmp->icmp_bound_if;
1828 			break;
1829 		case IPV6_UNSPEC_SRC:
1830 			*i1 = icmp->icmp_unspec_source;
1831 			break;
1832 		case IPV6_CHECKSUM:
1833 			/*
1834 			 * Return offset or -1 if no checksum offset.
1835 			 * Does not apply to IPPROTO_ICMPV6
1836 			 */
1837 			if (icmp->icmp_proto == IPPROTO_ICMPV6)
1838 				return (-1);
1839 
1840 			if (icmp->icmp_raw_checksum) {
1841 				*i1 = icmp->icmp_checksum_off;
1842 			} else {
1843 				*i1 = -1;
1844 			}
1845 			break;
1846 		case IPV6_JOIN_GROUP:
1847 		case IPV6_LEAVE_GROUP:
1848 		case MCAST_JOIN_GROUP:
1849 		case MCAST_LEAVE_GROUP:
1850 		case MCAST_BLOCK_SOURCE:
1851 		case MCAST_UNBLOCK_SOURCE:
1852 		case MCAST_JOIN_SOURCE_GROUP:
1853 		case MCAST_LEAVE_SOURCE_GROUP:
1854 			/* cannot "get" the value for these */
1855 			return (-1);
1856 		case IPV6_RECVPKTINFO:
1857 			*i1 = icmp->icmp_ip_recvpktinfo;
1858 			break;
1859 		case IPV6_RECVTCLASS:
1860 			*i1 = icmp->icmp_ipv6_recvtclass;
1861 			break;
1862 		case IPV6_RECVPATHMTU:
1863 			*i1 = icmp->icmp_ipv6_recvpathmtu;
1864 			break;
1865 		case IPV6_V6ONLY:
1866 			*i1 = 1;
1867 			break;
1868 		case IPV6_RECVHOPLIMIT:
1869 			*i1 = icmp->icmp_ipv6_recvhoplimit;
1870 			break;
1871 		case IPV6_RECVHOPOPTS:
1872 			*i1 = icmp->icmp_ipv6_recvhopopts;
1873 			break;
1874 		case IPV6_RECVDSTOPTS:
1875 			*i1 = icmp->icmp_ipv6_recvdstopts;
1876 			break;
1877 		case _OLD_IPV6_RECVDSTOPTS:
1878 			*i1 = icmp->icmp_old_ipv6_recvdstopts;
1879 			break;
1880 		case IPV6_RECVRTHDRDSTOPTS:
1881 			*i1 = icmp->icmp_ipv6_recvrtdstopts;
1882 			break;
1883 		case IPV6_RECVRTHDR:
1884 			*i1 = icmp->icmp_ipv6_recvrthdr;
1885 			break;
1886 		case IPV6_PKTINFO: {
1887 			/* XXX assumes that caller has room for max size! */
1888 			struct in6_pktinfo *pkti;
1889 
1890 			pkti = (struct in6_pktinfo *)ptr;
1891 			if (ipp->ipp_fields & IPPF_IFINDEX)
1892 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
1893 			else
1894 				pkti->ipi6_ifindex = 0;
1895 			if (ipp->ipp_fields & IPPF_ADDR)
1896 				pkti->ipi6_addr = ipp->ipp_addr;
1897 			else
1898 				pkti->ipi6_addr = ipv6_all_zeros;
1899 			return (sizeof (struct in6_pktinfo));
1900 		}
1901 		case IPV6_NEXTHOP: {
1902 			sin6_t *sin6 = (sin6_t *)ptr;
1903 
1904 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
1905 				return (0);
1906 			*sin6 = sin6_null;
1907 			sin6->sin6_family = AF_INET6;
1908 			sin6->sin6_addr = ipp->ipp_nexthop;
1909 			return (sizeof (sin6_t));
1910 		}
1911 		case IPV6_HOPOPTS:
1912 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
1913 				return (0);
1914 			if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
1915 				return (0);
1916 			bcopy((char *)ipp->ipp_hopopts +
1917 			    icmp->icmp_label_len_v6, ptr,
1918 			    ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
1919 			if (icmp->icmp_label_len_v6 > 0) {
1920 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
1921 				ptr[1] = (ipp->ipp_hopoptslen -
1922 				    icmp->icmp_label_len_v6 + 7) / 8 - 1;
1923 			}
1924 			return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
1925 		case IPV6_RTHDRDSTOPTS:
1926 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
1927 				return (0);
1928 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
1929 			return (ipp->ipp_rtdstoptslen);
1930 		case IPV6_RTHDR:
1931 			if (!(ipp->ipp_fields & IPPF_RTHDR))
1932 				return (0);
1933 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
1934 			return (ipp->ipp_rthdrlen);
1935 		case IPV6_DSTOPTS:
1936 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
1937 				return (0);
1938 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
1939 			return (ipp->ipp_dstoptslen);
1940 		case IPV6_PATHMTU:
1941 			if (!(ipp->ipp_fields & IPPF_PATHMTU))
1942 				return (0);
1943 
1944 			return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0,
1945 			    (struct ip6_mtuinfo *)ptr,
1946 			    is->is_netstack));
1947 		case IPV6_TCLASS:
1948 			if (ipp->ipp_fields & IPPF_TCLASS)
1949 				*i1 = ipp->ipp_tclass;
1950 			else
1951 				*i1 = IPV6_FLOW_TCLASS(
1952 				    IPV6_DEFAULT_VERS_AND_FLOW);
1953 			break;
1954 		default:
1955 			return (-1);
1956 		}
1957 		break;
1958 	case IPPROTO_ICMPV6:
1959 		/*
1960 		 * Only allow IPv6 option processing on native IPv6 sockets.
1961 		 */
1962 		if (icmp->icmp_family != AF_INET6)
1963 			return (-1);
1964 
1965 		if (icmp->icmp_proto != IPPROTO_ICMPV6)
1966 			return (-1);
1967 
1968 		switch (name) {
1969 		case ICMP6_FILTER:
1970 			if (icmp->icmp_filter == NULL) {
1971 				/* Make it look like "pass all" */
1972 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1973 			} else {
1974 				(void) bcopy(icmp->icmp_filter, ptr,
1975 				    sizeof (icmp6_filter_t));
1976 			}
1977 			return (sizeof (icmp6_filter_t));
1978 		default:
1979 			return (-1);
1980 		}
1981 	default:
1982 		return (-1);
1983 	}
1984 	return (sizeof (int));
1985 }
1986 
1987 /*
1988  * This routine retrieves the current status of socket options.
1989  * It returns the size of the option retrieved.
1990  */
1991 int
1992 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
1993 {
1994 	icmp_t  *icmp = Q_TO_ICMP(q);
1995 	int 	err;
1996 
1997 	rw_enter(&icmp->icmp_rwlock, RW_READER);
1998 	err = icmp_opt_get_locked(q, level, name, ptr);
1999 	rw_exit(&icmp->icmp_rwlock);
2000 	return (err);
2001 }
2002 
2003 
2004 /* This routine sets socket options. */
2005 /* ARGSUSED */
2006 int
2007 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
2008     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2009     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
2010 {
2011 	conn_t	*connp = Q_TO_CONN(q);
2012 	icmp_t	*icmp = connp->conn_icmp;
2013 	icmp_stack_t *is = icmp->icmp_is;
2014 	int	*i1 = (int *)invalp;
2015 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2016 	boolean_t checkonly;
2017 	int	error;
2018 
2019 	switch (optset_context) {
2020 	case SETFN_OPTCOM_CHECKONLY:
2021 		checkonly = B_TRUE;
2022 		/*
2023 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
2024 		 * inlen != 0 implies value supplied and
2025 		 * 	we have to "pretend" to set it.
2026 		 * inlen == 0 implies that there is no
2027 		 * 	value part in T_CHECK request and just validation
2028 		 * done elsewhere should be enough, we just return here.
2029 		 */
2030 		if (inlen == 0) {
2031 			*outlenp = 0;
2032 			return (0);
2033 		}
2034 		break;
2035 	case SETFN_OPTCOM_NEGOTIATE:
2036 		checkonly = B_FALSE;
2037 		break;
2038 	case SETFN_UD_NEGOTIATE:
2039 	case SETFN_CONN_NEGOTIATE:
2040 		checkonly = B_FALSE;
2041 		/*
2042 		 * Negotiating local and "association-related" options
2043 		 * through T_UNITDATA_REQ.
2044 		 *
2045 		 * Following routine can filter out ones we do not
2046 		 * want to be "set" this way.
2047 		 */
2048 		if (!icmp_opt_allow_udr_set(level, name)) {
2049 			*outlenp = 0;
2050 			return (EINVAL);
2051 		}
2052 		break;
2053 	default:
2054 		/*
2055 		 * We should never get here
2056 		 */
2057 		*outlenp = 0;
2058 		return (EINVAL);
2059 	}
2060 
2061 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
2062 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
2063 
2064 	/*
2065 	 * For fixed length options, no sanity check
2066 	 * of passed in length is done. It is assumed *_optcom_req()
2067 	 * routines do the right thing.
2068 	 */
2069 
2070 	switch (level) {
2071 	case SOL_SOCKET:
2072 		switch (name) {
2073 		case SO_DEBUG:
2074 			if (!checkonly)
2075 				icmp->icmp_debug = onoff;
2076 			break;
2077 		case SO_PROTOTYPE:
2078 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
2079 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
2080 			    secpolicy_net_rawaccess(cr) != 0) {
2081 				*outlenp = 0;
2082 				return (EACCES);
2083 			}
2084 			/* Can't use IPPROTO_RAW with IPv6 */
2085 			if ((*i1 & 0xFF) == IPPROTO_RAW &&
2086 			    icmp->icmp_family == AF_INET6) {
2087 				*outlenp = 0;
2088 				return (EPROTONOSUPPORT);
2089 			}
2090 			if (checkonly) {
2091 				/* T_CHECK case */
2092 				*(int *)outvalp = (*i1 & 0xFF);
2093 				break;
2094 			}
2095 			icmp->icmp_proto = *i1 & 0xFF;
2096 			if ((icmp->icmp_proto == IPPROTO_RAW ||
2097 			    icmp->icmp_proto == IPPROTO_IGMP) &&
2098 			    icmp->icmp_family == AF_INET)
2099 				icmp->icmp_hdrincl = 1;
2100 			else
2101 				icmp->icmp_hdrincl = 0;
2102 
2103 			if (icmp->icmp_family == AF_INET6 &&
2104 			    icmp->icmp_proto == IPPROTO_ICMPV6) {
2105 				/* Set offset for icmp6_cksum */
2106 				icmp->icmp_raw_checksum = 0;
2107 				icmp->icmp_checksum_off = 2;
2108 			}
2109 			if (icmp->icmp_proto == IPPROTO_UDP ||
2110 			    icmp->icmp_proto == IPPROTO_TCP ||
2111 			    icmp->icmp_proto == IPPROTO_SCTP) {
2112 				icmp->icmp_no_tp_cksum = 1;
2113 				icmp->icmp_sticky_ipp.ipp_fields |=
2114 				    IPPF_NO_CKSUM;
2115 			} else {
2116 				icmp->icmp_no_tp_cksum = 0;
2117 				icmp->icmp_sticky_ipp.ipp_fields &=
2118 				    ~IPPF_NO_CKSUM;
2119 			}
2120 
2121 			if (icmp->icmp_filter != NULL &&
2122 			    icmp->icmp_proto != IPPROTO_ICMPV6) {
2123 				kmem_free(icmp->icmp_filter,
2124 				    sizeof (icmp6_filter_t));
2125 				icmp->icmp_filter = NULL;
2126 			}
2127 
2128 			/* Rebuild the header template */
2129 			error = icmp_build_hdrs(icmp);
2130 			if (error != 0) {
2131 				*outlenp = 0;
2132 				return (error);
2133 			}
2134 
2135 			/*
2136 			 * For SCTP, we don't use icmp_bind_proto() for
2137 			 * raw socket binding.  Note that we do not need
2138 			 * to set *outlenp.
2139 			 * FIXME: how does SCTP work?
2140 			 */
2141 			if (icmp->icmp_proto == IPPROTO_SCTP)
2142 				return (0);
2143 
2144 			*outlenp = sizeof (int);
2145 			*(int *)outvalp = *i1 & 0xFF;
2146 
2147 			/* Drop lock across the bind operation */
2148 			rw_exit(&icmp->icmp_rwlock);
2149 			icmp_bind_proto(q);
2150 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2151 			return (0);
2152 		case SO_REUSEADDR:
2153 			if (!checkonly)
2154 				icmp->icmp_reuseaddr = onoff;
2155 			break;
2156 
2157 		/*
2158 		 * The following three items are available here,
2159 		 * but are only meaningful to IP.
2160 		 */
2161 		case SO_DONTROUTE:
2162 			if (!checkonly)
2163 				icmp->icmp_dontroute = onoff;
2164 			break;
2165 		case SO_USELOOPBACK:
2166 			if (!checkonly)
2167 				icmp->icmp_useloopback = onoff;
2168 			break;
2169 		case SO_BROADCAST:
2170 			if (!checkonly)
2171 				icmp->icmp_broadcast = onoff;
2172 			break;
2173 
2174 		case SO_SNDBUF:
2175 			if (*i1 > is->is_max_buf) {
2176 				*outlenp = 0;
2177 				return (ENOBUFS);
2178 			}
2179 			if (!checkonly) {
2180 				q->q_hiwat = *i1;
2181 			}
2182 			break;
2183 		case SO_RCVBUF:
2184 			if (*i1 > is->is_max_buf) {
2185 				*outlenp = 0;
2186 				return (ENOBUFS);
2187 			}
2188 			if (!checkonly) {
2189 				RD(q)->q_hiwat = *i1;
2190 				rw_exit(&icmp->icmp_rwlock);
2191 				(void) mi_set_sth_hiwat(RD(q), *i1);
2192 				rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2193 			}
2194 			break;
2195 		case SO_DGRAM_ERRIND:
2196 			if (!checkonly)
2197 				icmp->icmp_dgram_errind = onoff;
2198 			break;
2199 		case SO_ALLZONES:
2200 			/*
2201 			 * "soft" error (negative)
2202 			 * option not handled at this level
2203 			 * Note: Do not modify *outlenp
2204 			 */
2205 			return (-EINVAL);
2206 		case SO_TIMESTAMP:
2207 			if (!checkonly) {
2208 				icmp->icmp_timestamp = onoff;
2209 			}
2210 			break;
2211 		case SO_MAC_EXEMPT:
2212 			if (secpolicy_net_mac_aware(cr) != 0 ||
2213 			    icmp->icmp_state != TS_UNBND)
2214 				return (EACCES);
2215 			if (!checkonly)
2216 				icmp->icmp_mac_exempt = onoff;
2217 			break;
2218 		/*
2219 		 * Following three not meaningful for icmp
2220 		 * Action is same as "default" so we keep them
2221 		 * in comments.
2222 		 * case SO_LINGER:
2223 		 * case SO_KEEPALIVE:
2224 		 * case SO_OOBINLINE:
2225 		 */
2226 		default:
2227 			*outlenp = 0;
2228 			return (EINVAL);
2229 		}
2230 		break;
2231 	case IPPROTO_IP:
2232 		/*
2233 		 * Only allow IPv4 option processing on IPv4 sockets.
2234 		 */
2235 		if (icmp->icmp_family != AF_INET) {
2236 			*outlenp = 0;
2237 			return (ENOPROTOOPT);
2238 		}
2239 		switch (name) {
2240 		case IP_OPTIONS:
2241 		case T_IP_OPTIONS:
2242 			/* Save options for use by IP. */
2243 			if ((inlen & 0x3) ||
2244 			    inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
2245 				*outlenp = 0;
2246 				return (EINVAL);
2247 			}
2248 			if (checkonly)
2249 				break;
2250 
2251 			if (!tsol_option_set(&icmp->icmp_ip_snd_options,
2252 			    &icmp->icmp_ip_snd_options_len,
2253 			    icmp->icmp_label_len, invalp, inlen)) {
2254 				*outlenp = 0;
2255 				return (ENOMEM);
2256 			}
2257 
2258 			icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2259 			    icmp->icmp_ip_snd_options_len;
2260 			rw_exit(&icmp->icmp_rwlock);
2261 			(void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len +
2262 			    is->is_wroff_extra);
2263 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2264 			break;
2265 		case IP_HDRINCL:
2266 			if (!checkonly)
2267 				icmp->icmp_hdrincl = onoff;
2268 			break;
2269 		case IP_TOS:
2270 		case T_IP_TOS:
2271 			if (!checkonly) {
2272 				icmp->icmp_type_of_service = (uint8_t)*i1;
2273 			}
2274 			break;
2275 		case IP_TTL:
2276 			if (!checkonly) {
2277 				icmp->icmp_ttl = (uint8_t)*i1;
2278 			}
2279 			break;
2280 		case IP_MULTICAST_IF:
2281 			/*
2282 			 * TODO should check OPTMGMT reply and undo this if
2283 			 * there is an error.
2284 			 */
2285 			if (!checkonly)
2286 				icmp->icmp_multicast_if_addr = *i1;
2287 			break;
2288 		case IP_MULTICAST_TTL:
2289 			if (!checkonly)
2290 				icmp->icmp_multicast_ttl = *invalp;
2291 			break;
2292 		case IP_MULTICAST_LOOP:
2293 			if (!checkonly) {
2294 				connp->conn_multicast_loop =
2295 				    (*invalp == 0) ? 0 : 1;
2296 			}
2297 			break;
2298 		case IP_BOUND_IF:
2299 			if (!checkonly)
2300 				icmp->icmp_bound_if = *i1;
2301 			break;
2302 		case IP_UNSPEC_SRC:
2303 			if (!checkonly)
2304 				icmp->icmp_unspec_source = onoff;
2305 			break;
2306 		case IP_XMIT_IF:
2307 			if (!checkonly)
2308 				icmp->icmp_xmit_if = *i1;
2309 			break;
2310 		case IP_RECVIF:
2311 			if (!checkonly)
2312 				icmp->icmp_recvif = onoff;
2313 			/*
2314 			 * pass to ip
2315 			 */
2316 			return (-EINVAL);
2317 		case IP_PKTINFO: {
2318 			/*
2319 			 * This also handles IP_RECVPKTINFO.
2320 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2321 			 * Differentiation is based on the size of the argument
2322 			 * passed in.
2323 			 */
2324 			struct in_pktinfo *pktinfop;
2325 			ip4_pkt_t *attr_pktinfop;
2326 
2327 			if (checkonly)
2328 				break;
2329 
2330 			if (inlen == sizeof (int)) {
2331 				/*
2332 				 * This is IP_RECVPKTINFO option.
2333 				 * Keep a local copy of wether this option is
2334 				 * set or not and pass it down to IP for
2335 				 * processing.
2336 				 */
2337 				icmp->icmp_ip_recvpktinfo = onoff;
2338 				return (-EINVAL);
2339 			}
2340 
2341 
2342 			if (inlen != sizeof (struct in_pktinfo))
2343 				return (EINVAL);
2344 
2345 			if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
2346 			    == NULL) {
2347 				/*
2348 				 * sticky option is not supported
2349 				 */
2350 				return (EINVAL);
2351 			}
2352 
2353 			pktinfop = (struct in_pktinfo *)invalp;
2354 
2355 			/*
2356 			 * Atleast one of the values should be specified
2357 			 */
2358 			if (pktinfop->ipi_ifindex == 0 &&
2359 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2360 				return (EINVAL);
2361 			}
2362 
2363 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2364 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2365 		}
2366 			break;
2367 		case IP_ADD_MEMBERSHIP:
2368 		case IP_DROP_MEMBERSHIP:
2369 		case IP_BLOCK_SOURCE:
2370 		case IP_UNBLOCK_SOURCE:
2371 		case IP_ADD_SOURCE_MEMBERSHIP:
2372 		case IP_DROP_SOURCE_MEMBERSHIP:
2373 		case MCAST_JOIN_GROUP:
2374 		case MCAST_LEAVE_GROUP:
2375 		case MCAST_BLOCK_SOURCE:
2376 		case MCAST_UNBLOCK_SOURCE:
2377 		case MCAST_JOIN_SOURCE_GROUP:
2378 		case MCAST_LEAVE_SOURCE_GROUP:
2379 		case MRT_INIT:
2380 		case MRT_DONE:
2381 		case MRT_ADD_VIF:
2382 		case MRT_DEL_VIF:
2383 		case MRT_ADD_MFC:
2384 		case MRT_DEL_MFC:
2385 		case MRT_VERSION:
2386 		case MRT_ASSERT:
2387 		case IP_SEC_OPT:
2388 		case IP_DONTFAILOVER_IF:
2389 		case IP_NEXTHOP:
2390 			/*
2391 			 * "soft" error (negative)
2392 			 * option not handled at this level
2393 			 * Note: Do not modify *outlenp
2394 			 */
2395 			return (-EINVAL);
2396 		default:
2397 			*outlenp = 0;
2398 			return (EINVAL);
2399 		}
2400 		break;
2401 	case IPPROTO_IPV6: {
2402 		ip6_pkt_t		*ipp;
2403 		boolean_t		sticky;
2404 
2405 		if (icmp->icmp_family != AF_INET6) {
2406 			*outlenp = 0;
2407 			return (ENOPROTOOPT);
2408 		}
2409 		/*
2410 		 * Deal with both sticky options and ancillary data
2411 		 */
2412 		if (thisdg_attrs == NULL) {
2413 			/* sticky options, or none */
2414 			ipp = &icmp->icmp_sticky_ipp;
2415 			sticky = B_TRUE;
2416 		} else {
2417 			/* ancillary data */
2418 			ipp = (ip6_pkt_t *)thisdg_attrs;
2419 			sticky = B_FALSE;
2420 		}
2421 
2422 		switch (name) {
2423 		case IPV6_MULTICAST_IF:
2424 			if (!checkonly)
2425 				icmp->icmp_multicast_if_index = *i1;
2426 			break;
2427 		case IPV6_UNICAST_HOPS:
2428 			/* -1 means use default */
2429 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2430 				*outlenp = 0;
2431 				return (EINVAL);
2432 			}
2433 			if (!checkonly) {
2434 				if (*i1 == -1) {
2435 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2436 					    is->is_ipv6_hoplimit;
2437 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2438 					/* Pass modified value to IP. */
2439 					*i1 = ipp->ipp_hoplimit;
2440 				} else {
2441 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2442 					    (uint8_t)*i1;
2443 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2444 				}
2445 				/* Rebuild the header template */
2446 				error = icmp_build_hdrs(icmp);
2447 				if (error != 0) {
2448 					*outlenp = 0;
2449 					return (error);
2450 				}
2451 			}
2452 			break;
2453 		case IPV6_MULTICAST_HOPS:
2454 			/* -1 means use default */
2455 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2456 				*outlenp = 0;
2457 				return (EINVAL);
2458 			}
2459 			if (!checkonly) {
2460 				if (*i1 == -1) {
2461 					icmp->icmp_multicast_ttl =
2462 					    ipp->ipp_multicast_hops =
2463 					    IP_DEFAULT_MULTICAST_TTL;
2464 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2465 					/* Pass modified value to IP. */
2466 					*i1 = icmp->icmp_multicast_ttl;
2467 				} else {
2468 					icmp->icmp_multicast_ttl =
2469 					    ipp->ipp_multicast_hops =
2470 					    (uint8_t)*i1;
2471 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2472 				}
2473 			}
2474 			break;
2475 		case IPV6_MULTICAST_LOOP:
2476 			if (*i1 != 0 && *i1 != 1) {
2477 				*outlenp = 0;
2478 				return (EINVAL);
2479 			}
2480 			if (!checkonly)
2481 				connp->conn_multicast_loop = *i1;
2482 			break;
2483 		case IPV6_CHECKSUM:
2484 			/*
2485 			 * Integer offset into the user data of where the
2486 			 * checksum is located.
2487 			 * Offset of -1 disables option.
2488 			 * Does not apply to IPPROTO_ICMPV6.
2489 			 */
2490 			if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
2491 				*outlenp = 0;
2492 				return (EINVAL);
2493 			}
2494 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
2495 				/* Negative or not 16 bit aligned offset */
2496 				*outlenp = 0;
2497 				return (EINVAL);
2498 			}
2499 			if (checkonly)
2500 				break;
2501 
2502 			if (*i1 == -1) {
2503 				icmp->icmp_raw_checksum = 0;
2504 				ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
2505 			} else {
2506 				icmp->icmp_raw_checksum = 1;
2507 				icmp->icmp_checksum_off = *i1;
2508 				ipp->ipp_fields |= IPPF_RAW_CKSUM;
2509 			}
2510 			/* Rebuild the header template */
2511 			error = icmp_build_hdrs(icmp);
2512 			if (error != 0) {
2513 				*outlenp = 0;
2514 				return (error);
2515 			}
2516 			break;
2517 		case IPV6_JOIN_GROUP:
2518 		case IPV6_LEAVE_GROUP:
2519 		case MCAST_JOIN_GROUP:
2520 		case MCAST_LEAVE_GROUP:
2521 		case MCAST_BLOCK_SOURCE:
2522 		case MCAST_UNBLOCK_SOURCE:
2523 		case MCAST_JOIN_SOURCE_GROUP:
2524 		case MCAST_LEAVE_SOURCE_GROUP:
2525 			/*
2526 			 * "soft" error (negative)
2527 			 * option not handled at this level
2528 			 * Note: Do not modify *outlenp
2529 			 */
2530 			return (-EINVAL);
2531 		case IPV6_BOUND_IF:
2532 			if (!checkonly)
2533 				icmp->icmp_bound_if = *i1;
2534 			break;
2535 		case IPV6_UNSPEC_SRC:
2536 			if (!checkonly)
2537 				icmp->icmp_unspec_source = onoff;
2538 			break;
2539 		case IPV6_RECVTCLASS:
2540 			if (!checkonly)
2541 				icmp->icmp_ipv6_recvtclass = onoff;
2542 			break;
2543 		/*
2544 		 * Set boolean switches for ancillary data delivery
2545 		 */
2546 		case IPV6_RECVPKTINFO:
2547 			if (!checkonly)
2548 				icmp->icmp_ip_recvpktinfo = onoff;
2549 			break;
2550 		case IPV6_RECVPATHMTU:
2551 			if (!checkonly)
2552 				icmp->icmp_ipv6_recvpathmtu = onoff;
2553 			break;
2554 		case IPV6_RECVHOPLIMIT:
2555 			if (!checkonly)
2556 				icmp->icmp_ipv6_recvhoplimit = onoff;
2557 			break;
2558 		case IPV6_RECVHOPOPTS:
2559 			if (!checkonly)
2560 				icmp->icmp_ipv6_recvhopopts = onoff;
2561 			break;
2562 		case IPV6_RECVDSTOPTS:
2563 			if (!checkonly)
2564 				icmp->icmp_ipv6_recvdstopts = onoff;
2565 			break;
2566 		case _OLD_IPV6_RECVDSTOPTS:
2567 			if (!checkonly)
2568 				icmp->icmp_old_ipv6_recvdstopts = onoff;
2569 			break;
2570 		case IPV6_RECVRTHDRDSTOPTS:
2571 			if (!checkonly)
2572 				icmp->icmp_ipv6_recvrtdstopts = onoff;
2573 			break;
2574 		case IPV6_RECVRTHDR:
2575 			if (!checkonly)
2576 				icmp->icmp_ipv6_recvrthdr = onoff;
2577 			break;
2578 		/*
2579 		 * Set sticky options or ancillary data.
2580 		 * If sticky options, (re)build any extension headers
2581 		 * that might be needed as a result.
2582 		 */
2583 		case IPV6_PKTINFO:
2584 			/*
2585 			 * The source address and ifindex are verified
2586 			 * in ip_opt_set(). For ancillary data the
2587 			 * source address is checked in ip_wput_v6.
2588 			 */
2589 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
2590 				return (EINVAL);
2591 			if (checkonly)
2592 				break;
2593 
2594 			if (inlen == 0) {
2595 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2596 				ipp->ipp_sticky_ignored |=
2597 				    (IPPF_IFINDEX|IPPF_ADDR);
2598 			} else {
2599 				struct in6_pktinfo *pkti;
2600 
2601 				pkti = (struct in6_pktinfo *)invalp;
2602 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2603 				ipp->ipp_addr = pkti->ipi6_addr;
2604 				if (ipp->ipp_ifindex != 0)
2605 					ipp->ipp_fields |= IPPF_IFINDEX;
2606 				else
2607 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2608 				if (!IN6_IS_ADDR_UNSPECIFIED(
2609 				    &ipp->ipp_addr))
2610 					ipp->ipp_fields |= IPPF_ADDR;
2611 				else
2612 					ipp->ipp_fields &= ~IPPF_ADDR;
2613 			}
2614 			if (sticky) {
2615 				error = icmp_build_hdrs(icmp);
2616 				if (error != 0)
2617 					return (error);
2618 			}
2619 			break;
2620 		case IPV6_HOPLIMIT:
2621 			/* This option can only be used as ancillary data. */
2622 			if (sticky)
2623 				return (EINVAL);
2624 			if (inlen != 0 && inlen != sizeof (int))
2625 				return (EINVAL);
2626 			if (checkonly)
2627 				break;
2628 
2629 			if (inlen == 0) {
2630 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2631 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2632 			} else {
2633 				if (*i1 > 255 || *i1 < -1)
2634 					return (EINVAL);
2635 				if (*i1 == -1)
2636 					ipp->ipp_hoplimit =
2637 					    is->is_ipv6_hoplimit;
2638 				else
2639 					ipp->ipp_hoplimit = *i1;
2640 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2641 			}
2642 			break;
2643 		case IPV6_TCLASS:
2644 			/*
2645 			 * IPV6_RECVTCLASS accepts -1 as use kernel default
2646 			 * and [0, 255] as the actualy traffic class.
2647 			 */
2648 			if (inlen != 0 && inlen != sizeof (int))
2649 				return (EINVAL);
2650 			if (checkonly)
2651 				break;
2652 
2653 			if (inlen == 0) {
2654 				ipp->ipp_fields &= ~IPPF_TCLASS;
2655 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2656 			} else {
2657 				if (*i1 >= 256 || *i1 < -1)
2658 					return (EINVAL);
2659 				if (*i1 == -1) {
2660 					ipp->ipp_tclass =
2661 					    IPV6_FLOW_TCLASS(
2662 					    IPV6_DEFAULT_VERS_AND_FLOW);
2663 				} else {
2664 					ipp->ipp_tclass = *i1;
2665 				}
2666 				ipp->ipp_fields |= IPPF_TCLASS;
2667 			}
2668 			if (sticky) {
2669 				error = icmp_build_hdrs(icmp);
2670 				if (error != 0)
2671 					return (error);
2672 			}
2673 			break;
2674 		case IPV6_NEXTHOP:
2675 			/*
2676 			 * IP will verify that the nexthop is reachable
2677 			 * and fail for sticky options.
2678 			 */
2679 			if (inlen != 0 && inlen != sizeof (sin6_t))
2680 				return (EINVAL);
2681 			if (checkonly)
2682 				break;
2683 
2684 			if (inlen == 0) {
2685 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2686 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2687 			} else {
2688 				sin6_t *sin6 = (sin6_t *)invalp;
2689 
2690 				if (sin6->sin6_family != AF_INET6)
2691 					return (EAFNOSUPPORT);
2692 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
2693 					return (EADDRNOTAVAIL);
2694 				ipp->ipp_nexthop = sin6->sin6_addr;
2695 				if (!IN6_IS_ADDR_UNSPECIFIED(
2696 				    &ipp->ipp_nexthop))
2697 					ipp->ipp_fields |= IPPF_NEXTHOP;
2698 				else
2699 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2700 			}
2701 			if (sticky) {
2702 				error = icmp_build_hdrs(icmp);
2703 				if (error != 0)
2704 					return (error);
2705 			}
2706 			break;
2707 		case IPV6_HOPOPTS: {
2708 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2709 			/*
2710 			 * Sanity checks - minimum size, size a multiple of
2711 			 * eight bytes, and matching size passed in.
2712 			 */
2713 			if (inlen != 0 &&
2714 			    inlen != (8 * (hopts->ip6h_len + 1)))
2715 				return (EINVAL);
2716 
2717 			if (checkonly)
2718 				break;
2719 			error = optcom_pkt_set(invalp, inlen, sticky,
2720 			    (uchar_t **)&ipp->ipp_hopopts,
2721 			    &ipp->ipp_hopoptslen,
2722 			    sticky ? icmp->icmp_label_len_v6 : 0);
2723 			if (error != 0)
2724 				return (error);
2725 			if (ipp->ipp_hopoptslen == 0) {
2726 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
2727 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
2728 			} else {
2729 				ipp->ipp_fields |= IPPF_HOPOPTS;
2730 			}
2731 			if (sticky) {
2732 				error = icmp_build_hdrs(icmp);
2733 				if (error != 0)
2734 					return (error);
2735 			}
2736 			break;
2737 		}
2738 		case IPV6_RTHDRDSTOPTS: {
2739 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2740 
2741 			/*
2742 			 * Sanity checks - minimum size, size a multiple of
2743 			 * eight bytes, and matching size passed in.
2744 			 */
2745 			if (inlen != 0 &&
2746 			    inlen != (8 * (dopts->ip6d_len + 1)))
2747 				return (EINVAL);
2748 
2749 			if (checkonly)
2750 				break;
2751 
2752 			if (inlen == 0) {
2753 				if (sticky &&
2754 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
2755 					kmem_free(ipp->ipp_rtdstopts,
2756 					    ipp->ipp_rtdstoptslen);
2757 					ipp->ipp_rtdstopts = NULL;
2758 					ipp->ipp_rtdstoptslen = 0;
2759 				}
2760 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
2761 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
2762 			} else {
2763 				error = optcom_pkt_set(invalp, inlen, sticky,
2764 				    (uchar_t **)&ipp->ipp_rtdstopts,
2765 				    &ipp->ipp_rtdstoptslen, 0);
2766 				if (error != 0)
2767 					return (error);
2768 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
2769 			}
2770 			if (sticky) {
2771 				error = icmp_build_hdrs(icmp);
2772 				if (error != 0)
2773 					return (error);
2774 			}
2775 			break;
2776 		}
2777 		case IPV6_DSTOPTS: {
2778 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2779 
2780 			/*
2781 			 * Sanity checks - minimum size, size a multiple of
2782 			 * eight bytes, and matching size passed in.
2783 			 */
2784 			if (inlen != 0 &&
2785 			    inlen != (8 * (dopts->ip6d_len + 1)))
2786 				return (EINVAL);
2787 
2788 			if (checkonly)
2789 				break;
2790 
2791 			if (inlen == 0) {
2792 				if (sticky &&
2793 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
2794 					kmem_free(ipp->ipp_dstopts,
2795 					    ipp->ipp_dstoptslen);
2796 					ipp->ipp_dstopts = NULL;
2797 					ipp->ipp_dstoptslen = 0;
2798 				}
2799 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2800 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
2801 			} else {
2802 				error = optcom_pkt_set(invalp, inlen, sticky,
2803 				    (uchar_t **)&ipp->ipp_dstopts,
2804 				    &ipp->ipp_dstoptslen, 0);
2805 				if (error != 0)
2806 					return (error);
2807 				ipp->ipp_fields |= IPPF_DSTOPTS;
2808 			}
2809 			if (sticky) {
2810 				error = icmp_build_hdrs(icmp);
2811 				if (error != 0)
2812 					return (error);
2813 			}
2814 			break;
2815 		}
2816 		case IPV6_RTHDR: {
2817 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
2818 
2819 			/*
2820 			 * Sanity checks - minimum size, size a multiple of
2821 			 * eight bytes, and matching size passed in.
2822 			 */
2823 			if (inlen != 0 &&
2824 			    inlen != (8 * (rt->ip6r_len + 1)))
2825 				return (EINVAL);
2826 
2827 			if (checkonly)
2828 				break;
2829 
2830 			if (inlen == 0) {
2831 				if (sticky &&
2832 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
2833 					kmem_free(ipp->ipp_rthdr,
2834 					    ipp->ipp_rthdrlen);
2835 					ipp->ipp_rthdr = NULL;
2836 					ipp->ipp_rthdrlen = 0;
2837 				}
2838 				ipp->ipp_fields &= ~IPPF_RTHDR;
2839 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
2840 			} else {
2841 				error = optcom_pkt_set(invalp, inlen, sticky,
2842 				    (uchar_t **)&ipp->ipp_rthdr,
2843 				    &ipp->ipp_rthdrlen, 0);
2844 				if (error != 0)
2845 					return (error);
2846 				ipp->ipp_fields |= IPPF_RTHDR;
2847 			}
2848 			if (sticky) {
2849 				error = icmp_build_hdrs(icmp);
2850 				if (error != 0)
2851 					return (error);
2852 			}
2853 			break;
2854 		}
2855 
2856 		case IPV6_DONTFRAG:
2857 			if (checkonly)
2858 				break;
2859 
2860 			if (onoff) {
2861 				ipp->ipp_fields |= IPPF_DONTFRAG;
2862 			} else {
2863 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
2864 			}
2865 			break;
2866 
2867 		case IPV6_USE_MIN_MTU:
2868 			if (inlen != sizeof (int))
2869 				return (EINVAL);
2870 
2871 			if (*i1 < -1 || *i1 > 1)
2872 				return (EINVAL);
2873 
2874 			if (checkonly)
2875 				break;
2876 
2877 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
2878 			ipp->ipp_use_min_mtu = *i1;
2879 			break;
2880 
2881 		/*
2882 		 * This option can't be set.  Its only returned via
2883 		 * getsockopt() or ancillary data.
2884 		 */
2885 		case IPV6_PATHMTU:
2886 			return (EINVAL);
2887 
2888 		case IPV6_BOUND_PIF:
2889 		case IPV6_SEC_OPT:
2890 		case IPV6_DONTFAILOVER_IF:
2891 		case IPV6_SRC_PREFERENCES:
2892 		case IPV6_V6ONLY:
2893 			/* Handled at IP level */
2894 			return (-EINVAL);
2895 		default:
2896 			*outlenp = 0;
2897 			return (EINVAL);
2898 		}
2899 		break;
2900 	}		/* end IPPROTO_IPV6 */
2901 
2902 	case IPPROTO_ICMPV6:
2903 		/*
2904 		 * Only allow IPv6 option processing on IPv6 sockets.
2905 		 */
2906 		if (icmp->icmp_family != AF_INET6) {
2907 			*outlenp = 0;
2908 			return (ENOPROTOOPT);
2909 		}
2910 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
2911 			*outlenp = 0;
2912 			return (ENOPROTOOPT);
2913 		}
2914 		switch (name) {
2915 		case ICMP6_FILTER:
2916 			if (!checkonly) {
2917 				if ((inlen != 0) &&
2918 				    (inlen != sizeof (icmp6_filter_t)))
2919 					return (EINVAL);
2920 
2921 				if (inlen == 0) {
2922 					if (icmp->icmp_filter != NULL) {
2923 						kmem_free(icmp->icmp_filter,
2924 						    sizeof (icmp6_filter_t));
2925 						icmp->icmp_filter = NULL;
2926 					}
2927 				} else {
2928 					if (icmp->icmp_filter == NULL) {
2929 						icmp->icmp_filter = kmem_alloc(
2930 						    sizeof (icmp6_filter_t),
2931 						    KM_NOSLEEP);
2932 						if (icmp->icmp_filter == NULL) {
2933 							*outlenp = 0;
2934 							return (ENOBUFS);
2935 						}
2936 					}
2937 					(void) bcopy(invalp, icmp->icmp_filter,
2938 					    inlen);
2939 				}
2940 			}
2941 			break;
2942 
2943 		default:
2944 			*outlenp = 0;
2945 			return (EINVAL);
2946 		}
2947 		break;
2948 	default:
2949 		*outlenp = 0;
2950 		return (EINVAL);
2951 	}
2952 	/*
2953 	 * Common case of OK return with outval same as inval.
2954 	 */
2955 	if (invalp != outvalp) {
2956 		/* don't trust bcopy for identical src/dst */
2957 		(void) bcopy(invalp, outvalp, inlen);
2958 	}
2959 	*outlenp = inlen;
2960 	return (0);
2961 }
2962 /* This routine sets socket options. */
2963 /* ARGSUSED */
2964 int
2965 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2966     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2967     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
2968 {
2969 	icmp_t	*icmp;
2970 	int	err;
2971 
2972 	icmp = Q_TO_ICMP(q);
2973 
2974 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2975 	err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp,
2976 	    outlenp, outvalp, thisdg_attrs, cr, mblk);
2977 	rw_exit(&icmp->icmp_rwlock);
2978 	return (err);
2979 }
2980 
2981 /*
2982  * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl,
2983  * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum.
2984  * The headers include ip6i_t (if needed), ip6_t, and any sticky extension
2985  * headers.
2986  * Returns failure if can't allocate memory.
2987  */
2988 static int
2989 icmp_build_hdrs(icmp_t *icmp)
2990 {
2991 	icmp_stack_t *is = icmp->icmp_is;
2992 	uchar_t	*hdrs;
2993 	uint_t	hdrs_len;
2994 	ip6_t	*ip6h;
2995 	ip6i_t	*ip6i;
2996 	ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
2997 
2998 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
2999 	hdrs_len = ip_total_hdrs_len_v6(ipp);
3000 	ASSERT(hdrs_len != 0);
3001 	if (hdrs_len != icmp->icmp_sticky_hdrs_len) {
3002 		/* Need to reallocate */
3003 		if (hdrs_len != 0) {
3004 			hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3005 			if (hdrs == NULL)
3006 				return (ENOMEM);
3007 		} else {
3008 			hdrs = NULL;
3009 		}
3010 		if (icmp->icmp_sticky_hdrs_len != 0) {
3011 			kmem_free(icmp->icmp_sticky_hdrs,
3012 			    icmp->icmp_sticky_hdrs_len);
3013 		}
3014 		icmp->icmp_sticky_hdrs = hdrs;
3015 		icmp->icmp_sticky_hdrs_len = hdrs_len;
3016 	}
3017 	ip_build_hdrs_v6(icmp->icmp_sticky_hdrs,
3018 	    icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto);
3019 
3020 	/* Set header fields not in ipp */
3021 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3022 		ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs;
3023 		ip6h = (ip6_t *)&ip6i[1];
3024 
3025 		if (ipp->ipp_fields & IPPF_RAW_CKSUM) {
3026 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
3027 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
3028 		}
3029 		if (ipp->ipp_fields & IPPF_NO_CKSUM) {
3030 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
3031 		}
3032 	} else {
3033 		ip6h = (ip6_t *)icmp->icmp_sticky_hdrs;
3034 	}
3035 
3036 	if (!(ipp->ipp_fields & IPPF_ADDR))
3037 		ip6h->ip6_src = icmp->icmp_v6src;
3038 
3039 	/* Try to get everything in a single mblk */
3040 	if (hdrs_len > icmp->icmp_max_hdr_len) {
3041 		icmp->icmp_max_hdr_len = hdrs_len;
3042 		rw_exit(&icmp->icmp_rwlock);
3043 		(void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq,
3044 		    icmp->icmp_max_hdr_len + is->is_wroff_extra);
3045 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3046 	}
3047 	return (0);
3048 }
3049 
3050 /*
3051  * This routine retrieves the value of an ND variable in a icmpparam_t
3052  * structure.  It is called through nd_getset when a user reads the
3053  * variable.
3054  */
3055 /* ARGSUSED */
3056 static int
3057 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3058 {
3059 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3060 
3061 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
3062 	return (0);
3063 }
3064 
3065 /*
3066  * Walk through the param array specified registering each element with the
3067  * named dispatch (ND) handler.
3068  */
3069 static boolean_t
3070 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
3071 {
3072 	for (; cnt-- > 0; icmppa++) {
3073 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
3074 			if (!nd_load(ndp, icmppa->icmp_param_name,
3075 			    icmp_param_get, icmp_param_set,
3076 			    (caddr_t)icmppa)) {
3077 				nd_free(ndp);
3078 				return (B_FALSE);
3079 			}
3080 		}
3081 	}
3082 	if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL,
3083 	    NULL)) {
3084 		nd_free(ndp);
3085 		return (B_FALSE);
3086 	}
3087 	return (B_TRUE);
3088 }
3089 
3090 /* This routine sets an ND variable in a icmpparam_t structure. */
3091 /* ARGSUSED */
3092 static int
3093 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3094 {
3095 	long		new_value;
3096 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3097 
3098 	/*
3099 	 * Fail the request if the new value does not lie within the
3100 	 * required bounds.
3101 	 */
3102 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3103 	    new_value < icmppa->icmp_param_min ||
3104 	    new_value > icmppa->icmp_param_max) {
3105 		return (EINVAL);
3106 	}
3107 	/* Set the new value */
3108 	icmppa->icmp_param_value = new_value;
3109 	return (0);
3110 }
3111 /*ARGSUSED2*/
3112 static void
3113 icmp_input(void *arg1, mblk_t *mp, void *arg2)
3114 {
3115 	conn_t *connp = (conn_t *)arg1;
3116 	struct T_unitdata_ind	*tudi;
3117 	uchar_t			*rptr;
3118 	icmp_t			*icmp;
3119 	icmp_stack_t		*is;
3120 	sin_t			*sin;
3121 	sin6_t			*sin6;
3122 	ip6_t			*ip6h;
3123 	ip6i_t			*ip6i;
3124 	mblk_t			*mp1;
3125 	int			hdr_len;
3126 	ipha_t			*ipha;
3127 	int			udi_size;	/* Size of T_unitdata_ind */
3128 	uint_t			ipvers;
3129 	ip6_pkt_t		ipp;
3130 	uint8_t			nexthdr;
3131 	ip_pktinfo_t		*pinfo = NULL;
3132 	mblk_t			*options_mp = NULL;
3133 	uint_t			icmp_opt = 0;
3134 	boolean_t		icmp_ipv6_recvhoplimit = B_FALSE;
3135 	uint_t			hopstrip;
3136 
3137 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
3138 
3139 	icmp = connp->conn_icmp;
3140 	is = icmp->icmp_is;
3141 	rptr = mp->b_rptr;
3142 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3143 	ASSERT(OK_32PTR(rptr));
3144 
3145 	/*
3146 	 * IP should have prepended the options data in an M_CTL
3147 	 * Check M_CTL "type" to make sure are not here bcos of
3148 	 * a valid ICMP message
3149 	 */
3150 	if (DB_TYPE(mp) == M_CTL) {
3151 		/*
3152 		 * FIXME: does IP still do this?
3153 		 * IP sends up the IPSEC_IN message for handling IPSEC
3154 		 * policy at the TCP level. We don't need it here.
3155 		 */
3156 		if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) {
3157 			mp1 = mp->b_cont;
3158 			freeb(mp);
3159 			mp = mp1;
3160 			rptr = mp->b_rptr;
3161 		} else if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3162 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3163 		    IN_PKTINFO) {
3164 			/*
3165 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3166 			 * has been prepended to the packet by IP. We need to
3167 			 * extract the mblk and adjust the rptr
3168 			 */
3169 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3170 			options_mp = mp;
3171 			mp = mp->b_cont;
3172 			rptr = mp->b_rptr;
3173 		} else {
3174 			/*
3175 			 * ICMP messages.
3176 			 */
3177 			icmp_icmp_error(connp->conn_rq, mp);
3178 			return;
3179 		}
3180 	}
3181 
3182 	/*
3183 	 * Discard message if it is misaligned or smaller than the IP header.
3184 	 */
3185 	if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) {
3186 		freemsg(mp);
3187 		if (options_mp != NULL)
3188 			freeb(options_mp);
3189 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3190 		return;
3191 	}
3192 	ipvers = IPH_HDR_VERSION((ipha_t *)rptr);
3193 
3194 	/* Handle M_DATA messages containing IP packets messages */
3195 	if (ipvers == IPV4_VERSION) {
3196 		/*
3197 		 * Special case where IP attaches
3198 		 * the IRE needs to be handled so that we don't send up
3199 		 * IRE to the user land.
3200 		 */
3201 		ipha = (ipha_t *)rptr;
3202 		hdr_len = IPH_HDR_LENGTH(ipha);
3203 
3204 		if (ipha->ipha_protocol == IPPROTO_TCP) {
3205 			tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3206 
3207 			if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) ==
3208 			    TH_SYN) && mp->b_cont != NULL) {
3209 				mp1 = mp->b_cont;
3210 				if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3211 					freeb(mp1);
3212 					mp->b_cont = NULL;
3213 				}
3214 			}
3215 		}
3216 		if (is->is_bsd_compat) {
3217 			ushort_t len;
3218 			len = ntohs(ipha->ipha_length);
3219 
3220 			if (mp->b_datap->db_ref > 1) {
3221 				/*
3222 				 * Allocate a new IP header so that we can
3223 				 * modify ipha_length.
3224 				 */
3225 				mblk_t	*mp1;
3226 
3227 				mp1 = allocb(hdr_len, BPRI_MED);
3228 				if (!mp1) {
3229 					freemsg(mp);
3230 					if (options_mp != NULL)
3231 						freeb(options_mp);
3232 					BUMP_MIB(&is->is_rawip_mib,
3233 					    rawipInErrors);
3234 					return;
3235 				}
3236 				bcopy(rptr, mp1->b_rptr, hdr_len);
3237 				mp->b_rptr = rptr + hdr_len;
3238 				rptr = mp1->b_rptr;
3239 				ipha = (ipha_t *)rptr;
3240 				mp1->b_cont = mp;
3241 				mp1->b_wptr = rptr + hdr_len;
3242 				mp = mp1;
3243 			}
3244 			len -= hdr_len;
3245 			ipha->ipha_length = htons(len);
3246 		}
3247 	}
3248 
3249 	/*
3250 	 * This is the inbound data path.  Packets are passed upstream as
3251 	 * T_UNITDATA_IND messages with full IP headers still attached.
3252 	 */
3253 	if (icmp->icmp_family == AF_INET) {
3254 		ASSERT(ipvers == IPV4_VERSION);
3255 		udi_size =  sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3256 		if (icmp->icmp_recvif && (pinfo != NULL) &&
3257 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3258 			udi_size += sizeof (struct T_opthdr) +
3259 			    sizeof (uint_t);
3260 		}
3261 
3262 		if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3263 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3264 			udi_size += sizeof (struct T_opthdr) +
3265 			    sizeof (struct in_pktinfo);
3266 		}
3267 
3268 		/*
3269 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3270 		 * buffer. Since gethrestime() expects a pointer aligned
3271 		 * argument, we allocate space necessary for extra
3272 		 * alignment (even though it might not be used).
3273 		 */
3274 		if (icmp->icmp_timestamp) {
3275 			udi_size += sizeof (struct T_opthdr) +
3276 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3277 		}
3278 		mp1 = allocb(udi_size, BPRI_MED);
3279 		if (mp1 == NULL) {
3280 			freemsg(mp);
3281 			if (options_mp != NULL)
3282 				freeb(options_mp);
3283 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3284 			return;
3285 		}
3286 		mp1->b_cont = mp;
3287 		mp = mp1;
3288 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3289 		mp->b_datap->db_type = M_PROTO;
3290 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3291 		tudi->PRIM_type = T_UNITDATA_IND;
3292 		tudi->SRC_length = sizeof (sin_t);
3293 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3294 		sin = (sin_t *)&tudi[1];
3295 		*sin = sin_null;
3296 		sin->sin_family = AF_INET;
3297 		sin->sin_addr.s_addr = ipha->ipha_src;
3298 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
3299 		    sizeof (sin_t);
3300 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3301 		tudi->OPT_length = udi_size;
3302 
3303 		/*
3304 		 * Add options if IP_RECVIF is set
3305 		 */
3306 		if (udi_size != 0) {
3307 			char *dstopt;
3308 
3309 			dstopt = (char *)&sin[1];
3310 			if (icmp->icmp_recvif && (pinfo != NULL) &&
3311 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3312 
3313 				struct T_opthdr *toh;
3314 				uint_t		*dstptr;
3315 
3316 				toh = (struct T_opthdr *)dstopt;
3317 				toh->level = IPPROTO_IP;
3318 				toh->name = IP_RECVIF;
3319 				toh->len = sizeof (struct T_opthdr) +
3320 				    sizeof (uint_t);
3321 				toh->status = 0;
3322 				dstopt += sizeof (struct T_opthdr);
3323 				dstptr = (uint_t *)dstopt;
3324 				*dstptr = pinfo->ip_pkt_ifindex;
3325 				dstopt += sizeof (uint_t);
3326 				udi_size -= toh->len;
3327 			}
3328 			if (icmp->icmp_timestamp) {
3329 				struct	T_opthdr *toh;
3330 
3331 				toh = (struct T_opthdr *)dstopt;
3332 				toh->level = SOL_SOCKET;
3333 				toh->name = SCM_TIMESTAMP;
3334 				toh->len = sizeof (struct T_opthdr) +
3335 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3336 				toh->status = 0;
3337 				dstopt += sizeof (struct T_opthdr);
3338 				/* Align for gethrestime() */
3339 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
3340 				    sizeof (intptr_t));
3341 				gethrestime((timestruc_t *)dstopt);
3342 				dstopt = (char *)toh + toh->len;
3343 				udi_size -= toh->len;
3344 			}
3345 			if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3346 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3347 				struct	T_opthdr *toh;
3348 				struct	in_pktinfo *pktinfop;
3349 
3350 				toh = (struct T_opthdr *)dstopt;
3351 				toh->level = IPPROTO_IP;
3352 				toh->name = IP_PKTINFO;
3353 				toh->len = sizeof (struct T_opthdr) +
3354 				    sizeof (in_pktinfo_t);
3355 				toh->status = 0;
3356 				dstopt += sizeof (struct T_opthdr);
3357 				pktinfop = (struct in_pktinfo *)dstopt;
3358 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
3359 				pktinfop->ipi_spec_dst =
3360 				    pinfo->ip_pkt_match_addr;
3361 
3362 				pktinfop->ipi_addr.s_addr = ipha->ipha_dst;
3363 
3364 				dstopt += sizeof (struct in_pktinfo);
3365 				udi_size -= toh->len;
3366 			}
3367 
3368 			/* Consumed all of allocated space */
3369 			ASSERT(udi_size == 0);
3370 		}
3371 
3372 		if (options_mp != NULL)
3373 			freeb(options_mp);
3374 
3375 		BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3376 		putnext(connp->conn_rq, mp);
3377 		return;
3378 	}
3379 
3380 	/*
3381 	 * We don't need options_mp in the IPv6 path.
3382 	 */
3383 	if (options_mp != NULL) {
3384 		freeb(options_mp);
3385 		options_mp = NULL;
3386 	}
3387 
3388 	/*
3389 	 * Discard message if it is smaller than the IPv6 header
3390 	 * or if the header is malformed.
3391 	 */
3392 	if ((mp->b_wptr - rptr) < sizeof (ip6_t) ||
3393 	    IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION ||
3394 	    icmp->icmp_family != AF_INET6) {
3395 		freemsg(mp);
3396 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3397 		return;
3398 	}
3399 
3400 	/* Initialize */
3401 	ipp.ipp_fields = 0;
3402 	hopstrip = 0;
3403 
3404 	ip6h = (ip6_t *)rptr;
3405 	/*
3406 	 * Call on ip_find_hdr_v6 which gets the total hdr len
3407 	 * as well as individual lenghts of ext hdrs (and ptrs to
3408 	 * them).
3409 	 */
3410 	if (ip6h->ip6_nxt != icmp->icmp_proto) {
3411 		/* Look for ifindex information */
3412 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
3413 			ip6i = (ip6i_t *)ip6h;
3414 			if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3415 				ASSERT(ip6i->ip6i_ifindex != 0);
3416 				ipp.ipp_fields |= IPPF_IFINDEX;
3417 				ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3418 			}
3419 			rptr = (uchar_t *)&ip6i[1];
3420 			mp->b_rptr = rptr;
3421 			if (rptr == mp->b_wptr) {
3422 				mp1 = mp->b_cont;
3423 				freeb(mp);
3424 				mp = mp1;
3425 				rptr = mp->b_rptr;
3426 			}
3427 			ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN);
3428 			ip6h = (ip6_t *)rptr;
3429 		}
3430 		hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr);
3431 
3432 		/*
3433 		 * We need to lie a bit to the user because users inside
3434 		 * labeled compartments should not see their own labels.  We
3435 		 * assume that in all other respects IP has checked the label,
3436 		 * and that the label is always first among the options.  (If
3437 		 * it's not first, then this code won't see it, and the option
3438 		 * will be passed along to the user.)
3439 		 *
3440 		 * If we had multilevel ICMP sockets, then the following code
3441 		 * should be skipped for them to allow the user to see the
3442 		 * label.
3443 		 *
3444 		 * Alignment restrictions in the definition of IP options
3445 		 * (namely, the requirement that the 4-octet DOI goes on a
3446 		 * 4-octet boundary) mean that we know exactly where the option
3447 		 * should start, but we're lenient for other hosts.
3448 		 *
3449 		 * Note that there are no multilevel ICMP or raw IP sockets
3450 		 * yet, thus nobody ever sees the IP6OPT_LS option.
3451 		 */
3452 		if ((ipp.ipp_fields & IPPF_HOPOPTS) &&
3453 		    ipp.ipp_hopoptslen > 5 && is_system_labeled()) {
3454 			const uchar_t *ucp =
3455 			    (const uchar_t *)ipp.ipp_hopopts + 2;
3456 			int remlen = ipp.ipp_hopoptslen - 2;
3457 
3458 			while (remlen > 0) {
3459 				if (*ucp == IP6OPT_PAD1) {
3460 					remlen--;
3461 					ucp++;
3462 				} else if (*ucp == IP6OPT_PADN) {
3463 					remlen -= ucp[1] + 2;
3464 					ucp += ucp[1] + 2;
3465 				} else if (*ucp == ip6opt_ls) {
3466 					hopstrip = (ucp -
3467 					    (const uchar_t *)ipp.ipp_hopopts) +
3468 					    ucp[1] + 2;
3469 					hopstrip = (hopstrip + 7) & ~7;
3470 					break;
3471 				} else {
3472 					/* label option must be first */
3473 					break;
3474 				}
3475 			}
3476 		}
3477 	} else {
3478 		hdr_len = IPV6_HDR_LEN;
3479 		ip6i = NULL;
3480 		nexthdr = ip6h->ip6_nxt;
3481 	}
3482 	/*
3483 	 * One special case where IP attaches the IRE needs to
3484 	 * be handled so that we don't send up IRE to the user land.
3485 	 */
3486 	if (nexthdr == IPPROTO_TCP) {
3487 		tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3488 
3489 		if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) &&
3490 		    mp->b_cont != NULL) {
3491 			mp1 = mp->b_cont;
3492 			if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3493 				freeb(mp1);
3494 				mp->b_cont = NULL;
3495 			}
3496 		}
3497 	}
3498 	/*
3499 	 * Check a filter for ICMPv6 types if needed.
3500 	 * Verify raw checksums if needed.
3501 	 */
3502 	if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) {
3503 		if (icmp->icmp_filter != NULL) {
3504 			int type;
3505 
3506 			/* Assumes that IP has done the pullupmsg */
3507 			type = mp->b_rptr[hdr_len];
3508 
3509 			ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr);
3510 			if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
3511 				freemsg(mp);
3512 				return;
3513 			}
3514 		} else {
3515 			/* Checksum */
3516 			uint16_t	*up;
3517 			uint32_t	sum;
3518 			int		remlen;
3519 
3520 			up = (uint16_t *)&ip6h->ip6_src;
3521 
3522 			remlen = msgdsize(mp) - hdr_len;
3523 			sum = htons(icmp->icmp_proto + remlen)
3524 			    + up[0] + up[1] + up[2] + up[3]
3525 			    + up[4] + up[5] + up[6] + up[7]
3526 			    + up[8] + up[9] + up[10] + up[11]
3527 			    + up[12] + up[13] + up[14] + up[15];
3528 			sum = (sum & 0xffff) + (sum >> 16);
3529 			sum = IP_CSUM(mp, hdr_len, sum);
3530 			if (sum != 0) {
3531 				/* IPv6 RAW checksum failed */
3532 				ip0dbg(("icmp_rput: RAW checksum "
3533 				    "failed %x\n", sum));
3534 				freemsg(mp);
3535 				BUMP_MIB(&is->is_rawip_mib,
3536 				    rawipInCksumErrs);
3537 				return;
3538 			}
3539 		}
3540 	}
3541 	/* Skip all the IPv6 headers per API */
3542 	mp->b_rptr += hdr_len;
3543 
3544 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3545 
3546 	/*
3547 	 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to
3548 	 * maintain state information, instead of relying on icmp_t
3549 	 * structure, since there arent any locks protecting these members
3550 	 * and there is a window where there might be a race between a
3551 	 * thread setting options on the write side and a thread reading
3552 	 * these options on the read size.
3553 	 */
3554 	if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
3555 	    IPPF_RTHDR|IPPF_IFINDEX)) {
3556 		if (icmp->icmp_ipv6_recvhopopts &&
3557 		    (ipp.ipp_fields & IPPF_HOPOPTS) &&
3558 		    ipp.ipp_hopoptslen > hopstrip) {
3559 			udi_size += sizeof (struct T_opthdr) +
3560 			    ipp.ipp_hopoptslen - hopstrip;
3561 			icmp_opt |= IPPF_HOPOPTS;
3562 		}
3563 		if ((icmp->icmp_ipv6_recvdstopts ||
3564 		    icmp->icmp_old_ipv6_recvdstopts) &&
3565 		    (ipp.ipp_fields & IPPF_DSTOPTS)) {
3566 			udi_size += sizeof (struct T_opthdr) +
3567 			    ipp.ipp_dstoptslen;
3568 			icmp_opt |= IPPF_DSTOPTS;
3569 		}
3570 		if (((icmp->icmp_ipv6_recvdstopts &&
3571 		    icmp->icmp_ipv6_recvrthdr &&
3572 		    (ipp.ipp_fields & IPPF_RTHDR)) ||
3573 		    icmp->icmp_ipv6_recvrtdstopts) &&
3574 		    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
3575 			udi_size += sizeof (struct T_opthdr) +
3576 			    ipp.ipp_rtdstoptslen;
3577 			icmp_opt |= IPPF_RTDSTOPTS;
3578 		}
3579 		if (icmp->icmp_ipv6_recvrthdr &&
3580 		    (ipp.ipp_fields & IPPF_RTHDR)) {
3581 			udi_size += sizeof (struct T_opthdr) +
3582 			    ipp.ipp_rthdrlen;
3583 			icmp_opt |= IPPF_RTHDR;
3584 		}
3585 		if (icmp->icmp_ip_recvpktinfo &&
3586 		    (ipp.ipp_fields & IPPF_IFINDEX)) {
3587 			udi_size += sizeof (struct T_opthdr) +
3588 			    sizeof (struct in6_pktinfo);
3589 			icmp_opt |= IPPF_IFINDEX;
3590 		}
3591 	}
3592 	if (icmp->icmp_ipv6_recvhoplimit) {
3593 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3594 		icmp_ipv6_recvhoplimit = B_TRUE;
3595 	}
3596 
3597 	if (icmp->icmp_ipv6_recvtclass)
3598 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3599 
3600 	mp1 = allocb(udi_size, BPRI_MED);
3601 	if (mp1 == NULL) {
3602 		freemsg(mp);
3603 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3604 		return;
3605 	}
3606 	mp1->b_cont = mp;
3607 	mp = mp1;
3608 	mp->b_datap->db_type = M_PROTO;
3609 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
3610 	mp->b_wptr = (uchar_t *)tudi + udi_size;
3611 	tudi->PRIM_type = T_UNITDATA_IND;
3612 	tudi->SRC_length = sizeof (sin6_t);
3613 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3614 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3615 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
3616 	tudi->OPT_length = udi_size;
3617 	sin6 = (sin6_t *)&tudi[1];
3618 	sin6->sin6_port = 0;
3619 	sin6->sin6_family = AF_INET6;
3620 
3621 	sin6->sin6_addr = ip6h->ip6_src;
3622 	/* No sin6_flowinfo per API */
3623 	sin6->sin6_flowinfo = 0;
3624 	/* For link-scope source pass up scope id */
3625 	if ((ipp.ipp_fields & IPPF_IFINDEX) &&
3626 	    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
3627 		sin6->sin6_scope_id = ipp.ipp_ifindex;
3628 	else
3629 		sin6->sin6_scope_id = 0;
3630 
3631 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
3632 	    icmp->icmp_zoneid, is->is_netstack);
3633 
3634 	if (udi_size != 0) {
3635 		uchar_t *dstopt;
3636 
3637 		dstopt = (uchar_t *)&sin6[1];
3638 		if (icmp_opt & IPPF_IFINDEX) {
3639 			struct T_opthdr *toh;
3640 			struct in6_pktinfo *pkti;
3641 
3642 			toh = (struct T_opthdr *)dstopt;
3643 			toh->level = IPPROTO_IPV6;
3644 			toh->name = IPV6_PKTINFO;
3645 			toh->len = sizeof (struct T_opthdr) +
3646 			    sizeof (*pkti);
3647 			toh->status = 0;
3648 			dstopt += sizeof (struct T_opthdr);
3649 			pkti = (struct in6_pktinfo *)dstopt;
3650 			pkti->ipi6_addr = ip6h->ip6_dst;
3651 			pkti->ipi6_ifindex = ipp.ipp_ifindex;
3652 			dstopt += sizeof (*pkti);
3653 			udi_size -= toh->len;
3654 		}
3655 		if (icmp_ipv6_recvhoplimit) {
3656 			struct T_opthdr *toh;
3657 
3658 			toh = (struct T_opthdr *)dstopt;
3659 			toh->level = IPPROTO_IPV6;
3660 			toh->name = IPV6_HOPLIMIT;
3661 			toh->len = sizeof (struct T_opthdr) +
3662 			    sizeof (uint_t);
3663 			toh->status = 0;
3664 			dstopt += sizeof (struct T_opthdr);
3665 			*(uint_t *)dstopt = ip6h->ip6_hops;
3666 			dstopt += sizeof (uint_t);
3667 			udi_size -= toh->len;
3668 		}
3669 		if (icmp->icmp_ipv6_recvtclass) {
3670 			struct T_opthdr *toh;
3671 
3672 			toh = (struct T_opthdr *)dstopt;
3673 			toh->level = IPPROTO_IPV6;
3674 			toh->name = IPV6_TCLASS;
3675 			toh->len = sizeof (struct T_opthdr) +
3676 			    sizeof (uint_t);
3677 			toh->status = 0;
3678 			dstopt += sizeof (struct T_opthdr);
3679 			*(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
3680 			dstopt += sizeof (uint_t);
3681 			udi_size -= toh->len;
3682 		}
3683 		if (icmp_opt & IPPF_HOPOPTS) {
3684 			struct T_opthdr *toh;
3685 
3686 			toh = (struct T_opthdr *)dstopt;
3687 			toh->level = IPPROTO_IPV6;
3688 			toh->name = IPV6_HOPOPTS;
3689 			toh->len = sizeof (struct T_opthdr) +
3690 			    ipp.ipp_hopoptslen - hopstrip;
3691 			toh->status = 0;
3692 			dstopt += sizeof (struct T_opthdr);
3693 			bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt,
3694 			    ipp.ipp_hopoptslen - hopstrip);
3695 			if (hopstrip > 0) {
3696 				/* copy next header value and fake length */
3697 				dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0];
3698 				dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] -
3699 				    hopstrip / 8;
3700 			}
3701 			dstopt += ipp.ipp_hopoptslen - hopstrip;
3702 			udi_size -= toh->len;
3703 		}
3704 		if (icmp_opt & IPPF_RTDSTOPTS) {
3705 			struct T_opthdr *toh;
3706 
3707 			toh = (struct T_opthdr *)dstopt;
3708 			toh->level = IPPROTO_IPV6;
3709 			toh->name = IPV6_DSTOPTS;
3710 			toh->len = sizeof (struct T_opthdr) +
3711 			    ipp.ipp_rtdstoptslen;
3712 			toh->status = 0;
3713 			dstopt += sizeof (struct T_opthdr);
3714 			bcopy(ipp.ipp_rtdstopts, dstopt,
3715 			    ipp.ipp_rtdstoptslen);
3716 			dstopt += ipp.ipp_rtdstoptslen;
3717 			udi_size -= toh->len;
3718 		}
3719 		if (icmp_opt & IPPF_RTHDR) {
3720 			struct T_opthdr *toh;
3721 
3722 			toh = (struct T_opthdr *)dstopt;
3723 			toh->level = IPPROTO_IPV6;
3724 			toh->name = IPV6_RTHDR;
3725 			toh->len = sizeof (struct T_opthdr) +
3726 			    ipp.ipp_rthdrlen;
3727 			toh->status = 0;
3728 			dstopt += sizeof (struct T_opthdr);
3729 			bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
3730 			dstopt += ipp.ipp_rthdrlen;
3731 			udi_size -= toh->len;
3732 		}
3733 		if (icmp_opt & IPPF_DSTOPTS) {
3734 			struct T_opthdr *toh;
3735 
3736 			toh = (struct T_opthdr *)dstopt;
3737 			toh->level = IPPROTO_IPV6;
3738 			toh->name = IPV6_DSTOPTS;
3739 			toh->len = sizeof (struct T_opthdr) +
3740 			    ipp.ipp_dstoptslen;
3741 			toh->status = 0;
3742 			dstopt += sizeof (struct T_opthdr);
3743 			bcopy(ipp.ipp_dstopts, dstopt,
3744 			    ipp.ipp_dstoptslen);
3745 			dstopt += ipp.ipp_dstoptslen;
3746 			udi_size -= toh->len;
3747 		}
3748 		/* Consumed all of allocated space */
3749 		ASSERT(udi_size == 0);
3750 	}
3751 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3752 	putnext(connp->conn_rq, mp);
3753 }
3754 
3755 /*
3756  * Handle the results of a T_BIND_REQ whether deferred by IP or handled
3757  * immediately.
3758  */
3759 static void
3760 icmp_bind_result(conn_t *connp, mblk_t *mp)
3761 {
3762 	struct T_error_ack	*tea;
3763 
3764 	switch (mp->b_datap->db_type) {
3765 	case M_PROTO:
3766 	case M_PCPROTO:
3767 		/* M_PROTO messages contain some type of TPI message. */
3768 		if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) {
3769 			freemsg(mp);
3770 			return;
3771 		}
3772 		tea = (struct T_error_ack *)mp->b_rptr;
3773 
3774 		switch (tea->PRIM_type) {
3775 		case T_ERROR_ACK:
3776 			switch (tea->ERROR_prim) {
3777 			case O_T_BIND_REQ:
3778 			case T_BIND_REQ:
3779 				icmp_bind_error(connp, mp);
3780 				return;
3781 			default:
3782 				break;
3783 			}
3784 			ASSERT(0);
3785 			freemsg(mp);
3786 			return;
3787 
3788 		case T_BIND_ACK:
3789 			icmp_bind_ack(connp, mp);
3790 			return;
3791 
3792 		default:
3793 			break;
3794 		}
3795 		freemsg(mp);
3796 		return;
3797 	default:
3798 		/* FIXME: other cases? */
3799 		ASSERT(0);
3800 		freemsg(mp);
3801 		return;
3802 	}
3803 }
3804 
3805 /*
3806  * Process a T_BIND_ACK
3807  */
3808 static void
3809 icmp_bind_ack(conn_t *connp, mblk_t *mp)
3810 {
3811 	icmp_t	*icmp = connp->conn_icmp;
3812 	mblk_t	*mp1;
3813 	ire_t	*ire;
3814 	struct T_bind_ack *tba;
3815 	uchar_t *addrp;
3816 	ipa_conn_t	*ac;
3817 	ipa6_conn_t	*ac6;
3818 
3819 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3820 	/*
3821 	 * We know if headers are included or not so we can
3822 	 * safely do this.
3823 	 */
3824 	if (icmp->icmp_state == TS_UNBND) {
3825 		/*
3826 		 * TPI has not yet bound - bind sent by
3827 		 * icmp_bind_proto.
3828 		 */
3829 		freemsg(mp);
3830 		rw_exit(&icmp->icmp_rwlock);
3831 		return;
3832 	}
3833 	ASSERT(icmp->icmp_pending_op != -1);
3834 
3835 	/*
3836 	 * If a broadcast/multicast address was bound set
3837 	 * the source address to 0.
3838 	 * This ensures no datagrams with broadcast address
3839 	 * as source address are emitted (which would violate
3840 	 * RFC1122 - Hosts requirements)
3841 	 *
3842 	 * Note that when connecting the returned IRE is
3843 	 * for the destination address and we only perform
3844 	 * the broadcast check for the source address (it
3845 	 * is OK to connect to a broadcast/multicast address.)
3846 	 */
3847 	mp1 = mp->b_cont;
3848 	if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) {
3849 		ire = (ire_t *)mp1->b_rptr;
3850 
3851 		/*
3852 		 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
3853 		 * local address.
3854 		 */
3855 		if (ire->ire_type == IRE_BROADCAST &&
3856 		    icmp->icmp_state != TS_DATA_XFER) {
3857 			ASSERT(icmp->icmp_pending_op == T_BIND_REQ ||
3858 			    icmp->icmp_pending_op == O_T_BIND_REQ);
3859 			/* This was just a local bind to a MC/broadcast addr */
3860 			V6_SET_ZERO(icmp->icmp_v6src);
3861 			if (icmp->icmp_family == AF_INET6)
3862 				(void) icmp_build_hdrs(icmp);
3863 		} else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) {
3864 			/*
3865 			 * Local address not yet set - pick it from the
3866 			 * T_bind_ack
3867 			 */
3868 			tba = (struct T_bind_ack *)mp->b_rptr;
3869 			addrp = &mp->b_rptr[tba->ADDR_offset];
3870 			switch (icmp->icmp_family) {
3871 			case AF_INET:
3872 				if (tba->ADDR_length == sizeof (ipa_conn_t)) {
3873 					ac = (ipa_conn_t *)addrp;
3874 				} else {
3875 					ASSERT(tba->ADDR_length ==
3876 					    sizeof (ipa_conn_x_t));
3877 					ac = &((ipa_conn_x_t *)addrp)->acx_conn;
3878 				}
3879 				IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
3880 				    &icmp->icmp_v6src);
3881 				break;
3882 			case AF_INET6:
3883 				if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
3884 					ac6 = (ipa6_conn_t *)addrp;
3885 				} else {
3886 					ASSERT(tba->ADDR_length ==
3887 					    sizeof (ipa6_conn_x_t));
3888 					ac6 = &((ipa6_conn_x_t *)
3889 					    addrp)->ac6x_conn;
3890 				}
3891 				icmp->icmp_v6src = ac6->ac6_laddr;
3892 				(void) icmp_build_hdrs(icmp);
3893 			}
3894 		}
3895 		mp1 = mp1->b_cont;
3896 	}
3897 	icmp->icmp_pending_op = -1;
3898 	rw_exit(&icmp->icmp_rwlock);
3899 	/*
3900 	 * Look for one or more appended ACK message added by
3901 	 * icmp_connect or icmp_disconnect.
3902 	 * If none found just send up the T_BIND_ACK.
3903 	 * icmp_connect has appended a T_OK_ACK and a
3904 	 * T_CONN_CON.
3905 	 * icmp_disconnect has appended a T_OK_ACK.
3906 	 */
3907 	if (mp1 != NULL) {
3908 		if (mp->b_cont == mp1)
3909 			mp->b_cont = NULL;
3910 		else {
3911 			ASSERT(mp->b_cont->b_cont == mp1);
3912 			mp->b_cont->b_cont = NULL;
3913 		}
3914 		freemsg(mp);
3915 		mp = mp1;
3916 		while (mp != NULL) {
3917 			mp1 = mp->b_cont;
3918 			mp->b_cont = NULL;
3919 			putnext(connp->conn_rq, mp);
3920 			mp = mp1;
3921 		}
3922 		return;
3923 	}
3924 	freemsg(mp->b_cont);
3925 	mp->b_cont = NULL;
3926 	putnext(connp->conn_rq, mp);
3927 }
3928 
3929 static void
3930 icmp_bind_error(conn_t *connp, mblk_t *mp)
3931 {
3932 	icmp_t	*icmp = connp->conn_icmp;
3933 	struct T_error_ack *tea;
3934 
3935 	tea = (struct T_error_ack *)mp->b_rptr;
3936 	/*
3937 	 * If our O_T_BIND_REQ/T_BIND_REQ fails,
3938 	 * clear out the source address before
3939 	 * passing the message upstream.
3940 	 * If this was caused by a T_CONN_REQ
3941 	 * revert back to bound state.
3942 	 */
3943 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3944 	if (icmp->icmp_state == TS_UNBND) {
3945 		/*
3946 		 * TPI has not yet bound - bind sent by icmp_bind_proto.
3947 		 */
3948 		freemsg(mp);
3949 		rw_exit(&icmp->icmp_rwlock);
3950 		return;
3951 	}
3952 	ASSERT(icmp->icmp_pending_op != -1);
3953 	tea->ERROR_prim = icmp->icmp_pending_op;
3954 	icmp->icmp_pending_op = -1;
3955 
3956 	switch (tea->ERROR_prim) {
3957 	case T_CONN_REQ:
3958 		ASSERT(icmp->icmp_state == TS_DATA_XFER);
3959 		/* Connect failed */
3960 		/* Revert back to the bound source */
3961 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
3962 		icmp->icmp_state = TS_IDLE;
3963 		if (icmp->icmp_family == AF_INET6)
3964 			(void) icmp_build_hdrs(icmp);
3965 		break;
3966 
3967 	case T_DISCON_REQ:
3968 	case T_BIND_REQ:
3969 	case O_T_BIND_REQ:
3970 		V6_SET_ZERO(icmp->icmp_v6src);
3971 		V6_SET_ZERO(icmp->icmp_bound_v6src);
3972 		icmp->icmp_state = TS_UNBND;
3973 		if (icmp->icmp_family == AF_INET6)
3974 			(void) icmp_build_hdrs(icmp);
3975 		break;
3976 	default:
3977 		break;
3978 	}
3979 	rw_exit(&icmp->icmp_rwlock);
3980 	putnext(connp->conn_rq, mp);
3981 }
3982 
3983 /*
3984  * return SNMP stuff in buffer in mpdata
3985  */
3986 mblk_t *
3987 icmp_snmp_get(queue_t *q, mblk_t *mpctl)
3988 {
3989 	mblk_t			*mpdata;
3990 	struct opthdr		*optp;
3991 	conn_t			*connp = Q_TO_CONN(q);
3992 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
3993 	mblk_t			*mp2ctl;
3994 
3995 	/*
3996 	 * make a copy of the original message
3997 	 */
3998 	mp2ctl = copymsg(mpctl);
3999 
4000 	if (mpctl == NULL ||
4001 	    (mpdata = mpctl->b_cont) == NULL) {
4002 		freemsg(mpctl);
4003 		freemsg(mp2ctl);
4004 		return (0);
4005 	}
4006 
4007 	/* fixed length structure for IPv4 and IPv6 counters */
4008 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4009 	optp->level = EXPER_RAWIP;
4010 	optp->name = 0;
4011 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
4012 	    sizeof (is->is_rawip_mib));
4013 	optp->len = msgdsize(mpdata);
4014 	qreply(q, mpctl);
4015 
4016 	return (mp2ctl);
4017 }
4018 
4019 /*
4020  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
4021  * TODO:  If this ever actually tries to set anything, it needs to be
4022  * to do the appropriate locking.
4023  */
4024 /* ARGSUSED */
4025 int
4026 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4027     uchar_t *ptr, int len)
4028 {
4029 	switch (level) {
4030 	case EXPER_RAWIP:
4031 		return (0);
4032 	default:
4033 		return (1);
4034 	}
4035 }
4036 
4037 /* Report for ndd "icmp_status" */
4038 /* ARGSUSED */
4039 static int
4040 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4041 {
4042 	conn_t  *connp;
4043 	ip_stack_t *ipst;
4044 	char	laddrbuf[INET6_ADDRSTRLEN];
4045 	char	faddrbuf[INET6_ADDRSTRLEN];
4046 	int	i;
4047 
4048 	(void) mi_mpprintf(mp,
4049 	    "RAWIP    " MI_COL_HDRPAD_STR
4050 	/*   01234567[89ABCDEF] */
4051 	    "  src addr        dest addr       state");
4052 	/*   xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */
4053 
4054 	connp = Q_TO_CONN(q);
4055 	ipst = connp->conn_netstack->netstack_ip;
4056 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4057 		connf_t *connfp;
4058 		char	*state;
4059 
4060 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4061 		connp = NULL;
4062 
4063 		while ((connp = ipcl_get_next_conn(connfp, connp,
4064 		    IPCL_RAWIPCONN)) != NULL) {
4065 			icmp_t  *icmp;
4066 
4067 			mutex_enter(&(connp)->conn_lock);
4068 			icmp = connp->conn_icmp;
4069 
4070 			if (icmp->icmp_state == TS_UNBND)
4071 				state = "UNBOUND";
4072 			else if (icmp->icmp_state == TS_IDLE)
4073 				state = "IDLE";
4074 			else if (icmp->icmp_state == TS_DATA_XFER)
4075 				state = "CONNECTED";
4076 			else
4077 				state = "UnkState";
4078 
4079 			(void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s",
4080 			    (void *)icmp,
4081 			    inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf,
4082 			    sizeof (faddrbuf)),
4083 			    inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf,
4084 			    sizeof (laddrbuf)),
4085 			    state);
4086 			mutex_exit(&(connp)->conn_lock);
4087 		}
4088 	}
4089 	return (0);
4090 }
4091 
4092 /*
4093  * This routine creates a T_UDERROR_IND message and passes it upstream.
4094  * The address and options are copied from the T_UNITDATA_REQ message
4095  * passed in mp.  This message is freed.
4096  */
4097 static void
4098 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
4099 {
4100 	mblk_t	*mp1;
4101 	uchar_t	*rptr = mp->b_rptr;
4102 	struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr;
4103 
4104 	mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset],
4105 	    tudr->DEST_length, (char *)&rptr[tudr->OPT_offset],
4106 	    tudr->OPT_length, err);
4107 	if (mp1)
4108 		qreply(q, mp1);
4109 	freemsg(mp);
4110 }
4111 
4112 /*
4113  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
4114  * After some error checking, the message is passed downstream to ip.
4115  */
4116 static void
4117 icmp_unbind(queue_t *q, mblk_t *mp)
4118 {
4119 	icmp_t	*icmp = Q_TO_ICMP(q);
4120 
4121 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4122 	/* If a bind has not been done, we can't unbind. */
4123 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
4124 		rw_exit(&icmp->icmp_rwlock);
4125 		icmp_err_ack(q, mp, TOUTSTATE, 0);
4126 		return;
4127 	}
4128 	icmp->icmp_pending_op = T_UNBIND_REQ;
4129 	rw_exit(&icmp->icmp_rwlock);
4130 
4131 	/*
4132 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
4133 	 * and therefore ip_unbind must never return NULL.
4134 	 */
4135 	mp = ip_unbind(q, mp);
4136 	ASSERT(mp != NULL);
4137 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4138 
4139 	/*
4140 	 * Once we're unbound from IP, the pending operation may be cleared
4141 	 * here.
4142 	 */
4143 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4144 	V6_SET_ZERO(icmp->icmp_v6src);
4145 	V6_SET_ZERO(icmp->icmp_bound_v6src);
4146 	icmp->icmp_pending_op = -1;
4147 	icmp->icmp_state = TS_UNBND;
4148 	if (icmp->icmp_family == AF_INET6)
4149 		(void) icmp_build_hdrs(icmp);
4150 	rw_exit(&icmp->icmp_rwlock);
4151 
4152 	qreply(q, mp);
4153 }
4154 
4155 /*
4156  * Process IPv4 packets that already include an IP header.
4157  * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
4158  * IPPROTO_IGMP).
4159  */
4160 static void
4161 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
4162 {
4163 	icmp_stack_t *is = icmp->icmp_is;
4164 	ipha_t	*ipha;
4165 	int	ip_hdr_length;
4166 	int	tp_hdr_len;
4167 	mblk_t	*mp1;
4168 	uint_t	pkt_len;
4169 	ip_opt_info_t optinfo;
4170 	conn_t	*connp = icmp->icmp_connp;
4171 
4172 	optinfo.ip_opt_flags = 0;
4173 	optinfo.ip_opt_ill_index = 0;
4174 	ipha = (ipha_t *)mp->b_rptr;
4175 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4176 	if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
4177 		if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4178 			ASSERT(icmp != NULL);
4179 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4180 			freemsg(mp);
4181 			return;
4182 		}
4183 		ipha = (ipha_t *)mp->b_rptr;
4184 	}
4185 	ipha->ipha_version_and_hdr_length =
4186 	    (IP_VERSION<<4) | (ip_hdr_length>>2);
4187 
4188 	/*
4189 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4190 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4191 	 * tell IP that the application has sent a complete IP header and not
4192 	 * to compute the transport checksum nor change the DF flag.
4193 	 */
4194 	ipha->ipha_ident = IP_HDR_INCLUDED;
4195 	ipha->ipha_hdr_checksum = 0;
4196 	ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
4197 	/* Insert options if any */
4198 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4199 		/*
4200 		 * Put the IP header plus any transport header that is
4201 		 * checksumed by ip_wput into the first mblk. (ip_wput assumes
4202 		 * that at least the checksum field is in the first mblk.)
4203 		 */
4204 		switch (ipha->ipha_protocol) {
4205 		case IPPROTO_UDP:
4206 			tp_hdr_len = 8;
4207 			break;
4208 		case IPPROTO_TCP:
4209 			tp_hdr_len = 20;
4210 			break;
4211 		default:
4212 			tp_hdr_len = 0;
4213 			break;
4214 		}
4215 		/*
4216 		 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus
4217 		 * tp_hdr_len bytes will be in a single mblk.
4218 		 */
4219 		if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH +
4220 		    tp_hdr_len)) {
4221 			if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH +
4222 			    tp_hdr_len)) {
4223 				BUMP_MIB(&is->is_rawip_mib,
4224 				    rawipOutErrors);
4225 				freemsg(mp);
4226 				return;
4227 			}
4228 			ipha = (ipha_t *)mp->b_rptr;
4229 		}
4230 
4231 		/*
4232 		 * if the length is larger then the max allowed IP packet,
4233 		 * then send an error and abort the processing.
4234 		 */
4235 		pkt_len = ntohs(ipha->ipha_length)
4236 		    + icmp->icmp_ip_snd_options_len;
4237 		if (pkt_len > IP_MAXPACKET) {
4238 			icmp_ud_err(q, mp, EMSGSIZE);
4239 			return;
4240 		}
4241 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
4242 		    tp_hdr_len, BPRI_LO))) {
4243 			icmp_ud_err(q, mp, ENOMEM);
4244 			return;
4245 		}
4246 		mp1->b_rptr += is->is_wroff_extra;
4247 		mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
4248 
4249 		ipha->ipha_length = htons((uint16_t)pkt_len);
4250 		bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH);
4251 
4252 		/* Copy transport header if any */
4253 		bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len);
4254 		mp1->b_wptr += tp_hdr_len;
4255 
4256 		/* Add options */
4257 		ipha = (ipha_t *)mp1->b_rptr;
4258 		bcopy(icmp->icmp_ip_snd_options, &ipha[1],
4259 		    icmp->icmp_ip_snd_options_len);
4260 
4261 		/* Drop IP header and transport header from original */
4262 		(void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len);
4263 
4264 		mp1->b_cont = mp;
4265 		mp = mp1;
4266 		/*
4267 		 * Massage source route putting first source
4268 		 * route in ipha_dst.
4269 		 */
4270 		(void) ip_massage_options(ipha, is->is_netstack);
4271 	}
4272 
4273 	if (pktinfop != NULL) {
4274 		/*
4275 		 * Over write the source address provided in the header
4276 		 */
4277 		if (pktinfop->ip4_addr != INADDR_ANY) {
4278 			ipha->ipha_src = pktinfop->ip4_addr;
4279 			optinfo.ip_opt_flags = IP_VERIFY_SRC;
4280 		}
4281 
4282 		if (pktinfop->ip4_ill_index != 0) {
4283 			optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4284 		}
4285 	}
4286 
4287 	mblk_setcred(mp, connp->conn_cred);
4288 	ip_output_options(connp, mp, q, IP_WPUT,
4289 	    &optinfo);
4290 }
4291 
4292 static boolean_t
4293 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
4294 {
4295 	int err;
4296 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4297 	icmp_stack_t		*is = icmp->icmp_is;
4298 	conn_t	*connp = icmp->icmp_connp;
4299 
4300 	err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst,
4301 	    opt_storage, icmp->icmp_mac_exempt,
4302 	    is->is_netstack->netstack_ip);
4303 	if (err == 0) {
4304 		err = tsol_update_options(&icmp->icmp_ip_snd_options,
4305 		    &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len,
4306 		    opt_storage);
4307 	}
4308 	if (err != 0) {
4309 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4310 		DTRACE_PROBE4(
4311 		    tx__ip__log__drop__updatelabel__icmp,
4312 		    char *, "queue(1) failed to update options(2) on mp(3)",
4313 		    queue_t *, q, char *, opt_storage, mblk_t *, mp);
4314 		icmp_ud_err(q, mp, err);
4315 		return (B_FALSE);
4316 	}
4317 	IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
4318 	return (B_TRUE);
4319 }
4320 
4321 /*
4322  * This routine handles all messages passed downstream.  It either
4323  * consumes the message or passes it downstream; it never queues a
4324  * a message.
4325  */
4326 static void
4327 icmp_wput(queue_t *q, mblk_t *mp)
4328 {
4329 	uchar_t	*rptr = mp->b_rptr;
4330 	ipha_t	*ipha;
4331 	mblk_t	*mp1;
4332 	int	ip_hdr_length;
4333 #define	tudr ((struct T_unitdata_req *)rptr)
4334 	size_t	ip_len;
4335 	conn_t	*connp = Q_TO_CONN(q);
4336 	icmp_t	*icmp = connp->conn_icmp;
4337 	icmp_stack_t *is = icmp->icmp_is;
4338 	sin6_t	*sin6;
4339 	sin_t	*sin;
4340 	ipaddr_t	v4dst;
4341 	ip4_pkt_t	pktinfo;
4342 	ip4_pkt_t	*pktinfop = &pktinfo;
4343 	ip_opt_info_t	optinfo;
4344 
4345 	switch (mp->b_datap->db_type) {
4346 	case M_DATA:
4347 		if (icmp->icmp_hdrincl) {
4348 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
4349 			ipha = (ipha_t *)mp->b_rptr;
4350 			if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
4351 				if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4352 					BUMP_MIB(&is->is_rawip_mib,
4353 					    rawipOutErrors);
4354 					freemsg(mp);
4355 					return;
4356 				}
4357 				ipha = (ipha_t *)mp->b_rptr;
4358 			}
4359 			/*
4360 			 * If this connection was used for v6 (inconceivable!)
4361 			 * or if we have a new destination, then it's time to
4362 			 * figure a new label.
4363 			 */
4364 			if (is_system_labeled() &&
4365 			    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4366 			    V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
4367 			    ipha->ipha_dst) &&
4368 			    !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) {
4369 				return;
4370 			}
4371 			icmp_wput_hdrincl(q, mp, icmp, NULL);
4372 			return;
4373 		}
4374 		freemsg(mp);
4375 		return;
4376 	case M_PROTO:
4377 	case M_PCPROTO:
4378 		ip_len = mp->b_wptr - rptr;
4379 		if (ip_len >= sizeof (struct T_unitdata_req)) {
4380 			/* Expedite valid T_UNITDATA_REQ to below the switch */
4381 			if (((union T_primitives *)rptr)->type
4382 			    == T_UNITDATA_REQ)
4383 				break;
4384 		}
4385 		/* FALLTHRU */
4386 	default:
4387 		icmp_wput_other(q, mp);
4388 		return;
4389 	}
4390 
4391 	/* Handle T_UNITDATA_REQ messages here. */
4392 
4393 
4394 
4395 	if (icmp->icmp_state == TS_UNBND) {
4396 		/* If a port has not been bound to the stream, fail. */
4397 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4398 		icmp_ud_err(q, mp, EPROTO);
4399 		return;
4400 	}
4401 	mp1 = mp->b_cont;
4402 	if (mp1 == NULL) {
4403 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4404 		icmp_ud_err(q, mp, EPROTO);
4405 		return;
4406 	}
4407 
4408 	if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) {
4409 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4410 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4411 		return;
4412 	}
4413 
4414 	switch (icmp->icmp_family) {
4415 	case AF_INET6:
4416 		sin6 = (sin6_t *)&rptr[tudr->DEST_offset];
4417 		if (!OK_32PTR((char *)sin6) ||
4418 		    tudr->DEST_length != sizeof (sin6_t) ||
4419 		    sin6->sin6_family != AF_INET6) {
4420 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4421 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4422 			return;
4423 		}
4424 
4425 		/* No support for mapped addresses on raw sockets */
4426 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4427 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4428 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4429 			return;
4430 		}
4431 
4432 		/*
4433 		 * Destination is a native IPv6 address.
4434 		 * Send out an IPv6 format packet.
4435 		 */
4436 		icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length);
4437 		return;
4438 
4439 	case AF_INET:
4440 		sin = (sin_t *)&rptr[tudr->DEST_offset];
4441 		if (!OK_32PTR((char *)sin) ||
4442 		    tudr->DEST_length != sizeof (sin_t) ||
4443 		    sin->sin_family != AF_INET) {
4444 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4445 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4446 			return;
4447 		}
4448 		/* Extract and ipaddr */
4449 		v4dst = sin->sin_addr.s_addr;
4450 		break;
4451 
4452 	default:
4453 		ASSERT(0);
4454 	}
4455 
4456 	pktinfop->ip4_ill_index = 0;
4457 	pktinfop->ip4_addr = INADDR_ANY;
4458 	optinfo.ip_opt_flags = 0;
4459 	optinfo.ip_opt_ill_index = 0;
4460 
4461 
4462 	/*
4463 	 * If options passed in, feed it for verification and handling
4464 	 */
4465 	if (tudr->OPT_length != 0) {
4466 		int error;
4467 
4468 		error = 0;
4469 		if (icmp_unitdata_opt_process(q, mp, &error,
4470 		    (void *)pktinfop) < 0) {
4471 			/* failure */
4472 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4473 			icmp_ud_err(q, mp, error);
4474 			return;
4475 		}
4476 		ASSERT(error == 0);
4477 		/*
4478 		 * Note: Success in processing options.
4479 		 * mp option buffer represented by
4480 		 * OPT_length/offset now potentially modified
4481 		 * and contain option setting results
4482 		 */
4483 
4484 	}
4485 
4486 	if (v4dst == INADDR_ANY)
4487 		v4dst = htonl(INADDR_LOOPBACK);
4488 
4489 	/* Check if our saved options are valid; update if not */
4490 	if (is_system_labeled() &&
4491 	    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4492 	    V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) &&
4493 	    !icmp_update_label(q, icmp, mp, v4dst)) {
4494 		return;
4495 	}
4496 
4497 	/* Protocol 255 contains full IP headers */
4498 	if (icmp->icmp_hdrincl) {
4499 		freeb(mp);
4500 		icmp_wput_hdrincl(q, mp1, icmp, pktinfop);
4501 		return;
4502 	}
4503 
4504 
4505 	/* Add an IP header */
4506 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4507 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
4508 	if ((uchar_t *)ipha < mp1->b_datap->db_base ||
4509 	    mp1->b_datap->db_ref != 1 ||
4510 	    !OK_32PTR(ipha)) {
4511 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
4512 		    BPRI_LO))) {
4513 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4514 			icmp_ud_err(q, mp, ENOMEM);
4515 			return;
4516 		}
4517 		mp1->b_cont = mp->b_cont;
4518 		ipha = (ipha_t *)mp1->b_datap->db_lim;
4519 		mp1->b_wptr = (uchar_t *)ipha;
4520 		ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
4521 	}
4522 #ifdef	_BIG_ENDIAN
4523 	/* Set version, header length, and tos */
4524 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4525 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
4526 	    icmp->icmp_type_of_service);
4527 	/* Set ttl and protocol */
4528 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto;
4529 #else
4530 	/* Set version, header length, and tos */
4531 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4532 	    ((icmp->icmp_type_of_service << 8) |
4533 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
4534 	/* Set ttl and protocol */
4535 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl;
4536 #endif
4537 	if (pktinfop->ip4_addr != INADDR_ANY) {
4538 		ipha->ipha_src = pktinfop->ip4_addr;
4539 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
4540 	} else {
4541 
4542 		/*
4543 		 * Copy our address into the packet.  If this is zero,
4544 		 * ip will fill in the real source address.
4545 		 */
4546 		IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src);
4547 	}
4548 
4549 	ipha->ipha_fragment_offset_and_flags = 0;
4550 
4551 	if (pktinfop->ip4_ill_index != 0) {
4552 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4553 	}
4554 
4555 
4556 	/*
4557 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4558 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4559 	 * tell IP that the application has sent a complete IP header and not
4560 	 * to compute the transport checksum nor change the DF flag.
4561 	 */
4562 	ipha->ipha_ident = IP_HDR_INCLUDED;
4563 
4564 	/* Finish common formatting of the packet. */
4565 	mp1->b_rptr = (uchar_t *)ipha;
4566 
4567 	ip_len = mp1->b_wptr - (uchar_t *)ipha;
4568 	if (mp1->b_cont != NULL)
4569 		ip_len += msgdsize(mp1->b_cont);
4570 
4571 	/*
4572 	 * Set the length into the IP header.
4573 	 * If the length is greater than the maximum allowed by IP,
4574 	 * then free the message and return. Do not try and send it
4575 	 * as this can cause problems in layers below.
4576 	 */
4577 	if (ip_len > IP_MAXPACKET) {
4578 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4579 		icmp_ud_err(q, mp, EMSGSIZE);
4580 		return;
4581 	}
4582 	ipha->ipha_length = htons((uint16_t)ip_len);
4583 	/*
4584 	 * Copy in the destination address from the T_UNITDATA
4585 	 * request
4586 	 */
4587 	ipha->ipha_dst = v4dst;
4588 
4589 	/*
4590 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
4591 	 */
4592 	if (CLASSD(v4dst))
4593 		ipha->ipha_ttl = icmp->icmp_multicast_ttl;
4594 
4595 	/* Copy in options if any */
4596 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4597 		bcopy(icmp->icmp_ip_snd_options,
4598 		    &ipha[1], icmp->icmp_ip_snd_options_len);
4599 		/*
4600 		 * Massage source route putting first source route in ipha_dst.
4601 		 * Ignore the destination in the T_unitdata_req.
4602 		 */
4603 		(void) ip_massage_options(ipha, is->is_netstack);
4604 	}
4605 
4606 	freeb(mp);
4607 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
4608 	mblk_setcred(mp1, connp->conn_cred);
4609 	ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo);
4610 #undef	ipha
4611 #undef tudr
4612 }
4613 
4614 static boolean_t
4615 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
4616 {
4617 	int err;
4618 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
4619 	icmp_stack_t		*is = icmp->icmp_is;
4620 	conn_t	*connp = icmp->icmp_connp;
4621 
4622 	err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst,
4623 	    opt_storage, icmp->icmp_mac_exempt,
4624 	    is->is_netstack->netstack_ip);
4625 	if (err == 0) {
4626 		err = tsol_update_sticky(&icmp->icmp_sticky_ipp,
4627 		    &icmp->icmp_label_len_v6, opt_storage);
4628 	}
4629 	if (err != 0) {
4630 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4631 		DTRACE_PROBE4(
4632 		    tx__ip__log__drop__updatelabel__icmp6,
4633 		    char *, "queue(1) failed to update options(2) on mp(3)",
4634 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
4635 		icmp_ud_err(wq, mp, err);
4636 		return (B_FALSE);
4637 	}
4638 
4639 	icmp->icmp_v6lastdst = *dst;
4640 	return (B_TRUE);
4641 }
4642 
4643 /*
4644  * icmp_wput_ipv6():
4645  * Assumes that icmp_wput did some sanity checking on the destination
4646  * address, but that the label may not yet be correct.
4647  */
4648 void
4649 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen)
4650 {
4651 	ip6_t			*ip6h;
4652 	ip6i_t			*ip6i;	/* mp1->b_rptr even if no ip6i_t */
4653 	mblk_t			*mp1;
4654 	int			ip_hdr_len = IPV6_HDR_LEN;
4655 	size_t			ip_len;
4656 	icmp_t			*icmp = Q_TO_ICMP(q);
4657 	icmp_stack_t		*is = icmp->icmp_is;
4658 	ip6_pkt_t		ipp_s;	/* For ancillary data options */
4659 	ip6_pkt_t		*ipp = &ipp_s;
4660 	ip6_pkt_t		*tipp;
4661 	uint32_t		csum = 0;
4662 	uint_t			ignore = 0;
4663 	uint_t			option_exists = 0, is_sticky = 0;
4664 	uint8_t			*cp;
4665 	uint8_t			*nxthdr_ptr;
4666 	in6_addr_t		ip6_dst;
4667 
4668 	/*
4669 	 * If the local address is a mapped address return
4670 	 * an error.
4671 	 * It would be possible to send an IPv6 packet but the
4672 	 * response would never make it back to the application
4673 	 * since it is bound to a mapped address.
4674 	 */
4675 	if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
4676 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4677 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4678 		return;
4679 	}
4680 
4681 	ipp->ipp_fields = 0;
4682 	ipp->ipp_sticky_ignored = 0;
4683 
4684 	/*
4685 	 * If TPI options passed in, feed it for verification and handling
4686 	 */
4687 	if (tudr_optlen != 0) {
4688 		int error;
4689 
4690 		if (icmp_unitdata_opt_process(q, mp, &error,
4691 		    (void *)ipp) < 0) {
4692 			/* failure */
4693 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4694 			icmp_ud_err(q, mp, error);
4695 			return;
4696 		}
4697 		ignore = ipp->ipp_sticky_ignored;
4698 		ASSERT(error == 0);
4699 	}
4700 
4701 	if (sin6->sin6_scope_id != 0 &&
4702 	    IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
4703 		/*
4704 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
4705 		 * option nor ancillary data.  It needs to be
4706 		 * explicitly set in options_exists.
4707 		 */
4708 		option_exists |= IPPF_SCOPE_ID;
4709 	}
4710 
4711 	/*
4712 	 * Compute the destination address
4713 	 */
4714 	ip6_dst = sin6->sin6_addr;
4715 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
4716 		ip6_dst = ipv6_loopback;
4717 
4718 	/*
4719 	 * If we're not going to the same destination as last time, then
4720 	 * recompute the label required.  This is done in a separate routine to
4721 	 * avoid blowing up our stack here.
4722 	 */
4723 	if (is_system_labeled() &&
4724 	    !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) &&
4725 	    !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) {
4726 		return;
4727 	}
4728 
4729 	/*
4730 	 * If there's a security label here, then we ignore any options the
4731 	 * user may try to set.  We keep the peer's label as a hidden sticky
4732 	 * option.
4733 	 */
4734 	if (icmp->icmp_label_len_v6 > 0) {
4735 		ignore &= ~IPPF_HOPOPTS;
4736 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
4737 	}
4738 
4739 	if ((icmp->icmp_sticky_ipp.ipp_fields == 0) &&
4740 	    (ipp->ipp_fields == 0)) {
4741 		/* No sticky options nor ancillary data. */
4742 		goto no_options;
4743 	}
4744 
4745 	/*
4746 	 * Go through the options figuring out where each is going to
4747 	 * come from and build two masks.  The first mask indicates if
4748 	 * the option exists at all.  The second mask indicates if the
4749 	 * option is sticky or ancillary.
4750 	 */
4751 	if (!(ignore & IPPF_HOPOPTS)) {
4752 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
4753 			option_exists |= IPPF_HOPOPTS;
4754 			ip_hdr_len += ipp->ipp_hopoptslen;
4755 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
4756 			option_exists |= IPPF_HOPOPTS;
4757 			is_sticky |= IPPF_HOPOPTS;
4758 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen;
4759 		}
4760 	}
4761 
4762 	if (!(ignore & IPPF_RTHDR)) {
4763 		if (ipp->ipp_fields & IPPF_RTHDR) {
4764 			option_exists |= IPPF_RTHDR;
4765 			ip_hdr_len += ipp->ipp_rthdrlen;
4766 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
4767 			option_exists |= IPPF_RTHDR;
4768 			is_sticky |= IPPF_RTHDR;
4769 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen;
4770 		}
4771 	}
4772 
4773 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
4774 		/*
4775 		 * Need to have a router header to use these.
4776 		 */
4777 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
4778 			option_exists |= IPPF_RTDSTOPTS;
4779 			ip_hdr_len += ipp->ipp_rtdstoptslen;
4780 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
4781 			option_exists |= IPPF_RTDSTOPTS;
4782 			is_sticky |= IPPF_RTDSTOPTS;
4783 			ip_hdr_len +=
4784 			    icmp->icmp_sticky_ipp.ipp_rtdstoptslen;
4785 		}
4786 	}
4787 
4788 	if (!(ignore & IPPF_DSTOPTS)) {
4789 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
4790 			option_exists |= IPPF_DSTOPTS;
4791 			ip_hdr_len += ipp->ipp_dstoptslen;
4792 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
4793 			option_exists |= IPPF_DSTOPTS;
4794 			is_sticky |= IPPF_DSTOPTS;
4795 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen;
4796 		}
4797 	}
4798 
4799 	if (!(ignore & IPPF_IFINDEX)) {
4800 		if (ipp->ipp_fields & IPPF_IFINDEX) {
4801 			option_exists |= IPPF_IFINDEX;
4802 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
4803 			option_exists |= IPPF_IFINDEX;
4804 			is_sticky |= IPPF_IFINDEX;
4805 		}
4806 	}
4807 
4808 	if (!(ignore & IPPF_ADDR)) {
4809 		if (ipp->ipp_fields & IPPF_ADDR) {
4810 			option_exists |= IPPF_ADDR;
4811 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) {
4812 			option_exists |= IPPF_ADDR;
4813 			is_sticky |= IPPF_ADDR;
4814 		}
4815 	}
4816 
4817 	if (!(ignore & IPPF_DONTFRAG)) {
4818 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
4819 			option_exists |= IPPF_DONTFRAG;
4820 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
4821 			option_exists |= IPPF_DONTFRAG;
4822 			is_sticky |= IPPF_DONTFRAG;
4823 		}
4824 	}
4825 
4826 	if (!(ignore & IPPF_USE_MIN_MTU)) {
4827 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
4828 			option_exists |= IPPF_USE_MIN_MTU;
4829 		} else if (icmp->icmp_sticky_ipp.ipp_fields &
4830 		    IPPF_USE_MIN_MTU) {
4831 			option_exists |= IPPF_USE_MIN_MTU;
4832 			is_sticky |= IPPF_USE_MIN_MTU;
4833 		}
4834 	}
4835 
4836 	if (!(ignore & IPPF_NEXTHOP)) {
4837 		if (ipp->ipp_fields & IPPF_NEXTHOP) {
4838 			option_exists |= IPPF_NEXTHOP;
4839 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) {
4840 			option_exists |= IPPF_NEXTHOP;
4841 			is_sticky |= IPPF_NEXTHOP;
4842 		}
4843 	}
4844 
4845 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
4846 		option_exists |= IPPF_HOPLIMIT;
4847 	/* IPV6_HOPLIMIT can never be sticky */
4848 	ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
4849 
4850 	if (!(ignore & IPPF_UNICAST_HOPS) &&
4851 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
4852 		option_exists |= IPPF_UNICAST_HOPS;
4853 		is_sticky |= IPPF_UNICAST_HOPS;
4854 	}
4855 
4856 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
4857 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
4858 		option_exists |= IPPF_MULTICAST_HOPS;
4859 		is_sticky |= IPPF_MULTICAST_HOPS;
4860 	}
4861 
4862 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) {
4863 		/* This is a sticky socket option only */
4864 		option_exists |= IPPF_NO_CKSUM;
4865 		is_sticky |= IPPF_NO_CKSUM;
4866 	}
4867 
4868 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) {
4869 		/* This is a sticky socket option only */
4870 		option_exists |= IPPF_RAW_CKSUM;
4871 		is_sticky |= IPPF_RAW_CKSUM;
4872 	}
4873 
4874 	if (!(ignore & IPPF_TCLASS)) {
4875 		if (ipp->ipp_fields & IPPF_TCLASS) {
4876 			option_exists |= IPPF_TCLASS;
4877 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
4878 			option_exists |= IPPF_TCLASS;
4879 			is_sticky |= IPPF_TCLASS;
4880 		}
4881 	}
4882 
4883 no_options:
4884 
4885 	/*
4886 	 * If any options carried in the ip6i_t were specified, we
4887 	 * need to account for the ip6i_t in the data we'll be sending
4888 	 * down.
4889 	 */
4890 	if (option_exists & IPPF_HAS_IP6I)
4891 		ip_hdr_len += sizeof (ip6i_t);
4892 
4893 	/* check/fix buffer config, setup pointers into it */
4894 	mp1 = mp->b_cont;
4895 	ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len];
4896 	if ((mp1->b_datap->db_ref != 1) ||
4897 	    ((unsigned char *)ip6h < mp1->b_datap->db_base) ||
4898 	    !OK_32PTR(ip6h)) {
4899 		/* Try to get everything in a single mblk next time */
4900 		if (ip_hdr_len > icmp->icmp_max_hdr_len) {
4901 			icmp->icmp_max_hdr_len = ip_hdr_len;
4902 			(void) mi_set_sth_wroff(RD(q),
4903 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
4904 		}
4905 		mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
4906 		if (!mp1) {
4907 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4908 			icmp_ud_err(q, mp, ENOMEM);
4909 			return;
4910 		}
4911 		mp1->b_cont = mp->b_cont;
4912 		mp1->b_wptr = mp1->b_datap->db_lim;
4913 		ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
4914 	}
4915 	mp1->b_rptr = (unsigned char *)ip6h;
4916 	ip6i = (ip6i_t *)ip6h;
4917 
4918 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
4919 	if (option_exists & IPPF_HAS_IP6I) {
4920 		ip6h = (ip6_t *)&ip6i[1];
4921 		ip6i->ip6i_flags = 0;
4922 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
4923 
4924 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
4925 		if (option_exists & IPPF_SCOPE_ID) {
4926 			ip6i->ip6i_flags |= IP6I_IFINDEX;
4927 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
4928 		} else if (option_exists & IPPF_IFINDEX) {
4929 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
4930 			ASSERT(tipp->ipp_ifindex != 0);
4931 			ip6i->ip6i_flags |= IP6I_IFINDEX;
4932 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
4933 		}
4934 
4935 		if (option_exists & IPPF_RAW_CKSUM) {
4936 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
4937 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
4938 		}
4939 
4940 		if (option_exists & IPPF_NO_CKSUM) {
4941 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
4942 		}
4943 
4944 		if (option_exists & IPPF_ADDR) {
4945 			/*
4946 			 * Enable per-packet source address verification if
4947 			 * IPV6_PKTINFO specified the source address.
4948 			 * ip6_src is set in the transport's _wput function.
4949 			 */
4950 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
4951 		}
4952 
4953 		if (option_exists & IPPF_DONTFRAG) {
4954 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
4955 		}
4956 
4957 		if (option_exists & IPPF_USE_MIN_MTU) {
4958 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
4959 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
4960 		}
4961 
4962 		if (option_exists & IPPF_NEXTHOP) {
4963 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
4964 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
4965 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
4966 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
4967 		}
4968 
4969 		/*
4970 		 * tell IP this is an ip6i_t private header
4971 		 */
4972 		ip6i->ip6i_nxt = IPPROTO_RAW;
4973 	}
4974 
4975 	/* Initialize IPv6 header */
4976 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
4977 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
4978 
4979 	/* Set the hoplimit of the outgoing packet. */
4980 	if (option_exists & IPPF_HOPLIMIT) {
4981 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
4982 		ip6h->ip6_hops = ipp->ipp_hoplimit;
4983 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
4984 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
4985 		ip6h->ip6_hops = icmp->icmp_multicast_ttl;
4986 		if (option_exists & IPPF_MULTICAST_HOPS)
4987 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
4988 	} else {
4989 		ip6h->ip6_hops = icmp->icmp_ttl;
4990 		if (option_exists & IPPF_UNICAST_HOPS)
4991 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
4992 	}
4993 
4994 	if (option_exists & IPPF_ADDR) {
4995 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
4996 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
4997 		ip6h->ip6_src = tipp->ipp_addr;
4998 	} else {
4999 		/*
5000 		 * The source address was not set using IPV6_PKTINFO.
5001 		 * First look at the bound source.
5002 		 * If unspecified fallback to __sin6_src_id.
5003 		 */
5004 		ip6h->ip6_src = icmp->icmp_v6src;
5005 		if (sin6->__sin6_src_id != 0 &&
5006 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
5007 			ip_srcid_find_id(sin6->__sin6_src_id,
5008 			    &ip6h->ip6_src, icmp->icmp_zoneid,
5009 			    is->is_netstack);
5010 		}
5011 	}
5012 
5013 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
5014 	cp = (uint8_t *)&ip6h[1];
5015 
5016 	/*
5017 	 * Here's where we have to start stringing together
5018 	 * any extension headers in the right order:
5019 	 * Hop-by-hop, destination, routing, and final destination opts.
5020 	 */
5021 	if (option_exists & IPPF_HOPOPTS) {
5022 		/* Hop-by-hop options */
5023 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
5024 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
5025 
5026 		*nxthdr_ptr = IPPROTO_HOPOPTS;
5027 		nxthdr_ptr = &hbh->ip6h_nxt;
5028 
5029 		bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen);
5030 		cp += tipp->ipp_hopoptslen;
5031 	}
5032 	/*
5033 	 * En-route destination options
5034 	 * Only do them if there's a routing header as well
5035 	 */
5036 	if (option_exists & IPPF_RTDSTOPTS) {
5037 		ip6_dest_t *dst = (ip6_dest_t *)cp;
5038 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
5039 
5040 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5041 		nxthdr_ptr = &dst->ip6d_nxt;
5042 
5043 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
5044 		cp += tipp->ipp_rtdstoptslen;
5045 	}
5046 	/*
5047 	 * Routing header next
5048 	 */
5049 	if (option_exists & IPPF_RTHDR) {
5050 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
5051 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
5052 
5053 		*nxthdr_ptr = IPPROTO_ROUTING;
5054 		nxthdr_ptr = &rt->ip6r_nxt;
5055 
5056 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
5057 		cp += tipp->ipp_rthdrlen;
5058 	}
5059 	/*
5060 	 * Do ultimate destination options
5061 	 */
5062 	if (option_exists & IPPF_DSTOPTS) {
5063 		ip6_dest_t *dest = (ip6_dest_t *)cp;
5064 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
5065 
5066 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5067 		nxthdr_ptr = &dest->ip6d_nxt;
5068 
5069 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
5070 		cp += tipp->ipp_dstoptslen;
5071 	}
5072 
5073 	/*
5074 	 * Now set the last header pointer to the proto passed in
5075 	 */
5076 	ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len);
5077 	*nxthdr_ptr = icmp->icmp_proto;
5078 
5079 	/*
5080 	 * Copy in the destination address
5081 	 */
5082 	ip6h->ip6_dst = ip6_dst;
5083 
5084 	ip6h->ip6_vcf =
5085 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
5086 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
5087 
5088 	if (option_exists & IPPF_TCLASS) {
5089 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
5090 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
5091 		    tipp->ipp_tclass);
5092 	}
5093 	if (option_exists & IPPF_RTHDR) {
5094 		ip6_rthdr_t	*rth;
5095 
5096 		/*
5097 		 * Perform any processing needed for source routing.
5098 		 * We know that all extension headers will be in the same mblk
5099 		 * as the IPv6 header.
5100 		 */
5101 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
5102 		if (rth != NULL && rth->ip6r_segleft != 0) {
5103 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
5104 				/*
5105 				 * Drop packet - only support Type 0 routing.
5106 				 * Notify the application as well.
5107 				 */
5108 				icmp_ud_err(q, mp, EPROTO);
5109 				BUMP_MIB(&is->is_rawip_mib,
5110 				    rawipOutErrors);
5111 				return;
5112 			}
5113 			/*
5114 			 * rth->ip6r_len is twice the number of
5115 			 * addresses in the header
5116 			 */
5117 			if (rth->ip6r_len & 0x1) {
5118 				icmp_ud_err(q, mp, EPROTO);
5119 				BUMP_MIB(&is->is_rawip_mib,
5120 				    rawipOutErrors);
5121 				return;
5122 			}
5123 			/*
5124 			 * Shuffle the routing header and ip6_dst
5125 			 * addresses, and get the checksum difference
5126 			 * between the first hop (in ip6_dst) and
5127 			 * the destination (in the last routing hdr entry).
5128 			 */
5129 			csum = ip_massage_options_v6(ip6h, rth,
5130 			    is->is_netstack);
5131 			/*
5132 			 * Verify that the first hop isn't a mapped address.
5133 			 * Routers along the path need to do this verification
5134 			 * for subsequent hops.
5135 			 */
5136 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
5137 				icmp_ud_err(q, mp, EADDRNOTAVAIL);
5138 				BUMP_MIB(&is->is_rawip_mib,
5139 				    rawipOutErrors);
5140 				return;
5141 			}
5142 		}
5143 	}
5144 
5145 	ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
5146 	if (mp1->b_cont != NULL)
5147 		ip_len += msgdsize(mp1->b_cont);
5148 
5149 	/*
5150 	 * Set the length into the IP header.
5151 	 * If the length is greater than the maximum allowed by IP,
5152 	 * then free the message and return. Do not try and send it
5153 	 * as this can cause problems in layers below.
5154 	 */
5155 	if (ip_len > IP_MAXPACKET) {
5156 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5157 		icmp_ud_err(q, mp, EMSGSIZE);
5158 		return;
5159 	}
5160 	if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
5161 		uint_t	cksum_off;	/* From ip6i == mp1->b_rptr */
5162 		uint16_t *cksum_ptr;
5163 		uint_t	ext_hdrs_len;
5164 
5165 		/* ICMPv6 must have an offset matching icmp6_cksum offset */
5166 		ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 ||
5167 		    icmp->icmp_checksum_off == 2);
5168 
5169 		/*
5170 		 * We make it easy for IP to include our pseudo header
5171 		 * by putting our length in uh_checksum, modified (if
5172 		 * we have a routing header) by the checksum difference
5173 		 * between the ultimate destination and first hop addresses.
5174 		 * Note: ICMPv6 must always checksum the packet.
5175 		 */
5176 		cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
5177 		if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) {
5178 			if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) {
5179 				BUMP_MIB(&is->is_rawip_mib,
5180 				    rawipOutErrors);
5181 				freemsg(mp);
5182 				return;
5183 			}
5184 			ip6i = (ip6i_t *)mp1->b_rptr;
5185 			if (ip6i->ip6i_nxt == IPPROTO_RAW)
5186 				ip6h = (ip6_t *)&ip6i[1];
5187 			else
5188 				ip6h = (ip6_t *)ip6i;
5189 		}
5190 		/* Add payload length to checksum */
5191 		ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN -
5192 		    (int)((uchar_t *)ip6h - (uchar_t *)ip6i);
5193 		csum += htons(ip_len - ext_hdrs_len);
5194 
5195 		cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off);
5196 		csum = (csum & 0xFFFF) + (csum >> 16);
5197 		*cksum_ptr = (uint16_t)csum;
5198 	}
5199 
5200 #ifdef _LITTLE_ENDIAN
5201 	ip_len = htons(ip_len);
5202 #endif
5203 	ip6h->ip6_plen = (uint16_t)ip_len;
5204 
5205 	freeb(mp);
5206 
5207 	/* We're done. Pass the packet to IP */
5208 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
5209 	ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT);
5210 }
5211 
5212 static void
5213 icmp_wput_other(queue_t *q, mblk_t *mp)
5214 {
5215 	uchar_t	*rptr = mp->b_rptr;
5216 	struct iocblk *iocp;
5217 #define	tudr ((struct T_unitdata_req *)rptr)
5218 	conn_t	*connp = Q_TO_CONN(q);
5219 	icmp_t	*icmp = connp->conn_icmp;
5220 	icmp_stack_t *is = icmp->icmp_is;
5221 	cred_t *cr;
5222 
5223 	cr = DB_CREDDEF(mp, connp->conn_cred);
5224 
5225 	switch (mp->b_datap->db_type) {
5226 	case M_PROTO:
5227 	case M_PCPROTO:
5228 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
5229 			/*
5230 			 * If the message does not contain a PRIM_type,
5231 			 * throw it away.
5232 			 */
5233 			freemsg(mp);
5234 			return;
5235 		}
5236 		switch (((union T_primitives *)rptr)->type) {
5237 		case T_ADDR_REQ:
5238 			icmp_addr_req(q, mp);
5239 			return;
5240 		case O_T_BIND_REQ:
5241 		case T_BIND_REQ:
5242 			icmp_bind(q, mp);
5243 			return;
5244 		case T_CONN_REQ:
5245 			icmp_connect(q, mp);
5246 			return;
5247 		case T_CAPABILITY_REQ:
5248 			icmp_capability_req(q, mp);
5249 			return;
5250 		case T_INFO_REQ:
5251 			icmp_info_req(q, mp);
5252 			return;
5253 		case T_UNITDATA_REQ:
5254 			/*
5255 			 * If a T_UNITDATA_REQ gets here, the address must
5256 			 * be bad.  Valid T_UNITDATA_REQs are found above
5257 			 * and break to below this switch.
5258 			 */
5259 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
5260 			return;
5261 		case T_UNBIND_REQ:
5262 			icmp_unbind(q, mp);
5263 			return;
5264 
5265 		case T_SVR4_OPTMGMT_REQ:
5266 			if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
5267 			    cr)) {
5268 				/* Only IP can return anything meaningful */
5269 				(void) svr4_optcom_req(q, mp, cr,
5270 				    &icmp_opt_obj, B_TRUE);
5271 			}
5272 			return;
5273 
5274 		case T_OPTMGMT_REQ:
5275 			/* Only IP can return anything meaningful */
5276 			(void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE);
5277 			return;
5278 
5279 		case T_DISCON_REQ:
5280 			icmp_disconnect(q, mp);
5281 			return;
5282 
5283 		/* The following TPI message is not supported by icmp. */
5284 		case O_T_CONN_RES:
5285 		case T_CONN_RES:
5286 			icmp_err_ack(q, mp, TNOTSUPPORT, 0);
5287 			return;
5288 
5289 		/* The following 3 TPI requests are illegal for icmp. */
5290 		case T_DATA_REQ:
5291 		case T_EXDATA_REQ:
5292 		case T_ORDREL_REQ:
5293 			freemsg(mp);
5294 			(void) putctl1(RD(q), M_ERROR, EPROTO);
5295 			return;
5296 		default:
5297 			break;
5298 		}
5299 		break;
5300 	case M_IOCTL:
5301 		iocp = (struct iocblk *)mp->b_rptr;
5302 		switch (iocp->ioc_cmd) {
5303 		case TI_GETPEERNAME:
5304 			if (icmp->icmp_state != TS_DATA_XFER) {
5305 				/*
5306 				 * If a default destination address has not
5307 				 * been associated with the stream, then we
5308 				 * don't know the peer's name.
5309 				 */
5310 				iocp->ioc_error = ENOTCONN;
5311 		err_ret:;
5312 				iocp->ioc_count = 0;
5313 				mp->b_datap->db_type = M_IOCACK;
5314 				qreply(q, mp);
5315 				return;
5316 			}
5317 			/* FALLTHRU */
5318 		case TI_GETMYNAME:
5319 			/*
5320 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
5321 			 * need to copyin the user's strbuf structure.
5322 			 * Processing will continue in the M_IOCDATA case
5323 			 * below.
5324 			 */
5325 			mi_copyin(q, mp, NULL,
5326 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
5327 			return;
5328 		case ND_SET:
5329 			/* nd_getset performs the necessary error checking */
5330 		case ND_GET:
5331 			if (nd_getset(q, is->is_nd, mp)) {
5332 				qreply(q, mp);
5333 				return;
5334 			}
5335 			break;
5336 		default:
5337 			break;
5338 		}
5339 		break;
5340 	case M_IOCDATA:
5341 		icmp_wput_iocdata(q, mp);
5342 		return;
5343 	default:
5344 		break;
5345 	}
5346 	ip_wput(q, mp);
5347 }
5348 
5349 /*
5350  * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA
5351  * messages.
5352  */
5353 static void
5354 icmp_wput_iocdata(queue_t *q, mblk_t *mp)
5355 {
5356 	mblk_t	*mp1;
5357 	STRUCT_HANDLE(strbuf, sb);
5358 	icmp_t	*icmp;
5359 	in6_addr_t	v6addr;
5360 	ipaddr_t	v4addr;
5361 	uint32_t	flowinfo = 0;
5362 	int		addrlen;
5363 
5364 	/* Make sure it is one of ours. */
5365 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5366 	case TI_GETMYNAME:
5367 	case TI_GETPEERNAME:
5368 		break;
5369 	default:
5370 		icmp = Q_TO_ICMP(q);
5371 		ip_output(icmp->icmp_connp, mp, q, IP_WPUT);
5372 		return;
5373 	}
5374 	switch (mi_copy_state(q, mp, &mp1)) {
5375 	case -1:
5376 		return;
5377 	case MI_COPY_CASE(MI_COPY_IN, 1):
5378 		break;
5379 	case MI_COPY_CASE(MI_COPY_OUT, 1):
5380 		/*
5381 		 * The address has been copied out, so now
5382 		 * copyout the strbuf.
5383 		 */
5384 		mi_copyout(q, mp);
5385 		return;
5386 	case MI_COPY_CASE(MI_COPY_OUT, 2):
5387 		/*
5388 		 * The address and strbuf have been copied out.
5389 		 * We're done, so just acknowledge the original
5390 		 * M_IOCTL.
5391 		 */
5392 		mi_copy_done(q, mp, 0);
5393 		return;
5394 	default:
5395 		/*
5396 		 * Something strange has happened, so acknowledge
5397 		 * the original M_IOCTL with an EPROTO error.
5398 		 */
5399 		mi_copy_done(q, mp, EPROTO);
5400 		return;
5401 	}
5402 	/*
5403 	 * Now we have the strbuf structure for TI_GETMYNAME
5404 	 * and TI_GETPEERNAME.  Next we copyout the requested
5405 	 * address and then we'll copyout the strbuf.
5406 	 */
5407 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
5408 	    (void *)mp1->b_rptr);
5409 	icmp = Q_TO_ICMP(q);
5410 	if (icmp->icmp_family == AF_INET)
5411 		addrlen = sizeof (sin_t);
5412 	else
5413 		addrlen = sizeof (sin6_t);
5414 
5415 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
5416 		mi_copy_done(q, mp, EINVAL);
5417 		return;
5418 	}
5419 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5420 	case TI_GETMYNAME:
5421 		if (icmp->icmp_family == AF_INET) {
5422 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
5423 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
5424 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
5425 				v4addr = V4_PART_OF_V6(icmp->icmp_v6src);
5426 			} else {
5427 				/*
5428 				 * INADDR_ANY
5429 				 * icmp_v6src is not set, we might be bound to
5430 				 * broadcast/multicast. Use icmp_bound_v6src as
5431 				 * local address instead (that could
5432 				 * also still be INADDR_ANY)
5433 				 */
5434 				v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
5435 			}
5436 		} else {
5437 			/* icmp->icmp_family == AF_INET6 */
5438 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
5439 				v6addr = icmp->icmp_v6src;
5440 			} else {
5441 				/*
5442 				 * UNSPECIFIED
5443 				 * icmp_v6src is not set, we might be bound to
5444 				 * broadcast/multicast. Use icmp_bound_v6src as
5445 				 * local address instead (that could
5446 				 * also still be UNSPECIFIED)
5447 				 */
5448 				v6addr = icmp->icmp_bound_v6src;
5449 			}
5450 		}
5451 		break;
5452 	case TI_GETPEERNAME:
5453 		if (icmp->icmp_family == AF_INET) {
5454 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
5455 			v4addr = V4_PART_OF_V6(icmp->icmp_v6dst);
5456 		} else {
5457 			/* icmp->icmp_family == AF_INET6) */
5458 			v6addr = icmp->icmp_v6dst;
5459 			flowinfo = icmp->icmp_flowinfo;
5460 		}
5461 		break;
5462 	default:
5463 		mi_copy_done(q, mp, EPROTO);
5464 		return;
5465 	}
5466 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
5467 	if (!mp1)
5468 		return;
5469 
5470 	if (icmp->icmp_family == AF_INET) {
5471 		sin_t *sin;
5472 
5473 		STRUCT_FSET(sb, len, (int)sizeof (sin_t));
5474 		sin = (sin_t *)mp1->b_rptr;
5475 		mp1->b_wptr = (uchar_t *)&sin[1];
5476 		*sin = sin_null;
5477 		sin->sin_family = AF_INET;
5478 		sin->sin_addr.s_addr = v4addr;
5479 	} else {
5480 		/* icmp->icmp_family == AF_INET6 */
5481 		sin6_t *sin6;
5482 
5483 		ASSERT(icmp->icmp_family == AF_INET6);
5484 		STRUCT_FSET(sb, len, (int)sizeof (sin6_t));
5485 		sin6 = (sin6_t *)mp1->b_rptr;
5486 		mp1->b_wptr = (uchar_t *)&sin6[1];
5487 		*sin6 = sin6_null;
5488 		sin6->sin6_family = AF_INET6;
5489 		sin6->sin6_flowinfo = flowinfo;
5490 		sin6->sin6_addr = v6addr;
5491 	}
5492 	/* Copy out the address */
5493 	mi_copyout(q, mp);
5494 }
5495 
5496 static int
5497 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
5498     void *thisdg_attrs)
5499 {
5500 	conn_t	*connp = Q_TO_CONN(q);
5501 	struct T_unitdata_req *udreqp;
5502 	int is_absreq_failure;
5503 	cred_t *cr;
5504 
5505 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
5506 	*errorp = 0;
5507 
5508 	cr = DB_CREDDEF(mp, connp->conn_cred);
5509 
5510 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
5511 	    udreqp->OPT_offset, cr, &icmp_opt_obj,
5512 	    thisdg_attrs, &is_absreq_failure);
5513 
5514 	if (*errorp != 0) {
5515 		/*
5516 		 * Note: No special action needed in this
5517 		 * module for "is_absreq_failure"
5518 		 */
5519 		return (-1);		/* failure */
5520 	}
5521 	ASSERT(is_absreq_failure == 0);
5522 	return (0);	/* success */
5523 }
5524 
5525 void
5526 icmp_ddi_init(void)
5527 {
5528 	icmp_max_optsize =
5529 	    optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
5530 	    icmp_opt_obj.odb_opt_arr_cnt);
5531 
5532 	/*
5533 	 * We want to be informed each time a stack is created or
5534 	 * destroyed in the kernel, so we can maintain the
5535 	 * set of icmp_stack_t's.
5536 	 */
5537 	netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
5538 }
5539 
5540 void
5541 icmp_ddi_destroy(void)
5542 {
5543 	netstack_unregister(NS_ICMP);
5544 }
5545 
5546 /*
5547  * Initialize the ICMP stack instance.
5548  */
5549 static void *
5550 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
5551 {
5552 	icmp_stack_t	*is;
5553 	icmpparam_t	*pa;
5554 
5555 	is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
5556 	is->is_netstack = ns;
5557 
5558 	pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP);
5559 	is->is_param_arr = pa;
5560 	bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr));
5561 
5562 	(void) icmp_param_register(&is->is_nd,
5563 	    is->is_param_arr, A_CNT(icmp_param_arr));
5564 	is->is_ksp = rawip_kstat_init(stackid);
5565 	return (is);
5566 }
5567 
5568 /*
5569  * Free the ICMP stack instance.
5570  */
5571 static void
5572 rawip_stack_fini(netstackid_t stackid, void *arg)
5573 {
5574 	icmp_stack_t *is = (icmp_stack_t *)arg;
5575 
5576 	nd_free(&is->is_nd);
5577 	kmem_free(is->is_param_arr, sizeof (icmp_param_arr));
5578 	is->is_param_arr = NULL;
5579 
5580 	rawip_kstat_fini(stackid, is->is_ksp);
5581 	is->is_ksp = NULL;
5582 	kmem_free(is, sizeof (*is));
5583 }
5584 
5585 static void *
5586 rawip_kstat_init(netstackid_t stackid) {
5587 	kstat_t	*ksp;
5588 
5589 	rawip_named_kstat_t template = {
5590 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
5591 		{ "inCksumErrs",	KSTAT_DATA_UINT32, 0 },
5592 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
5593 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
5594 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
5595 	};
5596 
5597 	ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
5598 					KSTAT_TYPE_NAMED,
5599 					NUM_OF_FIELDS(rawip_named_kstat_t),
5600 					0, stackid);
5601 	if (ksp == NULL || ksp->ks_data == NULL)
5602 		return (NULL);
5603 
5604 	bcopy(&template, ksp->ks_data, sizeof (template));
5605 	ksp->ks_update = rawip_kstat_update;
5606 	ksp->ks_private = (void *)(uintptr_t)stackid;
5607 
5608 	kstat_install(ksp);
5609 	return (ksp);
5610 }
5611 
5612 static void
5613 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
5614 {
5615 	if (ksp != NULL) {
5616 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
5617 		kstat_delete_netstack(ksp, stackid);
5618 	}
5619 }
5620 
5621 static int
5622 rawip_kstat_update(kstat_t *ksp, int rw)
5623 {
5624 	rawip_named_kstat_t *rawipkp;
5625 	netstackid_t	stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
5626 	netstack_t	*ns;
5627 	icmp_stack_t	*is;
5628 
5629 	if ((ksp == NULL) || (ksp->ks_data == NULL))
5630 		return (EIO);
5631 
5632 	if (rw == KSTAT_WRITE)
5633 		return (EACCES);
5634 
5635 	rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
5636 
5637 	ns = netstack_find_by_stackid(stackid);
5638 	if (ns == NULL)
5639 		return (-1);
5640 	is = ns->netstack_icmp;
5641 	if (is == NULL) {
5642 		netstack_rele(ns);
5643 		return (-1);
5644 	}
5645 	rawipkp->inDatagrams.value.ui32 =  is->is_rawip_mib.rawipInDatagrams;
5646 	rawipkp->inCksumErrs.value.ui32 =  is->is_rawip_mib.rawipInCksumErrs;
5647 	rawipkp->inErrors.value.ui32 =	   is->is_rawip_mib.rawipInErrors;
5648 	rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
5649 	rawipkp->outErrors.value.ui32 =	   is->is_rawip_mib.rawipOutErrors;
5650 	netstack_rele(ns);
5651 	return (0);
5652 }
5653