xref: /titanic_51/usr/src/uts/common/inet/ip/icmp.c (revision 5f87cd85650b75d56c0833d286b882ee5ffb280a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/kmem.h>
41 #include <sys/policy.h>
42 #include <sys/priv.h>
43 #include <sys/zone.h>
44 #include <sys/time.h>
45 
46 #include <sys/socket.h>
47 #include <sys/isa_defs.h>
48 #include <sys/suntpi.h>
49 #include <sys/xti_inet.h>
50 #include <sys/netstack.h>
51 
52 #include <net/route.h>
53 #include <net/if.h>
54 
55 #include <netinet/in.h>
56 #include <netinet/ip6.h>
57 #include <netinet/icmp6.h>
58 #include <inet/common.h>
59 #include <inet/ip.h>
60 #include <inet/ip6.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/optcom.h>
64 #include <inet/snmpcom.h>
65 #include <inet/kstatcom.h>
66 #include <inet/rawip_impl.h>
67 
68 #include <netinet/ip_mroute.h>
69 #include <inet/tcp.h>
70 #include <net/pfkeyv2.h>
71 #include <inet/ipsec_info.h>
72 #include <inet/ipclassifier.h>
73 
74 #include <sys/tsol/label.h>
75 #include <sys/tsol/tnet.h>
76 
77 #include <inet/ip_ire.h>
78 #include <inet/ip_if.h>
79 
80 #include <inet/ip_impl.h>
81 
82 /*
83  * Synchronization notes:
84  *
85  * RAWIP is MT and uses the usual kernel synchronization primitives. There is
86  * locks, which is icmp_rwlock. We also use conn_lock when updating things
87  * which affect the IP classifier lookup.
88  * The lock order is icmp_rwlock -> conn_lock.
89  *
90  * The icmp_rwlock:
91  * This protects most of the other fields in the icmp_t. The exact list of
92  * fields which are protected by each of the above locks is documented in
93  * the icmp_t structure definition.
94  *
95  * Plumbing notes:
96  * ICMP is always a device driver. For compatibility with mibopen() code
97  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
98  * dummy module.
99  */
100 
101 static void	icmp_addr_req(queue_t *q, mblk_t *mp);
102 static void	icmp_bind(queue_t *q, mblk_t *mp);
103 static void	icmp_bind_proto(queue_t *q);
104 static void	icmp_bind_result(conn_t *, mblk_t *);
105 static void	icmp_bind_ack(conn_t *, mblk_t *mp);
106 static void	icmp_bind_error(conn_t *, mblk_t *mp);
107 static int	icmp_build_hdrs(icmp_t *icmp);
108 static void	icmp_capability_req(queue_t *q, mblk_t *mp);
109 static int	icmp_close(queue_t *q);
110 static void	icmp_connect(queue_t *q, mblk_t *mp);
111 static void	icmp_disconnect(queue_t *q, mblk_t *mp);
112 static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
113 		    int sys_error);
114 static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
115 		    t_scalar_t t_error, int sys_error);
116 static void	icmp_icmp_error(queue_t *q, mblk_t *mp);
117 static void	icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp);
118 static void	icmp_info_req(queue_t *q, mblk_t *mp);
119 static void	icmp_input(void *, mblk_t *, void *);
120 static mblk_t	*icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim,
121 		    t_scalar_t addr_length, in_port_t);
122 static int	icmp_open(queue_t *q, dev_t *devp, int flag, int sflag,
123 		    cred_t *credp, boolean_t isv6);
124 static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
125 		    cred_t *credp);
126 static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
127 		    cred_t *credp);
128 static void	icmp_output(queue_t *q, mblk_t *mp);
129 static int	icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
130 		    int *errorp, void *thisdg_attrs);
131 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
132 int		icmp_opt_set(queue_t *q, uint_t optset_context,
133 		    int level, int name, uint_t inlen,
134 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
135 		    void *thisdg_attrs, cred_t *cr, mblk_t *mblk);
136 int		icmp_opt_get(queue_t *q, int level, int name,
137 		    uchar_t *ptr);
138 static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
139 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
140 static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
141 		    caddr_t cp, cred_t *cr);
142 static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
143 		    uchar_t *ptr, int len);
144 static int	icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
145 		    cred_t *cr);
146 static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
147 static void	icmp_unbind(queue_t *q, mblk_t *mp);
148 static void	icmp_wput(queue_t *q, mblk_t *mp);
149 static void	icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6,
150 		    t_scalar_t tudr_optlen);
151 static void	icmp_wput_other(queue_t *q, mblk_t *mp);
152 static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
153 static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
154 
155 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
156 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
157 
158 static void	*rawip_kstat_init(netstackid_t stackid);
159 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
160 static int	rawip_kstat_update(kstat_t *kp, int rw);
161 
162 
163 static struct module_info icmp_mod_info =  {
164 	5707, "icmp", 1, INFPSZ, 512, 128
165 };
166 
167 /*
168  * Entry points for ICMP as a device.
169  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
170  */
171 static struct qinit icmprinitv4 = {
172 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
173 };
174 
175 static struct qinit icmprinitv6 = {
176 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
177 };
178 
179 static struct qinit icmpwinit = {
180 	(pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info
181 };
182 
183 /* For AF_INET aka /dev/icmp */
184 struct streamtab icmpinfov4 = {
185 	&icmprinitv4, &icmpwinit
186 };
187 
188 /* For AF_INET6 aka /dev/icmp6 */
189 struct streamtab icmpinfov6 = {
190 	&icmprinitv6, &icmpwinit
191 };
192 
193 static sin_t	sin_null;	/* Zero address for quick clears */
194 static sin6_t	sin6_null;	/* Zero address for quick clears */
195 
196 /* Default structure copied into T_INFO_ACK messages */
197 static struct T_info_ack icmp_g_t_info_ack = {
198 	T_INFO_ACK,
199 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
200 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
201 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
202 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
203 	0,		/* ADDR_size - filled in later. */
204 	0,		/* OPT_size - not initialized here */
205 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
206 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
207 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
208 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
209 };
210 
211 /*
212  * Table of ND variables supported by icmp.  These are loaded into is_nd
213  * when the stack instance is created.
214  * All of these are alterable, within the min/max values given, at run time.
215  */
216 static icmpparam_t	icmp_param_arr[] = {
217 	/* min	max	value	name */
218 	{ 0,	128,	32,	"icmp_wroff_extra" },
219 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
220 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
221 	{ 0,	1,	1,	"icmp_bsd_compat" },
222 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
223 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
224 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
225 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
226 };
227 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
228 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
229 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
230 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
231 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
232 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
233 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
234 #define	is_max_buf			is_param_arr[7].icmp_param_value
235 
236 /*
237  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
238  * passed to icmp_wput.
239  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
240  * protocol type placed in the message following the address. A T_BIND_ACK
241  * message is returned by ip_bind_v4/v6.
242  */
243 static void
244 icmp_bind(queue_t *q, mblk_t *mp)
245 {
246 	sin_t	*sin;
247 	sin6_t	*sin6;
248 	mblk_t	*mp1;
249 	struct T_bind_req	*tbr;
250 	icmp_t	*icmp;
251 	conn_t	*connp = Q_TO_CONN(q);
252 
253 	icmp = connp->conn_icmp;
254 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
255 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
256 		    "icmp_bind: bad req, len %u",
257 		    (uint_t)(mp->b_wptr - mp->b_rptr));
258 		icmp_err_ack(q, mp, TPROTO, 0);
259 		return;
260 	}
261 	if (icmp->icmp_state != TS_UNBND) {
262 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
263 		    "icmp_bind: bad state, %d", icmp->icmp_state);
264 		icmp_err_ack(q, mp, TOUTSTATE, 0);
265 		return;
266 	}
267 	/*
268 	 * Reallocate the message to make sure we have enough room for an
269 	 * address and the protocol type.
270 	 */
271 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
272 	if (!mp1) {
273 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
274 		return;
275 	}
276 	mp = mp1;
277 	tbr = (struct T_bind_req *)mp->b_rptr;
278 	switch (tbr->ADDR_length) {
279 	case 0:			/* Generic request */
280 		tbr->ADDR_offset = sizeof (struct T_bind_req);
281 		if (icmp->icmp_family == AF_INET) {
282 			tbr->ADDR_length = sizeof (sin_t);
283 			sin = (sin_t *)&tbr[1];
284 			*sin = sin_null;
285 			sin->sin_family = AF_INET;
286 			mp->b_wptr = (uchar_t *)&sin[1];
287 		} else {
288 			ASSERT(icmp->icmp_family == AF_INET6);
289 			tbr->ADDR_length = sizeof (sin6_t);
290 			sin6 = (sin6_t *)&tbr[1];
291 			*sin6 = sin6_null;
292 			sin6->sin6_family = AF_INET6;
293 			mp->b_wptr = (uchar_t *)&sin6[1];
294 		}
295 		break;
296 	case sizeof (sin_t):	/* Complete IP address */
297 		sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset,
298 		    sizeof (sin_t));
299 		if (sin == NULL || !OK_32PTR((char *)sin)) {
300 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
301 			return;
302 		}
303 		if (icmp->icmp_family != AF_INET ||
304 		    sin->sin_family != AF_INET) {
305 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
306 			return;
307 		}
308 		break;
309 	case sizeof (sin6_t):	/* Complete IP address */
310 		sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset,
311 		    sizeof (sin6_t));
312 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
313 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
314 			return;
315 		}
316 		if (icmp->icmp_family != AF_INET6 ||
317 		    sin6->sin6_family != AF_INET6) {
318 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
319 			return;
320 		}
321 		/* No support for mapped addresses on raw sockets */
322 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
323 			icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
324 			return;
325 		}
326 		break;
327 	default:
328 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
329 		    "icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
330 		icmp_err_ack(q, mp, TBADADDR, 0);
331 		return;
332 	}
333 
334 	/*
335 	 * The state must be TS_UNBND. TPI mandates that users must send
336 	 * TPI primitives only 1 at a time and wait for the response before
337 	 * sending the next primitive.
338 	 */
339 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
340 	if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
341 		rw_exit(&icmp->icmp_rwlock);
342 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
343 		    "icmp_bind: bad state, %d", icmp->icmp_state);
344 		icmp_err_ack(q, mp, TOUTSTATE, 0);
345 		return;
346 	}
347 
348 	icmp->icmp_pending_op = tbr->PRIM_type;
349 
350 	/*
351 	 * Copy the source address into our icmp structure.  This address
352 	 * may still be zero; if so, ip will fill in the correct address
353 	 * each time an outbound packet is passed to it.
354 	 * If we are binding to a broadcast or multicast address then
355 	 * icmp_bind_ack will clear the source address when it receives
356 	 * the T_BIND_ACK.
357 	 */
358 	icmp->icmp_state = TS_IDLE;
359 
360 	if (icmp->icmp_family == AF_INET) {
361 		ASSERT(sin != NULL);
362 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
363 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
364 		    &icmp->icmp_v6src);
365 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
366 		    icmp->icmp_ip_snd_options_len;
367 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
368 	} else {
369 		int error;
370 
371 		ASSERT(sin6 != NULL);
372 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
373 		icmp->icmp_v6src = sin6->sin6_addr;
374 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
375 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
376 
377 		/* Rebuild the header template */
378 		error = icmp_build_hdrs(icmp);
379 		if (error != 0) {
380 			icmp->icmp_pending_op = -1;
381 			rw_exit(&icmp->icmp_rwlock);
382 			icmp_err_ack(q, mp, TSYSERR, error);
383 			return;
384 		}
385 	}
386 	/*
387 	 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following
388 	 * the address.
389 	 */
390 	*mp->b_wptr++ = icmp->icmp_proto;
391 	if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
392 		/*
393 		 * Append a request for an IRE if src not 0 (INADDR_ANY)
394 		 */
395 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
396 		if (!mp->b_cont) {
397 			icmp->icmp_pending_op = -1;
398 			rw_exit(&icmp->icmp_rwlock);
399 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
400 			return;
401 		}
402 		mp->b_cont->b_wptr += sizeof (ire_t);
403 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
404 	}
405 	rw_exit(&icmp->icmp_rwlock);
406 
407 	/* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */
408 	if (icmp->icmp_family == AF_INET6)
409 		mp = ip_bind_v6(q, mp, connp, NULL);
410 	else
411 		mp = ip_bind_v4(q, mp, connp);
412 
413 	/* The above return NULL if the bind needs to be deferred */
414 	if (mp != NULL)
415 		icmp_bind_result(connp, mp);
416 	else
417 		CONN_INC_REF(connp);
418 }
419 
420 /*
421  * Send message to IP to just bind to the protocol.
422  */
423 static void
424 icmp_bind_proto(queue_t *q)
425 {
426 	mblk_t	*mp;
427 	struct T_bind_req	*tbr;
428 	icmp_t	*icmp;
429 	conn_t	*connp = Q_TO_CONN(q);
430 
431 	icmp = connp->conn_icmp;
432 
433 	mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1,
434 	    BPRI_MED);
435 	if (!mp) {
436 		return;
437 	}
438 	mp->b_datap->db_type = M_PROTO;
439 	tbr = (struct T_bind_req *)mp->b_rptr;
440 	tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */
441 	tbr->ADDR_offset = sizeof (struct T_bind_req);
442 
443 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
444 	if (icmp->icmp_ipversion == IPV4_VERSION) {
445 		sin_t	*sin;
446 
447 		tbr->ADDR_length = sizeof (sin_t);
448 		sin = (sin_t *)&tbr[1];
449 		*sin = sin_null;
450 		sin->sin_family = AF_INET;
451 		mp->b_wptr = (uchar_t *)&sin[1];
452 	} else {
453 		sin6_t	*sin6;
454 
455 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
456 		tbr->ADDR_length = sizeof (sin6_t);
457 		sin6 = (sin6_t *)&tbr[1];
458 		*sin6 = sin6_null;
459 		sin6->sin6_family = AF_INET6;
460 		mp->b_wptr = (uchar_t *)&sin6[1];
461 	}
462 
463 	/* Place protocol type in the O_T_BIND_REQ following the address. */
464 	*mp->b_wptr++ = icmp->icmp_proto;
465 	rw_exit(&icmp->icmp_rwlock);
466 
467 	/* Pass the O_T_BIND_REQ to ip. */
468 	if (icmp->icmp_family == AF_INET6)
469 		mp = ip_bind_v6(q, mp, connp, NULL);
470 	else
471 		mp = ip_bind_v4(q, mp, connp);
472 
473 	/* The above return NULL if the bind needs to be deferred */
474 	if (mp != NULL)
475 		icmp_bind_result(connp, mp);
476 	else
477 		CONN_INC_REF(connp);
478 }
479 
480 /*
481  * This is called from ip_wput_nondata to handle the results of a
482  * deferred RAWIP bind.  It is called once the bind has been completed.
483  */
484 void
485 rawip_resume_bind(conn_t *connp, mblk_t *mp)
486 {
487 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
488 
489 	icmp_bind_result(connp, mp);
490 
491 	CONN_OPER_PENDING_DONE(connp);
492 }
493 
494 /*
495  * This routine handles each T_CONN_REQ message passed to icmp.  It
496  * associates a default destination address with the stream.
497  *
498  * This routine sends down a T_BIND_REQ to IP with the following mblks:
499  *	T_BIND_REQ	- specifying local and remote address.
500  *	IRE_DB_REQ_TYPE	- to get an IRE back containing ire_type and src
501  *	T_OK_ACK	- for the T_CONN_REQ
502  *	T_CONN_CON	- to keep the TPI user happy
503  *
504  * The connect completes in icmp_bind_result.
505  * When a T_BIND_ACK is received information is extracted from the IRE
506  * and the two appended messages are sent to the TPI user.
507  * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
508  * convert it to an error ack for the appropriate primitive.
509  */
510 static void
511 icmp_connect(queue_t *q, mblk_t *mp)
512 {
513 	sin_t	*sin;
514 	sin6_t	*sin6;
515 	mblk_t	*mp1, *mp2;
516 	struct T_conn_req	*tcr;
517 	icmp_t	*icmp;
518 	ipaddr_t	v4dst;
519 	in6_addr_t	v6dst;
520 	uint32_t	flowinfo;
521 	conn_t	*connp = Q_TO_CONN(q);
522 
523 	icmp = connp->conn_icmp;
524 	tcr = (struct T_conn_req *)mp->b_rptr;
525 	/* Sanity checks */
526 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
527 		icmp_err_ack(q, mp, TPROTO, 0);
528 		return;
529 	}
530 
531 	if (tcr->OPT_length != 0) {
532 		icmp_err_ack(q, mp, TBADOPT, 0);
533 		return;
534 	}
535 
536 	switch (tcr->DEST_length) {
537 	default:
538 		icmp_err_ack(q, mp, TBADADDR, 0);
539 		return;
540 
541 	case sizeof (sin_t):
542 		sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
543 		    sizeof (sin_t));
544 		if (sin == NULL || !OK_32PTR((char *)sin)) {
545 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
546 			return;
547 		}
548 		if (icmp->icmp_family != AF_INET ||
549 		    sin->sin_family != AF_INET) {
550 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
551 			return;
552 		}
553 		v4dst = sin->sin_addr.s_addr;
554 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
555 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
556 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
557 		    icmp->icmp_ip_snd_options_len;
558 		break;
559 
560 	case sizeof (sin6_t):
561 		sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset,
562 		    sizeof (sin6_t));
563 		if (sin6 == NULL || !OK_32PTR((char *)sin6)) {
564 			icmp_err_ack(q, mp, TSYSERR, EINVAL);
565 			return;
566 		}
567 		if (icmp->icmp_family != AF_INET6 ||
568 		    sin6->sin6_family != AF_INET6) {
569 			icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
570 			return;
571 		}
572 		/* No support for mapped addresses on raw sockets */
573 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
574 			icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL);
575 			return;
576 		}
577 		v6dst = sin6->sin6_addr;
578 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
579 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
580 		flowinfo = sin6->sin6_flowinfo;
581 		break;
582 	}
583 	if (icmp->icmp_ipversion == IPV4_VERSION) {
584 		/*
585 		 * Interpret a zero destination to mean loopback.
586 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
587 		 * generate the T_CONN_CON.
588 		 */
589 		if (v4dst == INADDR_ANY) {
590 			v4dst = htonl(INADDR_LOOPBACK);
591 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
592 			if (icmp->icmp_family == AF_INET) {
593 				sin->sin_addr.s_addr = v4dst;
594 			} else {
595 				sin6->sin6_addr = v6dst;
596 			}
597 		}
598 		icmp->icmp_v6dst = v6dst;
599 		icmp->icmp_flowinfo = 0;
600 
601 		/*
602 		 * If the destination address is multicast and
603 		 * an outgoing multicast interface has been set,
604 		 * use the address of that interface as our
605 		 * source address if no source address has been set.
606 		 */
607 		if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
608 		    CLASSD(v4dst) &&
609 		    icmp->icmp_multicast_if_addr != INADDR_ANY) {
610 			IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
611 			    &icmp->icmp_v6src);
612 		}
613 	} else {
614 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
615 		/*
616 		 * Interpret a zero destination to mean loopback.
617 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
618 		 * generate the T_CONN_CON.
619 		 */
620 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
621 			v6dst = ipv6_loopback;
622 			sin6->sin6_addr = v6dst;
623 		}
624 		icmp->icmp_v6dst = v6dst;
625 		icmp->icmp_flowinfo = flowinfo;
626 		/*
627 		 * If the destination address is multicast and
628 		 * an outgoing multicast interface has been set,
629 		 * then the ip bind logic will pick the correct source
630 		 * address (i.e. matching the outgoing multicast interface).
631 		 */
632 	}
633 
634 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
635 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
636 		rw_exit(&icmp->icmp_rwlock);
637 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
638 		    "icmp_connect: bad state, %d", icmp->icmp_state);
639 		icmp_err_ack(q, mp, TOUTSTATE, 0);
640 		return;
641 	}
642 	icmp->icmp_pending_op = T_CONN_REQ;
643 
644 	if (icmp->icmp_state == TS_DATA_XFER) {
645 		/* Already connected - clear out state */
646 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
647 		icmp->icmp_state = TS_IDLE;
648 	}
649 
650 	/*
651 	 * Send down bind to IP to verify that there is a route
652 	 * and to determine the source address.
653 	 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput.
654 	 */
655 	if (icmp->icmp_family == AF_INET) {
656 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t),
657 		    sin->sin_port);
658 	} else {
659 		ASSERT(icmp->icmp_family == AF_INET6);
660 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t),
661 		    sin6->sin6_port);
662 	}
663 	if (mp1 == NULL) {
664 		icmp->icmp_pending_op = -1;
665 		rw_exit(&icmp->icmp_rwlock);
666 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
667 		return;
668 	}
669 
670 	/*
671 	 * We also have to send a connection confirmation to
672 	 * keep TLI happy. Prepare it for icmp_bind_result.
673 	 */
674 	if (icmp->icmp_family == AF_INET) {
675 		mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL,
676 		    0);
677 	} else {
678 		ASSERT(icmp->icmp_family == AF_INET6);
679 		mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL,
680 		    0);
681 	}
682 	if (mp2 == NULL) {
683 		freemsg(mp1);
684 		icmp->icmp_pending_op = -1;
685 		rw_exit(&icmp->icmp_rwlock);
686 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
687 		return;
688 	}
689 
690 	mp = mi_tpi_ok_ack_alloc(mp);
691 	if (mp == NULL) {
692 		/* Unable to reuse the T_CONN_REQ for the ack. */
693 		freemsg(mp2);
694 		icmp->icmp_pending_op = -1;
695 		rw_exit(&icmp->icmp_rwlock);
696 		icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
697 		return;
698 	}
699 
700 	icmp->icmp_state = TS_DATA_XFER;
701 	rw_exit(&icmp->icmp_rwlock);
702 
703 	/* Hang onto the T_OK_ACK and T_CONN_CON for later. */
704 	linkb(mp1, mp);
705 	linkb(mp1, mp2);
706 
707 	mblk_setcred(mp1, connp->conn_cred);
708 	if (icmp->icmp_family == AF_INET)
709 		mp1 = ip_bind_v4(q, mp1, connp);
710 	else
711 		mp1 = ip_bind_v6(q, mp1, connp, NULL);
712 
713 	/* The above return NULL if the bind needs to be deferred */
714 	if (mp1 != NULL)
715 		icmp_bind_result(connp, mp1);
716 	else
717 		CONN_INC_REF(connp);
718 }
719 
720 static void
721 icmp_close_free(conn_t *connp)
722 {
723 	icmp_t *icmp = connp->conn_icmp;
724 
725 	/* If there are any options associated with the stream, free them. */
726 	if (icmp->icmp_ip_snd_options != NULL) {
727 		mi_free((char *)icmp->icmp_ip_snd_options);
728 		icmp->icmp_ip_snd_options = NULL;
729 		icmp->icmp_ip_snd_options_len = 0;
730 	}
731 
732 	if (icmp->icmp_filter != NULL) {
733 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
734 		icmp->icmp_filter = NULL;
735 	}
736 	/* Free memory associated with sticky options */
737 	if (icmp->icmp_sticky_hdrs_len != 0) {
738 		kmem_free(icmp->icmp_sticky_hdrs,
739 		    icmp->icmp_sticky_hdrs_len);
740 		icmp->icmp_sticky_hdrs = NULL;
741 		icmp->icmp_sticky_hdrs_len = 0;
742 	}
743 	ip6_pkt_free(&icmp->icmp_sticky_ipp);
744 
745 	/*
746 	 * Clear any fields which the kmem_cache constructor clears.
747 	 * Only icmp_connp needs to be preserved.
748 	 * TBD: We should make this more efficient to avoid clearing
749 	 * everything.
750 	 */
751 	ASSERT(icmp->icmp_connp == connp);
752 	bzero(icmp, sizeof (icmp_t));
753 	icmp->icmp_connp = connp;
754 }
755 
756 static int
757 icmp_close(queue_t *q)
758 {
759 	conn_t	*connp = (conn_t *)q->q_ptr;
760 
761 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
762 
763 	ip_quiesce_conn(connp);
764 
765 	qprocsoff(connp->conn_rq);
766 
767 	icmp_close_free(connp);
768 
769 	/*
770 	 * Now we are truly single threaded on this stream, and can
771 	 * delete the things hanging off the connp, and finally the connp.
772 	 * We removed this connp from the fanout list, it cannot be
773 	 * accessed thru the fanouts, and we already waited for the
774 	 * conn_ref to drop to 0. We are already in close, so
775 	 * there cannot be any other thread from the top. qprocsoff
776 	 * has completed, and service has completed or won't run in
777 	 * future.
778 	 */
779 	ASSERT(connp->conn_ref == 1);
780 
781 	inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
782 
783 	connp->conn_ref--;
784 	ipcl_conn_destroy(connp);
785 
786 	q->q_ptr = WR(q)->q_ptr = NULL;
787 	return (0);
788 }
789 
790 /*
791  * This routine handles each T_DISCON_REQ message passed to icmp
792  * as an indicating that ICMP is no longer connected. This results
793  * in sending a T_BIND_REQ to IP to restore the binding to just
794  * the local address.
795  *
796  * This routine sends down a T_BIND_REQ to IP with the following mblks:
797  *	T_BIND_REQ	- specifying just the local address.
798  *	T_OK_ACK	- for the T_DISCON_REQ
799  *
800  * The disconnect completes in icmp_bind_result.
801  * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user.
802  * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will
803  * convert it to an error ack for the appropriate primitive.
804  */
805 static void
806 icmp_disconnect(queue_t *q, mblk_t *mp)
807 {
808 	icmp_t	*icmp;
809 	mblk_t	*mp1;
810 	conn_t	*connp = Q_TO_CONN(q);
811 
812 	icmp = connp->conn_icmp;
813 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
814 	if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
815 		rw_exit(&icmp->icmp_rwlock);
816 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
817 		    "icmp_disconnect: bad state, %d", icmp->icmp_state);
818 		icmp_err_ack(q, mp, TOUTSTATE, 0);
819 		return;
820 	}
821 	icmp->icmp_pending_op = T_DISCON_REQ;
822 	icmp->icmp_v6src = icmp->icmp_bound_v6src;
823 	icmp->icmp_state = TS_IDLE;
824 
825 	/*
826 	 * Send down bind to IP to remove the full binding and revert
827 	 * to the local address binding.
828 	 */
829 	if (icmp->icmp_family == AF_INET) {
830 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0);
831 	} else {
832 		ASSERT(icmp->icmp_family == AF_INET6);
833 		mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0);
834 	}
835 	if (mp1 == NULL) {
836 		icmp->icmp_pending_op = -1;
837 		rw_exit(&icmp->icmp_rwlock);
838 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
839 		return;
840 	}
841 	mp = mi_tpi_ok_ack_alloc(mp);
842 	if (mp == NULL) {
843 		/* Unable to reuse the T_DISCON_REQ for the ack. */
844 		icmp->icmp_pending_op = -1;
845 		rw_exit(&icmp->icmp_rwlock);
846 		icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM);
847 		return;
848 	}
849 
850 	if (icmp->icmp_family == AF_INET6) {
851 		int error;
852 
853 		/* Rebuild the header template */
854 		error = icmp_build_hdrs(icmp);
855 		if (error != 0) {
856 			icmp->icmp_pending_op = -1;
857 			rw_exit(&icmp->icmp_rwlock);
858 			icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error);
859 			freemsg(mp1);
860 			return;
861 		}
862 	}
863 
864 	rw_exit(&icmp->icmp_rwlock);
865 	/* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */
866 	linkb(mp1, mp);
867 
868 	if (icmp->icmp_family == AF_INET6)
869 		mp1 = ip_bind_v6(q, mp1, connp, NULL);
870 	else
871 		mp1 = ip_bind_v4(q, mp1, connp);
872 
873 	/* The above return NULL if the bind needs to be deferred */
874 	if (mp1 != NULL)
875 		icmp_bind_result(connp, mp1);
876 	else
877 		CONN_INC_REF(connp);
878 }
879 
880 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
881 static void
882 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
883 {
884 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
885 		qreply(q, mp);
886 }
887 
888 /* Shorthand to generate and send TPI error acks to our client */
889 static void
890 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
891     t_scalar_t t_error, int sys_error)
892 {
893 	struct T_error_ack	*teackp;
894 
895 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
896 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
897 		teackp = (struct T_error_ack *)mp->b_rptr;
898 		teackp->ERROR_prim = primitive;
899 		teackp->TLI_error = t_error;
900 		teackp->UNIX_error = sys_error;
901 		qreply(q, mp);
902 	}
903 }
904 
905 /*
906  * icmp_icmp_error is called by icmp_input to process ICMP
907  * messages passed up by IP.
908  * Generates the appropriate T_UDERROR_IND for permanent
909  * (non-transient) errors.
910  * Assumes that IP has pulled up everything up to and including
911  * the ICMP header.
912  */
913 static void
914 icmp_icmp_error(queue_t *q, mblk_t *mp)
915 {
916 	icmph_t *icmph;
917 	ipha_t	*ipha;
918 	int	iph_hdr_length;
919 	sin_t	sin;
920 	sin6_t	sin6;
921 	mblk_t	*mp1;
922 	int	error = 0;
923 	icmp_t	*icmp = Q_TO_ICMP(q);
924 
925 	ipha = (ipha_t *)mp->b_rptr;
926 
927 	ASSERT(OK_32PTR(mp->b_rptr));
928 
929 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
930 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
931 		icmp_icmp_error_ipv6(q, mp);
932 		return;
933 	}
934 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
935 
936 	/* Skip past the outer IP and ICMP headers */
937 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
938 	icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
939 	ipha = (ipha_t *)&icmph[1];
940 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
941 
942 	switch (icmph->icmph_type) {
943 	case ICMP_DEST_UNREACHABLE:
944 		switch (icmph->icmph_code) {
945 		case ICMP_FRAGMENTATION_NEEDED:
946 			/*
947 			 * IP has already adjusted the path MTU.
948 			 */
949 			break;
950 		case ICMP_PORT_UNREACHABLE:
951 		case ICMP_PROTOCOL_UNREACHABLE:
952 			error = ECONNREFUSED;
953 			break;
954 		default:
955 			/* Transient errors */
956 			break;
957 		}
958 		break;
959 	default:
960 		/* Transient errors */
961 		break;
962 	}
963 	if (error == 0) {
964 		freemsg(mp);
965 		return;
966 	}
967 
968 	/*
969 	 * Deliver T_UDERROR_IND when the application has asked for it.
970 	 * The socket layer enables this automatically when connected.
971 	 */
972 	if (!icmp->icmp_dgram_errind) {
973 		freemsg(mp);
974 		return;
975 	}
976 
977 	switch (icmp->icmp_family) {
978 	case AF_INET:
979 		sin = sin_null;
980 		sin.sin_family = AF_INET;
981 		sin.sin_addr.s_addr = ipha->ipha_dst;
982 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0,
983 		    error);
984 		break;
985 	case AF_INET6:
986 		sin6 = sin6_null;
987 		sin6.sin6_family = AF_INET6;
988 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
989 
990 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
991 		    NULL, 0, error);
992 		break;
993 	}
994 	if (mp1)
995 		putnext(q, mp1);
996 	freemsg(mp);
997 }
998 
999 /*
1000  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
1001  * for IPv6 packets.
1002  * Send permanent (non-transient) errors upstream.
1003  * Assumes that IP has pulled up all the extension headers as well
1004  * as the ICMPv6 header.
1005  */
1006 static void
1007 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp)
1008 {
1009 	icmp6_t		*icmp6;
1010 	ip6_t		*ip6h, *outer_ip6h;
1011 	uint16_t	iph_hdr_length;
1012 	uint8_t		*nexthdrp;
1013 	sin6_t		sin6;
1014 	mblk_t		*mp1;
1015 	int		error = 0;
1016 	icmp_t		*icmp = Q_TO_ICMP(q);
1017 
1018 	outer_ip6h = (ip6_t *)mp->b_rptr;
1019 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1020 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1021 	else
1022 		iph_hdr_length = IPV6_HDR_LEN;
1023 
1024 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1025 	ip6h = (ip6_t *)&icmp6[1];
1026 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1027 		freemsg(mp);
1028 		return;
1029 	}
1030 
1031 	switch (icmp6->icmp6_type) {
1032 	case ICMP6_DST_UNREACH:
1033 		switch (icmp6->icmp6_code) {
1034 		case ICMP6_DST_UNREACH_NOPORT:
1035 			error = ECONNREFUSED;
1036 			break;
1037 		case ICMP6_DST_UNREACH_ADMIN:
1038 		case ICMP6_DST_UNREACH_NOROUTE:
1039 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1040 		case ICMP6_DST_UNREACH_ADDR:
1041 			/* Transient errors */
1042 			break;
1043 		default:
1044 			break;
1045 		}
1046 		break;
1047 	case ICMP6_PACKET_TOO_BIG: {
1048 		struct T_unitdata_ind	*tudi;
1049 		struct T_opthdr		*toh;
1050 		size_t			udi_size;
1051 		mblk_t			*newmp;
1052 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1053 		    sizeof (struct ip6_mtuinfo);
1054 		sin6_t			*sin6;
1055 		struct ip6_mtuinfo	*mtuinfo;
1056 
1057 		/*
1058 		 * If the application has requested to receive path mtu
1059 		 * information, send up an empty message containing an
1060 		 * IPV6_PATHMTU ancillary data item.
1061 		 */
1062 		if (!icmp->icmp_ipv6_recvpathmtu)
1063 			break;
1064 
1065 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1066 		    opt_length;
1067 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1068 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
1069 			break;
1070 		}
1071 
1072 		/*
1073 		 * newmp->b_cont is left to NULL on purpose.  This is an
1074 		 * empty message containing only ancillary data.
1075 		 */
1076 		newmp->b_datap->db_type = M_PROTO;
1077 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1078 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1079 		tudi->PRIM_type = T_UNITDATA_IND;
1080 		tudi->SRC_length = sizeof (sin6_t);
1081 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1082 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1083 		tudi->OPT_length = opt_length;
1084 
1085 		sin6 = (sin6_t *)&tudi[1];
1086 		bzero(sin6, sizeof (sin6_t));
1087 		sin6->sin6_family = AF_INET6;
1088 		sin6->sin6_addr = icmp->icmp_v6dst;
1089 
1090 		toh = (struct T_opthdr *)&sin6[1];
1091 		toh->level = IPPROTO_IPV6;
1092 		toh->name = IPV6_PATHMTU;
1093 		toh->len = opt_length;
1094 		toh->status = 0;
1095 
1096 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1097 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1098 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1099 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1100 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1101 		/*
1102 		 * We've consumed everything we need from the original
1103 		 * message.  Free it, then send our empty message.
1104 		 */
1105 		freemsg(mp);
1106 		putnext(q, newmp);
1107 		return;
1108 	}
1109 	case ICMP6_TIME_EXCEEDED:
1110 		/* Transient errors */
1111 		break;
1112 	case ICMP6_PARAM_PROB:
1113 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1114 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1115 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1116 		    (uchar_t *)nexthdrp) {
1117 			error = ECONNREFUSED;
1118 			break;
1119 		}
1120 		break;
1121 	}
1122 	if (error == 0) {
1123 		freemsg(mp);
1124 		return;
1125 	}
1126 
1127 	/*
1128 	 * Deliver T_UDERROR_IND when the application has asked for it.
1129 	 * The socket layer enables this automatically when connected.
1130 	 */
1131 	if (!icmp->icmp_dgram_errind) {
1132 		freemsg(mp);
1133 		return;
1134 	}
1135 
1136 	sin6 = sin6_null;
1137 	sin6.sin6_family = AF_INET6;
1138 	sin6.sin6_addr = ip6h->ip6_dst;
1139 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1140 
1141 	mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0,
1142 	    error);
1143 	if (mp1)
1144 		putnext(q, mp1);
1145 	freemsg(mp);
1146 }
1147 
1148 /*
1149  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
1150  * The local address is filled in if endpoint is bound. The remote address
1151  * is filled in if remote address has been precified ("connected endpoint")
1152  * (The concept of connected CLTS sockets is alien to published TPI
1153  *  but we support it anyway).
1154  */
1155 static void
1156 icmp_addr_req(queue_t *q, mblk_t *mp)
1157 {
1158 	icmp_t	*icmp = Q_TO_ICMP(q);
1159 	mblk_t	*ackmp;
1160 	struct T_addr_ack *taa;
1161 
1162 	/* Make it large enough for worst case */
1163 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1164 	    2 * sizeof (sin6_t), 1);
1165 	if (ackmp == NULL) {
1166 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1167 		return;
1168 	}
1169 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1170 
1171 	bzero(taa, sizeof (struct T_addr_ack));
1172 	ackmp->b_wptr = (uchar_t *)&taa[1];
1173 
1174 	taa->PRIM_type = T_ADDR_ACK;
1175 	ackmp->b_datap->db_type = M_PCPROTO;
1176 	rw_enter(&icmp->icmp_rwlock, RW_READER);
1177 	/*
1178 	 * Note: Following code assumes 32 bit alignment of basic
1179 	 * data structures like sin_t and struct T_addr_ack.
1180 	 */
1181 	if (icmp->icmp_state != TS_UNBND) {
1182 		/*
1183 		 * Fill in local address
1184 		 */
1185 		taa->LOCADDR_offset = sizeof (*taa);
1186 		if (icmp->icmp_family == AF_INET) {
1187 			sin_t	*sin;
1188 
1189 			taa->LOCADDR_length = sizeof (sin_t);
1190 			sin = (sin_t *)&taa[1];
1191 			/* Fill zeroes and then intialize non-zero fields */
1192 			*sin = sin_null;
1193 			sin->sin_family = AF_INET;
1194 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
1195 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1196 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
1197 				    sin->sin_addr.s_addr);
1198 			} else {
1199 				/*
1200 				 * INADDR_ANY
1201 				 * icmp_v6src is not set, we might be bound to
1202 				 * broadcast/multicast. Use icmp_bound_v6src as
1203 				 * local address instead (that could
1204 				 * also still be INADDR_ANY)
1205 				 */
1206 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
1207 				    sin->sin_addr.s_addr);
1208 			}
1209 			ackmp->b_wptr = (uchar_t *)&sin[1];
1210 		} else {
1211 			sin6_t	*sin6;
1212 
1213 			ASSERT(icmp->icmp_family == AF_INET6);
1214 			taa->LOCADDR_length = sizeof (sin6_t);
1215 			sin6 = (sin6_t *)&taa[1];
1216 			/* Fill zeroes and then intialize non-zero fields */
1217 			*sin6 = sin6_null;
1218 			sin6->sin6_family = AF_INET6;
1219 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1220 				sin6->sin6_addr = icmp->icmp_v6src;
1221 			} else {
1222 				/*
1223 				 * UNSPECIFIED
1224 				 * icmp_v6src is not set, we might be bound to
1225 				 * broadcast/multicast. Use icmp_bound_v6src as
1226 				 * local address instead (that could
1227 				 * also still be UNSPECIFIED)
1228 				 */
1229 				sin6->sin6_addr = icmp->icmp_bound_v6src;
1230 			}
1231 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1232 		}
1233 	}
1234 	rw_exit(&icmp->icmp_rwlock);
1235 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1236 	qreply(q, ackmp);
1237 }
1238 
1239 static void
1240 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1241 {
1242 	*tap = icmp_g_t_info_ack;
1243 
1244 	if (icmp->icmp_family == AF_INET6)
1245 		tap->ADDR_size = sizeof (sin6_t);
1246 	else
1247 		tap->ADDR_size = sizeof (sin_t);
1248 	tap->CURRENT_state = icmp->icmp_state;
1249 	tap->OPT_size = icmp_max_optsize;
1250 }
1251 
1252 /*
1253  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1254  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1255  * icmp_g_t_info_ack.  The current state of the stream is copied from
1256  * icmp_state.
1257  */
1258 static void
1259 icmp_capability_req(queue_t *q, mblk_t *mp)
1260 {
1261 	icmp_t			*icmp = Q_TO_ICMP(q);
1262 	t_uscalar_t		cap_bits1;
1263 	struct T_capability_ack	*tcap;
1264 
1265 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1266 
1267 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1268 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1269 	if (!mp)
1270 		return;
1271 
1272 	tcap = (struct T_capability_ack *)mp->b_rptr;
1273 	tcap->CAP_bits1 = 0;
1274 
1275 	if (cap_bits1 & TC1_INFO) {
1276 		icmp_copy_info(&tcap->INFO_ack, icmp);
1277 		tcap->CAP_bits1 |= TC1_INFO;
1278 	}
1279 
1280 	qreply(q, mp);
1281 }
1282 
1283 /*
1284  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
1285  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1286  * The current state of the stream is copied from icmp_state.
1287  */
1288 static void
1289 icmp_info_req(queue_t *q, mblk_t *mp)
1290 {
1291 	icmp_t	*icmp = Q_TO_ICMP(q);
1292 
1293 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1294 	    T_INFO_ACK);
1295 	if (!mp)
1296 		return;
1297 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1298 	qreply(q, mp);
1299 }
1300 
1301 /*
1302  * IP recognizes seven kinds of bind requests:
1303  *
1304  * - A zero-length address binds only to the protocol number.
1305  *
1306  * - A 4-byte address is treated as a request to
1307  * validate that the address is a valid local IPv4
1308  * address, appropriate for an application to bind to.
1309  * IP does the verification, but does not make any note
1310  * of the address at this time.
1311  *
1312  * - A 16-byte address contains is treated as a request
1313  * to validate a local IPv6 address, as the 4-byte
1314  * address case above.
1315  *
1316  * - A 16-byte sockaddr_in to validate the local IPv4 address and also
1317  * use it for the inbound fanout of packets.
1318  *
1319  * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also
1320  * use it for the inbound fanout of packets.
1321  *
1322  * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout
1323  * information consisting of local and remote addresses
1324  * and ports (unused for raw sockets).  In this case, the addresses are both
1325  * validated as appropriate for this operation, and, if
1326  * so, the information is retained for use in the
1327  * inbound fanout.
1328  *
1329  * - A 36-byte address address (ipa6_conn_t) containing complete IPv6
1330  * fanout information, like the 12-byte case above.
1331  *
1332  * IP will also fill in the IRE request mblk with information
1333  * regarding our peer.  In all cases, we notify IP of our protocol
1334  * type by appending a single protocol byte to the bind request.
1335  */
1336 static mblk_t *
1337 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length,
1338     in_port_t fport)
1339 {
1340 	char	*cp;
1341 	mblk_t	*mp;
1342 	struct T_bind_req *tbr;
1343 	ipa_conn_t	*ac;
1344 	ipa6_conn_t	*ac6;
1345 	sin_t		*sin;
1346 	sin6_t		*sin6;
1347 
1348 	ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ);
1349 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
1350 	mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI);
1351 	if (mp == NULL)
1352 		return (NULL);
1353 	mp->b_datap->db_type = M_PROTO;
1354 	tbr = (struct T_bind_req *)mp->b_rptr;
1355 	tbr->PRIM_type = bind_prim;
1356 	tbr->ADDR_offset = sizeof (*tbr);
1357 	tbr->CONIND_number = 0;
1358 	tbr->ADDR_length = addr_length;
1359 	cp = (char *)&tbr[1];
1360 	switch (addr_length) {
1361 	case sizeof (ipa_conn_t):
1362 		ASSERT(icmp->icmp_family == AF_INET);
1363 		/* Append a request for an IRE */
1364 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1365 		if (mp->b_cont == NULL) {
1366 			freemsg(mp);
1367 			return (NULL);
1368 		}
1369 		mp->b_cont->b_wptr += sizeof (ire_t);
1370 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1371 
1372 		/* cp known to be 32 bit aligned */
1373 		ac = (ipa_conn_t *)cp;
1374 		ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src);
1375 		ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst);
1376 		ac->ac_fport = fport;
1377 		ac->ac_lport = 0;
1378 		break;
1379 
1380 	case sizeof (ipa6_conn_t):
1381 		ASSERT(icmp->icmp_family == AF_INET6);
1382 		/* Append a request for an IRE */
1383 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1384 		if (mp->b_cont == NULL) {
1385 			freemsg(mp);
1386 			return (NULL);
1387 		}
1388 		mp->b_cont->b_wptr += sizeof (ire_t);
1389 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1390 
1391 		/* cp known to be 32 bit aligned */
1392 		ac6 = (ipa6_conn_t *)cp;
1393 		ac6->ac6_laddr = icmp->icmp_v6src;
1394 		ac6->ac6_faddr = icmp->icmp_v6dst;
1395 		ac6->ac6_fport = fport;
1396 		ac6->ac6_lport = 0;
1397 		break;
1398 
1399 	case sizeof (sin_t):
1400 		ASSERT(icmp->icmp_family == AF_INET);
1401 		/* Append a request for an IRE */
1402 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1403 		if (!mp->b_cont) {
1404 			freemsg(mp);
1405 			return (NULL);
1406 		}
1407 		mp->b_cont->b_wptr += sizeof (ire_t);
1408 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1409 
1410 		sin = (sin_t *)cp;
1411 		*sin = sin_null;
1412 		sin->sin_family = AF_INET;
1413 		sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
1414 		break;
1415 
1416 	case sizeof (sin6_t):
1417 		ASSERT(icmp->icmp_family == AF_INET6);
1418 		/* Append a request for an IRE */
1419 		mp->b_cont = allocb(sizeof (ire_t), BPRI_HI);
1420 		if (!mp->b_cont) {
1421 			freemsg(mp);
1422 			return (NULL);
1423 		}
1424 		mp->b_cont->b_wptr += sizeof (ire_t);
1425 		mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE;
1426 
1427 		sin6 = (sin6_t *)cp;
1428 		*sin6 = sin6_null;
1429 		sin6->sin6_family = AF_INET6;
1430 		sin6->sin6_addr = icmp->icmp_bound_v6src;
1431 		break;
1432 	}
1433 	/* Add protocol number to end */
1434 	cp[addr_length] = icmp->icmp_proto;
1435 	mp->b_wptr = (uchar_t *)&cp[addr_length + 1];
1436 	return (mp);
1437 }
1438 
1439 /* For /dev/icmp aka AF_INET open */
1440 static int
1441 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1442 {
1443 	return (icmp_open(q, devp, flag, sflag, credp, B_FALSE));
1444 }
1445 
1446 /* For /dev/icmp6 aka AF_INET6 open */
1447 static int
1448 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1449 {
1450 	return (icmp_open(q, devp, flag, sflag, credp, B_TRUE));
1451 }
1452 
1453 /*
1454  * This is the open routine for icmp.  It allocates a icmp_t structure for
1455  * the stream and, on the first open of the module, creates an ND table.
1456  */
1457 /*ARGSUSED2*/
1458 static int
1459 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1460     boolean_t isv6)
1461 {
1462 	int	err;
1463 	icmp_t	*icmp;
1464 	conn_t *connp;
1465 	dev_t	conn_dev;
1466 	zoneid_t zoneid;
1467 	netstack_t *ns;
1468 	icmp_stack_t *is;
1469 
1470 	/* If the stream is already open, return immediately. */
1471 	if (q->q_ptr != NULL)
1472 		return (0);
1473 
1474 	if (sflag == MODOPEN)
1475 		return (EINVAL);
1476 
1477 	ns = netstack_find_by_cred(credp);
1478 	ASSERT(ns != NULL);
1479 	is = ns->netstack_icmp;
1480 	ASSERT(is != NULL);
1481 
1482 	/*
1483 	 * For exclusive stacks we set the zoneid to zero
1484 	 * to make ICMP operate as if in the global zone.
1485 	 */
1486 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
1487 		zoneid = GLOBAL_ZONEID;
1488 	else
1489 		zoneid = crgetzoneid(credp);
1490 
1491 	/*
1492 	 * Since ICMP is not used so heavily, allocating from the small
1493 	 * arena should be sufficient.
1494 	 */
1495 	if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
1496 		netstack_rele(ns);
1497 		return (EBUSY);
1498 	}
1499 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1500 
1501 	connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns);
1502 	connp->conn_dev = conn_dev;
1503 	connp->conn_minor_arena = ip_minor_arena_sa;
1504 	icmp = connp->conn_icmp;
1505 
1506 	/*
1507 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1508 	 * done by netstack_find_by_cred()
1509 	 */
1510 	netstack_rele(ns);
1511 
1512 	/*
1513 	 * Initialize the icmp_t structure for this stream.
1514 	 */
1515 	q->q_ptr = connp;
1516 	WR(q)->q_ptr = connp;
1517 	connp->conn_rq = q;
1518 	connp->conn_wq = WR(q);
1519 
1520 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1521 	ASSERT(connp->conn_ulp == IPPROTO_ICMP);
1522 	ASSERT(connp->conn_icmp == icmp);
1523 	ASSERT(icmp->icmp_connp == connp);
1524 
1525 	/* Set the initial state of the stream and the privilege status. */
1526 	icmp->icmp_state = TS_UNBND;
1527 	if (isv6) {
1528 		icmp->icmp_ipversion = IPV6_VERSION;
1529 		icmp->icmp_family = AF_INET6;
1530 		connp->conn_ulp = IPPROTO_ICMPV6;
1531 		/* May be changed by a SO_PROTOTYPE socket option. */
1532 		icmp->icmp_proto = IPPROTO_ICMPV6;
1533 		icmp->icmp_checksum_off = 2;	/* Offset for icmp6_cksum */
1534 		icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
1535 		icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
1536 		connp->conn_af_isv6 = B_TRUE;
1537 		connp->conn_flags |= IPCL_ISV6;
1538 	} else {
1539 		icmp->icmp_ipversion = IPV4_VERSION;
1540 		icmp->icmp_family = AF_INET;
1541 		/* May be changed by a SO_PROTOTYPE socket option. */
1542 		icmp->icmp_proto = IPPROTO_ICMP;
1543 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
1544 		icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
1545 		connp->conn_af_isv6 = B_FALSE;
1546 		connp->conn_flags &= ~IPCL_ISV6;
1547 	}
1548 	icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1549 	icmp->icmp_pending_op = -1;
1550 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1551 	connp->conn_zoneid = zoneid;
1552 
1553 	/*
1554 	 * If the caller has the process-wide flag set, then default to MAC
1555 	 * exempt mode.  This allows read-down to unlabeled hosts.
1556 	 */
1557 	if (getpflags(NET_MAC_AWARE, credp) != 0)
1558 		connp->conn_mac_exempt = B_TRUE;
1559 
1560 	connp->conn_ulp_labeled = is_system_labeled();
1561 
1562 	icmp->icmp_is = is;
1563 
1564 	q->q_hiwat = is->is_recv_hiwat;
1565 	WR(q)->q_hiwat = is->is_xmit_hiwat;
1566 	WR(q)->q_lowat = is->is_xmit_lowat;
1567 
1568 	connp->conn_recv = icmp_input;
1569 	crhold(credp);
1570 	connp->conn_cred = credp;
1571 
1572 	mutex_enter(&connp->conn_lock);
1573 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1574 	mutex_exit(&connp->conn_lock);
1575 
1576 	qprocson(q);
1577 
1578 	if (icmp->icmp_family == AF_INET6) {
1579 		/* Build initial header template for transmit */
1580 		if ((err = icmp_build_hdrs(icmp)) != 0) {
1581 			rw_exit(&icmp->icmp_rwlock);
1582 			qprocsoff(q);
1583 			ipcl_conn_destroy(connp);
1584 			return (err);
1585 		}
1586 	}
1587 	rw_exit(&icmp->icmp_rwlock);
1588 
1589 	/* Set the Stream head write offset. */
1590 	(void) mi_set_sth_wroff(q,
1591 	    icmp->icmp_max_hdr_len + is->is_wroff_extra);
1592 	(void) mi_set_sth_hiwat(q, q->q_hiwat);
1593 
1594 	return (0);
1595 }
1596 
1597 /*
1598  * Which ICMP options OK to set through T_UNITDATA_REQ...
1599  */
1600 /* ARGSUSED */
1601 static boolean_t
1602 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1603 {
1604 	return (B_TRUE);
1605 }
1606 
1607 /*
1608  * This routine gets default values of certain options whose default
1609  * values are maintained by protcol specific code
1610  */
1611 /* ARGSUSED */
1612 int
1613 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
1614 {
1615 	icmp_t *icmp = Q_TO_ICMP(q);
1616 	icmp_stack_t *is = icmp->icmp_is;
1617 	int *i1 = (int *)ptr;
1618 
1619 	switch (level) {
1620 	case IPPROTO_IP:
1621 		switch (name) {
1622 		case IP_MULTICAST_TTL:
1623 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1624 			return (sizeof (uchar_t));
1625 		case IP_MULTICAST_LOOP:
1626 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1627 			return (sizeof (uchar_t));
1628 		}
1629 		break;
1630 	case IPPROTO_IPV6:
1631 		switch (name) {
1632 		case IPV6_MULTICAST_HOPS:
1633 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1634 			return (sizeof (int));
1635 		case IPV6_MULTICAST_LOOP:
1636 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1637 			return (sizeof (int));
1638 		case IPV6_UNICAST_HOPS:
1639 			*i1 = is->is_ipv6_hoplimit;
1640 			return (sizeof (int));
1641 		}
1642 		break;
1643 	case IPPROTO_ICMPV6:
1644 		switch (name) {
1645 		case ICMP6_FILTER:
1646 			/* Make it look like "pass all" */
1647 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1648 			return (sizeof (icmp6_filter_t));
1649 		}
1650 		break;
1651 	}
1652 	return (-1);
1653 }
1654 
1655 /*
1656  * This routine retrieves the current status of socket options.
1657  * It returns the size of the option retrieved.
1658  */
1659 int
1660 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr)
1661 {
1662 	conn_t	*connp = Q_TO_CONN(q);
1663 	icmp_t	*icmp = connp->conn_icmp;
1664 	icmp_stack_t *is = icmp->icmp_is;
1665 	int	*i1 = (int *)ptr;
1666 	ip6_pkt_t	*ipp = &icmp->icmp_sticky_ipp;
1667 
1668 	switch (level) {
1669 	case SOL_SOCKET:
1670 		switch (name) {
1671 		case SO_DEBUG:
1672 			*i1 = icmp->icmp_debug;
1673 			break;
1674 		case SO_TYPE:
1675 			*i1 = SOCK_RAW;
1676 			break;
1677 		case SO_PROTOTYPE:
1678 			*i1 = icmp->icmp_proto;
1679 			break;
1680 		case SO_REUSEADDR:
1681 			*i1 = icmp->icmp_reuseaddr;
1682 			break;
1683 
1684 		/*
1685 		 * The following three items are available here,
1686 		 * but are only meaningful to IP.
1687 		 */
1688 		case SO_DONTROUTE:
1689 			*i1 = icmp->icmp_dontroute;
1690 			break;
1691 		case SO_USELOOPBACK:
1692 			*i1 = icmp->icmp_useloopback;
1693 			break;
1694 		case SO_BROADCAST:
1695 			*i1 = icmp->icmp_broadcast;
1696 			break;
1697 
1698 		case SO_SNDBUF:
1699 			ASSERT(q->q_hiwat <= INT_MAX);
1700 			*i1 = (int)q->q_hiwat;
1701 			break;
1702 		case SO_RCVBUF:
1703 			ASSERT(RD(q)->q_hiwat <= INT_MAX);
1704 			*i1 = (int)RD(q)->q_hiwat;
1705 			break;
1706 		case SO_DGRAM_ERRIND:
1707 			*i1 = icmp->icmp_dgram_errind;
1708 			break;
1709 		case SO_TIMESTAMP:
1710 			*i1 = icmp->icmp_timestamp;
1711 			break;
1712 		case SO_MAC_EXEMPT:
1713 			*i1 = connp->conn_mac_exempt;
1714 			break;
1715 		case SO_DOMAIN:
1716 			*i1 = icmp->icmp_family;
1717 			break;
1718 
1719 		/*
1720 		 * Following four not meaningful for icmp
1721 		 * Action is same as "default" to which we fallthrough
1722 		 * so we keep them in comments.
1723 		 * case SO_LINGER:
1724 		 * case SO_KEEPALIVE:
1725 		 * case SO_OOBINLINE:
1726 		 * case SO_ALLZONES:
1727 		 */
1728 		default:
1729 			return (-1);
1730 		}
1731 		break;
1732 	case IPPROTO_IP:
1733 		/*
1734 		 * Only allow IPv4 option processing on IPv4 sockets.
1735 		 */
1736 		if (icmp->icmp_family != AF_INET)
1737 			return (-1);
1738 
1739 		switch (name) {
1740 		case IP_OPTIONS:
1741 		case T_IP_OPTIONS:
1742 			/* Options are passed up with each packet */
1743 			return (0);
1744 		case IP_HDRINCL:
1745 			*i1 = (int)icmp->icmp_hdrincl;
1746 			break;
1747 		case IP_TOS:
1748 		case T_IP_TOS:
1749 			*i1 = (int)icmp->icmp_type_of_service;
1750 			break;
1751 		case IP_TTL:
1752 			*i1 = (int)icmp->icmp_ttl;
1753 			break;
1754 		case IP_MULTICAST_IF:
1755 			/* 0 address if not set */
1756 			*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
1757 			return (sizeof (ipaddr_t));
1758 		case IP_MULTICAST_TTL:
1759 			*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
1760 			return (sizeof (uchar_t));
1761 		case IP_MULTICAST_LOOP:
1762 			*ptr = connp->conn_multicast_loop;
1763 			return (sizeof (uint8_t));
1764 		case IP_BOUND_IF:
1765 			/* Zero if not set */
1766 			*i1 = icmp->icmp_bound_if;
1767 			break;	/* goto sizeof (int) option return */
1768 		case IP_UNSPEC_SRC:
1769 			*ptr = icmp->icmp_unspec_source;
1770 			break;	/* goto sizeof (int) option return */
1771 		case IP_BROADCAST_TTL:
1772 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
1773 			return (sizeof (uchar_t));
1774 		case IP_RECVIF:
1775 			*ptr = icmp->icmp_recvif;
1776 			break;	/* goto sizeof (int) option return */
1777 		case IP_RECVPKTINFO:
1778 			/*
1779 			 * This also handles IP_PKTINFO.
1780 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
1781 			 * Differentiation is based on the size of the argument
1782 			 * passed in.
1783 			 * This option is handled in IP which will return an
1784 			 * error for IP_PKTINFO as it's not supported as a
1785 			 * sticky option.
1786 			 */
1787 			return (-EINVAL);
1788 		/*
1789 		 * Cannot "get" the value of following options
1790 		 * at this level. Action is same as "default" to
1791 		 * which we fallthrough so we keep them in comments.
1792 		 *
1793 		 * case IP_ADD_MEMBERSHIP:
1794 		 * case IP_DROP_MEMBERSHIP:
1795 		 * case IP_BLOCK_SOURCE:
1796 		 * case IP_UNBLOCK_SOURCE:
1797 		 * case IP_ADD_SOURCE_MEMBERSHIP:
1798 		 * case IP_DROP_SOURCE_MEMBERSHIP:
1799 		 * case MCAST_JOIN_GROUP:
1800 		 * case MCAST_LEAVE_GROUP:
1801 		 * case MCAST_BLOCK_SOURCE:
1802 		 * case MCAST_UNBLOCK_SOURCE:
1803 		 * case MCAST_JOIN_SOURCE_GROUP:
1804 		 * case MCAST_LEAVE_SOURCE_GROUP:
1805 		 * case MRT_INIT:
1806 		 * case MRT_DONE:
1807 		 * case MRT_ADD_VIF:
1808 		 * case MRT_DEL_VIF:
1809 		 * case MRT_ADD_MFC:
1810 		 * case MRT_DEL_MFC:
1811 		 * case MRT_VERSION:
1812 		 * case MRT_ASSERT:
1813 		 * case IP_SEC_OPT:
1814 		 * case IP_DONTFAILOVER_IF:
1815 		 * case IP_NEXTHOP:
1816 		 */
1817 		default:
1818 			return (-1);
1819 		}
1820 		break;
1821 	case IPPROTO_IPV6:
1822 		/*
1823 		 * Only allow IPv6 option processing on native IPv6 sockets.
1824 		 */
1825 		if (icmp->icmp_family != AF_INET6)
1826 			return (-1);
1827 		switch (name) {
1828 		case IPV6_UNICAST_HOPS:
1829 			*i1 = (unsigned int)icmp->icmp_ttl;
1830 			break;
1831 		case IPV6_MULTICAST_IF:
1832 			/* 0 index if not set */
1833 			*i1 = icmp->icmp_multicast_if_index;
1834 			break;
1835 		case IPV6_MULTICAST_HOPS:
1836 			*i1 = icmp->icmp_multicast_ttl;
1837 			break;
1838 		case IPV6_MULTICAST_LOOP:
1839 			*i1 = connp->conn_multicast_loop;
1840 			break;
1841 		case IPV6_BOUND_IF:
1842 			/* Zero if not set */
1843 			*i1 = icmp->icmp_bound_if;
1844 			break;
1845 		case IPV6_UNSPEC_SRC:
1846 			*i1 = icmp->icmp_unspec_source;
1847 			break;
1848 		case IPV6_CHECKSUM:
1849 			/*
1850 			 * Return offset or -1 if no checksum offset.
1851 			 * Does not apply to IPPROTO_ICMPV6
1852 			 */
1853 			if (icmp->icmp_proto == IPPROTO_ICMPV6)
1854 				return (-1);
1855 
1856 			if (icmp->icmp_raw_checksum) {
1857 				*i1 = icmp->icmp_checksum_off;
1858 			} else {
1859 				*i1 = -1;
1860 			}
1861 			break;
1862 		case IPV6_JOIN_GROUP:
1863 		case IPV6_LEAVE_GROUP:
1864 		case MCAST_JOIN_GROUP:
1865 		case MCAST_LEAVE_GROUP:
1866 		case MCAST_BLOCK_SOURCE:
1867 		case MCAST_UNBLOCK_SOURCE:
1868 		case MCAST_JOIN_SOURCE_GROUP:
1869 		case MCAST_LEAVE_SOURCE_GROUP:
1870 			/* cannot "get" the value for these */
1871 			return (-1);
1872 		case IPV6_RECVPKTINFO:
1873 			*i1 = icmp->icmp_ip_recvpktinfo;
1874 			break;
1875 		case IPV6_RECVTCLASS:
1876 			*i1 = icmp->icmp_ipv6_recvtclass;
1877 			break;
1878 		case IPV6_RECVPATHMTU:
1879 			*i1 = icmp->icmp_ipv6_recvpathmtu;
1880 			break;
1881 		case IPV6_V6ONLY:
1882 			*i1 = 1;
1883 			break;
1884 		case IPV6_RECVHOPLIMIT:
1885 			*i1 = icmp->icmp_ipv6_recvhoplimit;
1886 			break;
1887 		case IPV6_RECVHOPOPTS:
1888 			*i1 = icmp->icmp_ipv6_recvhopopts;
1889 			break;
1890 		case IPV6_RECVDSTOPTS:
1891 			*i1 = icmp->icmp_ipv6_recvdstopts;
1892 			break;
1893 		case _OLD_IPV6_RECVDSTOPTS:
1894 			*i1 = icmp->icmp_old_ipv6_recvdstopts;
1895 			break;
1896 		case IPV6_RECVRTHDRDSTOPTS:
1897 			*i1 = icmp->icmp_ipv6_recvrtdstopts;
1898 			break;
1899 		case IPV6_RECVRTHDR:
1900 			*i1 = icmp->icmp_ipv6_recvrthdr;
1901 			break;
1902 		case IPV6_PKTINFO: {
1903 			/* XXX assumes that caller has room for max size! */
1904 			struct in6_pktinfo *pkti;
1905 
1906 			pkti = (struct in6_pktinfo *)ptr;
1907 			if (ipp->ipp_fields & IPPF_IFINDEX)
1908 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
1909 			else
1910 				pkti->ipi6_ifindex = 0;
1911 			if (ipp->ipp_fields & IPPF_ADDR)
1912 				pkti->ipi6_addr = ipp->ipp_addr;
1913 			else
1914 				pkti->ipi6_addr = ipv6_all_zeros;
1915 			return (sizeof (struct in6_pktinfo));
1916 		}
1917 		case IPV6_NEXTHOP: {
1918 			sin6_t *sin6 = (sin6_t *)ptr;
1919 
1920 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
1921 				return (0);
1922 			*sin6 = sin6_null;
1923 			sin6->sin6_family = AF_INET6;
1924 			sin6->sin6_addr = ipp->ipp_nexthop;
1925 			return (sizeof (sin6_t));
1926 		}
1927 		case IPV6_HOPOPTS:
1928 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
1929 				return (0);
1930 			if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
1931 				return (0);
1932 			bcopy((char *)ipp->ipp_hopopts +
1933 			    icmp->icmp_label_len_v6, ptr,
1934 			    ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
1935 			if (icmp->icmp_label_len_v6 > 0) {
1936 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
1937 				ptr[1] = (ipp->ipp_hopoptslen -
1938 				    icmp->icmp_label_len_v6 + 7) / 8 - 1;
1939 			}
1940 			return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
1941 		case IPV6_RTHDRDSTOPTS:
1942 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
1943 				return (0);
1944 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
1945 			return (ipp->ipp_rtdstoptslen);
1946 		case IPV6_RTHDR:
1947 			if (!(ipp->ipp_fields & IPPF_RTHDR))
1948 				return (0);
1949 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
1950 			return (ipp->ipp_rthdrlen);
1951 		case IPV6_DSTOPTS:
1952 			if (!(ipp->ipp_fields & IPPF_DSTOPTS))
1953 				return (0);
1954 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
1955 			return (ipp->ipp_dstoptslen);
1956 		case IPV6_PATHMTU:
1957 			if (!(ipp->ipp_fields & IPPF_PATHMTU))
1958 				return (0);
1959 
1960 			return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0,
1961 			    (struct ip6_mtuinfo *)ptr, is->is_netstack));
1962 		case IPV6_TCLASS:
1963 			if (ipp->ipp_fields & IPPF_TCLASS)
1964 				*i1 = ipp->ipp_tclass;
1965 			else
1966 				*i1 = IPV6_FLOW_TCLASS(
1967 				    IPV6_DEFAULT_VERS_AND_FLOW);
1968 			break;
1969 		default:
1970 			return (-1);
1971 		}
1972 		break;
1973 	case IPPROTO_ICMPV6:
1974 		/*
1975 		 * Only allow IPv6 option processing on native IPv6 sockets.
1976 		 */
1977 		if (icmp->icmp_family != AF_INET6)
1978 			return (-1);
1979 
1980 		if (icmp->icmp_proto != IPPROTO_ICMPV6)
1981 			return (-1);
1982 
1983 		switch (name) {
1984 		case ICMP6_FILTER:
1985 			if (icmp->icmp_filter == NULL) {
1986 				/* Make it look like "pass all" */
1987 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1988 			} else {
1989 				(void) bcopy(icmp->icmp_filter, ptr,
1990 				    sizeof (icmp6_filter_t));
1991 			}
1992 			return (sizeof (icmp6_filter_t));
1993 		default:
1994 			return (-1);
1995 		}
1996 	default:
1997 		return (-1);
1998 	}
1999 	return (sizeof (int));
2000 }
2001 
2002 /*
2003  * This routine retrieves the current status of socket options.
2004  * It returns the size of the option retrieved.
2005  */
2006 int
2007 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
2008 {
2009 	icmp_t  *icmp = Q_TO_ICMP(q);
2010 	int 	err;
2011 
2012 	rw_enter(&icmp->icmp_rwlock, RW_READER);
2013 	err = icmp_opt_get_locked(q, level, name, ptr);
2014 	rw_exit(&icmp->icmp_rwlock);
2015 	return (err);
2016 }
2017 
2018 
2019 /* This routine sets socket options. */
2020 /* ARGSUSED */
2021 int
2022 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name,
2023     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2024     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
2025 {
2026 	conn_t	*connp = Q_TO_CONN(q);
2027 	icmp_t	*icmp = connp->conn_icmp;
2028 	icmp_stack_t *is = icmp->icmp_is;
2029 	int	*i1 = (int *)invalp;
2030 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2031 	boolean_t checkonly;
2032 	int	error;
2033 
2034 	switch (optset_context) {
2035 	case SETFN_OPTCOM_CHECKONLY:
2036 		checkonly = B_TRUE;
2037 		/*
2038 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
2039 		 * inlen != 0 implies value supplied and
2040 		 * 	we have to "pretend" to set it.
2041 		 * inlen == 0 implies that there is no
2042 		 * 	value part in T_CHECK request and just validation
2043 		 * done elsewhere should be enough, we just return here.
2044 		 */
2045 		if (inlen == 0) {
2046 			*outlenp = 0;
2047 			return (0);
2048 		}
2049 		break;
2050 	case SETFN_OPTCOM_NEGOTIATE:
2051 		checkonly = B_FALSE;
2052 		break;
2053 	case SETFN_UD_NEGOTIATE:
2054 	case SETFN_CONN_NEGOTIATE:
2055 		checkonly = B_FALSE;
2056 		/*
2057 		 * Negotiating local and "association-related" options
2058 		 * through T_UNITDATA_REQ.
2059 		 *
2060 		 * Following routine can filter out ones we do not
2061 		 * want to be "set" this way.
2062 		 */
2063 		if (!icmp_opt_allow_udr_set(level, name)) {
2064 			*outlenp = 0;
2065 			return (EINVAL);
2066 		}
2067 		break;
2068 	default:
2069 		/*
2070 		 * We should never get here
2071 		 */
2072 		*outlenp = 0;
2073 		return (EINVAL);
2074 	}
2075 
2076 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
2077 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
2078 
2079 	/*
2080 	 * For fixed length options, no sanity check
2081 	 * of passed in length is done. It is assumed *_optcom_req()
2082 	 * routines do the right thing.
2083 	 */
2084 
2085 	switch (level) {
2086 	case SOL_SOCKET:
2087 		switch (name) {
2088 		case SO_DEBUG:
2089 			if (!checkonly)
2090 				icmp->icmp_debug = onoff;
2091 			break;
2092 		case SO_PROTOTYPE:
2093 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
2094 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
2095 			    secpolicy_net_rawaccess(cr) != 0) {
2096 				*outlenp = 0;
2097 				return (EACCES);
2098 			}
2099 			/* Can't use IPPROTO_RAW with IPv6 */
2100 			if ((*i1 & 0xFF) == IPPROTO_RAW &&
2101 			    icmp->icmp_family == AF_INET6) {
2102 				*outlenp = 0;
2103 				return (EPROTONOSUPPORT);
2104 			}
2105 			if (checkonly) {
2106 				/* T_CHECK case */
2107 				*(int *)outvalp = (*i1 & 0xFF);
2108 				break;
2109 			}
2110 			icmp->icmp_proto = *i1 & 0xFF;
2111 			if ((icmp->icmp_proto == IPPROTO_RAW ||
2112 			    icmp->icmp_proto == IPPROTO_IGMP) &&
2113 			    icmp->icmp_family == AF_INET)
2114 				icmp->icmp_hdrincl = 1;
2115 			else
2116 				icmp->icmp_hdrincl = 0;
2117 
2118 			if (icmp->icmp_family == AF_INET6 &&
2119 			    icmp->icmp_proto == IPPROTO_ICMPV6) {
2120 				/* Set offset for icmp6_cksum */
2121 				icmp->icmp_raw_checksum = 0;
2122 				icmp->icmp_checksum_off = 2;
2123 			}
2124 			if (icmp->icmp_proto == IPPROTO_UDP ||
2125 			    icmp->icmp_proto == IPPROTO_TCP ||
2126 			    icmp->icmp_proto == IPPROTO_SCTP) {
2127 				icmp->icmp_no_tp_cksum = 1;
2128 				icmp->icmp_sticky_ipp.ipp_fields |=
2129 				    IPPF_NO_CKSUM;
2130 			} else {
2131 				icmp->icmp_no_tp_cksum = 0;
2132 				icmp->icmp_sticky_ipp.ipp_fields &=
2133 				    ~IPPF_NO_CKSUM;
2134 			}
2135 
2136 			if (icmp->icmp_filter != NULL &&
2137 			    icmp->icmp_proto != IPPROTO_ICMPV6) {
2138 				kmem_free(icmp->icmp_filter,
2139 				    sizeof (icmp6_filter_t));
2140 				icmp->icmp_filter = NULL;
2141 			}
2142 
2143 			/* Rebuild the header template */
2144 			error = icmp_build_hdrs(icmp);
2145 			if (error != 0) {
2146 				*outlenp = 0;
2147 				return (error);
2148 			}
2149 
2150 			/*
2151 			 * For SCTP, we don't use icmp_bind_proto() for
2152 			 * raw socket binding.  Note that we do not need
2153 			 * to set *outlenp.
2154 			 * FIXME: how does SCTP work?
2155 			 */
2156 			if (icmp->icmp_proto == IPPROTO_SCTP)
2157 				return (0);
2158 
2159 			*outlenp = sizeof (int);
2160 			*(int *)outvalp = *i1 & 0xFF;
2161 
2162 			/* Drop lock across the bind operation */
2163 			rw_exit(&icmp->icmp_rwlock);
2164 			icmp_bind_proto(q);
2165 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2166 			return (0);
2167 		case SO_REUSEADDR:
2168 			if (!checkonly)
2169 				icmp->icmp_reuseaddr = onoff;
2170 			break;
2171 
2172 		/*
2173 		 * The following three items are available here,
2174 		 * but are only meaningful to IP.
2175 		 */
2176 		case SO_DONTROUTE:
2177 			if (!checkonly)
2178 				icmp->icmp_dontroute = onoff;
2179 			break;
2180 		case SO_USELOOPBACK:
2181 			if (!checkonly)
2182 				icmp->icmp_useloopback = onoff;
2183 			break;
2184 		case SO_BROADCAST:
2185 			if (!checkonly)
2186 				icmp->icmp_broadcast = onoff;
2187 			break;
2188 
2189 		case SO_SNDBUF:
2190 			if (*i1 > is->is_max_buf) {
2191 				*outlenp = 0;
2192 				return (ENOBUFS);
2193 			}
2194 			if (!checkonly) {
2195 				q->q_hiwat = *i1;
2196 			}
2197 			break;
2198 		case SO_RCVBUF:
2199 			if (*i1 > is->is_max_buf) {
2200 				*outlenp = 0;
2201 				return (ENOBUFS);
2202 			}
2203 			if (!checkonly) {
2204 				RD(q)->q_hiwat = *i1;
2205 				rw_exit(&icmp->icmp_rwlock);
2206 				(void) mi_set_sth_hiwat(RD(q), *i1);
2207 				rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2208 			}
2209 			break;
2210 		case SO_DGRAM_ERRIND:
2211 			if (!checkonly)
2212 				icmp->icmp_dgram_errind = onoff;
2213 			break;
2214 		case SO_ALLZONES:
2215 			/*
2216 			 * "soft" error (negative)
2217 			 * option not handled at this level
2218 			 * Note: Do not modify *outlenp
2219 			 */
2220 			return (-EINVAL);
2221 		case SO_TIMESTAMP:
2222 			if (!checkonly) {
2223 				icmp->icmp_timestamp = onoff;
2224 			}
2225 			break;
2226 		case SO_MAC_EXEMPT:
2227 			/*
2228 			 * "soft" error (negative)
2229 			 * option not handled at this level
2230 			 * Note: Do not modify *outlenp
2231 			 */
2232 			return (-EINVAL);
2233 		/*
2234 		 * Following three not meaningful for icmp
2235 		 * Action is same as "default" so we keep them
2236 		 * in comments.
2237 		 * case SO_LINGER:
2238 		 * case SO_KEEPALIVE:
2239 		 * case SO_OOBINLINE:
2240 		 */
2241 		default:
2242 			*outlenp = 0;
2243 			return (EINVAL);
2244 		}
2245 		break;
2246 	case IPPROTO_IP:
2247 		/*
2248 		 * Only allow IPv4 option processing on IPv4 sockets.
2249 		 */
2250 		if (icmp->icmp_family != AF_INET) {
2251 			*outlenp = 0;
2252 			return (ENOPROTOOPT);
2253 		}
2254 		switch (name) {
2255 		case IP_OPTIONS:
2256 		case T_IP_OPTIONS:
2257 			/* Save options for use by IP. */
2258 			if ((inlen & 0x3) ||
2259 			    inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
2260 				*outlenp = 0;
2261 				return (EINVAL);
2262 			}
2263 			if (checkonly)
2264 				break;
2265 
2266 			if (!tsol_option_set(&icmp->icmp_ip_snd_options,
2267 			    &icmp->icmp_ip_snd_options_len,
2268 			    icmp->icmp_label_len, invalp, inlen)) {
2269 				*outlenp = 0;
2270 				return (ENOMEM);
2271 			}
2272 
2273 			icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2274 			    icmp->icmp_ip_snd_options_len;
2275 			rw_exit(&icmp->icmp_rwlock);
2276 			(void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len +
2277 			    is->is_wroff_extra);
2278 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2279 			break;
2280 		case IP_HDRINCL:
2281 			if (!checkonly)
2282 				icmp->icmp_hdrincl = onoff;
2283 			break;
2284 		case IP_TOS:
2285 		case T_IP_TOS:
2286 			if (!checkonly) {
2287 				icmp->icmp_type_of_service = (uint8_t)*i1;
2288 			}
2289 			break;
2290 		case IP_TTL:
2291 			if (!checkonly) {
2292 				icmp->icmp_ttl = (uint8_t)*i1;
2293 			}
2294 			break;
2295 		case IP_MULTICAST_IF:
2296 			/*
2297 			 * TODO should check OPTMGMT reply and undo this if
2298 			 * there is an error.
2299 			 */
2300 			if (!checkonly)
2301 				icmp->icmp_multicast_if_addr = *i1;
2302 			break;
2303 		case IP_MULTICAST_TTL:
2304 			if (!checkonly)
2305 				icmp->icmp_multicast_ttl = *invalp;
2306 			break;
2307 		case IP_MULTICAST_LOOP:
2308 			if (!checkonly) {
2309 				connp->conn_multicast_loop =
2310 				    (*invalp == 0) ? 0 : 1;
2311 			}
2312 			break;
2313 		case IP_BOUND_IF:
2314 			if (!checkonly)
2315 				icmp->icmp_bound_if = *i1;
2316 			break;
2317 		case IP_UNSPEC_SRC:
2318 			if (!checkonly)
2319 				icmp->icmp_unspec_source = onoff;
2320 			break;
2321 		case IP_BROADCAST_TTL:
2322 			if (!checkonly)
2323 				connp->conn_broadcast_ttl = *invalp;
2324 			break;
2325 		case IP_RECVIF:
2326 			if (!checkonly)
2327 				icmp->icmp_recvif = onoff;
2328 			/*
2329 			 * pass to ip
2330 			 */
2331 			return (-EINVAL);
2332 		case IP_PKTINFO: {
2333 			/*
2334 			 * This also handles IP_RECVPKTINFO.
2335 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2336 			 * Differentiation is based on the size of the argument
2337 			 * passed in.
2338 			 */
2339 			struct in_pktinfo *pktinfop;
2340 			ip4_pkt_t *attr_pktinfop;
2341 
2342 			if (checkonly)
2343 				break;
2344 
2345 			if (inlen == sizeof (int)) {
2346 				/*
2347 				 * This is IP_RECVPKTINFO option.
2348 				 * Keep a local copy of wether this option is
2349 				 * set or not and pass it down to IP for
2350 				 * processing.
2351 				 */
2352 				icmp->icmp_ip_recvpktinfo = onoff;
2353 				return (-EINVAL);
2354 			}
2355 
2356 
2357 			if (inlen != sizeof (struct in_pktinfo))
2358 				return (EINVAL);
2359 
2360 			if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
2361 			    == NULL) {
2362 				/*
2363 				 * sticky option is not supported
2364 				 */
2365 				return (EINVAL);
2366 			}
2367 
2368 			pktinfop = (struct in_pktinfo *)invalp;
2369 
2370 			/*
2371 			 * Atleast one of the values should be specified
2372 			 */
2373 			if (pktinfop->ipi_ifindex == 0 &&
2374 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2375 				return (EINVAL);
2376 			}
2377 
2378 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2379 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2380 		}
2381 			break;
2382 		case IP_ADD_MEMBERSHIP:
2383 		case IP_DROP_MEMBERSHIP:
2384 		case IP_BLOCK_SOURCE:
2385 		case IP_UNBLOCK_SOURCE:
2386 		case IP_ADD_SOURCE_MEMBERSHIP:
2387 		case IP_DROP_SOURCE_MEMBERSHIP:
2388 		case MCAST_JOIN_GROUP:
2389 		case MCAST_LEAVE_GROUP:
2390 		case MCAST_BLOCK_SOURCE:
2391 		case MCAST_UNBLOCK_SOURCE:
2392 		case MCAST_JOIN_SOURCE_GROUP:
2393 		case MCAST_LEAVE_SOURCE_GROUP:
2394 		case MRT_INIT:
2395 		case MRT_DONE:
2396 		case MRT_ADD_VIF:
2397 		case MRT_DEL_VIF:
2398 		case MRT_ADD_MFC:
2399 		case MRT_DEL_MFC:
2400 		case MRT_VERSION:
2401 		case MRT_ASSERT:
2402 		case IP_SEC_OPT:
2403 		case IP_DONTFAILOVER_IF:
2404 		case IP_NEXTHOP:
2405 			/*
2406 			 * "soft" error (negative)
2407 			 * option not handled at this level
2408 			 * Note: Do not modify *outlenp
2409 			 */
2410 			return (-EINVAL);
2411 		default:
2412 			*outlenp = 0;
2413 			return (EINVAL);
2414 		}
2415 		break;
2416 	case IPPROTO_IPV6: {
2417 		ip6_pkt_t		*ipp;
2418 		boolean_t		sticky;
2419 
2420 		if (icmp->icmp_family != AF_INET6) {
2421 			*outlenp = 0;
2422 			return (ENOPROTOOPT);
2423 		}
2424 		/*
2425 		 * Deal with both sticky options and ancillary data
2426 		 */
2427 		if (thisdg_attrs == NULL) {
2428 			/* sticky options, or none */
2429 			ipp = &icmp->icmp_sticky_ipp;
2430 			sticky = B_TRUE;
2431 		} else {
2432 			/* ancillary data */
2433 			ipp = (ip6_pkt_t *)thisdg_attrs;
2434 			sticky = B_FALSE;
2435 		}
2436 
2437 		switch (name) {
2438 		case IPV6_MULTICAST_IF:
2439 			if (!checkonly)
2440 				icmp->icmp_multicast_if_index = *i1;
2441 			break;
2442 		case IPV6_UNICAST_HOPS:
2443 			/* -1 means use default */
2444 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2445 				*outlenp = 0;
2446 				return (EINVAL);
2447 			}
2448 			if (!checkonly) {
2449 				if (*i1 == -1) {
2450 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2451 					    is->is_ipv6_hoplimit;
2452 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2453 					/* Pass modified value to IP. */
2454 					*i1 = ipp->ipp_hoplimit;
2455 				} else {
2456 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2457 					    (uint8_t)*i1;
2458 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2459 				}
2460 				/* Rebuild the header template */
2461 				error = icmp_build_hdrs(icmp);
2462 				if (error != 0) {
2463 					*outlenp = 0;
2464 					return (error);
2465 				}
2466 			}
2467 			break;
2468 		case IPV6_MULTICAST_HOPS:
2469 			/* -1 means use default */
2470 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2471 				*outlenp = 0;
2472 				return (EINVAL);
2473 			}
2474 			if (!checkonly) {
2475 				if (*i1 == -1) {
2476 					icmp->icmp_multicast_ttl =
2477 					    ipp->ipp_multicast_hops =
2478 					    IP_DEFAULT_MULTICAST_TTL;
2479 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2480 					/* Pass modified value to IP. */
2481 					*i1 = icmp->icmp_multicast_ttl;
2482 				} else {
2483 					icmp->icmp_multicast_ttl =
2484 					    ipp->ipp_multicast_hops =
2485 					    (uint8_t)*i1;
2486 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2487 				}
2488 			}
2489 			break;
2490 		case IPV6_MULTICAST_LOOP:
2491 			if (*i1 != 0 && *i1 != 1) {
2492 				*outlenp = 0;
2493 				return (EINVAL);
2494 			}
2495 			if (!checkonly)
2496 				connp->conn_multicast_loop = *i1;
2497 			break;
2498 		case IPV6_CHECKSUM:
2499 			/*
2500 			 * Integer offset into the user data of where the
2501 			 * checksum is located.
2502 			 * Offset of -1 disables option.
2503 			 * Does not apply to IPPROTO_ICMPV6.
2504 			 */
2505 			if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
2506 				*outlenp = 0;
2507 				return (EINVAL);
2508 			}
2509 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
2510 				/* Negative or not 16 bit aligned offset */
2511 				*outlenp = 0;
2512 				return (EINVAL);
2513 			}
2514 			if (checkonly)
2515 				break;
2516 
2517 			if (*i1 == -1) {
2518 				icmp->icmp_raw_checksum = 0;
2519 				ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
2520 			} else {
2521 				icmp->icmp_raw_checksum = 1;
2522 				icmp->icmp_checksum_off = *i1;
2523 				ipp->ipp_fields |= IPPF_RAW_CKSUM;
2524 			}
2525 			/* Rebuild the header template */
2526 			error = icmp_build_hdrs(icmp);
2527 			if (error != 0) {
2528 				*outlenp = 0;
2529 				return (error);
2530 			}
2531 			break;
2532 		case IPV6_JOIN_GROUP:
2533 		case IPV6_LEAVE_GROUP:
2534 		case MCAST_JOIN_GROUP:
2535 		case MCAST_LEAVE_GROUP:
2536 		case MCAST_BLOCK_SOURCE:
2537 		case MCAST_UNBLOCK_SOURCE:
2538 		case MCAST_JOIN_SOURCE_GROUP:
2539 		case MCAST_LEAVE_SOURCE_GROUP:
2540 			/*
2541 			 * "soft" error (negative)
2542 			 * option not handled at this level
2543 			 * Note: Do not modify *outlenp
2544 			 */
2545 			return (-EINVAL);
2546 		case IPV6_BOUND_IF:
2547 			if (!checkonly)
2548 				icmp->icmp_bound_if = *i1;
2549 			break;
2550 		case IPV6_UNSPEC_SRC:
2551 			if (!checkonly)
2552 				icmp->icmp_unspec_source = onoff;
2553 			break;
2554 		case IPV6_RECVTCLASS:
2555 			if (!checkonly)
2556 				icmp->icmp_ipv6_recvtclass = onoff;
2557 			break;
2558 		/*
2559 		 * Set boolean switches for ancillary data delivery
2560 		 */
2561 		case IPV6_RECVPKTINFO:
2562 			if (!checkonly)
2563 				icmp->icmp_ip_recvpktinfo = onoff;
2564 			break;
2565 		case IPV6_RECVPATHMTU:
2566 			if (!checkonly)
2567 				icmp->icmp_ipv6_recvpathmtu = onoff;
2568 			break;
2569 		case IPV6_RECVHOPLIMIT:
2570 			if (!checkonly)
2571 				icmp->icmp_ipv6_recvhoplimit = onoff;
2572 			break;
2573 		case IPV6_RECVHOPOPTS:
2574 			if (!checkonly)
2575 				icmp->icmp_ipv6_recvhopopts = onoff;
2576 			break;
2577 		case IPV6_RECVDSTOPTS:
2578 			if (!checkonly)
2579 				icmp->icmp_ipv6_recvdstopts = onoff;
2580 			break;
2581 		case _OLD_IPV6_RECVDSTOPTS:
2582 			if (!checkonly)
2583 				icmp->icmp_old_ipv6_recvdstopts = onoff;
2584 			break;
2585 		case IPV6_RECVRTHDRDSTOPTS:
2586 			if (!checkonly)
2587 				icmp->icmp_ipv6_recvrtdstopts = onoff;
2588 			break;
2589 		case IPV6_RECVRTHDR:
2590 			if (!checkonly)
2591 				icmp->icmp_ipv6_recvrthdr = onoff;
2592 			break;
2593 		/*
2594 		 * Set sticky options or ancillary data.
2595 		 * If sticky options, (re)build any extension headers
2596 		 * that might be needed as a result.
2597 		 */
2598 		case IPV6_PKTINFO:
2599 			/*
2600 			 * The source address and ifindex are verified
2601 			 * in ip_opt_set(). For ancillary data the
2602 			 * source address is checked in ip_wput_v6.
2603 			 */
2604 			if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
2605 				return (EINVAL);
2606 			if (checkonly)
2607 				break;
2608 
2609 			if (inlen == 0) {
2610 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2611 				ipp->ipp_sticky_ignored |=
2612 				    (IPPF_IFINDEX|IPPF_ADDR);
2613 			} else {
2614 				struct in6_pktinfo *pkti;
2615 
2616 				pkti = (struct in6_pktinfo *)invalp;
2617 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2618 				ipp->ipp_addr = pkti->ipi6_addr;
2619 				if (ipp->ipp_ifindex != 0)
2620 					ipp->ipp_fields |= IPPF_IFINDEX;
2621 				else
2622 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2623 				if (!IN6_IS_ADDR_UNSPECIFIED(
2624 				    &ipp->ipp_addr))
2625 					ipp->ipp_fields |= IPPF_ADDR;
2626 				else
2627 					ipp->ipp_fields &= ~IPPF_ADDR;
2628 			}
2629 			if (sticky) {
2630 				error = icmp_build_hdrs(icmp);
2631 				if (error != 0)
2632 					return (error);
2633 			}
2634 			break;
2635 		case IPV6_HOPLIMIT:
2636 			/* This option can only be used as ancillary data. */
2637 			if (sticky)
2638 				return (EINVAL);
2639 			if (inlen != 0 && inlen != sizeof (int))
2640 				return (EINVAL);
2641 			if (checkonly)
2642 				break;
2643 
2644 			if (inlen == 0) {
2645 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2646 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2647 			} else {
2648 				if (*i1 > 255 || *i1 < -1)
2649 					return (EINVAL);
2650 				if (*i1 == -1)
2651 					ipp->ipp_hoplimit =
2652 					    is->is_ipv6_hoplimit;
2653 				else
2654 					ipp->ipp_hoplimit = *i1;
2655 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2656 			}
2657 			break;
2658 		case IPV6_TCLASS:
2659 			/*
2660 			 * IPV6_RECVTCLASS accepts -1 as use kernel default
2661 			 * and [0, 255] as the actualy traffic class.
2662 			 */
2663 			if (inlen != 0 && inlen != sizeof (int))
2664 				return (EINVAL);
2665 			if (checkonly)
2666 				break;
2667 
2668 			if (inlen == 0) {
2669 				ipp->ipp_fields &= ~IPPF_TCLASS;
2670 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2671 			} else {
2672 				if (*i1 >= 256 || *i1 < -1)
2673 					return (EINVAL);
2674 				if (*i1 == -1) {
2675 					ipp->ipp_tclass =
2676 					    IPV6_FLOW_TCLASS(
2677 					    IPV6_DEFAULT_VERS_AND_FLOW);
2678 				} else {
2679 					ipp->ipp_tclass = *i1;
2680 				}
2681 				ipp->ipp_fields |= IPPF_TCLASS;
2682 			}
2683 			if (sticky) {
2684 				error = icmp_build_hdrs(icmp);
2685 				if (error != 0)
2686 					return (error);
2687 			}
2688 			break;
2689 		case IPV6_NEXTHOP:
2690 			/*
2691 			 * IP will verify that the nexthop is reachable
2692 			 * and fail for sticky options.
2693 			 */
2694 			if (inlen != 0 && inlen != sizeof (sin6_t))
2695 				return (EINVAL);
2696 			if (checkonly)
2697 				break;
2698 
2699 			if (inlen == 0) {
2700 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2701 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2702 			} else {
2703 				sin6_t *sin6 = (sin6_t *)invalp;
2704 
2705 				if (sin6->sin6_family != AF_INET6)
2706 					return (EAFNOSUPPORT);
2707 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
2708 					return (EADDRNOTAVAIL);
2709 				ipp->ipp_nexthop = sin6->sin6_addr;
2710 				if (!IN6_IS_ADDR_UNSPECIFIED(
2711 				    &ipp->ipp_nexthop))
2712 					ipp->ipp_fields |= IPPF_NEXTHOP;
2713 				else
2714 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2715 			}
2716 			if (sticky) {
2717 				error = icmp_build_hdrs(icmp);
2718 				if (error != 0)
2719 					return (error);
2720 			}
2721 			break;
2722 		case IPV6_HOPOPTS: {
2723 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2724 			/*
2725 			 * Sanity checks - minimum size, size a multiple of
2726 			 * eight bytes, and matching size passed in.
2727 			 */
2728 			if (inlen != 0 &&
2729 			    inlen != (8 * (hopts->ip6h_len + 1)))
2730 				return (EINVAL);
2731 
2732 			if (checkonly)
2733 				break;
2734 			error = optcom_pkt_set(invalp, inlen, sticky,
2735 			    (uchar_t **)&ipp->ipp_hopopts,
2736 			    &ipp->ipp_hopoptslen,
2737 			    sticky ? icmp->icmp_label_len_v6 : 0);
2738 			if (error != 0)
2739 				return (error);
2740 			if (ipp->ipp_hopoptslen == 0) {
2741 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
2742 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
2743 			} else {
2744 				ipp->ipp_fields |= IPPF_HOPOPTS;
2745 			}
2746 			if (sticky) {
2747 				error = icmp_build_hdrs(icmp);
2748 				if (error != 0)
2749 					return (error);
2750 			}
2751 			break;
2752 		}
2753 		case IPV6_RTHDRDSTOPTS: {
2754 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2755 
2756 			/*
2757 			 * Sanity checks - minimum size, size a multiple of
2758 			 * eight bytes, and matching size passed in.
2759 			 */
2760 			if (inlen != 0 &&
2761 			    inlen != (8 * (dopts->ip6d_len + 1)))
2762 				return (EINVAL);
2763 
2764 			if (checkonly)
2765 				break;
2766 
2767 			if (inlen == 0) {
2768 				if (sticky &&
2769 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
2770 					kmem_free(ipp->ipp_rtdstopts,
2771 					    ipp->ipp_rtdstoptslen);
2772 					ipp->ipp_rtdstopts = NULL;
2773 					ipp->ipp_rtdstoptslen = 0;
2774 				}
2775 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
2776 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
2777 			} else {
2778 				error = optcom_pkt_set(invalp, inlen, sticky,
2779 				    (uchar_t **)&ipp->ipp_rtdstopts,
2780 				    &ipp->ipp_rtdstoptslen, 0);
2781 				if (error != 0)
2782 					return (error);
2783 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
2784 			}
2785 			if (sticky) {
2786 				error = icmp_build_hdrs(icmp);
2787 				if (error != 0)
2788 					return (error);
2789 			}
2790 			break;
2791 		}
2792 		case IPV6_DSTOPTS: {
2793 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2794 
2795 			/*
2796 			 * Sanity checks - minimum size, size a multiple of
2797 			 * eight bytes, and matching size passed in.
2798 			 */
2799 			if (inlen != 0 &&
2800 			    inlen != (8 * (dopts->ip6d_len + 1)))
2801 				return (EINVAL);
2802 
2803 			if (checkonly)
2804 				break;
2805 
2806 			if (inlen == 0) {
2807 				if (sticky &&
2808 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
2809 					kmem_free(ipp->ipp_dstopts,
2810 					    ipp->ipp_dstoptslen);
2811 					ipp->ipp_dstopts = NULL;
2812 					ipp->ipp_dstoptslen = 0;
2813 				}
2814 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2815 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
2816 			} else {
2817 				error = optcom_pkt_set(invalp, inlen, sticky,
2818 				    (uchar_t **)&ipp->ipp_dstopts,
2819 				    &ipp->ipp_dstoptslen, 0);
2820 				if (error != 0)
2821 					return (error);
2822 				ipp->ipp_fields |= IPPF_DSTOPTS;
2823 			}
2824 			if (sticky) {
2825 				error = icmp_build_hdrs(icmp);
2826 				if (error != 0)
2827 					return (error);
2828 			}
2829 			break;
2830 		}
2831 		case IPV6_RTHDR: {
2832 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
2833 
2834 			/*
2835 			 * Sanity checks - minimum size, size a multiple of
2836 			 * eight bytes, and matching size passed in.
2837 			 */
2838 			if (inlen != 0 &&
2839 			    inlen != (8 * (rt->ip6r_len + 1)))
2840 				return (EINVAL);
2841 
2842 			if (checkonly)
2843 				break;
2844 
2845 			if (inlen == 0) {
2846 				if (sticky &&
2847 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
2848 					kmem_free(ipp->ipp_rthdr,
2849 					    ipp->ipp_rthdrlen);
2850 					ipp->ipp_rthdr = NULL;
2851 					ipp->ipp_rthdrlen = 0;
2852 				}
2853 				ipp->ipp_fields &= ~IPPF_RTHDR;
2854 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
2855 			} else {
2856 				error = optcom_pkt_set(invalp, inlen, sticky,
2857 				    (uchar_t **)&ipp->ipp_rthdr,
2858 				    &ipp->ipp_rthdrlen, 0);
2859 				if (error != 0)
2860 					return (error);
2861 				ipp->ipp_fields |= IPPF_RTHDR;
2862 			}
2863 			if (sticky) {
2864 				error = icmp_build_hdrs(icmp);
2865 				if (error != 0)
2866 					return (error);
2867 			}
2868 			break;
2869 		}
2870 
2871 		case IPV6_DONTFRAG:
2872 			if (checkonly)
2873 				break;
2874 
2875 			if (onoff) {
2876 				ipp->ipp_fields |= IPPF_DONTFRAG;
2877 			} else {
2878 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
2879 			}
2880 			break;
2881 
2882 		case IPV6_USE_MIN_MTU:
2883 			if (inlen != sizeof (int))
2884 				return (EINVAL);
2885 
2886 			if (*i1 < -1 || *i1 > 1)
2887 				return (EINVAL);
2888 
2889 			if (checkonly)
2890 				break;
2891 
2892 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
2893 			ipp->ipp_use_min_mtu = *i1;
2894 			break;
2895 
2896 		/*
2897 		 * This option can't be set.  Its only returned via
2898 		 * getsockopt() or ancillary data.
2899 		 */
2900 		case IPV6_PATHMTU:
2901 			return (EINVAL);
2902 
2903 		case IPV6_BOUND_PIF:
2904 		case IPV6_SEC_OPT:
2905 		case IPV6_DONTFAILOVER_IF:
2906 		case IPV6_SRC_PREFERENCES:
2907 		case IPV6_V6ONLY:
2908 			/* Handled at IP level */
2909 			return (-EINVAL);
2910 		default:
2911 			*outlenp = 0;
2912 			return (EINVAL);
2913 		}
2914 		break;
2915 	}		/* end IPPROTO_IPV6 */
2916 
2917 	case IPPROTO_ICMPV6:
2918 		/*
2919 		 * Only allow IPv6 option processing on IPv6 sockets.
2920 		 */
2921 		if (icmp->icmp_family != AF_INET6) {
2922 			*outlenp = 0;
2923 			return (ENOPROTOOPT);
2924 		}
2925 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
2926 			*outlenp = 0;
2927 			return (ENOPROTOOPT);
2928 		}
2929 		switch (name) {
2930 		case ICMP6_FILTER:
2931 			if (!checkonly) {
2932 				if ((inlen != 0) &&
2933 				    (inlen != sizeof (icmp6_filter_t)))
2934 					return (EINVAL);
2935 
2936 				if (inlen == 0) {
2937 					if (icmp->icmp_filter != NULL) {
2938 						kmem_free(icmp->icmp_filter,
2939 						    sizeof (icmp6_filter_t));
2940 						icmp->icmp_filter = NULL;
2941 					}
2942 				} else {
2943 					if (icmp->icmp_filter == NULL) {
2944 						icmp->icmp_filter = kmem_alloc(
2945 						    sizeof (icmp6_filter_t),
2946 						    KM_NOSLEEP);
2947 						if (icmp->icmp_filter == NULL) {
2948 							*outlenp = 0;
2949 							return (ENOBUFS);
2950 						}
2951 					}
2952 					(void) bcopy(invalp, icmp->icmp_filter,
2953 					    inlen);
2954 				}
2955 			}
2956 			break;
2957 
2958 		default:
2959 			*outlenp = 0;
2960 			return (EINVAL);
2961 		}
2962 		break;
2963 	default:
2964 		*outlenp = 0;
2965 		return (EINVAL);
2966 	}
2967 	/*
2968 	 * Common case of OK return with outval same as inval.
2969 	 */
2970 	if (invalp != outvalp) {
2971 		/* don't trust bcopy for identical src/dst */
2972 		(void) bcopy(invalp, outvalp, inlen);
2973 	}
2974 	*outlenp = inlen;
2975 	return (0);
2976 }
2977 /* This routine sets socket options. */
2978 /* ARGSUSED */
2979 int
2980 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name,
2981     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
2982     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
2983 {
2984 	icmp_t	*icmp;
2985 	int	err;
2986 
2987 	icmp = Q_TO_ICMP(q);
2988 
2989 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2990 	err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp,
2991 	    outlenp, outvalp, thisdg_attrs, cr, mblk);
2992 	rw_exit(&icmp->icmp_rwlock);
2993 	return (err);
2994 }
2995 
2996 /*
2997  * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl,
2998  * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum.
2999  * The headers include ip6i_t (if needed), ip6_t, and any sticky extension
3000  * headers.
3001  * Returns failure if can't allocate memory.
3002  */
3003 static int
3004 icmp_build_hdrs(icmp_t *icmp)
3005 {
3006 	icmp_stack_t *is = icmp->icmp_is;
3007 	uchar_t	*hdrs;
3008 	uint_t	hdrs_len;
3009 	ip6_t	*ip6h;
3010 	ip6i_t	*ip6i;
3011 	ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
3012 
3013 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
3014 	hdrs_len = ip_total_hdrs_len_v6(ipp);
3015 	ASSERT(hdrs_len != 0);
3016 	if (hdrs_len != icmp->icmp_sticky_hdrs_len) {
3017 		/* Need to reallocate */
3018 		if (hdrs_len != 0) {
3019 			hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3020 			if (hdrs == NULL)
3021 				return (ENOMEM);
3022 		} else {
3023 			hdrs = NULL;
3024 		}
3025 		if (icmp->icmp_sticky_hdrs_len != 0) {
3026 			kmem_free(icmp->icmp_sticky_hdrs,
3027 			    icmp->icmp_sticky_hdrs_len);
3028 		}
3029 		icmp->icmp_sticky_hdrs = hdrs;
3030 		icmp->icmp_sticky_hdrs_len = hdrs_len;
3031 	}
3032 	ip_build_hdrs_v6(icmp->icmp_sticky_hdrs,
3033 	    icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto);
3034 
3035 	/* Set header fields not in ipp */
3036 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3037 		ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs;
3038 		ip6h = (ip6_t *)&ip6i[1];
3039 
3040 		if (ipp->ipp_fields & IPPF_RAW_CKSUM) {
3041 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
3042 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
3043 		}
3044 		if (ipp->ipp_fields & IPPF_NO_CKSUM) {
3045 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
3046 		}
3047 	} else {
3048 		ip6h = (ip6_t *)icmp->icmp_sticky_hdrs;
3049 	}
3050 
3051 	if (!(ipp->ipp_fields & IPPF_ADDR))
3052 		ip6h->ip6_src = icmp->icmp_v6src;
3053 
3054 	/* Try to get everything in a single mblk */
3055 	if (hdrs_len > icmp->icmp_max_hdr_len) {
3056 		icmp->icmp_max_hdr_len = hdrs_len;
3057 		rw_exit(&icmp->icmp_rwlock);
3058 		(void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq,
3059 		    icmp->icmp_max_hdr_len + is->is_wroff_extra);
3060 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3061 	}
3062 	return (0);
3063 }
3064 
3065 /*
3066  * This routine retrieves the value of an ND variable in a icmpparam_t
3067  * structure.  It is called through nd_getset when a user reads the
3068  * variable.
3069  */
3070 /* ARGSUSED */
3071 static int
3072 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3073 {
3074 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3075 
3076 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
3077 	return (0);
3078 }
3079 
3080 /*
3081  * Walk through the param array specified registering each element with the
3082  * named dispatch (ND) handler.
3083  */
3084 static boolean_t
3085 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
3086 {
3087 	for (; cnt-- > 0; icmppa++) {
3088 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
3089 			if (!nd_load(ndp, icmppa->icmp_param_name,
3090 			    icmp_param_get, icmp_param_set,
3091 			    (caddr_t)icmppa)) {
3092 				nd_free(ndp);
3093 				return (B_FALSE);
3094 			}
3095 		}
3096 	}
3097 	if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL,
3098 	    NULL)) {
3099 		nd_free(ndp);
3100 		return (B_FALSE);
3101 	}
3102 	return (B_TRUE);
3103 }
3104 
3105 /* This routine sets an ND variable in a icmpparam_t structure. */
3106 /* ARGSUSED */
3107 static int
3108 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3109 {
3110 	long		new_value;
3111 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3112 
3113 	/*
3114 	 * Fail the request if the new value does not lie within the
3115 	 * required bounds.
3116 	 */
3117 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3118 	    new_value < icmppa->icmp_param_min ||
3119 	    new_value > icmppa->icmp_param_max) {
3120 		return (EINVAL);
3121 	}
3122 	/* Set the new value */
3123 	icmppa->icmp_param_value = new_value;
3124 	return (0);
3125 }
3126 /*ARGSUSED2*/
3127 static void
3128 icmp_input(void *arg1, mblk_t *mp, void *arg2)
3129 {
3130 	conn_t *connp = (conn_t *)arg1;
3131 	struct T_unitdata_ind	*tudi;
3132 	uchar_t			*rptr;
3133 	icmp_t			*icmp;
3134 	icmp_stack_t		*is;
3135 	sin_t			*sin;
3136 	sin6_t			*sin6;
3137 	ip6_t			*ip6h;
3138 	ip6i_t			*ip6i;
3139 	mblk_t			*mp1;
3140 	int			hdr_len;
3141 	ipha_t			*ipha;
3142 	int			udi_size;	/* Size of T_unitdata_ind */
3143 	uint_t			ipvers;
3144 	ip6_pkt_t		ipp;
3145 	uint8_t			nexthdr;
3146 	ip_pktinfo_t		*pinfo = NULL;
3147 	mblk_t			*options_mp = NULL;
3148 	uint_t			icmp_opt = 0;
3149 	boolean_t		icmp_ipv6_recvhoplimit = B_FALSE;
3150 	uint_t			hopstrip;
3151 
3152 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
3153 
3154 	icmp = connp->conn_icmp;
3155 	is = icmp->icmp_is;
3156 	rptr = mp->b_rptr;
3157 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3158 	ASSERT(OK_32PTR(rptr));
3159 
3160 	/*
3161 	 * IP should have prepended the options data in an M_CTL
3162 	 * Check M_CTL "type" to make sure are not here bcos of
3163 	 * a valid ICMP message
3164 	 */
3165 	if (DB_TYPE(mp) == M_CTL) {
3166 		/*
3167 		 * FIXME: does IP still do this?
3168 		 * IP sends up the IPSEC_IN message for handling IPSEC
3169 		 * policy at the TCP level. We don't need it here.
3170 		 */
3171 		if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) {
3172 			mp1 = mp->b_cont;
3173 			freeb(mp);
3174 			mp = mp1;
3175 			rptr = mp->b_rptr;
3176 		} else if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3177 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3178 		    IN_PKTINFO) {
3179 			/*
3180 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3181 			 * has been prepended to the packet by IP. We need to
3182 			 * extract the mblk and adjust the rptr
3183 			 */
3184 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3185 			options_mp = mp;
3186 			mp = mp->b_cont;
3187 			rptr = mp->b_rptr;
3188 		} else {
3189 			/*
3190 			 * ICMP messages.
3191 			 */
3192 			icmp_icmp_error(connp->conn_rq, mp);
3193 			return;
3194 		}
3195 	}
3196 
3197 	/*
3198 	 * Discard message if it is misaligned or smaller than the IP header.
3199 	 */
3200 	if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) {
3201 		freemsg(mp);
3202 		if (options_mp != NULL)
3203 			freeb(options_mp);
3204 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3205 		return;
3206 	}
3207 	ipvers = IPH_HDR_VERSION((ipha_t *)rptr);
3208 
3209 	/* Handle M_DATA messages containing IP packets messages */
3210 	if (ipvers == IPV4_VERSION) {
3211 		/*
3212 		 * Special case where IP attaches
3213 		 * the IRE needs to be handled so that we don't send up
3214 		 * IRE to the user land.
3215 		 */
3216 		ipha = (ipha_t *)rptr;
3217 		hdr_len = IPH_HDR_LENGTH(ipha);
3218 
3219 		if (ipha->ipha_protocol == IPPROTO_TCP) {
3220 			tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3221 
3222 			if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) ==
3223 			    TH_SYN) && mp->b_cont != NULL) {
3224 				mp1 = mp->b_cont;
3225 				if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3226 					freeb(mp1);
3227 					mp->b_cont = NULL;
3228 				}
3229 			}
3230 		}
3231 		if (is->is_bsd_compat) {
3232 			ushort_t len;
3233 			len = ntohs(ipha->ipha_length);
3234 
3235 			if (mp->b_datap->db_ref > 1) {
3236 				/*
3237 				 * Allocate a new IP header so that we can
3238 				 * modify ipha_length.
3239 				 */
3240 				mblk_t	*mp1;
3241 
3242 				mp1 = allocb(hdr_len, BPRI_MED);
3243 				if (!mp1) {
3244 					freemsg(mp);
3245 					if (options_mp != NULL)
3246 						freeb(options_mp);
3247 					BUMP_MIB(&is->is_rawip_mib,
3248 					    rawipInErrors);
3249 					return;
3250 				}
3251 				bcopy(rptr, mp1->b_rptr, hdr_len);
3252 				mp->b_rptr = rptr + hdr_len;
3253 				rptr = mp1->b_rptr;
3254 				ipha = (ipha_t *)rptr;
3255 				mp1->b_cont = mp;
3256 				mp1->b_wptr = rptr + hdr_len;
3257 				mp = mp1;
3258 			}
3259 			len -= hdr_len;
3260 			ipha->ipha_length = htons(len);
3261 		}
3262 	}
3263 
3264 	/*
3265 	 * This is the inbound data path.  Packets are passed upstream as
3266 	 * T_UNITDATA_IND messages with full IP headers still attached.
3267 	 */
3268 	if (icmp->icmp_family == AF_INET) {
3269 		ASSERT(ipvers == IPV4_VERSION);
3270 		udi_size =  sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3271 		if (icmp->icmp_recvif && (pinfo != NULL) &&
3272 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3273 			udi_size += sizeof (struct T_opthdr) +
3274 			    sizeof (uint_t);
3275 		}
3276 
3277 		if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3278 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3279 			udi_size += sizeof (struct T_opthdr) +
3280 			    sizeof (struct in_pktinfo);
3281 		}
3282 
3283 		/*
3284 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3285 		 * buffer. Since gethrestime() expects a pointer aligned
3286 		 * argument, we allocate space necessary for extra
3287 		 * alignment (even though it might not be used).
3288 		 */
3289 		if (icmp->icmp_timestamp) {
3290 			udi_size += sizeof (struct T_opthdr) +
3291 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3292 		}
3293 		mp1 = allocb(udi_size, BPRI_MED);
3294 		if (mp1 == NULL) {
3295 			freemsg(mp);
3296 			if (options_mp != NULL)
3297 				freeb(options_mp);
3298 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3299 			return;
3300 		}
3301 		mp1->b_cont = mp;
3302 		mp = mp1;
3303 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3304 		mp->b_datap->db_type = M_PROTO;
3305 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3306 		tudi->PRIM_type = T_UNITDATA_IND;
3307 		tudi->SRC_length = sizeof (sin_t);
3308 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3309 		sin = (sin_t *)&tudi[1];
3310 		*sin = sin_null;
3311 		sin->sin_family = AF_INET;
3312 		sin->sin_addr.s_addr = ipha->ipha_src;
3313 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
3314 		    sizeof (sin_t);
3315 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3316 		tudi->OPT_length = udi_size;
3317 
3318 		/*
3319 		 * Add options if IP_RECVIF is set
3320 		 */
3321 		if (udi_size != 0) {
3322 			char *dstopt;
3323 
3324 			dstopt = (char *)&sin[1];
3325 			if (icmp->icmp_recvif && (pinfo != NULL) &&
3326 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3327 
3328 				struct T_opthdr *toh;
3329 				uint_t		*dstptr;
3330 
3331 				toh = (struct T_opthdr *)dstopt;
3332 				toh->level = IPPROTO_IP;
3333 				toh->name = IP_RECVIF;
3334 				toh->len = sizeof (struct T_opthdr) +
3335 				    sizeof (uint_t);
3336 				toh->status = 0;
3337 				dstopt += sizeof (struct T_opthdr);
3338 				dstptr = (uint_t *)dstopt;
3339 				*dstptr = pinfo->ip_pkt_ifindex;
3340 				dstopt += sizeof (uint_t);
3341 				udi_size -= toh->len;
3342 			}
3343 			if (icmp->icmp_timestamp) {
3344 				struct	T_opthdr *toh;
3345 
3346 				toh = (struct T_opthdr *)dstopt;
3347 				toh->level = SOL_SOCKET;
3348 				toh->name = SCM_TIMESTAMP;
3349 				toh->len = sizeof (struct T_opthdr) +
3350 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3351 				toh->status = 0;
3352 				dstopt += sizeof (struct T_opthdr);
3353 				/* Align for gethrestime() */
3354 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
3355 				    sizeof (intptr_t));
3356 				gethrestime((timestruc_t *)dstopt);
3357 				dstopt = (char *)toh + toh->len;
3358 				udi_size -= toh->len;
3359 			}
3360 			if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3361 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3362 				struct	T_opthdr *toh;
3363 				struct	in_pktinfo *pktinfop;
3364 
3365 				toh = (struct T_opthdr *)dstopt;
3366 				toh->level = IPPROTO_IP;
3367 				toh->name = IP_PKTINFO;
3368 				toh->len = sizeof (struct T_opthdr) +
3369 				    sizeof (in_pktinfo_t);
3370 				toh->status = 0;
3371 				dstopt += sizeof (struct T_opthdr);
3372 				pktinfop = (struct in_pktinfo *)dstopt;
3373 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
3374 				pktinfop->ipi_spec_dst =
3375 				    pinfo->ip_pkt_match_addr;
3376 
3377 				pktinfop->ipi_addr.s_addr = ipha->ipha_dst;
3378 
3379 				dstopt += sizeof (struct in_pktinfo);
3380 				udi_size -= toh->len;
3381 			}
3382 
3383 			/* Consumed all of allocated space */
3384 			ASSERT(udi_size == 0);
3385 		}
3386 
3387 		if (options_mp != NULL)
3388 			freeb(options_mp);
3389 
3390 		BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3391 		putnext(connp->conn_rq, mp);
3392 		return;
3393 	}
3394 
3395 	/*
3396 	 * We don't need options_mp in the IPv6 path.
3397 	 */
3398 	if (options_mp != NULL) {
3399 		freeb(options_mp);
3400 		options_mp = NULL;
3401 	}
3402 
3403 	/*
3404 	 * Discard message if it is smaller than the IPv6 header
3405 	 * or if the header is malformed.
3406 	 */
3407 	if ((mp->b_wptr - rptr) < sizeof (ip6_t) ||
3408 	    IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION ||
3409 	    icmp->icmp_family != AF_INET6) {
3410 		freemsg(mp);
3411 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3412 		return;
3413 	}
3414 
3415 	/* Initialize */
3416 	ipp.ipp_fields = 0;
3417 	hopstrip = 0;
3418 
3419 	ip6h = (ip6_t *)rptr;
3420 	/*
3421 	 * Call on ip_find_hdr_v6 which gets the total hdr len
3422 	 * as well as individual lenghts of ext hdrs (and ptrs to
3423 	 * them).
3424 	 */
3425 	if (ip6h->ip6_nxt != icmp->icmp_proto) {
3426 		/* Look for ifindex information */
3427 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
3428 			ip6i = (ip6i_t *)ip6h;
3429 			if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3430 				ASSERT(ip6i->ip6i_ifindex != 0);
3431 				ipp.ipp_fields |= IPPF_IFINDEX;
3432 				ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3433 			}
3434 			rptr = (uchar_t *)&ip6i[1];
3435 			mp->b_rptr = rptr;
3436 			if (rptr == mp->b_wptr) {
3437 				mp1 = mp->b_cont;
3438 				freeb(mp);
3439 				mp = mp1;
3440 				rptr = mp->b_rptr;
3441 			}
3442 			ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN);
3443 			ip6h = (ip6_t *)rptr;
3444 		}
3445 		hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr);
3446 
3447 		/*
3448 		 * We need to lie a bit to the user because users inside
3449 		 * labeled compartments should not see their own labels.  We
3450 		 * assume that in all other respects IP has checked the label,
3451 		 * and that the label is always first among the options.  (If
3452 		 * it's not first, then this code won't see it, and the option
3453 		 * will be passed along to the user.)
3454 		 *
3455 		 * If we had multilevel ICMP sockets, then the following code
3456 		 * should be skipped for them to allow the user to see the
3457 		 * label.
3458 		 *
3459 		 * Alignment restrictions in the definition of IP options
3460 		 * (namely, the requirement that the 4-octet DOI goes on a
3461 		 * 4-octet boundary) mean that we know exactly where the option
3462 		 * should start, but we're lenient for other hosts.
3463 		 *
3464 		 * Note that there are no multilevel ICMP or raw IP sockets
3465 		 * yet, thus nobody ever sees the IP6OPT_LS option.
3466 		 */
3467 		if ((ipp.ipp_fields & IPPF_HOPOPTS) &&
3468 		    ipp.ipp_hopoptslen > 5 && is_system_labeled()) {
3469 			const uchar_t *ucp =
3470 			    (const uchar_t *)ipp.ipp_hopopts + 2;
3471 			int remlen = ipp.ipp_hopoptslen - 2;
3472 
3473 			while (remlen > 0) {
3474 				if (*ucp == IP6OPT_PAD1) {
3475 					remlen--;
3476 					ucp++;
3477 				} else if (*ucp == IP6OPT_PADN) {
3478 					remlen -= ucp[1] + 2;
3479 					ucp += ucp[1] + 2;
3480 				} else if (*ucp == ip6opt_ls) {
3481 					hopstrip = (ucp -
3482 					    (const uchar_t *)ipp.ipp_hopopts) +
3483 					    ucp[1] + 2;
3484 					hopstrip = (hopstrip + 7) & ~7;
3485 					break;
3486 				} else {
3487 					/* label option must be first */
3488 					break;
3489 				}
3490 			}
3491 		}
3492 	} else {
3493 		hdr_len = IPV6_HDR_LEN;
3494 		ip6i = NULL;
3495 		nexthdr = ip6h->ip6_nxt;
3496 	}
3497 	/*
3498 	 * One special case where IP attaches the IRE needs to
3499 	 * be handled so that we don't send up IRE to the user land.
3500 	 */
3501 	if (nexthdr == IPPROTO_TCP) {
3502 		tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3503 
3504 		if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) &&
3505 		    mp->b_cont != NULL) {
3506 			mp1 = mp->b_cont;
3507 			if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3508 				freeb(mp1);
3509 				mp->b_cont = NULL;
3510 			}
3511 		}
3512 	}
3513 	/*
3514 	 * Check a filter for ICMPv6 types if needed.
3515 	 * Verify raw checksums if needed.
3516 	 */
3517 	if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) {
3518 		if (icmp->icmp_filter != NULL) {
3519 			int type;
3520 
3521 			/* Assumes that IP has done the pullupmsg */
3522 			type = mp->b_rptr[hdr_len];
3523 
3524 			ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr);
3525 			if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
3526 				freemsg(mp);
3527 				return;
3528 			}
3529 		} else {
3530 			/* Checksum */
3531 			uint16_t	*up;
3532 			uint32_t	sum;
3533 			int		remlen;
3534 
3535 			up = (uint16_t *)&ip6h->ip6_src;
3536 
3537 			remlen = msgdsize(mp) - hdr_len;
3538 			sum = htons(icmp->icmp_proto + remlen)
3539 			    + up[0] + up[1] + up[2] + up[3]
3540 			    + up[4] + up[5] + up[6] + up[7]
3541 			    + up[8] + up[9] + up[10] + up[11]
3542 			    + up[12] + up[13] + up[14] + up[15];
3543 			sum = (sum & 0xffff) + (sum >> 16);
3544 			sum = IP_CSUM(mp, hdr_len, sum);
3545 			if (sum != 0) {
3546 				/* IPv6 RAW checksum failed */
3547 				ip0dbg(("icmp_rput: RAW checksum "
3548 				    "failed %x\n", sum));
3549 				freemsg(mp);
3550 				BUMP_MIB(&is->is_rawip_mib,
3551 				    rawipInCksumErrs);
3552 				return;
3553 			}
3554 		}
3555 	}
3556 	/* Skip all the IPv6 headers per API */
3557 	mp->b_rptr += hdr_len;
3558 
3559 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3560 
3561 	/*
3562 	 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to
3563 	 * maintain state information, instead of relying on icmp_t
3564 	 * structure, since there arent any locks protecting these members
3565 	 * and there is a window where there might be a race between a
3566 	 * thread setting options on the write side and a thread reading
3567 	 * these options on the read size.
3568 	 */
3569 	if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
3570 	    IPPF_RTHDR|IPPF_IFINDEX)) {
3571 		if (icmp->icmp_ipv6_recvhopopts &&
3572 		    (ipp.ipp_fields & IPPF_HOPOPTS) &&
3573 		    ipp.ipp_hopoptslen > hopstrip) {
3574 			udi_size += sizeof (struct T_opthdr) +
3575 			    ipp.ipp_hopoptslen - hopstrip;
3576 			icmp_opt |= IPPF_HOPOPTS;
3577 		}
3578 		if ((icmp->icmp_ipv6_recvdstopts ||
3579 		    icmp->icmp_old_ipv6_recvdstopts) &&
3580 		    (ipp.ipp_fields & IPPF_DSTOPTS)) {
3581 			udi_size += sizeof (struct T_opthdr) +
3582 			    ipp.ipp_dstoptslen;
3583 			icmp_opt |= IPPF_DSTOPTS;
3584 		}
3585 		if (((icmp->icmp_ipv6_recvdstopts &&
3586 		    icmp->icmp_ipv6_recvrthdr &&
3587 		    (ipp.ipp_fields & IPPF_RTHDR)) ||
3588 		    icmp->icmp_ipv6_recvrtdstopts) &&
3589 		    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
3590 			udi_size += sizeof (struct T_opthdr) +
3591 			    ipp.ipp_rtdstoptslen;
3592 			icmp_opt |= IPPF_RTDSTOPTS;
3593 		}
3594 		if (icmp->icmp_ipv6_recvrthdr &&
3595 		    (ipp.ipp_fields & IPPF_RTHDR)) {
3596 			udi_size += sizeof (struct T_opthdr) +
3597 			    ipp.ipp_rthdrlen;
3598 			icmp_opt |= IPPF_RTHDR;
3599 		}
3600 		if (icmp->icmp_ip_recvpktinfo &&
3601 		    (ipp.ipp_fields & IPPF_IFINDEX)) {
3602 			udi_size += sizeof (struct T_opthdr) +
3603 			    sizeof (struct in6_pktinfo);
3604 			icmp_opt |= IPPF_IFINDEX;
3605 		}
3606 	}
3607 	if (icmp->icmp_ipv6_recvhoplimit) {
3608 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3609 		icmp_ipv6_recvhoplimit = B_TRUE;
3610 	}
3611 
3612 	if (icmp->icmp_ipv6_recvtclass)
3613 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3614 
3615 	/*
3616 	 * If SO_TIMESTAMP is set allocate the appropriate sized
3617 	 * buffer. Since gethrestime() expects a pointer aligned
3618 	 * argument, we allocate space necessary for extra
3619 	 * alignment (even though it might not be used).
3620 	 */
3621 	if (icmp->icmp_timestamp) {
3622 		udi_size += sizeof (struct T_opthdr) +
3623 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3624 	}
3625 
3626 	mp1 = allocb(udi_size, BPRI_MED);
3627 	if (mp1 == NULL) {
3628 		freemsg(mp);
3629 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3630 		return;
3631 	}
3632 	mp1->b_cont = mp;
3633 	mp = mp1;
3634 	mp->b_datap->db_type = M_PROTO;
3635 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
3636 	mp->b_wptr = (uchar_t *)tudi + udi_size;
3637 	tudi->PRIM_type = T_UNITDATA_IND;
3638 	tudi->SRC_length = sizeof (sin6_t);
3639 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3640 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3641 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
3642 	tudi->OPT_length = udi_size;
3643 	sin6 = (sin6_t *)&tudi[1];
3644 	sin6->sin6_port = 0;
3645 	sin6->sin6_family = AF_INET6;
3646 
3647 	sin6->sin6_addr = ip6h->ip6_src;
3648 	/* No sin6_flowinfo per API */
3649 	sin6->sin6_flowinfo = 0;
3650 	/* For link-scope source pass up scope id */
3651 	if ((ipp.ipp_fields & IPPF_IFINDEX) &&
3652 	    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
3653 		sin6->sin6_scope_id = ipp.ipp_ifindex;
3654 	else
3655 		sin6->sin6_scope_id = 0;
3656 
3657 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
3658 	    icmp->icmp_zoneid, is->is_netstack);
3659 
3660 	if (udi_size != 0) {
3661 		uchar_t *dstopt;
3662 
3663 		dstopt = (uchar_t *)&sin6[1];
3664 		if (icmp_opt & IPPF_IFINDEX) {
3665 			struct T_opthdr *toh;
3666 			struct in6_pktinfo *pkti;
3667 
3668 			toh = (struct T_opthdr *)dstopt;
3669 			toh->level = IPPROTO_IPV6;
3670 			toh->name = IPV6_PKTINFO;
3671 			toh->len = sizeof (struct T_opthdr) +
3672 			    sizeof (*pkti);
3673 			toh->status = 0;
3674 			dstopt += sizeof (struct T_opthdr);
3675 			pkti = (struct in6_pktinfo *)dstopt;
3676 			pkti->ipi6_addr = ip6h->ip6_dst;
3677 			pkti->ipi6_ifindex = ipp.ipp_ifindex;
3678 			dstopt += sizeof (*pkti);
3679 			udi_size -= toh->len;
3680 		}
3681 		if (icmp_ipv6_recvhoplimit) {
3682 			struct T_opthdr *toh;
3683 
3684 			toh = (struct T_opthdr *)dstopt;
3685 			toh->level = IPPROTO_IPV6;
3686 			toh->name = IPV6_HOPLIMIT;
3687 			toh->len = sizeof (struct T_opthdr) +
3688 			    sizeof (uint_t);
3689 			toh->status = 0;
3690 			dstopt += sizeof (struct T_opthdr);
3691 			*(uint_t *)dstopt = ip6h->ip6_hops;
3692 			dstopt += sizeof (uint_t);
3693 			udi_size -= toh->len;
3694 		}
3695 		if (icmp->icmp_ipv6_recvtclass) {
3696 			struct T_opthdr *toh;
3697 
3698 			toh = (struct T_opthdr *)dstopt;
3699 			toh->level = IPPROTO_IPV6;
3700 			toh->name = IPV6_TCLASS;
3701 			toh->len = sizeof (struct T_opthdr) +
3702 			    sizeof (uint_t);
3703 			toh->status = 0;
3704 			dstopt += sizeof (struct T_opthdr);
3705 			*(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
3706 			dstopt += sizeof (uint_t);
3707 			udi_size -= toh->len;
3708 		}
3709 		if (icmp->icmp_timestamp) {
3710 			struct	T_opthdr *toh;
3711 
3712 			toh = (struct T_opthdr *)dstopt;
3713 			toh->level = SOL_SOCKET;
3714 			toh->name = SCM_TIMESTAMP;
3715 			toh->len = sizeof (struct T_opthdr) +
3716 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3717 			toh->status = 0;
3718 			dstopt += sizeof (struct T_opthdr);
3719 			/* Align for gethrestime() */
3720 			dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
3721 			    sizeof (intptr_t));
3722 			gethrestime((timestruc_t *)dstopt);
3723 			dstopt = (uchar_t *)toh + toh->len;
3724 			udi_size -= toh->len;
3725 		}
3726 		if (icmp_opt & IPPF_HOPOPTS) {
3727 			struct T_opthdr *toh;
3728 
3729 			toh = (struct T_opthdr *)dstopt;
3730 			toh->level = IPPROTO_IPV6;
3731 			toh->name = IPV6_HOPOPTS;
3732 			toh->len = sizeof (struct T_opthdr) +
3733 			    ipp.ipp_hopoptslen - hopstrip;
3734 			toh->status = 0;
3735 			dstopt += sizeof (struct T_opthdr);
3736 			bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt,
3737 			    ipp.ipp_hopoptslen - hopstrip);
3738 			if (hopstrip > 0) {
3739 				/* copy next header value and fake length */
3740 				dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0];
3741 				dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] -
3742 				    hopstrip / 8;
3743 			}
3744 			dstopt += ipp.ipp_hopoptslen - hopstrip;
3745 			udi_size -= toh->len;
3746 		}
3747 		if (icmp_opt & IPPF_RTDSTOPTS) {
3748 			struct T_opthdr *toh;
3749 
3750 			toh = (struct T_opthdr *)dstopt;
3751 			toh->level = IPPROTO_IPV6;
3752 			toh->name = IPV6_DSTOPTS;
3753 			toh->len = sizeof (struct T_opthdr) +
3754 			    ipp.ipp_rtdstoptslen;
3755 			toh->status = 0;
3756 			dstopt += sizeof (struct T_opthdr);
3757 			bcopy(ipp.ipp_rtdstopts, dstopt,
3758 			    ipp.ipp_rtdstoptslen);
3759 			dstopt += ipp.ipp_rtdstoptslen;
3760 			udi_size -= toh->len;
3761 		}
3762 		if (icmp_opt & IPPF_RTHDR) {
3763 			struct T_opthdr *toh;
3764 
3765 			toh = (struct T_opthdr *)dstopt;
3766 			toh->level = IPPROTO_IPV6;
3767 			toh->name = IPV6_RTHDR;
3768 			toh->len = sizeof (struct T_opthdr) +
3769 			    ipp.ipp_rthdrlen;
3770 			toh->status = 0;
3771 			dstopt += sizeof (struct T_opthdr);
3772 			bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
3773 			dstopt += ipp.ipp_rthdrlen;
3774 			udi_size -= toh->len;
3775 		}
3776 		if (icmp_opt & IPPF_DSTOPTS) {
3777 			struct T_opthdr *toh;
3778 
3779 			toh = (struct T_opthdr *)dstopt;
3780 			toh->level = IPPROTO_IPV6;
3781 			toh->name = IPV6_DSTOPTS;
3782 			toh->len = sizeof (struct T_opthdr) +
3783 			    ipp.ipp_dstoptslen;
3784 			toh->status = 0;
3785 			dstopt += sizeof (struct T_opthdr);
3786 			bcopy(ipp.ipp_dstopts, dstopt,
3787 			    ipp.ipp_dstoptslen);
3788 			dstopt += ipp.ipp_dstoptslen;
3789 			udi_size -= toh->len;
3790 		}
3791 		/* Consumed all of allocated space */
3792 		ASSERT(udi_size == 0);
3793 	}
3794 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3795 	putnext(connp->conn_rq, mp);
3796 }
3797 
3798 /*
3799  * Handle the results of a T_BIND_REQ whether deferred by IP or handled
3800  * immediately.
3801  */
3802 static void
3803 icmp_bind_result(conn_t *connp, mblk_t *mp)
3804 {
3805 	struct T_error_ack	*tea;
3806 
3807 	switch (mp->b_datap->db_type) {
3808 	case M_PROTO:
3809 	case M_PCPROTO:
3810 		/* M_PROTO messages contain some type of TPI message. */
3811 		if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) {
3812 			freemsg(mp);
3813 			return;
3814 		}
3815 		tea = (struct T_error_ack *)mp->b_rptr;
3816 
3817 		switch (tea->PRIM_type) {
3818 		case T_ERROR_ACK:
3819 			switch (tea->ERROR_prim) {
3820 			case O_T_BIND_REQ:
3821 			case T_BIND_REQ:
3822 				icmp_bind_error(connp, mp);
3823 				return;
3824 			default:
3825 				break;
3826 			}
3827 			ASSERT(0);
3828 			freemsg(mp);
3829 			return;
3830 
3831 		case T_BIND_ACK:
3832 			icmp_bind_ack(connp, mp);
3833 			return;
3834 
3835 		default:
3836 			break;
3837 		}
3838 		freemsg(mp);
3839 		return;
3840 	default:
3841 		/* FIXME: other cases? */
3842 		ASSERT(0);
3843 		freemsg(mp);
3844 		return;
3845 	}
3846 }
3847 
3848 /*
3849  * Process a T_BIND_ACK
3850  */
3851 static void
3852 icmp_bind_ack(conn_t *connp, mblk_t *mp)
3853 {
3854 	icmp_t	*icmp = connp->conn_icmp;
3855 	mblk_t	*mp1;
3856 	ire_t	*ire;
3857 	struct T_bind_ack *tba;
3858 	uchar_t *addrp;
3859 	ipa_conn_t	*ac;
3860 	ipa6_conn_t	*ac6;
3861 
3862 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3863 	/*
3864 	 * We know if headers are included or not so we can
3865 	 * safely do this.
3866 	 */
3867 	if (icmp->icmp_state == TS_UNBND) {
3868 		/*
3869 		 * TPI has not yet bound - bind sent by
3870 		 * icmp_bind_proto.
3871 		 */
3872 		freemsg(mp);
3873 		rw_exit(&icmp->icmp_rwlock);
3874 		return;
3875 	}
3876 	ASSERT(icmp->icmp_pending_op != -1);
3877 
3878 	/*
3879 	 * If a broadcast/multicast address was bound set
3880 	 * the source address to 0.
3881 	 * This ensures no datagrams with broadcast address
3882 	 * as source address are emitted (which would violate
3883 	 * RFC1122 - Hosts requirements)
3884 	 *
3885 	 * Note that when connecting the returned IRE is
3886 	 * for the destination address and we only perform
3887 	 * the broadcast check for the source address (it
3888 	 * is OK to connect to a broadcast/multicast address.)
3889 	 */
3890 	mp1 = mp->b_cont;
3891 	if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) {
3892 		ire = (ire_t *)mp1->b_rptr;
3893 
3894 		/*
3895 		 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast
3896 		 * local address.
3897 		 */
3898 		if (ire->ire_type == IRE_BROADCAST &&
3899 		    icmp->icmp_state != TS_DATA_XFER) {
3900 			ASSERT(icmp->icmp_pending_op == T_BIND_REQ ||
3901 			    icmp->icmp_pending_op == O_T_BIND_REQ);
3902 			/* This was just a local bind to a MC/broadcast addr */
3903 			V6_SET_ZERO(icmp->icmp_v6src);
3904 			if (icmp->icmp_family == AF_INET6)
3905 				(void) icmp_build_hdrs(icmp);
3906 		} else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) {
3907 			/*
3908 			 * Local address not yet set - pick it from the
3909 			 * T_bind_ack
3910 			 */
3911 			tba = (struct T_bind_ack *)mp->b_rptr;
3912 			addrp = &mp->b_rptr[tba->ADDR_offset];
3913 			switch (icmp->icmp_family) {
3914 			case AF_INET:
3915 				if (tba->ADDR_length == sizeof (ipa_conn_t)) {
3916 					ac = (ipa_conn_t *)addrp;
3917 				} else {
3918 					ASSERT(tba->ADDR_length ==
3919 					    sizeof (ipa_conn_x_t));
3920 					ac = &((ipa_conn_x_t *)addrp)->acx_conn;
3921 				}
3922 				IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr,
3923 				    &icmp->icmp_v6src);
3924 				break;
3925 			case AF_INET6:
3926 				if (tba->ADDR_length == sizeof (ipa6_conn_t)) {
3927 					ac6 = (ipa6_conn_t *)addrp;
3928 				} else {
3929 					ASSERT(tba->ADDR_length ==
3930 					    sizeof (ipa6_conn_x_t));
3931 					ac6 = &((ipa6_conn_x_t *)
3932 					    addrp)->ac6x_conn;
3933 				}
3934 				icmp->icmp_v6src = ac6->ac6_laddr;
3935 				(void) icmp_build_hdrs(icmp);
3936 			}
3937 		}
3938 		mp1 = mp1->b_cont;
3939 	}
3940 	icmp->icmp_pending_op = -1;
3941 	rw_exit(&icmp->icmp_rwlock);
3942 	/*
3943 	 * Look for one or more appended ACK message added by
3944 	 * icmp_connect or icmp_disconnect.
3945 	 * If none found just send up the T_BIND_ACK.
3946 	 * icmp_connect has appended a T_OK_ACK and a
3947 	 * T_CONN_CON.
3948 	 * icmp_disconnect has appended a T_OK_ACK.
3949 	 */
3950 	if (mp1 != NULL) {
3951 		if (mp->b_cont == mp1)
3952 			mp->b_cont = NULL;
3953 		else {
3954 			ASSERT(mp->b_cont->b_cont == mp1);
3955 			mp->b_cont->b_cont = NULL;
3956 		}
3957 		freemsg(mp);
3958 		mp = mp1;
3959 		while (mp != NULL) {
3960 			mp1 = mp->b_cont;
3961 			mp->b_cont = NULL;
3962 			putnext(connp->conn_rq, mp);
3963 			mp = mp1;
3964 		}
3965 		return;
3966 	}
3967 	freemsg(mp->b_cont);
3968 	mp->b_cont = NULL;
3969 	putnext(connp->conn_rq, mp);
3970 }
3971 
3972 static void
3973 icmp_bind_error(conn_t *connp, mblk_t *mp)
3974 {
3975 	icmp_t	*icmp = connp->conn_icmp;
3976 	struct T_error_ack *tea;
3977 
3978 	tea = (struct T_error_ack *)mp->b_rptr;
3979 	/*
3980 	 * If our O_T_BIND_REQ/T_BIND_REQ fails,
3981 	 * clear out the source address before
3982 	 * passing the message upstream.
3983 	 * If this was caused by a T_CONN_REQ
3984 	 * revert back to bound state.
3985 	 */
3986 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3987 	if (icmp->icmp_state == TS_UNBND) {
3988 		/*
3989 		 * TPI has not yet bound - bind sent by icmp_bind_proto.
3990 		 */
3991 		freemsg(mp);
3992 		rw_exit(&icmp->icmp_rwlock);
3993 		return;
3994 	}
3995 	ASSERT(icmp->icmp_pending_op != -1);
3996 	tea->ERROR_prim = icmp->icmp_pending_op;
3997 	icmp->icmp_pending_op = -1;
3998 
3999 	switch (tea->ERROR_prim) {
4000 	case T_CONN_REQ:
4001 		ASSERT(icmp->icmp_state == TS_DATA_XFER);
4002 		/* Connect failed */
4003 		/* Revert back to the bound source */
4004 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
4005 		icmp->icmp_state = TS_IDLE;
4006 		if (icmp->icmp_family == AF_INET6)
4007 			(void) icmp_build_hdrs(icmp);
4008 		break;
4009 
4010 	case T_DISCON_REQ:
4011 	case T_BIND_REQ:
4012 	case O_T_BIND_REQ:
4013 		V6_SET_ZERO(icmp->icmp_v6src);
4014 		V6_SET_ZERO(icmp->icmp_bound_v6src);
4015 		icmp->icmp_state = TS_UNBND;
4016 		if (icmp->icmp_family == AF_INET6)
4017 			(void) icmp_build_hdrs(icmp);
4018 		break;
4019 	default:
4020 		break;
4021 	}
4022 	rw_exit(&icmp->icmp_rwlock);
4023 	putnext(connp->conn_rq, mp);
4024 }
4025 
4026 /*
4027  * return SNMP stuff in buffer in mpdata
4028  */
4029 mblk_t *
4030 icmp_snmp_get(queue_t *q, mblk_t *mpctl)
4031 {
4032 	mblk_t			*mpdata;
4033 	struct opthdr		*optp;
4034 	conn_t			*connp = Q_TO_CONN(q);
4035 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
4036 	mblk_t			*mp2ctl;
4037 
4038 	/*
4039 	 * make a copy of the original message
4040 	 */
4041 	mp2ctl = copymsg(mpctl);
4042 
4043 	if (mpctl == NULL ||
4044 	    (mpdata = mpctl->b_cont) == NULL) {
4045 		freemsg(mpctl);
4046 		freemsg(mp2ctl);
4047 		return (0);
4048 	}
4049 
4050 	/* fixed length structure for IPv4 and IPv6 counters */
4051 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4052 	optp->level = EXPER_RAWIP;
4053 	optp->name = 0;
4054 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
4055 	    sizeof (is->is_rawip_mib));
4056 	optp->len = msgdsize(mpdata);
4057 	qreply(q, mpctl);
4058 
4059 	return (mp2ctl);
4060 }
4061 
4062 /*
4063  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
4064  * TODO:  If this ever actually tries to set anything, it needs to be
4065  * to do the appropriate locking.
4066  */
4067 /* ARGSUSED */
4068 int
4069 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4070     uchar_t *ptr, int len)
4071 {
4072 	switch (level) {
4073 	case EXPER_RAWIP:
4074 		return (0);
4075 	default:
4076 		return (1);
4077 	}
4078 }
4079 
4080 /* Report for ndd "icmp_status" */
4081 /* ARGSUSED */
4082 static int
4083 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4084 {
4085 	conn_t  *connp;
4086 	ip_stack_t *ipst;
4087 	char	laddrbuf[INET6_ADDRSTRLEN];
4088 	char	faddrbuf[INET6_ADDRSTRLEN];
4089 	int	i;
4090 
4091 	(void) mi_mpprintf(mp,
4092 	    "RAWIP    " MI_COL_HDRPAD_STR
4093 	/*   01234567[89ABCDEF] */
4094 	    "  src addr        dest addr       state");
4095 	/*   xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */
4096 
4097 	connp = Q_TO_CONN(q);
4098 	ipst = connp->conn_netstack->netstack_ip;
4099 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4100 		connf_t *connfp;
4101 		char	*state;
4102 
4103 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4104 		connp = NULL;
4105 
4106 		while ((connp = ipcl_get_next_conn(connfp, connp,
4107 		    IPCL_RAWIPCONN)) != NULL) {
4108 			icmp_t  *icmp;
4109 
4110 			mutex_enter(&(connp)->conn_lock);
4111 			icmp = connp->conn_icmp;
4112 
4113 			if (icmp->icmp_state == TS_UNBND)
4114 				state = "UNBOUND";
4115 			else if (icmp->icmp_state == TS_IDLE)
4116 				state = "IDLE";
4117 			else if (icmp->icmp_state == TS_DATA_XFER)
4118 				state = "CONNECTED";
4119 			else
4120 				state = "UnkState";
4121 
4122 			(void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s",
4123 			    (void *)icmp,
4124 			    inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf,
4125 			    sizeof (faddrbuf)),
4126 			    inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf,
4127 			    sizeof (laddrbuf)),
4128 			    state);
4129 			mutex_exit(&(connp)->conn_lock);
4130 		}
4131 	}
4132 	return (0);
4133 }
4134 
4135 /*
4136  * This routine creates a T_UDERROR_IND message and passes it upstream.
4137  * The address and options are copied from the T_UNITDATA_REQ message
4138  * passed in mp.  This message is freed.
4139  */
4140 static void
4141 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
4142 {
4143 	mblk_t	*mp1;
4144 	uchar_t	*rptr = mp->b_rptr;
4145 	struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr;
4146 
4147 	mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset],
4148 	    tudr->DEST_length, (char *)&rptr[tudr->OPT_offset],
4149 	    tudr->OPT_length, err);
4150 	if (mp1)
4151 		qreply(q, mp1);
4152 	freemsg(mp);
4153 }
4154 
4155 /*
4156  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
4157  * After some error checking, the message is passed downstream to ip.
4158  */
4159 static void
4160 icmp_unbind(queue_t *q, mblk_t *mp)
4161 {
4162 	icmp_t	*icmp = Q_TO_ICMP(q);
4163 
4164 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4165 	/* If a bind has not been done, we can't unbind. */
4166 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
4167 		rw_exit(&icmp->icmp_rwlock);
4168 		icmp_err_ack(q, mp, TOUTSTATE, 0);
4169 		return;
4170 	}
4171 	icmp->icmp_pending_op = T_UNBIND_REQ;
4172 	rw_exit(&icmp->icmp_rwlock);
4173 
4174 	/*
4175 	 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK
4176 	 * and therefore ip_unbind must never return NULL.
4177 	 */
4178 	mp = ip_unbind(q, mp);
4179 	ASSERT(mp != NULL);
4180 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4181 
4182 	/*
4183 	 * Once we're unbound from IP, the pending operation may be cleared
4184 	 * here.
4185 	 */
4186 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4187 	V6_SET_ZERO(icmp->icmp_v6src);
4188 	V6_SET_ZERO(icmp->icmp_bound_v6src);
4189 	icmp->icmp_pending_op = -1;
4190 	icmp->icmp_state = TS_UNBND;
4191 	if (icmp->icmp_family == AF_INET6)
4192 		(void) icmp_build_hdrs(icmp);
4193 	rw_exit(&icmp->icmp_rwlock);
4194 
4195 	qreply(q, mp);
4196 }
4197 
4198 /*
4199  * Process IPv4 packets that already include an IP header.
4200  * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
4201  * IPPROTO_IGMP).
4202  */
4203 static void
4204 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop)
4205 {
4206 	icmp_stack_t *is = icmp->icmp_is;
4207 	ipha_t	*ipha;
4208 	int	ip_hdr_length;
4209 	int	tp_hdr_len;
4210 	mblk_t	*mp1;
4211 	uint_t	pkt_len;
4212 	ip_opt_info_t optinfo;
4213 	conn_t	*connp = icmp->icmp_connp;
4214 
4215 	optinfo.ip_opt_flags = 0;
4216 	optinfo.ip_opt_ill_index = 0;
4217 	ipha = (ipha_t *)mp->b_rptr;
4218 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4219 	if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
4220 		if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4221 			ASSERT(icmp != NULL);
4222 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4223 			freemsg(mp);
4224 			return;
4225 		}
4226 		ipha = (ipha_t *)mp->b_rptr;
4227 	}
4228 	ipha->ipha_version_and_hdr_length =
4229 	    (IP_VERSION<<4) | (ip_hdr_length>>2);
4230 
4231 	/*
4232 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4233 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4234 	 * tell IP that the application has sent a complete IP header and not
4235 	 * to compute the transport checksum nor change the DF flag.
4236 	 */
4237 	ipha->ipha_ident = IP_HDR_INCLUDED;
4238 	ipha->ipha_hdr_checksum = 0;
4239 	ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
4240 	/* Insert options if any */
4241 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4242 		/*
4243 		 * Put the IP header plus any transport header that is
4244 		 * checksumed by ip_wput into the first mblk. (ip_wput assumes
4245 		 * that at least the checksum field is in the first mblk.)
4246 		 */
4247 		switch (ipha->ipha_protocol) {
4248 		case IPPROTO_UDP:
4249 			tp_hdr_len = 8;
4250 			break;
4251 		case IPPROTO_TCP:
4252 			tp_hdr_len = 20;
4253 			break;
4254 		default:
4255 			tp_hdr_len = 0;
4256 			break;
4257 		}
4258 		/*
4259 		 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus
4260 		 * tp_hdr_len bytes will be in a single mblk.
4261 		 */
4262 		if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH +
4263 		    tp_hdr_len)) {
4264 			if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH +
4265 			    tp_hdr_len)) {
4266 				BUMP_MIB(&is->is_rawip_mib,
4267 				    rawipOutErrors);
4268 				freemsg(mp);
4269 				return;
4270 			}
4271 			ipha = (ipha_t *)mp->b_rptr;
4272 		}
4273 
4274 		/*
4275 		 * if the length is larger then the max allowed IP packet,
4276 		 * then send an error and abort the processing.
4277 		 */
4278 		pkt_len = ntohs(ipha->ipha_length)
4279 		    + icmp->icmp_ip_snd_options_len;
4280 		if (pkt_len > IP_MAXPACKET) {
4281 			icmp_ud_err(q, mp, EMSGSIZE);
4282 			return;
4283 		}
4284 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
4285 		    tp_hdr_len, BPRI_LO))) {
4286 			icmp_ud_err(q, mp, ENOMEM);
4287 			return;
4288 		}
4289 		mp1->b_rptr += is->is_wroff_extra;
4290 		mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
4291 
4292 		ipha->ipha_length = htons((uint16_t)pkt_len);
4293 		bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH);
4294 
4295 		/* Copy transport header if any */
4296 		bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len);
4297 		mp1->b_wptr += tp_hdr_len;
4298 
4299 		/* Add options */
4300 		ipha = (ipha_t *)mp1->b_rptr;
4301 		bcopy(icmp->icmp_ip_snd_options, &ipha[1],
4302 		    icmp->icmp_ip_snd_options_len);
4303 
4304 		/* Drop IP header and transport header from original */
4305 		(void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len);
4306 
4307 		mp1->b_cont = mp;
4308 		mp = mp1;
4309 		/*
4310 		 * Massage source route putting first source
4311 		 * route in ipha_dst.
4312 		 */
4313 		(void) ip_massage_options(ipha, is->is_netstack);
4314 	}
4315 
4316 	if (pktinfop != NULL) {
4317 		/*
4318 		 * Over write the source address provided in the header
4319 		 */
4320 		if (pktinfop->ip4_addr != INADDR_ANY) {
4321 			ipha->ipha_src = pktinfop->ip4_addr;
4322 			optinfo.ip_opt_flags = IP_VERIFY_SRC;
4323 		}
4324 
4325 		if (pktinfop->ip4_ill_index != 0) {
4326 			optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4327 		}
4328 	}
4329 
4330 	mblk_setcred(mp, connp->conn_cred);
4331 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
4332 }
4333 
4334 static boolean_t
4335 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
4336 {
4337 	int err;
4338 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4339 	icmp_stack_t		*is = icmp->icmp_is;
4340 	conn_t	*connp = icmp->icmp_connp;
4341 
4342 	err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst,
4343 	    opt_storage, connp->conn_mac_exempt,
4344 	    is->is_netstack->netstack_ip);
4345 	if (err == 0) {
4346 		err = tsol_update_options(&icmp->icmp_ip_snd_options,
4347 		    &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len,
4348 		    opt_storage);
4349 	}
4350 	if (err != 0) {
4351 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4352 		DTRACE_PROBE4(
4353 		    tx__ip__log__drop__updatelabel__icmp,
4354 		    char *, "queue(1) failed to update options(2) on mp(3)",
4355 		    queue_t *, q, char *, opt_storage, mblk_t *, mp);
4356 		icmp_ud_err(q, mp, err);
4357 		return (B_FALSE);
4358 	}
4359 	IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
4360 	return (B_TRUE);
4361 }
4362 
4363 /*
4364  * This routine handles all messages passed downstream.  It either
4365  * consumes the message or passes it downstream; it never queues a
4366  * a message.
4367  */
4368 static void
4369 icmp_wput(queue_t *q, mblk_t *mp)
4370 {
4371 	uchar_t	*rptr = mp->b_rptr;
4372 	ipha_t	*ipha;
4373 	mblk_t	*mp1;
4374 	int	ip_hdr_length;
4375 #define	tudr ((struct T_unitdata_req *)rptr)
4376 	size_t	ip_len;
4377 	conn_t	*connp = Q_TO_CONN(q);
4378 	icmp_t	*icmp = connp->conn_icmp;
4379 	icmp_stack_t *is = icmp->icmp_is;
4380 	sin6_t	*sin6;
4381 	sin_t	*sin;
4382 	ipaddr_t	v4dst;
4383 	ip4_pkt_t	pktinfo;
4384 	ip4_pkt_t	*pktinfop = &pktinfo;
4385 	ip_opt_info_t	optinfo;
4386 
4387 	switch (mp->b_datap->db_type) {
4388 	case M_DATA:
4389 		if (icmp->icmp_hdrincl) {
4390 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
4391 			ipha = (ipha_t *)mp->b_rptr;
4392 			if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
4393 				if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4394 					BUMP_MIB(&is->is_rawip_mib,
4395 					    rawipOutErrors);
4396 					freemsg(mp);
4397 					return;
4398 				}
4399 				ipha = (ipha_t *)mp->b_rptr;
4400 			}
4401 			/*
4402 			 * If this connection was used for v6 (inconceivable!)
4403 			 * or if we have a new destination, then it's time to
4404 			 * figure a new label.
4405 			 */
4406 			if (is_system_labeled() &&
4407 			    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4408 			    V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
4409 			    ipha->ipha_dst) &&
4410 			    !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) {
4411 				return;
4412 			}
4413 			icmp_wput_hdrincl(q, mp, icmp, NULL);
4414 			return;
4415 		}
4416 		freemsg(mp);
4417 		return;
4418 	case M_PROTO:
4419 	case M_PCPROTO:
4420 		ip_len = mp->b_wptr - rptr;
4421 		if (ip_len >= sizeof (struct T_unitdata_req)) {
4422 			/* Expedite valid T_UNITDATA_REQ to below the switch */
4423 			if (((union T_primitives *)rptr)->type
4424 			    == T_UNITDATA_REQ)
4425 				break;
4426 		}
4427 		/* FALLTHRU */
4428 	default:
4429 		icmp_wput_other(q, mp);
4430 		return;
4431 	}
4432 
4433 	/* Handle T_UNITDATA_REQ messages here. */
4434 
4435 
4436 
4437 	if (icmp->icmp_state == TS_UNBND) {
4438 		/* If a port has not been bound to the stream, fail. */
4439 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4440 		icmp_ud_err(q, mp, EPROTO);
4441 		return;
4442 	}
4443 	mp1 = mp->b_cont;
4444 	if (mp1 == NULL) {
4445 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4446 		icmp_ud_err(q, mp, EPROTO);
4447 		return;
4448 	}
4449 
4450 	if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) {
4451 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4452 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4453 		return;
4454 	}
4455 
4456 	switch (icmp->icmp_family) {
4457 	case AF_INET6:
4458 		sin6 = (sin6_t *)&rptr[tudr->DEST_offset];
4459 		if (!OK_32PTR((char *)sin6) ||
4460 		    tudr->DEST_length != sizeof (sin6_t) ||
4461 		    sin6->sin6_family != AF_INET6) {
4462 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4463 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4464 			return;
4465 		}
4466 
4467 		/* No support for mapped addresses on raw sockets */
4468 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4469 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4470 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4471 			return;
4472 		}
4473 
4474 		/*
4475 		 * Destination is a native IPv6 address.
4476 		 * Send out an IPv6 format packet.
4477 		 */
4478 		icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length);
4479 		return;
4480 
4481 	case AF_INET:
4482 		sin = (sin_t *)&rptr[tudr->DEST_offset];
4483 		if (!OK_32PTR((char *)sin) ||
4484 		    tudr->DEST_length != sizeof (sin_t) ||
4485 		    sin->sin_family != AF_INET) {
4486 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4487 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4488 			return;
4489 		}
4490 		/* Extract and ipaddr */
4491 		v4dst = sin->sin_addr.s_addr;
4492 		break;
4493 
4494 	default:
4495 		ASSERT(0);
4496 	}
4497 
4498 	pktinfop->ip4_ill_index = 0;
4499 	pktinfop->ip4_addr = INADDR_ANY;
4500 	optinfo.ip_opt_flags = 0;
4501 	optinfo.ip_opt_ill_index = 0;
4502 
4503 
4504 	/*
4505 	 * If options passed in, feed it for verification and handling
4506 	 */
4507 	if (tudr->OPT_length != 0) {
4508 		int error;
4509 
4510 		error = 0;
4511 		if (icmp_unitdata_opt_process(q, mp, &error,
4512 		    (void *)pktinfop) < 0) {
4513 			/* failure */
4514 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4515 			icmp_ud_err(q, mp, error);
4516 			return;
4517 		}
4518 		ASSERT(error == 0);
4519 		/*
4520 		 * Note: Success in processing options.
4521 		 * mp option buffer represented by
4522 		 * OPT_length/offset now potentially modified
4523 		 * and contain option setting results
4524 		 */
4525 
4526 	}
4527 
4528 	if (v4dst == INADDR_ANY)
4529 		v4dst = htonl(INADDR_LOOPBACK);
4530 
4531 	/* Check if our saved options are valid; update if not */
4532 	if (is_system_labeled() &&
4533 	    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4534 	    V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) &&
4535 	    !icmp_update_label(q, icmp, mp, v4dst)) {
4536 		return;
4537 	}
4538 
4539 	/* Protocol 255 contains full IP headers */
4540 	if (icmp->icmp_hdrincl) {
4541 		freeb(mp);
4542 		icmp_wput_hdrincl(q, mp1, icmp, pktinfop);
4543 		return;
4544 	}
4545 
4546 
4547 	/* Add an IP header */
4548 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4549 	ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length];
4550 	if ((uchar_t *)ipha < mp1->b_datap->db_base ||
4551 	    mp1->b_datap->db_ref != 1 ||
4552 	    !OK_32PTR(ipha)) {
4553 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
4554 		    BPRI_LO))) {
4555 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4556 			icmp_ud_err(q, mp, ENOMEM);
4557 			return;
4558 		}
4559 		mp1->b_cont = mp->b_cont;
4560 		ipha = (ipha_t *)mp1->b_datap->db_lim;
4561 		mp1->b_wptr = (uchar_t *)ipha;
4562 		ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
4563 	}
4564 #ifdef	_BIG_ENDIAN
4565 	/* Set version, header length, and tos */
4566 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4567 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
4568 	    icmp->icmp_type_of_service);
4569 	/* Set ttl and protocol */
4570 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto;
4571 #else
4572 	/* Set version, header length, and tos */
4573 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4574 	    ((icmp->icmp_type_of_service << 8) |
4575 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
4576 	/* Set ttl and protocol */
4577 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl;
4578 #endif
4579 	if (pktinfop->ip4_addr != INADDR_ANY) {
4580 		ipha->ipha_src = pktinfop->ip4_addr;
4581 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
4582 	} else {
4583 
4584 		/*
4585 		 * Copy our address into the packet.  If this is zero,
4586 		 * ip will fill in the real source address.
4587 		 */
4588 		IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src);
4589 	}
4590 
4591 	ipha->ipha_fragment_offset_and_flags = 0;
4592 
4593 	if (pktinfop->ip4_ill_index != 0) {
4594 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4595 	}
4596 
4597 
4598 	/*
4599 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4600 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4601 	 * tell IP that the application has sent a complete IP header and not
4602 	 * to compute the transport checksum nor change the DF flag.
4603 	 */
4604 	ipha->ipha_ident = IP_HDR_INCLUDED;
4605 
4606 	/* Finish common formatting of the packet. */
4607 	mp1->b_rptr = (uchar_t *)ipha;
4608 
4609 	ip_len = mp1->b_wptr - (uchar_t *)ipha;
4610 	if (mp1->b_cont != NULL)
4611 		ip_len += msgdsize(mp1->b_cont);
4612 
4613 	/*
4614 	 * Set the length into the IP header.
4615 	 * If the length is greater than the maximum allowed by IP,
4616 	 * then free the message and return. Do not try and send it
4617 	 * as this can cause problems in layers below.
4618 	 */
4619 	if (ip_len > IP_MAXPACKET) {
4620 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4621 		icmp_ud_err(q, mp, EMSGSIZE);
4622 		return;
4623 	}
4624 	ipha->ipha_length = htons((uint16_t)ip_len);
4625 	/*
4626 	 * Copy in the destination address from the T_UNITDATA
4627 	 * request
4628 	 */
4629 	ipha->ipha_dst = v4dst;
4630 
4631 	/*
4632 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
4633 	 */
4634 	if (CLASSD(v4dst))
4635 		ipha->ipha_ttl = icmp->icmp_multicast_ttl;
4636 
4637 	/* Copy in options if any */
4638 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4639 		bcopy(icmp->icmp_ip_snd_options,
4640 		    &ipha[1], icmp->icmp_ip_snd_options_len);
4641 		/*
4642 		 * Massage source route putting first source route in ipha_dst.
4643 		 * Ignore the destination in the T_unitdata_req.
4644 		 */
4645 		(void) ip_massage_options(ipha, is->is_netstack);
4646 	}
4647 
4648 	freeb(mp);
4649 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
4650 	mblk_setcred(mp1, connp->conn_cred);
4651 	ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo);
4652 #undef	ipha
4653 #undef tudr
4654 }
4655 
4656 static boolean_t
4657 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
4658 {
4659 	int err;
4660 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
4661 	icmp_stack_t		*is = icmp->icmp_is;
4662 	conn_t	*connp = icmp->icmp_connp;
4663 
4664 	err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst,
4665 	    opt_storage, connp->conn_mac_exempt,
4666 	    is->is_netstack->netstack_ip);
4667 	if (err == 0) {
4668 		err = tsol_update_sticky(&icmp->icmp_sticky_ipp,
4669 		    &icmp->icmp_label_len_v6, opt_storage);
4670 	}
4671 	if (err != 0) {
4672 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4673 		DTRACE_PROBE4(
4674 		    tx__ip__log__drop__updatelabel__icmp6,
4675 		    char *, "queue(1) failed to update options(2) on mp(3)",
4676 		    queue_t *, wq, char *, opt_storage, mblk_t *, mp);
4677 		icmp_ud_err(wq, mp, err);
4678 		return (B_FALSE);
4679 	}
4680 
4681 	icmp->icmp_v6lastdst = *dst;
4682 	return (B_TRUE);
4683 }
4684 
4685 /*
4686  * icmp_wput_ipv6():
4687  * Assumes that icmp_wput did some sanity checking on the destination
4688  * address, but that the label may not yet be correct.
4689  */
4690 void
4691 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen)
4692 {
4693 	ip6_t			*ip6h;
4694 	ip6i_t			*ip6i;	/* mp1->b_rptr even if no ip6i_t */
4695 	mblk_t			*mp1;
4696 	int			ip_hdr_len = IPV6_HDR_LEN;
4697 	size_t			ip_len;
4698 	icmp_t			*icmp = Q_TO_ICMP(q);
4699 	icmp_stack_t		*is = icmp->icmp_is;
4700 	ip6_pkt_t		ipp_s;	/* For ancillary data options */
4701 	ip6_pkt_t		*ipp = &ipp_s;
4702 	ip6_pkt_t		*tipp;
4703 	uint32_t		csum = 0;
4704 	uint_t			ignore = 0;
4705 	uint_t			option_exists = 0, is_sticky = 0;
4706 	uint8_t			*cp;
4707 	uint8_t			*nxthdr_ptr;
4708 	in6_addr_t		ip6_dst;
4709 
4710 	/*
4711 	 * If the local address is a mapped address return
4712 	 * an error.
4713 	 * It would be possible to send an IPv6 packet but the
4714 	 * response would never make it back to the application
4715 	 * since it is bound to a mapped address.
4716 	 */
4717 	if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
4718 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4719 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4720 		return;
4721 	}
4722 
4723 	ipp->ipp_fields = 0;
4724 	ipp->ipp_sticky_ignored = 0;
4725 
4726 	/*
4727 	 * If TPI options passed in, feed it for verification and handling
4728 	 */
4729 	if (tudr_optlen != 0) {
4730 		int error;
4731 
4732 		if (icmp_unitdata_opt_process(q, mp, &error,
4733 		    (void *)ipp) < 0) {
4734 			/* failure */
4735 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4736 			icmp_ud_err(q, mp, error);
4737 			return;
4738 		}
4739 		ignore = ipp->ipp_sticky_ignored;
4740 		ASSERT(error == 0);
4741 	}
4742 
4743 	if (sin6->sin6_scope_id != 0 &&
4744 	    IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
4745 		/*
4746 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
4747 		 * option nor ancillary data.  It needs to be
4748 		 * explicitly set in options_exists.
4749 		 */
4750 		option_exists |= IPPF_SCOPE_ID;
4751 	}
4752 
4753 	/*
4754 	 * Compute the destination address
4755 	 */
4756 	ip6_dst = sin6->sin6_addr;
4757 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
4758 		ip6_dst = ipv6_loopback;
4759 
4760 	/*
4761 	 * If we're not going to the same destination as last time, then
4762 	 * recompute the label required.  This is done in a separate routine to
4763 	 * avoid blowing up our stack here.
4764 	 */
4765 	if (is_system_labeled() &&
4766 	    !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) &&
4767 	    !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) {
4768 		return;
4769 	}
4770 
4771 	/*
4772 	 * If there's a security label here, then we ignore any options the
4773 	 * user may try to set.  We keep the peer's label as a hidden sticky
4774 	 * option.
4775 	 */
4776 	if (icmp->icmp_label_len_v6 > 0) {
4777 		ignore &= ~IPPF_HOPOPTS;
4778 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
4779 	}
4780 
4781 	if ((icmp->icmp_sticky_ipp.ipp_fields == 0) &&
4782 	    (ipp->ipp_fields == 0)) {
4783 		/* No sticky options nor ancillary data. */
4784 		goto no_options;
4785 	}
4786 
4787 	/*
4788 	 * Go through the options figuring out where each is going to
4789 	 * come from and build two masks.  The first mask indicates if
4790 	 * the option exists at all.  The second mask indicates if the
4791 	 * option is sticky or ancillary.
4792 	 */
4793 	if (!(ignore & IPPF_HOPOPTS)) {
4794 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
4795 			option_exists |= IPPF_HOPOPTS;
4796 			ip_hdr_len += ipp->ipp_hopoptslen;
4797 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
4798 			option_exists |= IPPF_HOPOPTS;
4799 			is_sticky |= IPPF_HOPOPTS;
4800 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen;
4801 		}
4802 	}
4803 
4804 	if (!(ignore & IPPF_RTHDR)) {
4805 		if (ipp->ipp_fields & IPPF_RTHDR) {
4806 			option_exists |= IPPF_RTHDR;
4807 			ip_hdr_len += ipp->ipp_rthdrlen;
4808 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
4809 			option_exists |= IPPF_RTHDR;
4810 			is_sticky |= IPPF_RTHDR;
4811 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen;
4812 		}
4813 	}
4814 
4815 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
4816 		/*
4817 		 * Need to have a router header to use these.
4818 		 */
4819 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
4820 			option_exists |= IPPF_RTDSTOPTS;
4821 			ip_hdr_len += ipp->ipp_rtdstoptslen;
4822 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
4823 			option_exists |= IPPF_RTDSTOPTS;
4824 			is_sticky |= IPPF_RTDSTOPTS;
4825 			ip_hdr_len +=
4826 			    icmp->icmp_sticky_ipp.ipp_rtdstoptslen;
4827 		}
4828 	}
4829 
4830 	if (!(ignore & IPPF_DSTOPTS)) {
4831 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
4832 			option_exists |= IPPF_DSTOPTS;
4833 			ip_hdr_len += ipp->ipp_dstoptslen;
4834 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
4835 			option_exists |= IPPF_DSTOPTS;
4836 			is_sticky |= IPPF_DSTOPTS;
4837 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen;
4838 		}
4839 	}
4840 
4841 	if (!(ignore & IPPF_IFINDEX)) {
4842 		if (ipp->ipp_fields & IPPF_IFINDEX) {
4843 			option_exists |= IPPF_IFINDEX;
4844 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
4845 			option_exists |= IPPF_IFINDEX;
4846 			is_sticky |= IPPF_IFINDEX;
4847 		}
4848 	}
4849 
4850 	if (!(ignore & IPPF_ADDR)) {
4851 		if (ipp->ipp_fields & IPPF_ADDR) {
4852 			option_exists |= IPPF_ADDR;
4853 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) {
4854 			option_exists |= IPPF_ADDR;
4855 			is_sticky |= IPPF_ADDR;
4856 		}
4857 	}
4858 
4859 	if (!(ignore & IPPF_DONTFRAG)) {
4860 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
4861 			option_exists |= IPPF_DONTFRAG;
4862 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
4863 			option_exists |= IPPF_DONTFRAG;
4864 			is_sticky |= IPPF_DONTFRAG;
4865 		}
4866 	}
4867 
4868 	if (!(ignore & IPPF_USE_MIN_MTU)) {
4869 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
4870 			option_exists |= IPPF_USE_MIN_MTU;
4871 		} else if (icmp->icmp_sticky_ipp.ipp_fields &
4872 		    IPPF_USE_MIN_MTU) {
4873 			option_exists |= IPPF_USE_MIN_MTU;
4874 			is_sticky |= IPPF_USE_MIN_MTU;
4875 		}
4876 	}
4877 
4878 	if (!(ignore & IPPF_NEXTHOP)) {
4879 		if (ipp->ipp_fields & IPPF_NEXTHOP) {
4880 			option_exists |= IPPF_NEXTHOP;
4881 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) {
4882 			option_exists |= IPPF_NEXTHOP;
4883 			is_sticky |= IPPF_NEXTHOP;
4884 		}
4885 	}
4886 
4887 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
4888 		option_exists |= IPPF_HOPLIMIT;
4889 	/* IPV6_HOPLIMIT can never be sticky */
4890 	ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
4891 
4892 	if (!(ignore & IPPF_UNICAST_HOPS) &&
4893 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
4894 		option_exists |= IPPF_UNICAST_HOPS;
4895 		is_sticky |= IPPF_UNICAST_HOPS;
4896 	}
4897 
4898 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
4899 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
4900 		option_exists |= IPPF_MULTICAST_HOPS;
4901 		is_sticky |= IPPF_MULTICAST_HOPS;
4902 	}
4903 
4904 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) {
4905 		/* This is a sticky socket option only */
4906 		option_exists |= IPPF_NO_CKSUM;
4907 		is_sticky |= IPPF_NO_CKSUM;
4908 	}
4909 
4910 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) {
4911 		/* This is a sticky socket option only */
4912 		option_exists |= IPPF_RAW_CKSUM;
4913 		is_sticky |= IPPF_RAW_CKSUM;
4914 	}
4915 
4916 	if (!(ignore & IPPF_TCLASS)) {
4917 		if (ipp->ipp_fields & IPPF_TCLASS) {
4918 			option_exists |= IPPF_TCLASS;
4919 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
4920 			option_exists |= IPPF_TCLASS;
4921 			is_sticky |= IPPF_TCLASS;
4922 		}
4923 	}
4924 
4925 no_options:
4926 
4927 	/*
4928 	 * If any options carried in the ip6i_t were specified, we
4929 	 * need to account for the ip6i_t in the data we'll be sending
4930 	 * down.
4931 	 */
4932 	if (option_exists & IPPF_HAS_IP6I)
4933 		ip_hdr_len += sizeof (ip6i_t);
4934 
4935 	/* check/fix buffer config, setup pointers into it */
4936 	mp1 = mp->b_cont;
4937 	ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len];
4938 	if ((mp1->b_datap->db_ref != 1) ||
4939 	    ((unsigned char *)ip6h < mp1->b_datap->db_base) ||
4940 	    !OK_32PTR(ip6h)) {
4941 		/* Try to get everything in a single mblk next time */
4942 		if (ip_hdr_len > icmp->icmp_max_hdr_len) {
4943 			icmp->icmp_max_hdr_len = ip_hdr_len;
4944 			(void) mi_set_sth_wroff(RD(q),
4945 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
4946 		}
4947 		mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
4948 		if (!mp1) {
4949 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4950 			icmp_ud_err(q, mp, ENOMEM);
4951 			return;
4952 		}
4953 		mp1->b_cont = mp->b_cont;
4954 		mp1->b_wptr = mp1->b_datap->db_lim;
4955 		ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
4956 	}
4957 	mp1->b_rptr = (unsigned char *)ip6h;
4958 	ip6i = (ip6i_t *)ip6h;
4959 
4960 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
4961 	if (option_exists & IPPF_HAS_IP6I) {
4962 		ip6h = (ip6_t *)&ip6i[1];
4963 		ip6i->ip6i_flags = 0;
4964 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
4965 
4966 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
4967 		if (option_exists & IPPF_SCOPE_ID) {
4968 			ip6i->ip6i_flags |= IP6I_IFINDEX;
4969 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
4970 		} else if (option_exists & IPPF_IFINDEX) {
4971 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
4972 			ASSERT(tipp->ipp_ifindex != 0);
4973 			ip6i->ip6i_flags |= IP6I_IFINDEX;
4974 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
4975 		}
4976 
4977 		if (option_exists & IPPF_RAW_CKSUM) {
4978 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
4979 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
4980 		}
4981 
4982 		if (option_exists & IPPF_NO_CKSUM) {
4983 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
4984 		}
4985 
4986 		if (option_exists & IPPF_ADDR) {
4987 			/*
4988 			 * Enable per-packet source address verification if
4989 			 * IPV6_PKTINFO specified the source address.
4990 			 * ip6_src is set in the transport's _wput function.
4991 			 */
4992 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
4993 		}
4994 
4995 		if (option_exists & IPPF_DONTFRAG) {
4996 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
4997 		}
4998 
4999 		if (option_exists & IPPF_USE_MIN_MTU) {
5000 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
5001 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
5002 		}
5003 
5004 		if (option_exists & IPPF_NEXTHOP) {
5005 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
5006 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
5007 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
5008 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
5009 		}
5010 
5011 		/*
5012 		 * tell IP this is an ip6i_t private header
5013 		 */
5014 		ip6i->ip6i_nxt = IPPROTO_RAW;
5015 	}
5016 
5017 	/* Initialize IPv6 header */
5018 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
5019 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
5020 
5021 	/* Set the hoplimit of the outgoing packet. */
5022 	if (option_exists & IPPF_HOPLIMIT) {
5023 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
5024 		ip6h->ip6_hops = ipp->ipp_hoplimit;
5025 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5026 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
5027 		ip6h->ip6_hops = icmp->icmp_multicast_ttl;
5028 		if (option_exists & IPPF_MULTICAST_HOPS)
5029 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5030 	} else {
5031 		ip6h->ip6_hops = icmp->icmp_ttl;
5032 		if (option_exists & IPPF_UNICAST_HOPS)
5033 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5034 	}
5035 
5036 	if (option_exists & IPPF_ADDR) {
5037 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
5038 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
5039 		ip6h->ip6_src = tipp->ipp_addr;
5040 	} else {
5041 		/*
5042 		 * The source address was not set using IPV6_PKTINFO.
5043 		 * First look at the bound source.
5044 		 * If unspecified fallback to __sin6_src_id.
5045 		 */
5046 		ip6h->ip6_src = icmp->icmp_v6src;
5047 		if (sin6->__sin6_src_id != 0 &&
5048 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
5049 			ip_srcid_find_id(sin6->__sin6_src_id,
5050 			    &ip6h->ip6_src, icmp->icmp_zoneid,
5051 			    is->is_netstack);
5052 		}
5053 	}
5054 
5055 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
5056 	cp = (uint8_t *)&ip6h[1];
5057 
5058 	/*
5059 	 * Here's where we have to start stringing together
5060 	 * any extension headers in the right order:
5061 	 * Hop-by-hop, destination, routing, and final destination opts.
5062 	 */
5063 	if (option_exists & IPPF_HOPOPTS) {
5064 		/* Hop-by-hop options */
5065 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
5066 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
5067 
5068 		*nxthdr_ptr = IPPROTO_HOPOPTS;
5069 		nxthdr_ptr = &hbh->ip6h_nxt;
5070 
5071 		bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen);
5072 		cp += tipp->ipp_hopoptslen;
5073 	}
5074 	/*
5075 	 * En-route destination options
5076 	 * Only do them if there's a routing header as well
5077 	 */
5078 	if (option_exists & IPPF_RTDSTOPTS) {
5079 		ip6_dest_t *dst = (ip6_dest_t *)cp;
5080 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
5081 
5082 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5083 		nxthdr_ptr = &dst->ip6d_nxt;
5084 
5085 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
5086 		cp += tipp->ipp_rtdstoptslen;
5087 	}
5088 	/*
5089 	 * Routing header next
5090 	 */
5091 	if (option_exists & IPPF_RTHDR) {
5092 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
5093 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
5094 
5095 		*nxthdr_ptr = IPPROTO_ROUTING;
5096 		nxthdr_ptr = &rt->ip6r_nxt;
5097 
5098 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
5099 		cp += tipp->ipp_rthdrlen;
5100 	}
5101 	/*
5102 	 * Do ultimate destination options
5103 	 */
5104 	if (option_exists & IPPF_DSTOPTS) {
5105 		ip6_dest_t *dest = (ip6_dest_t *)cp;
5106 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
5107 
5108 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5109 		nxthdr_ptr = &dest->ip6d_nxt;
5110 
5111 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
5112 		cp += tipp->ipp_dstoptslen;
5113 	}
5114 
5115 	/*
5116 	 * Now set the last header pointer to the proto passed in
5117 	 */
5118 	ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len);
5119 	*nxthdr_ptr = icmp->icmp_proto;
5120 
5121 	/*
5122 	 * Copy in the destination address
5123 	 */
5124 	ip6h->ip6_dst = ip6_dst;
5125 
5126 	ip6h->ip6_vcf =
5127 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
5128 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
5129 
5130 	if (option_exists & IPPF_TCLASS) {
5131 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
5132 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
5133 		    tipp->ipp_tclass);
5134 	}
5135 	if (option_exists & IPPF_RTHDR) {
5136 		ip6_rthdr_t	*rth;
5137 
5138 		/*
5139 		 * Perform any processing needed for source routing.
5140 		 * We know that all extension headers will be in the same mblk
5141 		 * as the IPv6 header.
5142 		 */
5143 		rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr);
5144 		if (rth != NULL && rth->ip6r_segleft != 0) {
5145 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
5146 				/*
5147 				 * Drop packet - only support Type 0 routing.
5148 				 * Notify the application as well.
5149 				 */
5150 				icmp_ud_err(q, mp, EPROTO);
5151 				BUMP_MIB(&is->is_rawip_mib,
5152 				    rawipOutErrors);
5153 				return;
5154 			}
5155 			/*
5156 			 * rth->ip6r_len is twice the number of
5157 			 * addresses in the header
5158 			 */
5159 			if (rth->ip6r_len & 0x1) {
5160 				icmp_ud_err(q, mp, EPROTO);
5161 				BUMP_MIB(&is->is_rawip_mib,
5162 				    rawipOutErrors);
5163 				return;
5164 			}
5165 			/*
5166 			 * Shuffle the routing header and ip6_dst
5167 			 * addresses, and get the checksum difference
5168 			 * between the first hop (in ip6_dst) and
5169 			 * the destination (in the last routing hdr entry).
5170 			 */
5171 			csum = ip_massage_options_v6(ip6h, rth,
5172 			    is->is_netstack);
5173 			/*
5174 			 * Verify that the first hop isn't a mapped address.
5175 			 * Routers along the path need to do this verification
5176 			 * for subsequent hops.
5177 			 */
5178 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
5179 				icmp_ud_err(q, mp, EADDRNOTAVAIL);
5180 				BUMP_MIB(&is->is_rawip_mib,
5181 				    rawipOutErrors);
5182 				return;
5183 			}
5184 		}
5185 	}
5186 
5187 	ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
5188 	if (mp1->b_cont != NULL)
5189 		ip_len += msgdsize(mp1->b_cont);
5190 
5191 	/*
5192 	 * Set the length into the IP header.
5193 	 * If the length is greater than the maximum allowed by IP,
5194 	 * then free the message and return. Do not try and send it
5195 	 * as this can cause problems in layers below.
5196 	 */
5197 	if (ip_len > IP_MAXPACKET) {
5198 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5199 		icmp_ud_err(q, mp, EMSGSIZE);
5200 		return;
5201 	}
5202 	if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
5203 		uint_t	cksum_off;	/* From ip6i == mp1->b_rptr */
5204 		uint16_t *cksum_ptr;
5205 		uint_t	ext_hdrs_len;
5206 
5207 		/* ICMPv6 must have an offset matching icmp6_cksum offset */
5208 		ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 ||
5209 		    icmp->icmp_checksum_off == 2);
5210 
5211 		/*
5212 		 * We make it easy for IP to include our pseudo header
5213 		 * by putting our length in uh_checksum, modified (if
5214 		 * we have a routing header) by the checksum difference
5215 		 * between the ultimate destination and first hop addresses.
5216 		 * Note: ICMPv6 must always checksum the packet.
5217 		 */
5218 		cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
5219 		if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) {
5220 			if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) {
5221 				BUMP_MIB(&is->is_rawip_mib,
5222 				    rawipOutErrors);
5223 				freemsg(mp);
5224 				return;
5225 			}
5226 			ip6i = (ip6i_t *)mp1->b_rptr;
5227 			if (ip6i->ip6i_nxt == IPPROTO_RAW)
5228 				ip6h = (ip6_t *)&ip6i[1];
5229 			else
5230 				ip6h = (ip6_t *)ip6i;
5231 		}
5232 		/* Add payload length to checksum */
5233 		ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN -
5234 		    (int)((uchar_t *)ip6h - (uchar_t *)ip6i);
5235 		csum += htons(ip_len - ext_hdrs_len);
5236 
5237 		cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off);
5238 		csum = (csum & 0xFFFF) + (csum >> 16);
5239 		*cksum_ptr = (uint16_t)csum;
5240 	}
5241 
5242 #ifdef _LITTLE_ENDIAN
5243 	ip_len = htons(ip_len);
5244 #endif
5245 	ip6h->ip6_plen = (uint16_t)ip_len;
5246 
5247 	freeb(mp);
5248 
5249 	/* We're done. Pass the packet to IP */
5250 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
5251 	ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT);
5252 }
5253 
5254 static void
5255 icmp_wput_other(queue_t *q, mblk_t *mp)
5256 {
5257 	uchar_t	*rptr = mp->b_rptr;
5258 	struct iocblk *iocp;
5259 #define	tudr ((struct T_unitdata_req *)rptr)
5260 	conn_t	*connp = Q_TO_CONN(q);
5261 	icmp_t	*icmp = connp->conn_icmp;
5262 	icmp_stack_t *is = icmp->icmp_is;
5263 	cred_t *cr;
5264 
5265 	cr = DB_CREDDEF(mp, connp->conn_cred);
5266 
5267 	switch (mp->b_datap->db_type) {
5268 	case M_PROTO:
5269 	case M_PCPROTO:
5270 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
5271 			/*
5272 			 * If the message does not contain a PRIM_type,
5273 			 * throw it away.
5274 			 */
5275 			freemsg(mp);
5276 			return;
5277 		}
5278 		switch (((union T_primitives *)rptr)->type) {
5279 		case T_ADDR_REQ:
5280 			icmp_addr_req(q, mp);
5281 			return;
5282 		case O_T_BIND_REQ:
5283 		case T_BIND_REQ:
5284 			icmp_bind(q, mp);
5285 			return;
5286 		case T_CONN_REQ:
5287 			icmp_connect(q, mp);
5288 			return;
5289 		case T_CAPABILITY_REQ:
5290 			icmp_capability_req(q, mp);
5291 			return;
5292 		case T_INFO_REQ:
5293 			icmp_info_req(q, mp);
5294 			return;
5295 		case T_UNITDATA_REQ:
5296 			/*
5297 			 * If a T_UNITDATA_REQ gets here, the address must
5298 			 * be bad.  Valid T_UNITDATA_REQs are found above
5299 			 * and break to below this switch.
5300 			 */
5301 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
5302 			return;
5303 		case T_UNBIND_REQ:
5304 			icmp_unbind(q, mp);
5305 			return;
5306 
5307 		case T_SVR4_OPTMGMT_REQ:
5308 			if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
5309 			    cr)) {
5310 				/* Only IP can return anything meaningful */
5311 				(void) svr4_optcom_req(q, mp, cr,
5312 				    &icmp_opt_obj, B_TRUE);
5313 			}
5314 			return;
5315 
5316 		case T_OPTMGMT_REQ:
5317 			/* Only IP can return anything meaningful */
5318 			(void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE);
5319 			return;
5320 
5321 		case T_DISCON_REQ:
5322 			icmp_disconnect(q, mp);
5323 			return;
5324 
5325 		/* The following TPI message is not supported by icmp. */
5326 		case O_T_CONN_RES:
5327 		case T_CONN_RES:
5328 			icmp_err_ack(q, mp, TNOTSUPPORT, 0);
5329 			return;
5330 
5331 		/* The following 3 TPI requests are illegal for icmp. */
5332 		case T_DATA_REQ:
5333 		case T_EXDATA_REQ:
5334 		case T_ORDREL_REQ:
5335 			freemsg(mp);
5336 			(void) putctl1(RD(q), M_ERROR, EPROTO);
5337 			return;
5338 		default:
5339 			break;
5340 		}
5341 		break;
5342 	case M_IOCTL:
5343 		iocp = (struct iocblk *)mp->b_rptr;
5344 		switch (iocp->ioc_cmd) {
5345 		case TI_GETPEERNAME:
5346 			if (icmp->icmp_state != TS_DATA_XFER) {
5347 				/*
5348 				 * If a default destination address has not
5349 				 * been associated with the stream, then we
5350 				 * don't know the peer's name.
5351 				 */
5352 				iocp->ioc_error = ENOTCONN;
5353 		err_ret:;
5354 				iocp->ioc_count = 0;
5355 				mp->b_datap->db_type = M_IOCACK;
5356 				qreply(q, mp);
5357 				return;
5358 			}
5359 			/* FALLTHRU */
5360 		case TI_GETMYNAME:
5361 			/*
5362 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
5363 			 * need to copyin the user's strbuf structure.
5364 			 * Processing will continue in the M_IOCDATA case
5365 			 * below.
5366 			 */
5367 			mi_copyin(q, mp, NULL,
5368 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
5369 			return;
5370 		case ND_SET:
5371 			/* nd_getset performs the necessary error checking */
5372 		case ND_GET:
5373 			if (nd_getset(q, is->is_nd, mp)) {
5374 				qreply(q, mp);
5375 				return;
5376 			}
5377 			break;
5378 		default:
5379 			break;
5380 		}
5381 		break;
5382 	case M_IOCDATA:
5383 		icmp_wput_iocdata(q, mp);
5384 		return;
5385 	default:
5386 		break;
5387 	}
5388 	ip_wput(q, mp);
5389 }
5390 
5391 /*
5392  * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA
5393  * messages.
5394  */
5395 static void
5396 icmp_wput_iocdata(queue_t *q, mblk_t *mp)
5397 {
5398 	mblk_t	*mp1;
5399 	STRUCT_HANDLE(strbuf, sb);
5400 	icmp_t	*icmp;
5401 	in6_addr_t	v6addr;
5402 	ipaddr_t	v4addr;
5403 	uint32_t	flowinfo = 0;
5404 	int		addrlen;
5405 
5406 	/* Make sure it is one of ours. */
5407 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5408 	case TI_GETMYNAME:
5409 	case TI_GETPEERNAME:
5410 		break;
5411 	default:
5412 		icmp = Q_TO_ICMP(q);
5413 		ip_output(icmp->icmp_connp, mp, q, IP_WPUT);
5414 		return;
5415 	}
5416 	switch (mi_copy_state(q, mp, &mp1)) {
5417 	case -1:
5418 		return;
5419 	case MI_COPY_CASE(MI_COPY_IN, 1):
5420 		break;
5421 	case MI_COPY_CASE(MI_COPY_OUT, 1):
5422 		/*
5423 		 * The address has been copied out, so now
5424 		 * copyout the strbuf.
5425 		 */
5426 		mi_copyout(q, mp);
5427 		return;
5428 	case MI_COPY_CASE(MI_COPY_OUT, 2):
5429 		/*
5430 		 * The address and strbuf have been copied out.
5431 		 * We're done, so just acknowledge the original
5432 		 * M_IOCTL.
5433 		 */
5434 		mi_copy_done(q, mp, 0);
5435 		return;
5436 	default:
5437 		/*
5438 		 * Something strange has happened, so acknowledge
5439 		 * the original M_IOCTL with an EPROTO error.
5440 		 */
5441 		mi_copy_done(q, mp, EPROTO);
5442 		return;
5443 	}
5444 	/*
5445 	 * Now we have the strbuf structure for TI_GETMYNAME
5446 	 * and TI_GETPEERNAME.  Next we copyout the requested
5447 	 * address and then we'll copyout the strbuf.
5448 	 */
5449 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
5450 	    (void *)mp1->b_rptr);
5451 	icmp = Q_TO_ICMP(q);
5452 	if (icmp->icmp_family == AF_INET)
5453 		addrlen = sizeof (sin_t);
5454 	else
5455 		addrlen = sizeof (sin6_t);
5456 
5457 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
5458 		mi_copy_done(q, mp, EINVAL);
5459 		return;
5460 	}
5461 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5462 	case TI_GETMYNAME:
5463 		if (icmp->icmp_family == AF_INET) {
5464 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
5465 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
5466 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
5467 				v4addr = V4_PART_OF_V6(icmp->icmp_v6src);
5468 			} else {
5469 				/*
5470 				 * INADDR_ANY
5471 				 * icmp_v6src is not set, we might be bound to
5472 				 * broadcast/multicast. Use icmp_bound_v6src as
5473 				 * local address instead (that could
5474 				 * also still be INADDR_ANY)
5475 				 */
5476 				v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src);
5477 			}
5478 		} else {
5479 			/* icmp->icmp_family == AF_INET6 */
5480 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
5481 				v6addr = icmp->icmp_v6src;
5482 			} else {
5483 				/*
5484 				 * UNSPECIFIED
5485 				 * icmp_v6src is not set, we might be bound to
5486 				 * broadcast/multicast. Use icmp_bound_v6src as
5487 				 * local address instead (that could
5488 				 * also still be UNSPECIFIED)
5489 				 */
5490 				v6addr = icmp->icmp_bound_v6src;
5491 			}
5492 		}
5493 		break;
5494 	case TI_GETPEERNAME:
5495 		if (icmp->icmp_family == AF_INET) {
5496 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
5497 			v4addr = V4_PART_OF_V6(icmp->icmp_v6dst);
5498 		} else {
5499 			/* icmp->icmp_family == AF_INET6) */
5500 			v6addr = icmp->icmp_v6dst;
5501 			flowinfo = icmp->icmp_flowinfo;
5502 		}
5503 		break;
5504 	default:
5505 		mi_copy_done(q, mp, EPROTO);
5506 		return;
5507 	}
5508 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
5509 	if (!mp1)
5510 		return;
5511 
5512 	if (icmp->icmp_family == AF_INET) {
5513 		sin_t *sin;
5514 
5515 		STRUCT_FSET(sb, len, (int)sizeof (sin_t));
5516 		sin = (sin_t *)mp1->b_rptr;
5517 		mp1->b_wptr = (uchar_t *)&sin[1];
5518 		*sin = sin_null;
5519 		sin->sin_family = AF_INET;
5520 		sin->sin_addr.s_addr = v4addr;
5521 	} else {
5522 		/* icmp->icmp_family == AF_INET6 */
5523 		sin6_t *sin6;
5524 
5525 		ASSERT(icmp->icmp_family == AF_INET6);
5526 		STRUCT_FSET(sb, len, (int)sizeof (sin6_t));
5527 		sin6 = (sin6_t *)mp1->b_rptr;
5528 		mp1->b_wptr = (uchar_t *)&sin6[1];
5529 		*sin6 = sin6_null;
5530 		sin6->sin6_family = AF_INET6;
5531 		sin6->sin6_flowinfo = flowinfo;
5532 		sin6->sin6_addr = v6addr;
5533 	}
5534 	/* Copy out the address */
5535 	mi_copyout(q, mp);
5536 }
5537 
5538 static int
5539 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
5540     void *thisdg_attrs)
5541 {
5542 	conn_t	*connp = Q_TO_CONN(q);
5543 	struct T_unitdata_req *udreqp;
5544 	int is_absreq_failure;
5545 	cred_t *cr;
5546 
5547 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
5548 	*errorp = 0;
5549 
5550 	cr = DB_CREDDEF(mp, connp->conn_cred);
5551 
5552 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
5553 	    udreqp->OPT_offset, cr, &icmp_opt_obj,
5554 	    thisdg_attrs, &is_absreq_failure);
5555 
5556 	if (*errorp != 0) {
5557 		/*
5558 		 * Note: No special action needed in this
5559 		 * module for "is_absreq_failure"
5560 		 */
5561 		return (-1);		/* failure */
5562 	}
5563 	ASSERT(is_absreq_failure == 0);
5564 	return (0);	/* success */
5565 }
5566 
5567 void
5568 icmp_ddi_init(void)
5569 {
5570 	icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
5571 	    icmp_opt_obj.odb_opt_arr_cnt);
5572 
5573 	/*
5574 	 * We want to be informed each time a stack is created or
5575 	 * destroyed in the kernel, so we can maintain the
5576 	 * set of icmp_stack_t's.
5577 	 */
5578 	netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
5579 }
5580 
5581 void
5582 icmp_ddi_destroy(void)
5583 {
5584 	netstack_unregister(NS_ICMP);
5585 }
5586 
5587 /*
5588  * Initialize the ICMP stack instance.
5589  */
5590 static void *
5591 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
5592 {
5593 	icmp_stack_t	*is;
5594 	icmpparam_t	*pa;
5595 
5596 	is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
5597 	is->is_netstack = ns;
5598 
5599 	pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP);
5600 	is->is_param_arr = pa;
5601 	bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr));
5602 
5603 	(void) icmp_param_register(&is->is_nd,
5604 	    is->is_param_arr, A_CNT(icmp_param_arr));
5605 	is->is_ksp = rawip_kstat_init(stackid);
5606 	return (is);
5607 }
5608 
5609 /*
5610  * Free the ICMP stack instance.
5611  */
5612 static void
5613 rawip_stack_fini(netstackid_t stackid, void *arg)
5614 {
5615 	icmp_stack_t *is = (icmp_stack_t *)arg;
5616 
5617 	nd_free(&is->is_nd);
5618 	kmem_free(is->is_param_arr, sizeof (icmp_param_arr));
5619 	is->is_param_arr = NULL;
5620 
5621 	rawip_kstat_fini(stackid, is->is_ksp);
5622 	is->is_ksp = NULL;
5623 	kmem_free(is, sizeof (*is));
5624 }
5625 
5626 static void *
5627 rawip_kstat_init(netstackid_t stackid) {
5628 	kstat_t	*ksp;
5629 
5630 	rawip_named_kstat_t template = {
5631 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
5632 		{ "inCksumErrs",	KSTAT_DATA_UINT32, 0 },
5633 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
5634 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
5635 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
5636 	};
5637 
5638 	ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
5639 					KSTAT_TYPE_NAMED,
5640 					NUM_OF_FIELDS(rawip_named_kstat_t),
5641 					0, stackid);
5642 	if (ksp == NULL || ksp->ks_data == NULL)
5643 		return (NULL);
5644 
5645 	bcopy(&template, ksp->ks_data, sizeof (template));
5646 	ksp->ks_update = rawip_kstat_update;
5647 	ksp->ks_private = (void *)(uintptr_t)stackid;
5648 
5649 	kstat_install(ksp);
5650 	return (ksp);
5651 }
5652 
5653 static void
5654 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
5655 {
5656 	if (ksp != NULL) {
5657 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
5658 		kstat_delete_netstack(ksp, stackid);
5659 	}
5660 }
5661 
5662 static int
5663 rawip_kstat_update(kstat_t *ksp, int rw)
5664 {
5665 	rawip_named_kstat_t *rawipkp;
5666 	netstackid_t	stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
5667 	netstack_t	*ns;
5668 	icmp_stack_t	*is;
5669 
5670 	if ((ksp == NULL) || (ksp->ks_data == NULL))
5671 		return (EIO);
5672 
5673 	if (rw == KSTAT_WRITE)
5674 		return (EACCES);
5675 
5676 	rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
5677 
5678 	ns = netstack_find_by_stackid(stackid);
5679 	if (ns == NULL)
5680 		return (-1);
5681 	is = ns->netstack_icmp;
5682 	if (is == NULL) {
5683 		netstack_rele(ns);
5684 		return (-1);
5685 	}
5686 	rawipkp->inDatagrams.value.ui32 =  is->is_rawip_mib.rawipInDatagrams;
5687 	rawipkp->inCksumErrs.value.ui32 =  is->is_rawip_mib.rawipInCksumErrs;
5688 	rawipkp->inErrors.value.ui32 =	   is->is_rawip_mib.rawipInErrors;
5689 	rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
5690 	rawipkp->outErrors.value.ui32 =	   is->is_rawip_mib.rawipOutErrors;
5691 	netstack_rele(ns);
5692 	return (0);
5693 }
5694