xref: /illumos-gate/usr/src/uts/common/inet/ip/icmp.c (revision 9b4e3ac25d882519cad3fc11f0c53b07f4e60536)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/kmem.h>
41 #include <sys/policy.h>
42 #include <sys/priv.h>
43 #include <sys/zone.h>
44 #include <sys/time.h>
45 
46 #include <sys/sockio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/isa_defs.h>
50 #include <sys/suntpi.h>
51 #include <sys/xti_inet.h>
52 #include <sys/netstack.h>
53 
54 #include <net/route.h>
55 #include <net/if.h>
56 
57 #include <netinet/in.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <inet/common.h>
61 #include <inet/ip.h>
62 #include <inet/ip6.h>
63 #include <inet/proto_set.h>
64 #include <inet/nd.h>
65 #include <inet/optcom.h>
66 #include <inet/snmpcom.h>
67 #include <inet/kstatcom.h>
68 #include <inet/rawip_impl.h>
69 
70 #include <netinet/ip_mroute.h>
71 #include <inet/tcp.h>
72 #include <net/pfkeyv2.h>
73 #include <inet/ipsec_info.h>
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/label.h>
77 #include <sys/tsol/tnet.h>
78 
79 #include <inet/ip_ire.h>
80 #include <inet/ip_if.h>
81 
82 #include <inet/ip_impl.h>
83 #include <sys/disp.h>
84 
85 /*
86  * Synchronization notes:
87  *
88  * RAWIP is MT and uses the usual kernel synchronization primitives. There is
89  * locks, which is icmp_rwlock. We also use conn_lock when updating things
90  * which affect the IP classifier lookup.
91  * The lock order is icmp_rwlock -> conn_lock.
92  *
93  * The icmp_rwlock:
94  * This protects most of the other fields in the icmp_t. The exact list of
95  * fields which are protected by each of the above locks is documented in
96  * the icmp_t structure definition.
97  *
98  * Plumbing notes:
99  * ICMP is always a device driver. For compatibility with mibopen() code
100  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
101  * dummy module.
102  */
103 
104 static void	icmp_addr_req(queue_t *q, mblk_t *mp);
105 static void	icmp_tpi_bind(queue_t *q, mblk_t *mp);
106 static int	icmp_bind_proto(conn_t *connp);
107 static int	icmp_build_hdrs(icmp_t *icmp);
108 static void	icmp_capability_req(queue_t *q, mblk_t *mp);
109 static int	icmp_close(queue_t *q, int flags);
110 static void	icmp_tpi_connect(queue_t *q, mblk_t *mp);
111 static void	icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
112 static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
113 		    int sys_error);
114 static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
115 		    t_scalar_t t_error, int sys_error);
116 static void	icmp_icmp_error(conn_t *connp, mblk_t *mp);
117 static void	icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp);
118 static void	icmp_info_req(queue_t *q, mblk_t *mp);
119 static void	icmp_input(void *, mblk_t *, void *);
120 static conn_t 	*icmp_open(int family, cred_t *credp, int *err, int flags);
121 static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
122 		    cred_t *credp);
123 static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
124 		    cred_t *credp);
125 static int	icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
126 		    int *errorp, void *thisdg_attrs);
127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
128 int		icmp_opt_set(conn_t *connp, uint_t optset_context,
129 		    int level, int name, uint_t inlen,
130 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
131 		    void *thisdg_attrs, cred_t *cr);
132 int		icmp_opt_get(conn_t *connp, int level, int name,
133 		    uchar_t *ptr);
134 static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
136 static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
137 		    caddr_t cp, cred_t *cr);
138 static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
139 		    uchar_t *ptr, int len);
140 static int	icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp,
141 		    cred_t *cr);
142 static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
143 static void	icmp_tpi_unbind(queue_t *q, mblk_t *mp);
144 static void	icmp_wput(queue_t *q, mblk_t *mp);
145 static void	icmp_wput_fallback(queue_t *q, mblk_t *mp);
146 static int	raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp,
147 		    sin6_t *sin6, ip6_pkt_t *ipp);
148 static int	raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp,
149 		    ipaddr_t v4dst, ip4_pkt_t *pktinfop);
150 static void	icmp_wput_other(queue_t *q, mblk_t *mp);
151 static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
152 static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
153 
154 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
155 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
156 
157 static void	*rawip_kstat_init(netstackid_t stackid);
158 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
159 static int	rawip_kstat_update(kstat_t *kp, int rw);
160 static void	rawip_stack_shutdown(netstackid_t stackid, void *arg);
161 static int	rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa,
162 		    uint_t *salenp);
163 static int	rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa,
164 		    uint_t *salenp);
165 
166 int		rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
167 		    socklen_t *, cred_t *);
168 int		rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
169 		    socklen_t *, cred_t *);
170 
171 static struct module_info icmp_mod_info =  {
172 	5707, "icmp", 1, INFPSZ, 512, 128
173 };
174 
175 /*
176  * Entry points for ICMP as a device.
177  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
178  */
179 static struct qinit icmprinitv4 = {
180 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
181 };
182 
183 static struct qinit icmprinitv6 = {
184 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
185 };
186 
187 static struct qinit icmpwinit = {
188 	(pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info
189 };
190 
191 /* ICMP entry point during fallback */
192 static struct qinit icmp_fallback_sock_winit = {
193 	(pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
194 };
195 
196 /* For AF_INET aka /dev/icmp */
197 struct streamtab icmpinfov4 = {
198 	&icmprinitv4, &icmpwinit
199 };
200 
201 /* For AF_INET6 aka /dev/icmp6 */
202 struct streamtab icmpinfov6 = {
203 	&icmprinitv6, &icmpwinit
204 };
205 
206 static sin_t	sin_null;	/* Zero address for quick clears */
207 static sin6_t	sin6_null;	/* Zero address for quick clears */
208 
209 /* Default structure copied into T_INFO_ACK messages */
210 static struct T_info_ack icmp_g_t_info_ack = {
211 	T_INFO_ACK,
212 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
213 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
214 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
215 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
216 	0,		/* ADDR_size - filled in later. */
217 	0,		/* OPT_size - not initialized here */
218 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
219 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
220 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
221 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
222 };
223 
224 /*
225  * Table of ND variables supported by icmp.  These are loaded into is_nd
226  * when the stack instance is created.
227  * All of these are alterable, within the min/max values given, at run time.
228  */
229 static icmpparam_t	icmp_param_arr[] = {
230 	/* min	max	value	name */
231 	{ 0,	128,	32,	"icmp_wroff_extra" },
232 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
233 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
234 	{ 0,	1,	1,	"icmp_bsd_compat" },
235 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
236 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
237 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
238 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
239 };
240 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
241 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
242 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
243 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
244 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
245 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
246 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
247 #define	is_max_buf			is_param_arr[7].icmp_param_value
248 
249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len);
250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa,
251     socklen_t len);
252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error);
253 
254 /*
255  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
256  * passed to icmp_wput.
257  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
258  * protocol type placed in the message following the address. A T_BIND_ACK
259  * message is returned by ip_bind_v4/v6.
260  */
261 static void
262 icmp_tpi_bind(queue_t *q, mblk_t *mp)
263 {
264 	int	error;
265 	struct sockaddr *sa;
266 	struct T_bind_req *tbr;
267 	socklen_t	len;
268 	sin_t	*sin;
269 	sin6_t	*sin6;
270 	icmp_t		*icmp;
271 	conn_t	*connp = Q_TO_CONN(q);
272 	mblk_t *mp1;
273 
274 	icmp = connp->conn_icmp;
275 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
276 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
277 		    "icmp_bind: bad req, len %u",
278 		    (uint_t)(mp->b_wptr - mp->b_rptr));
279 		icmp_err_ack(q, mp, TPROTO, 0);
280 		return;
281 	}
282 
283 	if (icmp->icmp_state != TS_UNBND) {
284 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
285 		    "icmp_bind: bad state, %d", icmp->icmp_state);
286 		icmp_err_ack(q, mp, TOUTSTATE, 0);
287 		return;
288 	}
289 
290 	/*
291 	 * Reallocate the message to make sure we have enough room for an
292 	 * address and the protocol type.
293 	 */
294 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
295 	if (!mp1) {
296 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
297 		return;
298 	}
299 	mp = mp1;
300 
301 	/* Reset the message type in preparation for shipping it back. */
302 	DB_TYPE(mp) = M_PCPROTO;
303 	tbr = (struct T_bind_req *)mp->b_rptr;
304 	len = tbr->ADDR_length;
305 	switch (len) {
306 	case 0:	/* request for a generic port */
307 		tbr->ADDR_offset = sizeof (struct T_bind_req);
308 		if (icmp->icmp_family == AF_INET) {
309 			tbr->ADDR_length = sizeof (sin_t);
310 			sin = (sin_t *)&tbr[1];
311 			*sin = sin_null;
312 			sin->sin_family = AF_INET;
313 			mp->b_wptr = (uchar_t *)&sin[1];
314 			sa = (struct sockaddr *)sin;
315 			len = sizeof (sin_t);
316 		} else {
317 			ASSERT(icmp->icmp_family == AF_INET6);
318 			tbr->ADDR_length = sizeof (sin6_t);
319 			sin6 = (sin6_t *)&tbr[1];
320 			*sin6 = sin6_null;
321 			sin6->sin6_family = AF_INET6;
322 			mp->b_wptr = (uchar_t *)&sin6[1];
323 			sa = (struct sockaddr *)sin6;
324 			len = sizeof (sin6_t);
325 		}
326 		break;
327 
328 	case sizeof (sin_t):	/* Complete IPv4 address */
329 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
330 		    sizeof (sin_t));
331 		break;
332 
333 	case sizeof (sin6_t):	/* Complete IPv6 address */
334 		sa = (struct sockaddr *)mi_offset_param(mp,
335 		    tbr->ADDR_offset, sizeof (sin6_t));
336 		break;
337 
338 	default:
339 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
340 		    "icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
341 		icmp_err_ack(q, mp, TBADADDR, 0);
342 		return;
343 	}
344 
345 	error = rawip_do_bind(connp, sa, len);
346 done:
347 	ASSERT(mp->b_cont == NULL);
348 	if (error != 0) {
349 		if (error > 0) {
350 			icmp_err_ack(q, mp, TSYSERR, error);
351 		} else {
352 			icmp_err_ack(q, mp, -error, 0);
353 		}
354 	} else {
355 		tbr->PRIM_type = T_BIND_ACK;
356 		qreply(q, mp);
357 	}
358 }
359 
360 static int
361 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
362 {
363 	sin_t		*sin;
364 	sin6_t		*sin6;
365 	icmp_t		*icmp;
366 	int		error = 0;
367 	mblk_t		*ire_mp;
368 
369 
370 	icmp = connp->conn_icmp;
371 
372 	if (sa == NULL || !OK_32PTR((char *)sa)) {
373 		return (EINVAL);
374 	}
375 
376 	/*
377 	 * The state must be TS_UNBND. TPI mandates that users must send
378 	 * TPI primitives only 1 at a time and wait for the response before
379 	 * sending the next primitive.
380 	 */
381 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
382 	if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
383 		error = -TOUTSTATE;
384 		goto done;
385 	}
386 
387 	ASSERT(len != 0);
388 	switch (len) {
389 	case sizeof (sin_t):    /* Complete IPv4 address */
390 		sin = (sin_t *)sa;
391 		if (sin->sin_family != AF_INET ||
392 		    icmp->icmp_family != AF_INET) {
393 			/* TSYSERR, EAFNOSUPPORT */
394 			error = EAFNOSUPPORT;
395 			goto done;
396 		}
397 		break;
398 	case sizeof (sin6_t): /* Complete IPv6 address */
399 		sin6 = (sin6_t *)sa;
400 		if (sin6->sin6_family != AF_INET6 ||
401 		    icmp->icmp_family != AF_INET6) {
402 			/* TSYSERR, EAFNOSUPPORT */
403 			error = EAFNOSUPPORT;
404 			goto done;
405 		}
406 		/* No support for mapped addresses on raw sockets */
407 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
408 			/* TSYSERR, EADDRNOTAVAIL */
409 			error = EADDRNOTAVAIL;
410 			goto done;
411 		}
412 		break;
413 
414 	default:
415 		/* TBADADDR */
416 		error = EADDRNOTAVAIL;
417 		goto done;
418 	}
419 
420 	icmp->icmp_pending_op = T_BIND_REQ;
421 	icmp->icmp_state = TS_IDLE;
422 
423 	/*
424 	 * Copy the source address into our icmp structure.  This address
425 	 * may still be zero; if so, ip will fill in the correct address
426 	 * each time an outbound packet is passed to it.
427 	 * If we are binding to a broadcast or multicast address then
428 	 * rawip_post_ip_bind_connect will clear the source address.
429 	 */
430 
431 	if (icmp->icmp_family == AF_INET) {
432 		ASSERT(sin != NULL);
433 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
434 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
435 		    &icmp->icmp_v6src);
436 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
437 		    icmp->icmp_ip_snd_options_len;
438 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
439 	} else {
440 		int error;
441 
442 		ASSERT(sin6 != NULL);
443 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
444 		icmp->icmp_v6src = sin6->sin6_addr;
445 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
446 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
447 
448 		/* Rebuild the header template */
449 		error = icmp_build_hdrs(icmp);
450 		if (error != 0) {
451 			icmp->icmp_pending_op = -1;
452 			/*
453 			 * TSYSERR
454 			 */
455 			goto done;
456 		}
457 	}
458 
459 	ire_mp = NULL;
460 	if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
461 		/*
462 		 * request an IRE if src not 0 (INADDR_ANY)
463 		 */
464 		ire_mp = allocb(sizeof (ire_t), BPRI_HI);
465 		if (ire_mp == NULL) {
466 			icmp->icmp_pending_op = -1;
467 			error = ENOMEM;
468 			goto done;
469 		}
470 		DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
471 	}
472 done:
473 	rw_exit(&icmp->icmp_rwlock);
474 	if (error != 0)
475 		return (error);
476 
477 	if (icmp->icmp_family == AF_INET6) {
478 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
479 		    &sin6->sin6_addr, sin6->sin6_port, B_TRUE);
480 	} else {
481 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
482 		    sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
483 	}
484 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
485 	return (error);
486 }
487 
488 static void
489 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error)
490 {
491 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
492 	if (icmp->icmp_state == TS_UNBND) {
493 		/*
494 		 * not yet bound - bind sent by icmp_bind_proto.
495 		 */
496 		rw_exit(&icmp->icmp_rwlock);
497 		return;
498 	}
499 	ASSERT(icmp->icmp_pending_op != -1);
500 	icmp->icmp_pending_op = -1;
501 
502 	if (error != 0) {
503 		if (icmp->icmp_state == TS_DATA_XFER) {
504 			/* Connect failed */
505 			/* Revert back to the bound source */
506 			icmp->icmp_v6src = icmp->icmp_bound_v6src;
507 			icmp->icmp_state = TS_IDLE;
508 			if (icmp->icmp_family == AF_INET6)
509 				(void) icmp_build_hdrs(icmp);
510 		} else {
511 			V6_SET_ZERO(icmp->icmp_v6src);
512 			V6_SET_ZERO(icmp->icmp_bound_v6src);
513 			icmp->icmp_state = TS_UNBND;
514 			if (icmp->icmp_family == AF_INET6)
515 				(void) icmp_build_hdrs(icmp);
516 		}
517 	} else {
518 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
519 			ire_t *ire;
520 
521 			ire = (ire_t *)ire_mp->b_rptr;
522 			/*
523 			 * If a broadcast/multicast address was bound set
524 			 * the source address to 0.
525 			 * This ensures no datagrams with broadcast address
526 			 * as source address are emitted (which would violate
527 			 * RFC1122 - Hosts requirements)
528 			 * Note: we get IRE_BROADCAST for IPv6
529 			 * to "mark" a multicast local address.
530 			 */
531 
532 
533 			if (ire->ire_type == IRE_BROADCAST &&
534 			    icmp->icmp_state != TS_DATA_XFER) {
535 				/*
536 				 * This was just a local bind to a
537 				 * MC/broadcast addr
538 				 */
539 				V6_SET_ZERO(icmp->icmp_v6src);
540 				if (icmp->icmp_family == AF_INET6)
541 					(void) icmp_build_hdrs(icmp);
542 			}
543 		}
544 
545 	}
546 	rw_exit(&icmp->icmp_rwlock);
547 	if (ire_mp != NULL)
548 		freeb(ire_mp);
549 }
550 
551 /*
552  * Send message to IP to just bind to the protocol.
553  */
554 static int
555 icmp_bind_proto(conn_t *connp)
556 {
557 	icmp_t	*icmp;
558 	int	error;
559 
560 	icmp = connp->conn_icmp;
561 
562 	if (icmp->icmp_family == AF_INET6)
563 		error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto,
564 		    &sin6_null.sin6_addr, 0, B_TRUE);
565 	else
566 		error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto,
567 		    sin_null.sin_addr.s_addr, 0, B_TRUE);
568 
569 	rawip_post_ip_bind_connect(icmp, NULL, error);
570 	return (error);
571 }
572 
573 static void
574 icmp_tpi_connect(queue_t *q, mblk_t *mp)
575 {
576 	conn_t	*connp = Q_TO_CONN(q);
577 	struct T_conn_req	*tcr;
578 	icmp_t	*icmp;
579 	struct sockaddr *sa;
580 	socklen_t len;
581 	int error;
582 
583 	icmp = connp->conn_icmp;
584 	tcr = (struct T_conn_req *)mp->b_rptr;
585 	/* Sanity checks */
586 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
587 		icmp_err_ack(q, mp, TPROTO, 0);
588 		return;
589 	}
590 
591 	if (tcr->OPT_length != 0) {
592 		icmp_err_ack(q, mp, TBADOPT, 0);
593 		return;
594 	}
595 
596 	len = tcr->DEST_length;
597 
598 	switch (len) {
599 	default:
600 		icmp_err_ack(q, mp, TBADADDR, 0);
601 		return;
602 	case sizeof (sin_t):
603 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
604 		    sizeof (sin_t));
605 		break;
606 	case sizeof (sin6_t):
607 		sa = (struct sockaddr *)mi_offset_param(mp,
608 		    tcr->DEST_offset, sizeof (sin6_t));
609 		break;
610 	}
611 
612 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
613 	if (error != 0) {
614 		icmp_err_ack(q, mp, TSYSERR, error);
615 		return;
616 	}
617 
618 	error = rawip_do_connect(connp, sa, len);
619 	if (error != 0) {
620 		if (error < 0) {
621 			icmp_err_ack(q, mp, -error, 0);
622 		} else {
623 			icmp_err_ack(q, mp, 0, error);
624 		}
625 	} else {
626 		mblk_t *mp1;
627 
628 		/*
629 		 * We have to send a connection confirmation to
630 		 * keep TLI happy.
631 		 */
632 		if (icmp->icmp_family == AF_INET) {
633 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
634 			    sizeof (sin_t), NULL, 0);
635 		} else {
636 			ASSERT(icmp->icmp_family == AF_INET6);
637 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
638 			    sizeof (sin6_t), NULL, 0);
639 		}
640 		if (mp1 == NULL) {
641 			rw_exit(&icmp->icmp_rwlock);
642 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
643 			return;
644 		}
645 
646 		/*
647 		 * Send ok_ack for T_CONN_REQ
648 		 */
649 		mp = mi_tpi_ok_ack_alloc(mp);
650 		if (mp == NULL) {
651 			/* Unable to reuse the T_CONN_REQ for the ack. */
652 			freemsg(mp1);
653 			icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
654 			return;
655 		}
656 		putnext(connp->conn_rq, mp);
657 		putnext(connp->conn_rq, mp1);
658 	}
659 }
660 
661 static int
662 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len)
663 {
664 	icmp_t	*icmp;
665 	sin_t	*sin;
666 	sin6_t	*sin6;
667 	mblk_t  *ire_mp;
668 	int	error;
669 	ipaddr_t	v4dst;
670 	in6_addr_t	v6dst;
671 
672 	icmp = connp->conn_icmp;
673 
674 	if (sa == NULL || !OK_32PTR((char *)sa)) {
675 		return (EINVAL);
676 	}
677 
678 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
679 	if (ire_mp == NULL)
680 		return (ENOMEM);
681 	DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
682 
683 
684 	ASSERT(sa != NULL && len != 0);
685 
686 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
687 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
688 		rw_exit(&icmp->icmp_rwlock);
689 		freeb(ire_mp);
690 		return (-TOUTSTATE);
691 	}
692 
693 	switch (len) {
694 	case sizeof (sin_t):
695 		sin = (sin_t *)sa;
696 
697 		ASSERT(icmp->icmp_family == AF_INET);
698 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
699 
700 		v4dst = sin->sin_addr.s_addr;
701 		/*
702 		 * Interpret a zero destination to mean loopback.
703 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
704 		 * generate the T_CONN_CON.
705 		 */
706 		if (v4dst == INADDR_ANY) {
707 			v4dst = htonl(INADDR_LOOPBACK);
708 		}
709 
710 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
711 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
712 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
713 		    icmp->icmp_ip_snd_options_len;
714 		icmp->icmp_v6dst.sin6_addr = v6dst;
715 		icmp->icmp_v6dst.sin6_family = AF_INET6;
716 		icmp->icmp_v6dst.sin6_flowinfo = 0;
717 		icmp->icmp_v6dst.sin6_port = 0;
718 
719 		/*
720 		 * If the destination address is multicast and
721 		 * an outgoing multicast interface has been set,
722 		 * use the address of that interface as our
723 		 * source address if no source address has been set.
724 		 */
725 		if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
726 		    CLASSD(v4dst) &&
727 		    icmp->icmp_multicast_if_addr != INADDR_ANY) {
728 			IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
729 			    &icmp->icmp_v6src);
730 		}
731 		break;
732 	case sizeof (sin6_t):
733 		sin6 = (sin6_t *)sa;
734 
735 		/* No support for mapped addresses on raw sockets */
736 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
737 			rw_exit(&icmp->icmp_rwlock);
738 			freeb(ire_mp);
739 			return (EADDRNOTAVAIL);
740 		}
741 
742 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
743 		ASSERT(icmp->icmp_family == AF_INET6);
744 
745 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
746 
747 		icmp->icmp_v6dst = *sin6;
748 		icmp->icmp_v6dst.sin6_port = 0;
749 
750 		/*
751 		 * Interpret a zero destination to mean loopback.
752 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
753 		 * generate the T_CONN_CON.
754 		 */
755 		if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) {
756 			icmp->icmp_v6dst.sin6_addr = ipv6_loopback;
757 		}
758 		/*
759 		 * If the destination address is multicast and
760 		 * an outgoing multicast interface has been set,
761 		 * then the ip bind logic will pick the correct source
762 		 * address (i.e. matching the outgoing multicast interface).
763 		 */
764 		break;
765 	}
766 
767 	icmp->icmp_pending_op = T_CONN_REQ;
768 
769 	if (icmp->icmp_state == TS_DATA_XFER) {
770 		/* Already connected - clear out state */
771 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
772 		icmp->icmp_state = TS_IDLE;
773 	}
774 
775 	icmp->icmp_state = TS_DATA_XFER;
776 	rw_exit(&icmp->icmp_rwlock);
777 
778 	if (icmp->icmp_family == AF_INET6) {
779 		error = ip_proto_bind_connected_v6(connp, &ire_mp,
780 		    icmp->icmp_proto, &icmp->icmp_v6src, 0,
781 		    &icmp->icmp_v6dst.sin6_addr,
782 		    NULL, sin6->sin6_port, B_TRUE, B_TRUE);
783 	} else {
784 		error = ip_proto_bind_connected_v4(connp, &ire_mp,
785 		    icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0,
786 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port,
787 		    B_TRUE, B_TRUE);
788 	}
789 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
790 	return (error);
791 }
792 
793 static void
794 icmp_close_free(conn_t *connp)
795 {
796 	icmp_t *icmp = connp->conn_icmp;
797 
798 	/* If there are any options associated with the stream, free them. */
799 	if (icmp->icmp_ip_snd_options != NULL) {
800 		mi_free((char *)icmp->icmp_ip_snd_options);
801 		icmp->icmp_ip_snd_options = NULL;
802 		icmp->icmp_ip_snd_options_len = 0;
803 	}
804 
805 	if (icmp->icmp_filter != NULL) {
806 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
807 		icmp->icmp_filter = NULL;
808 	}
809 
810 	/* Free memory associated with sticky options */
811 	if (icmp->icmp_sticky_hdrs_len != 0) {
812 		kmem_free(icmp->icmp_sticky_hdrs,
813 		    icmp->icmp_sticky_hdrs_len);
814 		icmp->icmp_sticky_hdrs = NULL;
815 		icmp->icmp_sticky_hdrs_len = 0;
816 	}
817 	ip6_pkt_free(&icmp->icmp_sticky_ipp);
818 
819 	/*
820 	 * Clear any fields which the kmem_cache constructor clears.
821 	 * Only icmp_connp needs to be preserved.
822 	 * TBD: We should make this more efficient to avoid clearing
823 	 * everything.
824 	 */
825 	ASSERT(icmp->icmp_connp == connp);
826 	bzero(icmp, sizeof (icmp_t));
827 	icmp->icmp_connp = connp;
828 }
829 
830 static int
831 rawip_do_close(conn_t *connp)
832 {
833 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
834 
835 	ip_quiesce_conn(connp);
836 
837 	if (!IPCL_IS_NONSTR(connp)) {
838 		qprocsoff(connp->conn_rq);
839 	}
840 
841 	ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL &&
842 	    connp->conn_icmp->icmp_fallback_queue_tail == NULL);
843 	icmp_close_free(connp);
844 
845 	/*
846 	 * Now we are truly single threaded on this stream, and can
847 	 * delete the things hanging off the connp, and finally the connp.
848 	 * We removed this connp from the fanout list, it cannot be
849 	 * accessed thru the fanouts, and we already waited for the
850 	 * conn_ref to drop to 0. We are already in close, so
851 	 * there cannot be any other thread from the top. qprocsoff
852 	 * has completed, and service has completed or won't run in
853 	 * future.
854 	 */
855 	ASSERT(connp->conn_ref == 1);
856 
857 	if (!IPCL_IS_NONSTR(connp)) {
858 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
859 	} else {
860 		ip_close_helper_stream(connp);
861 	}
862 
863 	connp->conn_ref--;
864 	ipcl_conn_destroy(connp);
865 
866 	return (0);
867 }
868 
869 static int
870 icmp_close(queue_t *q, int flags)
871 {
872 	conn_t  *connp;
873 
874 	if (flags & SO_FALLBACK) {
875 		/*
876 		 * stream is being closed while in fallback
877 		 * simply free the resources that were allocated
878 		 */
879 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
880 		qprocsoff(q);
881 		goto done;
882 	}
883 
884 	connp = Q_TO_CONN(q);
885 	(void) rawip_do_close(connp);
886 done:
887 	q->q_ptr = WR(q)->q_ptr = NULL;
888 	return (0);
889 }
890 
891 /*
892  * This routine handles each T_DISCON_REQ message passed to icmp
893  * as an indicating that ICMP is no longer connected. This results
894  * in sending a T_BIND_REQ to IP to restore the binding to just
895  * the local address.
896  *
897  * The disconnect completes in rawip_post_ip_bind_connect.
898  */
899 static int
900 icmp_do_disconnect(conn_t *connp)
901 {
902 	icmp_t	*icmp;
903 	mblk_t	*ire_mp;
904 	int error;
905 
906 	icmp = connp->conn_icmp;
907 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
908 	if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
909 		rw_exit(&icmp->icmp_rwlock);
910 		return (-TOUTSTATE);
911 	}
912 	icmp->icmp_pending_op = T_DISCON_REQ;
913 	icmp->icmp_v6src = icmp->icmp_bound_v6src;
914 	icmp->icmp_state = TS_IDLE;
915 
916 
917 	if (icmp->icmp_family == AF_INET6) {
918 		/* Rebuild the header template */
919 		error = icmp_build_hdrs(icmp);
920 		if (error != 0) {
921 			icmp->icmp_pending_op = -1;
922 			rw_exit(&icmp->icmp_rwlock);
923 			return (error);
924 		}
925 	}
926 
927 	rw_exit(&icmp->icmp_rwlock);
928 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
929 	if (ire_mp == NULL) {
930 		return (ENOMEM);
931 	}
932 
933 	if (icmp->icmp_family == AF_INET6) {
934 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
935 		    &icmp->icmp_bound_v6src, 0, B_TRUE);
936 	} else {
937 
938 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
939 		    V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE);
940 	}
941 
942 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
943 
944 	return (error);
945 }
946 
947 static void
948 icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
949 {
950 	conn_t	*connp = Q_TO_CONN(q);
951 	int	error;
952 
953 	/*
954 	 * Allocate the largest primitive we need to send back
955 	 * T_error_ack is > than T_ok_ack
956 	 */
957 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
958 	if (mp == NULL) {
959 		/* Unable to reuse the T_DISCON_REQ for the ack. */
960 		icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
961 		return;
962 	}
963 
964 	error = icmp_do_disconnect(connp);
965 
966 	if (error != 0) {
967 		if (error > 0) {
968 			icmp_err_ack(q, mp, 0, error);
969 		} else {
970 			icmp_err_ack(q, mp, -error, 0);
971 		}
972 	} else {
973 		mp = mi_tpi_ok_ack_alloc(mp);
974 		ASSERT(mp != NULL);
975 		qreply(q, mp);
976 	}
977 
978 }
979 
980 static int
981 icmp_disconnect(conn_t *connp)
982 {
983 	int	error;
984 	icmp_t	*icmp = connp->conn_icmp;
985 
986 	icmp->icmp_dgram_errind = B_FALSE;
987 
988 	error = icmp_do_disconnect(connp);
989 
990 	if (error < 0)
991 		error = proto_tlitosyserr(-error);
992 	return (error);
993 }
994 
995 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
996 static void
997 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
998 {
999 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1000 		qreply(q, mp);
1001 }
1002 
1003 /* Shorthand to generate and send TPI error acks to our client */
1004 static void
1005 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
1006     t_scalar_t t_error, int sys_error)
1007 {
1008 	struct T_error_ack	*teackp;
1009 
1010 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1011 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
1012 		teackp = (struct T_error_ack *)mp->b_rptr;
1013 		teackp->ERROR_prim = primitive;
1014 		teackp->TLI_error = t_error;
1015 		teackp->UNIX_error = sys_error;
1016 		qreply(q, mp);
1017 	}
1018 }
1019 
1020 /*
1021  * icmp_icmp_error is called by icmp_input to process ICMP
1022  * messages passed up by IP.
1023  * Generates the appropriate permanent (non-transient) errors.
1024  * Assumes that IP has pulled up everything up to and including
1025  * the ICMP header.
1026  */
1027 static void
1028 icmp_icmp_error(conn_t *connp, mblk_t *mp)
1029 {
1030 	icmph_t *icmph;
1031 	ipha_t	*ipha;
1032 	int	iph_hdr_length;
1033 	sin_t	sin;
1034 	mblk_t	*mp1;
1035 	int	error = 0;
1036 	icmp_t	*icmp = connp->conn_icmp;
1037 
1038 	ipha = (ipha_t *)mp->b_rptr;
1039 
1040 	ASSERT(OK_32PTR(mp->b_rptr));
1041 
1042 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1043 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1044 		icmp_icmp_error_ipv6(connp, mp);
1045 		return;
1046 	}
1047 
1048 	/*
1049 	 * icmp does not support v4 mapped addresses
1050 	 * so we can never be here for a V6 socket
1051 	 * i.e. icmp_family == AF_INET6
1052 	 */
1053 	ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) &&
1054 	    (icmp->icmp_family == AF_INET));
1055 
1056 	ASSERT(icmp->icmp_family == AF_INET);
1057 
1058 	/* Skip past the outer IP and ICMP headers */
1059 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1060 	icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
1061 	ipha = (ipha_t *)&icmph[1];
1062 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1063 
1064 	switch (icmph->icmph_type) {
1065 	case ICMP_DEST_UNREACHABLE:
1066 		switch (icmph->icmph_code) {
1067 		case ICMP_FRAGMENTATION_NEEDED:
1068 			/*
1069 			 * IP has already adjusted the path MTU.
1070 			 */
1071 			break;
1072 		case ICMP_PORT_UNREACHABLE:
1073 		case ICMP_PROTOCOL_UNREACHABLE:
1074 			error = ECONNREFUSED;
1075 			break;
1076 		default:
1077 			/* Transient errors */
1078 			break;
1079 		}
1080 		break;
1081 	default:
1082 		/* Transient errors */
1083 		break;
1084 	}
1085 	if (error == 0) {
1086 		freemsg(mp);
1087 		return;
1088 	}
1089 
1090 	/*
1091 	 * Deliver T_UDERROR_IND when the application has asked for it.
1092 	 * The socket layer enables this automatically when connected.
1093 	 */
1094 	if (!icmp->icmp_dgram_errind) {
1095 		freemsg(mp);
1096 		return;
1097 	}
1098 
1099 	sin = sin_null;
1100 	sin.sin_family = AF_INET;
1101 	sin.sin_addr.s_addr = ipha->ipha_dst;
1102 	if (IPCL_IS_NONSTR(connp)) {
1103 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1104 		if (icmp->icmp_state == TS_DATA_XFER) {
1105 			if (sin.sin_addr.s_addr ==
1106 			    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) {
1107 				rw_exit(&icmp->icmp_rwlock);
1108 				(*connp->conn_upcalls->su_set_error)
1109 				    (connp->conn_upper_handle, error);
1110 				goto done;
1111 			}
1112 		} else {
1113 			icmp->icmp_delayed_error = error;
1114 			*((sin_t *)&icmp->icmp_delayed_addr) = sin;
1115 		}
1116 		rw_exit(&icmp->icmp_rwlock);
1117 	} else {
1118 
1119 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL,
1120 		    0, error);
1121 		if (mp1 != NULL)
1122 			putnext(connp->conn_rq, mp1);
1123 	}
1124 done:
1125 	freemsg(mp);
1126 }
1127 
1128 /*
1129  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
1130  * for IPv6 packets.
1131  * Send permanent (non-transient) errors upstream.
1132  * Assumes that IP has pulled up all the extension headers as well
1133  * as the ICMPv6 header.
1134  */
1135 static void
1136 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
1137 {
1138 	icmp6_t		*icmp6;
1139 	ip6_t		*ip6h, *outer_ip6h;
1140 	uint16_t	iph_hdr_length;
1141 	uint8_t		*nexthdrp;
1142 	sin6_t		sin6;
1143 	mblk_t		*mp1;
1144 	int		error = 0;
1145 	icmp_t		*icmp = connp->conn_icmp;
1146 
1147 	outer_ip6h = (ip6_t *)mp->b_rptr;
1148 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1149 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1150 	else
1151 		iph_hdr_length = IPV6_HDR_LEN;
1152 
1153 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1154 	ip6h = (ip6_t *)&icmp6[1];
1155 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1156 		freemsg(mp);
1157 		return;
1158 	}
1159 
1160 	switch (icmp6->icmp6_type) {
1161 	case ICMP6_DST_UNREACH:
1162 		switch (icmp6->icmp6_code) {
1163 		case ICMP6_DST_UNREACH_NOPORT:
1164 			error = ECONNREFUSED;
1165 			break;
1166 		case ICMP6_DST_UNREACH_ADMIN:
1167 		case ICMP6_DST_UNREACH_NOROUTE:
1168 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1169 		case ICMP6_DST_UNREACH_ADDR:
1170 			/* Transient errors */
1171 			break;
1172 		default:
1173 			break;
1174 		}
1175 		break;
1176 	case ICMP6_PACKET_TOO_BIG: {
1177 		struct T_unitdata_ind	*tudi;
1178 		struct T_opthdr		*toh;
1179 		size_t			udi_size;
1180 		mblk_t			*newmp;
1181 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1182 		    sizeof (struct ip6_mtuinfo);
1183 		sin6_t			*sin6;
1184 		struct ip6_mtuinfo	*mtuinfo;
1185 
1186 		/*
1187 		 * If the application has requested to receive path mtu
1188 		 * information, send up an empty message containing an
1189 		 * IPV6_PATHMTU ancillary data item.
1190 		 */
1191 		if (!icmp->icmp_ipv6_recvpathmtu)
1192 			break;
1193 
1194 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1195 		    opt_length;
1196 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1197 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
1198 			break;
1199 		}
1200 
1201 		/*
1202 		 * newmp->b_cont is left to NULL on purpose.  This is an
1203 		 * empty message containing only ancillary data.
1204 		 */
1205 		newmp->b_datap->db_type = M_PROTO;
1206 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1207 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1208 		tudi->PRIM_type = T_UNITDATA_IND;
1209 		tudi->SRC_length = sizeof (sin6_t);
1210 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1211 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1212 		tudi->OPT_length = opt_length;
1213 
1214 		sin6 = (sin6_t *)&tudi[1];
1215 		bzero(sin6, sizeof (sin6_t));
1216 		sin6->sin6_family = AF_INET6;
1217 		sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr;
1218 
1219 		toh = (struct T_opthdr *)&sin6[1];
1220 		toh->level = IPPROTO_IPV6;
1221 		toh->name = IPV6_PATHMTU;
1222 		toh->len = opt_length;
1223 		toh->status = 0;
1224 
1225 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1226 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1227 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1228 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1229 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1230 		/*
1231 		 * We've consumed everything we need from the original
1232 		 * message.  Free it, then send our empty message.
1233 		 */
1234 		freemsg(mp);
1235 		if (!IPCL_IS_NONSTR(connp)) {
1236 			putnext(connp->conn_rq, newmp);
1237 		} else {
1238 			(*connp->conn_upcalls->su_recv)
1239 			    (connp->conn_upper_handle, newmp, 0, 0, &error,
1240 			    NULL);
1241 			ASSERT(error == 0);
1242 		}
1243 		return;
1244 	}
1245 	case ICMP6_TIME_EXCEEDED:
1246 		/* Transient errors */
1247 		break;
1248 	case ICMP6_PARAM_PROB:
1249 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1250 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1251 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1252 		    (uchar_t *)nexthdrp) {
1253 			error = ECONNREFUSED;
1254 			break;
1255 		}
1256 		break;
1257 	}
1258 	if (error == 0) {
1259 		freemsg(mp);
1260 		return;
1261 	}
1262 
1263 	/*
1264 	 * Deliver T_UDERROR_IND when the application has asked for it.
1265 	 * The socket layer enables this automatically when connected.
1266 	 */
1267 	if (!icmp->icmp_dgram_errind) {
1268 		freemsg(mp);
1269 		return;
1270 	}
1271 
1272 	sin6 = sin6_null;
1273 	sin6.sin6_family = AF_INET6;
1274 	sin6.sin6_addr = ip6h->ip6_dst;
1275 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1276 
1277 	if (IPCL_IS_NONSTR(connp)) {
1278 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1279 		if (icmp->icmp_state == TS_DATA_XFER) {
1280 			if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1281 			    &icmp->icmp_v6dst.sin6_addr)) {
1282 				rw_exit(&icmp->icmp_rwlock);
1283 				(*connp->conn_upcalls->su_set_error)
1284 				    (connp->conn_upper_handle, error);
1285 				goto done;
1286 			}
1287 		} else {
1288 			icmp->icmp_delayed_error = error;
1289 			*((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
1290 		}
1291 		rw_exit(&icmp->icmp_rwlock);
1292 	} else {
1293 
1294 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1295 		    NULL, 0, error);
1296 		if (mp1 != NULL)
1297 			putnext(connp->conn_rq, mp1);
1298 	}
1299 done:
1300 	freemsg(mp);
1301 }
1302 
1303 /*
1304  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
1305  * The local address is filled in if endpoint is bound. The remote address
1306  * is filled in if remote address has been precified ("connected endpoint")
1307  * (The concept of connected CLTS sockets is alien to published TPI
1308  *  but we support it anyway).
1309  */
1310 static void
1311 icmp_addr_req(queue_t *q, mblk_t *mp)
1312 {
1313 	icmp_t	*icmp = Q_TO_ICMP(q);
1314 	mblk_t	*ackmp;
1315 	struct T_addr_ack *taa;
1316 
1317 	/* Make it large enough for worst case */
1318 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1319 	    2 * sizeof (sin6_t), 1);
1320 	if (ackmp == NULL) {
1321 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1322 		return;
1323 	}
1324 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1325 
1326 	bzero(taa, sizeof (struct T_addr_ack));
1327 	ackmp->b_wptr = (uchar_t *)&taa[1];
1328 
1329 	taa->PRIM_type = T_ADDR_ACK;
1330 	ackmp->b_datap->db_type = M_PCPROTO;
1331 	rw_enter(&icmp->icmp_rwlock, RW_READER);
1332 	/*
1333 	 * Note: Following code assumes 32 bit alignment of basic
1334 	 * data structures like sin_t and struct T_addr_ack.
1335 	 */
1336 	if (icmp->icmp_state != TS_UNBND) {
1337 		/*
1338 		 * Fill in local address
1339 		 */
1340 		taa->LOCADDR_offset = sizeof (*taa);
1341 		if (icmp->icmp_family == AF_INET) {
1342 			sin_t	*sin;
1343 
1344 			taa->LOCADDR_length = sizeof (sin_t);
1345 			sin = (sin_t *)&taa[1];
1346 			/* Fill zeroes and then intialize non-zero fields */
1347 			*sin = sin_null;
1348 			sin->sin_family = AF_INET;
1349 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
1350 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1351 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
1352 				    sin->sin_addr.s_addr);
1353 			} else {
1354 				/*
1355 				 * INADDR_ANY
1356 				 * icmp_v6src is not set, we might be bound to
1357 				 * broadcast/multicast. Use icmp_bound_v6src as
1358 				 * local address instead (that could
1359 				 * also still be INADDR_ANY)
1360 				 */
1361 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
1362 				    sin->sin_addr.s_addr);
1363 			}
1364 			ackmp->b_wptr = (uchar_t *)&sin[1];
1365 		} else {
1366 			sin6_t	*sin6;
1367 
1368 			ASSERT(icmp->icmp_family == AF_INET6);
1369 			taa->LOCADDR_length = sizeof (sin6_t);
1370 			sin6 = (sin6_t *)&taa[1];
1371 			/* Fill zeroes and then intialize non-zero fields */
1372 			*sin6 = sin6_null;
1373 			sin6->sin6_family = AF_INET6;
1374 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1375 				sin6->sin6_addr = icmp->icmp_v6src;
1376 			} else {
1377 				/*
1378 				 * UNSPECIFIED
1379 				 * icmp_v6src is not set, we might be bound to
1380 				 * broadcast/multicast. Use icmp_bound_v6src as
1381 				 * local address instead (that could
1382 				 * also still be UNSPECIFIED)
1383 				 */
1384 				sin6->sin6_addr = icmp->icmp_bound_v6src;
1385 			}
1386 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1387 		}
1388 	}
1389 	rw_exit(&icmp->icmp_rwlock);
1390 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1391 	qreply(q, ackmp);
1392 }
1393 
1394 static void
1395 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1396 {
1397 	*tap = icmp_g_t_info_ack;
1398 
1399 	if (icmp->icmp_family == AF_INET6)
1400 		tap->ADDR_size = sizeof (sin6_t);
1401 	else
1402 		tap->ADDR_size = sizeof (sin_t);
1403 	tap->CURRENT_state = icmp->icmp_state;
1404 	tap->OPT_size = icmp_max_optsize;
1405 }
1406 
1407 static void
1408 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
1409     t_uscalar_t cap_bits1)
1410 {
1411 	tcap->CAP_bits1 = 0;
1412 
1413 	if (cap_bits1 & TC1_INFO) {
1414 		icmp_copy_info(&tcap->INFO_ack, icmp);
1415 		tcap->CAP_bits1 |= TC1_INFO;
1416 	}
1417 }
1418 
1419 /*
1420  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1421  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1422  * icmp_g_t_info_ack.  The current state of the stream is copied from
1423  * icmp_state.
1424  */
1425 static void
1426 icmp_capability_req(queue_t *q, mblk_t *mp)
1427 {
1428 	icmp_t			*icmp = Q_TO_ICMP(q);
1429 	t_uscalar_t		cap_bits1;
1430 	struct T_capability_ack	*tcap;
1431 
1432 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1433 
1434 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1435 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1436 	if (!mp)
1437 		return;
1438 
1439 	tcap = (struct T_capability_ack *)mp->b_rptr;
1440 
1441 	icmp_do_capability_ack(icmp, tcap, cap_bits1);
1442 
1443 	qreply(q, mp);
1444 }
1445 
1446 /*
1447  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
1448  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1449  * The current state of the stream is copied from icmp_state.
1450  */
1451 static void
1452 icmp_info_req(queue_t *q, mblk_t *mp)
1453 {
1454 	icmp_t	*icmp = Q_TO_ICMP(q);
1455 
1456 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1457 	    T_INFO_ACK);
1458 	if (!mp)
1459 		return;
1460 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1461 	qreply(q, mp);
1462 }
1463 
1464 /* For /dev/icmp aka AF_INET open */
1465 static int
1466 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1467     int family)
1468 {
1469 	conn_t *connp;
1470 	dev_t	conn_dev;
1471 	icmp_stack_t *is;
1472 	int	error;
1473 
1474 	conn_dev = NULL;
1475 
1476 	/* If the stream is already open, return immediately. */
1477 	if (q->q_ptr != NULL)
1478 		return (0);
1479 
1480 	if (sflag == MODOPEN)
1481 		return (EINVAL);
1482 
1483 	/*
1484 	 * Since ICMP is not used so heavily, allocating from the small
1485 	 * arena should be sufficient.
1486 	 */
1487 	if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
1488 		return (EBUSY);
1489 	}
1490 
1491 	if (flag & SO_FALLBACK) {
1492 		/*
1493 		 * Non streams socket needs a stream to fallback to
1494 		 */
1495 		RD(q)->q_ptr = (void *)conn_dev;
1496 		WR(q)->q_qinfo = &icmp_fallback_sock_winit;
1497 		WR(q)->q_ptr = (void *)ip_minor_arena_sa;
1498 		qprocson(q);
1499 		return (0);
1500 	}
1501 
1502 	connp = icmp_open(family, credp, &error, KM_SLEEP);
1503 	if (connp == NULL) {
1504 		ASSERT(error != NULL);
1505 		inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
1506 		return (error);
1507 	}
1508 
1509 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1510 	connp->conn_dev = conn_dev;
1511 	connp->conn_minor_arena = ip_minor_arena_sa;
1512 
1513 	is = connp->conn_icmp->icmp_is;
1514 
1515 	/*
1516 	 * Initialize the icmp_t structure for this stream.
1517 	 */
1518 	q->q_ptr = connp;
1519 	WR(q)->q_ptr = connp;
1520 	connp->conn_rq = q;
1521 	connp->conn_wq = WR(q);
1522 
1523 	if (connp->conn_icmp->icmp_family == AF_INET6) {
1524 		/* Build initial header template for transmit */
1525 		rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
1526 		if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) {
1527 			rw_exit(&connp->conn_icmp->icmp_rwlock);
1528 			inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
1529 			ipcl_conn_destroy(connp);
1530 			return (error);
1531 		}
1532 		rw_exit(&connp->conn_icmp->icmp_rwlock);
1533 	}
1534 
1535 
1536 	q->q_hiwat = is->is_recv_hiwat;
1537 	WR(q)->q_hiwat = is->is_xmit_hiwat;
1538 	WR(q)->q_lowat = is->is_xmit_lowat;
1539 
1540 	qprocson(q);
1541 
1542 	/* Set the Stream head write offset. */
1543 	(void) proto_set_tx_wroff(q, connp,
1544 	    connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra);
1545 	(void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat);
1546 
1547 	mutex_enter(&connp->conn_lock);
1548 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1549 	mutex_exit(&connp->conn_lock);
1550 
1551 	return (0);
1552 }
1553 
1554 /* For /dev/icmp4 aka AF_INET open */
1555 static int
1556 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1557 {
1558 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
1559 }
1560 
1561 /* For /dev/icmp6 aka AF_INET6 open */
1562 static int
1563 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1564 {
1565 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
1566 }
1567 
1568 /*
1569  * This is the open routine for icmp.  It allocates a icmp_t structure for
1570  * the stream and, on the first open of the module, creates an ND table.
1571  */
1572 /* ARGSUSED */
1573 static conn_t *
1574 icmp_open(int family, cred_t *credp, int *err, int flags)
1575 {
1576 	icmp_t	*icmp;
1577 	conn_t *connp;
1578 	zoneid_t zoneid;
1579 	netstack_t *ns;
1580 	icmp_stack_t *is;
1581 	boolean_t isv6 = B_FALSE;
1582 
1583 	*err = secpolicy_net_icmpaccess(credp);
1584 	if (*err != 0)
1585 		return (NULL);
1586 
1587 	if (family == AF_INET6)
1588 		isv6 = B_TRUE;
1589 	ns = netstack_find_by_cred(credp);
1590 	ASSERT(ns != NULL);
1591 	is = ns->netstack_icmp;
1592 	ASSERT(is != NULL);
1593 
1594 	/*
1595 	 * For exclusive stacks we set the zoneid to zero
1596 	 * to make ICMP operate as if in the global zone.
1597 	 */
1598 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
1599 		zoneid = GLOBAL_ZONEID;
1600 	else
1601 		zoneid = crgetzoneid(credp);
1602 
1603 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
1604 
1605 	connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
1606 	icmp = connp->conn_icmp;
1607 	icmp->icmp_v6dst = sin6_null;
1608 
1609 	/*
1610 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1611 	 * done by netstack_find_by_cred()
1612 	 */
1613 	netstack_rele(ns);
1614 
1615 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1616 	ASSERT(connp->conn_ulp == IPPROTO_ICMP);
1617 	ASSERT(connp->conn_icmp == icmp);
1618 	ASSERT(icmp->icmp_connp == connp);
1619 
1620 	/* Set the initial state of the stream and the privilege status. */
1621 	icmp->icmp_state = TS_UNBND;
1622 	if (isv6) {
1623 		icmp->icmp_ipversion = IPV6_VERSION;
1624 		icmp->icmp_family = AF_INET6;
1625 		connp->conn_ulp = IPPROTO_ICMPV6;
1626 		/* May be changed by a SO_PROTOTYPE socket option. */
1627 		icmp->icmp_proto = IPPROTO_ICMPV6;
1628 		icmp->icmp_checksum_off = 2;	/* Offset for icmp6_cksum */
1629 		icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
1630 		icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
1631 		connp->conn_af_isv6 = B_TRUE;
1632 		connp->conn_flags |= IPCL_ISV6;
1633 	} else {
1634 		icmp->icmp_ipversion = IPV4_VERSION;
1635 		icmp->icmp_family = AF_INET;
1636 		/* May be changed by a SO_PROTOTYPE socket option. */
1637 		icmp->icmp_proto = IPPROTO_ICMP;
1638 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
1639 		icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
1640 		connp->conn_af_isv6 = B_FALSE;
1641 		connp->conn_flags &= ~IPCL_ISV6;
1642 	}
1643 	icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1644 	icmp->icmp_pending_op = -1;
1645 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1646 	connp->conn_zoneid = zoneid;
1647 
1648 	/*
1649 	 * If the caller has the process-wide flag set, then default to MAC
1650 	 * exempt mode.  This allows read-down to unlabeled hosts.
1651 	 */
1652 	if (getpflags(NET_MAC_AWARE, credp) != 0)
1653 		connp->conn_mac_exempt = B_TRUE;
1654 
1655 	connp->conn_ulp_labeled = is_system_labeled();
1656 
1657 	icmp->icmp_is = is;
1658 
1659 	connp->conn_recv = icmp_input;
1660 	crhold(credp);
1661 	connp->conn_cred = credp;
1662 
1663 	rw_exit(&icmp->icmp_rwlock);
1664 
1665 	connp->conn_flow_cntrld = B_FALSE;
1666 	return (connp);
1667 }
1668 
1669 /*
1670  * Which ICMP options OK to set through T_UNITDATA_REQ...
1671  */
1672 /* ARGSUSED */
1673 static boolean_t
1674 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1675 {
1676 	return (B_TRUE);
1677 }
1678 
1679 /*
1680  * This routine gets default values of certain options whose default
1681  * values are maintained by protcol specific code
1682  */
1683 /* ARGSUSED */
1684 int
1685 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
1686 {
1687 	icmp_t *icmp = Q_TO_ICMP(q);
1688 	icmp_stack_t *is = icmp->icmp_is;
1689 	int *i1 = (int *)ptr;
1690 
1691 	switch (level) {
1692 	case IPPROTO_IP:
1693 		switch (name) {
1694 		case IP_MULTICAST_TTL:
1695 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1696 			return (sizeof (uchar_t));
1697 		case IP_MULTICAST_LOOP:
1698 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1699 			return (sizeof (uchar_t));
1700 		}
1701 		break;
1702 	case IPPROTO_IPV6:
1703 		switch (name) {
1704 		case IPV6_MULTICAST_HOPS:
1705 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1706 			return (sizeof (int));
1707 		case IPV6_MULTICAST_LOOP:
1708 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1709 			return (sizeof (int));
1710 		case IPV6_UNICAST_HOPS:
1711 			*i1 = is->is_ipv6_hoplimit;
1712 			return (sizeof (int));
1713 		}
1714 		break;
1715 	case IPPROTO_ICMPV6:
1716 		switch (name) {
1717 		case ICMP6_FILTER:
1718 			/* Make it look like "pass all" */
1719 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1720 			return (sizeof (icmp6_filter_t));
1721 		}
1722 		break;
1723 	}
1724 	return (-1);
1725 }
1726 
1727 /*
1728  * This routine retrieves the current status of socket options.
1729  * It returns the size of the option retrieved.
1730  */
1731 int
1732 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
1733 {
1734 	icmp_t		*icmp = connp->conn_icmp;
1735 	icmp_stack_t	*is = icmp->icmp_is;
1736 	int		*i1 = (int *)ptr;
1737 	ip6_pkt_t	*ipp = &icmp->icmp_sticky_ipp;
1738 	int		ret = 0;
1739 
1740 	ASSERT(RW_READ_HELD(&icmp->icmp_rwlock));
1741 	switch (level) {
1742 	case SOL_SOCKET:
1743 		switch (name) {
1744 		case SO_DEBUG:
1745 			*i1 = icmp->icmp_debug;
1746 			break;
1747 		case SO_TYPE:
1748 			*i1 = SOCK_RAW;
1749 			break;
1750 		case SO_PROTOTYPE:
1751 			*i1 = icmp->icmp_proto;
1752 			break;
1753 		case SO_REUSEADDR:
1754 			*i1 = icmp->icmp_reuseaddr;
1755 			break;
1756 
1757 		/*
1758 		 * The following three items are available here,
1759 		 * but are only meaningful to IP.
1760 		 */
1761 		case SO_DONTROUTE:
1762 			*i1 = icmp->icmp_dontroute;
1763 			break;
1764 		case SO_USELOOPBACK:
1765 			*i1 = icmp->icmp_useloopback;
1766 			break;
1767 		case SO_BROADCAST:
1768 			*i1 = icmp->icmp_broadcast;
1769 			break;
1770 
1771 		case SO_SNDBUF:
1772 			ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX);
1773 			*i1 = icmp->icmp_xmit_hiwat;
1774 			break;
1775 		case SO_RCVBUF:
1776 			ASSERT(icmp->icmp_recv_hiwat <= INT_MAX);
1777 			*i1 = icmp->icmp_recv_hiwat;
1778 			break;
1779 		case SO_DGRAM_ERRIND:
1780 			*i1 = icmp->icmp_dgram_errind;
1781 			break;
1782 		case SO_TIMESTAMP:
1783 			*i1 = icmp->icmp_timestamp;
1784 			break;
1785 		case SO_MAC_EXEMPT:
1786 			*i1 = connp->conn_mac_exempt;
1787 			break;
1788 		case SO_DOMAIN:
1789 			*i1 = icmp->icmp_family;
1790 			break;
1791 
1792 		/*
1793 		 * Following four not meaningful for icmp
1794 		 * Action is same as "default" to which we fallthrough
1795 		 * so we keep them in comments.
1796 		 * case SO_LINGER:
1797 		 * case SO_KEEPALIVE:
1798 		 * case SO_OOBINLINE:
1799 		 * case SO_ALLZONES:
1800 		 */
1801 		default:
1802 			ret = -1;
1803 			goto done;
1804 		}
1805 		break;
1806 	case IPPROTO_IP:
1807 		/*
1808 		 * Only allow IPv4 option processing on IPv4 sockets.
1809 		 */
1810 		if (icmp->icmp_family != AF_INET) {
1811 			ret = -1;
1812 			goto done;
1813 		}
1814 
1815 		switch (name) {
1816 		case IP_OPTIONS:
1817 		case T_IP_OPTIONS:
1818 			/* Options are passed up with each packet */
1819 			ret = 0;
1820 			goto done;
1821 		case IP_HDRINCL:
1822 			*i1 = (int)icmp->icmp_hdrincl;
1823 			break;
1824 		case IP_TOS:
1825 		case T_IP_TOS:
1826 			*i1 = (int)icmp->icmp_type_of_service;
1827 			break;
1828 		case IP_TTL:
1829 			*i1 = (int)icmp->icmp_ttl;
1830 			break;
1831 		case IP_MULTICAST_IF:
1832 			/* 0 address if not set */
1833 			*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
1834 			ret = sizeof (ipaddr_t);
1835 			goto done;
1836 		case IP_MULTICAST_TTL:
1837 			*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
1838 			ret = sizeof (uchar_t);
1839 			goto done;
1840 		case IP_MULTICAST_LOOP:
1841 			*ptr = connp->conn_multicast_loop;
1842 			ret = sizeof (uint8_t);
1843 			goto done;
1844 		case IP_BOUND_IF:
1845 			/* Zero if not set */
1846 			*i1 = icmp->icmp_bound_if;
1847 			break;	/* goto sizeof (int) option return */
1848 		case IP_UNSPEC_SRC:
1849 			*ptr = icmp->icmp_unspec_source;
1850 			break;	/* goto sizeof (int) option return */
1851 		case IP_RECVIF:
1852 			*ptr = icmp->icmp_recvif;
1853 			break;	/* goto sizeof (int) option return */
1854 		case IP_BROADCAST_TTL:
1855 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
1856 			return (sizeof (uchar_t));
1857 		case IP_RECVPKTINFO:
1858 			/*
1859 			 * This also handles IP_PKTINFO.
1860 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
1861 			 * Differentiation is based on the size of the argument
1862 			 * passed in.
1863 			 * This option is handled in IP which will return an
1864 			 * error for IP_PKTINFO as it's not supported as a
1865 			 * sticky option.
1866 			 */
1867 			ret = -EINVAL;
1868 			goto done;
1869 		/*
1870 		 * Cannot "get" the value of following options
1871 		 * at this level. Action is same as "default" to
1872 		 * which we fallthrough so we keep them in comments.
1873 		 *
1874 		 * case IP_ADD_MEMBERSHIP:
1875 		 * case IP_DROP_MEMBERSHIP:
1876 		 * case IP_BLOCK_SOURCE:
1877 		 * case IP_UNBLOCK_SOURCE:
1878 		 * case IP_ADD_SOURCE_MEMBERSHIP:
1879 		 * case IP_DROP_SOURCE_MEMBERSHIP:
1880 		 * case MCAST_JOIN_GROUP:
1881 		 * case MCAST_LEAVE_GROUP:
1882 		 * case MCAST_BLOCK_SOURCE:
1883 		 * case MCAST_UNBLOCK_SOURCE:
1884 		 * case MCAST_JOIN_SOURCE_GROUP:
1885 		 * case MCAST_LEAVE_SOURCE_GROUP:
1886 		 * case MRT_INIT:
1887 		 * case MRT_DONE:
1888 		 * case MRT_ADD_VIF:
1889 		 * case MRT_DEL_VIF:
1890 		 * case MRT_ADD_MFC:
1891 		 * case MRT_DEL_MFC:
1892 		 * case MRT_VERSION:
1893 		 * case MRT_ASSERT:
1894 		 * case IP_SEC_OPT:
1895 		 * case IP_DONTFAILOVER_IF:
1896 		 * case IP_NEXTHOP:
1897 		 */
1898 		default:
1899 			ret = -1;
1900 			goto done;
1901 		}
1902 		break;
1903 	case IPPROTO_IPV6:
1904 		/*
1905 		 * Only allow IPv6 option processing on native IPv6 sockets.
1906 		 */
1907 		if (icmp->icmp_family != AF_INET6) {
1908 			ret = -1;
1909 			goto done;
1910 		}
1911 		switch (name) {
1912 		case IPV6_UNICAST_HOPS:
1913 			*i1 = (unsigned int)icmp->icmp_ttl;
1914 			break;
1915 		case IPV6_MULTICAST_IF:
1916 			/* 0 index if not set */
1917 			*i1 = icmp->icmp_multicast_if_index;
1918 			break;
1919 		case IPV6_MULTICAST_HOPS:
1920 			*i1 = icmp->icmp_multicast_ttl;
1921 			break;
1922 		case IPV6_MULTICAST_LOOP:
1923 			*i1 = connp->conn_multicast_loop;
1924 			break;
1925 		case IPV6_BOUND_IF:
1926 			/* Zero if not set */
1927 			*i1 = icmp->icmp_bound_if;
1928 			break;
1929 		case IPV6_UNSPEC_SRC:
1930 			*i1 = icmp->icmp_unspec_source;
1931 			break;
1932 		case IPV6_CHECKSUM:
1933 			/*
1934 			 * Return offset or -1 if no checksum offset.
1935 			 * Does not apply to IPPROTO_ICMPV6
1936 			 */
1937 			if (icmp->icmp_proto == IPPROTO_ICMPV6) {
1938 				ret = -1;
1939 				goto done;
1940 			}
1941 
1942 			if (icmp->icmp_raw_checksum) {
1943 				*i1 = icmp->icmp_checksum_off;
1944 			} else {
1945 				*i1 = -1;
1946 			}
1947 			break;
1948 		case IPV6_JOIN_GROUP:
1949 		case IPV6_LEAVE_GROUP:
1950 		case MCAST_JOIN_GROUP:
1951 		case MCAST_LEAVE_GROUP:
1952 		case MCAST_BLOCK_SOURCE:
1953 		case MCAST_UNBLOCK_SOURCE:
1954 		case MCAST_JOIN_SOURCE_GROUP:
1955 		case MCAST_LEAVE_SOURCE_GROUP:
1956 			/* cannot "get" the value for these */
1957 			ret = -1;
1958 			goto done;
1959 		case IPV6_RECVPKTINFO:
1960 			*i1 = icmp->icmp_ip_recvpktinfo;
1961 			break;
1962 		case IPV6_RECVTCLASS:
1963 			*i1 = icmp->icmp_ipv6_recvtclass;
1964 			break;
1965 		case IPV6_RECVPATHMTU:
1966 			*i1 = icmp->icmp_ipv6_recvpathmtu;
1967 			break;
1968 		case IPV6_V6ONLY:
1969 			*i1 = 1;
1970 			break;
1971 		case IPV6_RECVHOPLIMIT:
1972 			*i1 = icmp->icmp_ipv6_recvhoplimit;
1973 			break;
1974 		case IPV6_RECVHOPOPTS:
1975 			*i1 = icmp->icmp_ipv6_recvhopopts;
1976 			break;
1977 		case IPV6_RECVDSTOPTS:
1978 			*i1 = icmp->icmp_ipv6_recvdstopts;
1979 			break;
1980 		case _OLD_IPV6_RECVDSTOPTS:
1981 			*i1 = icmp->icmp_old_ipv6_recvdstopts;
1982 			break;
1983 		case IPV6_RECVRTHDRDSTOPTS:
1984 			*i1 = icmp->icmp_ipv6_recvrtdstopts;
1985 			break;
1986 		case IPV6_RECVRTHDR:
1987 			*i1 = icmp->icmp_ipv6_recvrthdr;
1988 			break;
1989 		case IPV6_PKTINFO: {
1990 			/* XXX assumes that caller has room for max size! */
1991 			struct in6_pktinfo *pkti;
1992 
1993 			pkti = (struct in6_pktinfo *)ptr;
1994 			if (ipp->ipp_fields & IPPF_IFINDEX)
1995 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
1996 			else
1997 				pkti->ipi6_ifindex = 0;
1998 			if (ipp->ipp_fields & IPPF_ADDR)
1999 				pkti->ipi6_addr = ipp->ipp_addr;
2000 			else
2001 				pkti->ipi6_addr = ipv6_all_zeros;
2002 			ret = sizeof (struct in6_pktinfo);
2003 			goto done;
2004 		}
2005 		case IPV6_NEXTHOP: {
2006 			sin6_t *sin6 = (sin6_t *)ptr;
2007 
2008 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
2009 				return (0);
2010 			*sin6 = sin6_null;
2011 			sin6->sin6_family = AF_INET6;
2012 			sin6->sin6_addr = ipp->ipp_nexthop;
2013 			ret = (sizeof (sin6_t));
2014 			goto done;
2015 		}
2016 		case IPV6_HOPOPTS:
2017 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
2018 				return (0);
2019 			if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
2020 				return (0);
2021 			bcopy((char *)ipp->ipp_hopopts +
2022 			    icmp->icmp_label_len_v6, ptr,
2023 			    ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
2024 			if (icmp->icmp_label_len_v6 > 0) {
2025 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
2026 				ptr[1] = (ipp->ipp_hopoptslen -
2027 				    icmp->icmp_label_len_v6 + 7) / 8 - 1;
2028 			}
2029 			ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
2030 			goto done;
2031 		case IPV6_RTHDRDSTOPTS:
2032 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
2033 				return (0);
2034 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
2035 			ret = ipp->ipp_rtdstoptslen;
2036 			goto done;
2037 		case IPV6_RTHDR:
2038 			if (!(ipp->ipp_fields & IPPF_RTHDR))
2039 				return (0);
2040 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
2041 			ret = ipp->ipp_rthdrlen;
2042 			goto done;
2043 		case IPV6_DSTOPTS:
2044 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2045 				ret = 0;
2046 				goto done;
2047 			}
2048 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
2049 			ret = ipp->ipp_dstoptslen;
2050 			goto done;
2051 		case IPV6_PATHMTU:
2052 			if (!(ipp->ipp_fields & IPPF_PATHMTU)) {
2053 				ret = 0;
2054 			} else {
2055 				ret = ip_fill_mtuinfo(
2056 				    &icmp->icmp_v6dst.sin6_addr, 0,
2057 				    (struct ip6_mtuinfo *)ptr,
2058 				    is->is_netstack);
2059 			}
2060 			goto done;
2061 		case IPV6_TCLASS:
2062 			if (ipp->ipp_fields & IPPF_TCLASS)
2063 				*i1 = ipp->ipp_tclass;
2064 			else
2065 				*i1 = IPV6_FLOW_TCLASS(
2066 				    IPV6_DEFAULT_VERS_AND_FLOW);
2067 			break;
2068 		default:
2069 			ret = -1;
2070 			goto done;
2071 		}
2072 		break;
2073 	case IPPROTO_ICMPV6:
2074 		/*
2075 		 * Only allow IPv6 option processing on native IPv6 sockets.
2076 		 */
2077 		if (icmp->icmp_family != AF_INET6) {
2078 			ret = -1;
2079 		}
2080 
2081 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
2082 			ret = -1;
2083 		}
2084 
2085 		switch (name) {
2086 		case ICMP6_FILTER:
2087 			if (icmp->icmp_filter == NULL) {
2088 				/* Make it look like "pass all" */
2089 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
2090 			} else {
2091 				(void) bcopy(icmp->icmp_filter, ptr,
2092 				    sizeof (icmp6_filter_t));
2093 			}
2094 			ret = sizeof (icmp6_filter_t);
2095 			goto done;
2096 		default:
2097 			ret = -1;
2098 			goto done;
2099 		}
2100 	default:
2101 		ret = -1;
2102 		goto done;
2103 	}
2104 	ret = sizeof (int);
2105 done:
2106 	return (ret);
2107 }
2108 
2109 /*
2110  * This routine retrieves the current status of socket options.
2111  * It returns the size of the option retrieved.
2112  */
2113 int
2114 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
2115 {
2116 	conn_t  *connp = Q_TO_CONN(q);
2117 	icmp_t	*icmp = connp->conn_icmp;
2118 	int 	err;
2119 
2120 	rw_enter(&icmp->icmp_rwlock, RW_READER);
2121 	err = icmp_opt_get(connp, level, name, ptr);
2122 	rw_exit(&icmp->icmp_rwlock);
2123 	return (err);
2124 }
2125 
2126 int
2127 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
2128     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
2129     void *thisdg_attrs, boolean_t checkonly)
2130 {
2131 
2132 	int	*i1 = (int *)invalp;
2133 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2134 	icmp_t *icmp = connp->conn_icmp;
2135 	icmp_stack_t *is = icmp->icmp_is;
2136 	int	error;
2137 
2138 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
2139 	/*
2140 	 * For fixed length options, no sanity check
2141 	 * of passed in length is done. It is assumed *_optcom_req()
2142 	 * routines do the right thing.
2143 	 */
2144 	switch (level) {
2145 	case SOL_SOCKET:
2146 		switch (name) {
2147 		case SO_DEBUG:
2148 			if (!checkonly)
2149 				icmp->icmp_debug = onoff;
2150 			break;
2151 		case SO_PROTOTYPE:
2152 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
2153 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
2154 			    secpolicy_net_rawaccess(cr) != 0) {
2155 				*outlenp = 0;
2156 				return (EACCES);
2157 			}
2158 			/* Can't use IPPROTO_RAW with IPv6 */
2159 			if ((*i1 & 0xFF) == IPPROTO_RAW &&
2160 			    icmp->icmp_family == AF_INET6) {
2161 				*outlenp = 0;
2162 				return (EPROTONOSUPPORT);
2163 			}
2164 			if (checkonly) {
2165 				/* T_CHECK case */
2166 				*(int *)outvalp = (*i1 & 0xFF);
2167 				break;
2168 			}
2169 			icmp->icmp_proto = *i1 & 0xFF;
2170 			if ((icmp->icmp_proto == IPPROTO_RAW ||
2171 			    icmp->icmp_proto == IPPROTO_IGMP) &&
2172 			    icmp->icmp_family == AF_INET)
2173 				icmp->icmp_hdrincl = 1;
2174 			else
2175 				icmp->icmp_hdrincl = 0;
2176 
2177 			if (icmp->icmp_family == AF_INET6 &&
2178 			    icmp->icmp_proto == IPPROTO_ICMPV6) {
2179 				/* Set offset for icmp6_cksum */
2180 				icmp->icmp_raw_checksum = 0;
2181 				icmp->icmp_checksum_off = 2;
2182 			}
2183 			if (icmp->icmp_proto == IPPROTO_UDP ||
2184 			    icmp->icmp_proto == IPPROTO_TCP ||
2185 			    icmp->icmp_proto == IPPROTO_SCTP) {
2186 				icmp->icmp_no_tp_cksum = 1;
2187 				icmp->icmp_sticky_ipp.ipp_fields |=
2188 				    IPPF_NO_CKSUM;
2189 			} else {
2190 				icmp->icmp_no_tp_cksum = 0;
2191 				icmp->icmp_sticky_ipp.ipp_fields &=
2192 				    ~IPPF_NO_CKSUM;
2193 			}
2194 
2195 			if (icmp->icmp_filter != NULL &&
2196 			    icmp->icmp_proto != IPPROTO_ICMPV6) {
2197 				kmem_free(icmp->icmp_filter,
2198 				    sizeof (icmp6_filter_t));
2199 				icmp->icmp_filter = NULL;
2200 			}
2201 
2202 			/* Rebuild the header template */
2203 			error = icmp_build_hdrs(icmp);
2204 			if (error != 0) {
2205 				*outlenp = 0;
2206 				return (error);
2207 			}
2208 
2209 			/*
2210 			 * For SCTP, we don't use icmp_bind_proto() for
2211 			 * raw socket binding.  Note that we do not need
2212 			 * to set *outlenp.
2213 			 * FIXME: how does SCTP work?
2214 			 */
2215 			if (icmp->icmp_proto == IPPROTO_SCTP)
2216 				return (0);
2217 
2218 			*outlenp = sizeof (int);
2219 			*(int *)outvalp = *i1 & 0xFF;
2220 
2221 			/* Drop lock across the bind operation */
2222 			rw_exit(&icmp->icmp_rwlock);
2223 			(void) icmp_bind_proto(connp);
2224 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2225 			return (0);
2226 		case SO_REUSEADDR:
2227 			if (!checkonly) {
2228 				icmp->icmp_reuseaddr = onoff;
2229 				PASS_OPT_TO_IP(connp);
2230 			}
2231 			break;
2232 
2233 		/*
2234 		 * The following three items are available here,
2235 		 * but are only meaningful to IP.
2236 		 */
2237 		case SO_DONTROUTE:
2238 			if (!checkonly) {
2239 				icmp->icmp_dontroute = onoff;
2240 				PASS_OPT_TO_IP(connp);
2241 			}
2242 			break;
2243 		case SO_USELOOPBACK:
2244 			if (!checkonly) {
2245 				icmp->icmp_useloopback = onoff;
2246 				PASS_OPT_TO_IP(connp);
2247 			}
2248 			break;
2249 		case SO_BROADCAST:
2250 			if (!checkonly) {
2251 				icmp->icmp_broadcast = onoff;
2252 				PASS_OPT_TO_IP(connp);
2253 			}
2254 			break;
2255 
2256 		case SO_SNDBUF:
2257 			if (*i1 > is->is_max_buf) {
2258 				*outlenp = 0;
2259 				return (ENOBUFS);
2260 			}
2261 			if (!checkonly) {
2262 				if (!IPCL_IS_NONSTR(connp)) {
2263 					connp->conn_wq->q_hiwat = *i1;
2264 				}
2265 				icmp->icmp_xmit_hiwat = *i1;
2266 			}
2267 			break;
2268 		case SO_RCVBUF:
2269 			if (*i1 > is->is_max_buf) {
2270 				*outlenp = 0;
2271 				return (ENOBUFS);
2272 			}
2273 			if (!checkonly) {
2274 				icmp->icmp_recv_hiwat = *i1;
2275 				rw_exit(&icmp->icmp_rwlock);
2276 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2277 				    *i1);
2278 				rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2279 			}
2280 			break;
2281 		case SO_DGRAM_ERRIND:
2282 			if (!checkonly)
2283 				icmp->icmp_dgram_errind = onoff;
2284 			break;
2285 		case SO_ALLZONES:
2286 			/*
2287 			 * "soft" error (negative)
2288 			 * option not handled at this level
2289 			 * Note: Do not modify *outlenp
2290 			 */
2291 			return (-EINVAL);
2292 		case SO_TIMESTAMP:
2293 			if (!checkonly) {
2294 				icmp->icmp_timestamp = onoff;
2295 			}
2296 			break;
2297 		case SO_MAC_EXEMPT:
2298 			/*
2299 			 * "soft" error (negative)
2300 			 * option not handled at this level
2301 			 * Note: Do not modify *outlenp
2302 			 */
2303 			return (-EINVAL);
2304 		/*
2305 		 * Following three not meaningful for icmp
2306 		 * Action is same as "default" so we keep them
2307 		 * in comments.
2308 		 * case SO_LINGER:
2309 		 * case SO_KEEPALIVE:
2310 		 * case SO_OOBINLINE:
2311 		 */
2312 		default:
2313 			*outlenp = 0;
2314 			return (EINVAL);
2315 		}
2316 		break;
2317 	case IPPROTO_IP:
2318 		/*
2319 		 * Only allow IPv4 option processing on IPv4 sockets.
2320 		 */
2321 		if (icmp->icmp_family != AF_INET) {
2322 			*outlenp = 0;
2323 			return (ENOPROTOOPT);
2324 		}
2325 		switch (name) {
2326 		case IP_OPTIONS:
2327 		case T_IP_OPTIONS:
2328 			/* Save options for use by IP. */
2329 			if ((inlen & 0x3) ||
2330 			    inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
2331 				*outlenp = 0;
2332 				return (EINVAL);
2333 			}
2334 			if (checkonly)
2335 				break;
2336 
2337 			if (!tsol_option_set(&icmp->icmp_ip_snd_options,
2338 			    &icmp->icmp_ip_snd_options_len,
2339 			    icmp->icmp_label_len, invalp, inlen)) {
2340 				*outlenp = 0;
2341 				return (ENOMEM);
2342 			}
2343 
2344 			icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2345 			    icmp->icmp_ip_snd_options_len;
2346 			rw_exit(&icmp->icmp_rwlock);
2347 			(void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL:
2348 			    RD(connp->conn_rq), connp,
2349 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
2350 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2351 			break;
2352 		case IP_HDRINCL:
2353 			if (!checkonly)
2354 				icmp->icmp_hdrincl = onoff;
2355 			break;
2356 		case IP_TOS:
2357 		case T_IP_TOS:
2358 			if (!checkonly) {
2359 				icmp->icmp_type_of_service = (uint8_t)*i1;
2360 			}
2361 			break;
2362 		case IP_TTL:
2363 			if (!checkonly) {
2364 				icmp->icmp_ttl = (uint8_t)*i1;
2365 			}
2366 			break;
2367 		case IP_MULTICAST_IF:
2368 			/*
2369 			 * TODO should check OPTMGMT reply and undo this if
2370 			 * there is an error.
2371 			 */
2372 			if (!checkonly) {
2373 				icmp->icmp_multicast_if_addr = *i1;
2374 				PASS_OPT_TO_IP(connp);
2375 			}
2376 			break;
2377 		case IP_MULTICAST_TTL:
2378 			if (!checkonly)
2379 				icmp->icmp_multicast_ttl = *invalp;
2380 			break;
2381 		case IP_MULTICAST_LOOP:
2382 			if (!checkonly) {
2383 				connp->conn_multicast_loop =
2384 				    (*invalp == 0) ? 0 : 1;
2385 				PASS_OPT_TO_IP(connp);
2386 			}
2387 			break;
2388 		case IP_BOUND_IF:
2389 			if (!checkonly) {
2390 				icmp->icmp_bound_if = *i1;
2391 				PASS_OPT_TO_IP(connp);
2392 			}
2393 			break;
2394 		case IP_UNSPEC_SRC:
2395 			if (!checkonly) {
2396 				icmp->icmp_unspec_source = onoff;
2397 				PASS_OPT_TO_IP(connp);
2398 			}
2399 			break;
2400 		case IP_BROADCAST_TTL:
2401 			if (!checkonly)
2402 				connp->conn_broadcast_ttl = *invalp;
2403 			break;
2404 		case IP_RECVIF:
2405 			if (!checkonly) {
2406 				icmp->icmp_recvif = onoff;
2407 			}
2408 			/*
2409 			 * pass to ip
2410 			 */
2411 			return (-EINVAL);
2412 		case IP_PKTINFO: {
2413 			/*
2414 			 * This also handles IP_RECVPKTINFO.
2415 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2416 			 * Differentiation is based on the size of the argument
2417 			 * passed in.
2418 			 */
2419 			struct in_pktinfo *pktinfop;
2420 			ip4_pkt_t *attr_pktinfop;
2421 
2422 			if (checkonly)
2423 				break;
2424 
2425 			if (inlen == sizeof (int)) {
2426 				/*
2427 				 * This is IP_RECVPKTINFO option.
2428 				 * Keep a local copy of wether this option is
2429 				 * set or not and pass it down to IP for
2430 				 * processing.
2431 				 */
2432 				icmp->icmp_ip_recvpktinfo = onoff;
2433 				return (-EINVAL);
2434 			}
2435 
2436 
2437 			if (inlen != sizeof (struct in_pktinfo)) {
2438 				return (EINVAL);
2439 			}
2440 
2441 			if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
2442 			    == NULL) {
2443 				/*
2444 				 * sticky option is not supported
2445 				 */
2446 				return (EINVAL);
2447 			}
2448 
2449 			pktinfop = (struct in_pktinfo *)invalp;
2450 
2451 			/*
2452 			 * Atleast one of the values should be specified
2453 			 */
2454 			if (pktinfop->ipi_ifindex == 0 &&
2455 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2456 				return (EINVAL);
2457 			}
2458 
2459 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2460 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2461 		}
2462 			break;
2463 		case IP_ADD_MEMBERSHIP:
2464 		case IP_DROP_MEMBERSHIP:
2465 		case IP_BLOCK_SOURCE:
2466 		case IP_UNBLOCK_SOURCE:
2467 		case IP_ADD_SOURCE_MEMBERSHIP:
2468 		case IP_DROP_SOURCE_MEMBERSHIP:
2469 		case MCAST_JOIN_GROUP:
2470 		case MCAST_LEAVE_GROUP:
2471 		case MCAST_BLOCK_SOURCE:
2472 		case MCAST_UNBLOCK_SOURCE:
2473 		case MCAST_JOIN_SOURCE_GROUP:
2474 		case MCAST_LEAVE_SOURCE_GROUP:
2475 		case MRT_INIT:
2476 		case MRT_DONE:
2477 		case MRT_ADD_VIF:
2478 		case MRT_DEL_VIF:
2479 		case MRT_ADD_MFC:
2480 		case MRT_DEL_MFC:
2481 		case MRT_VERSION:
2482 		case MRT_ASSERT:
2483 		case IP_SEC_OPT:
2484 		case IP_DONTFAILOVER_IF:
2485 		case IP_NEXTHOP:
2486 			/*
2487 			 * "soft" error (negative)
2488 			 * option not handled at this level
2489 			 * Note: Do not modify *outlenp
2490 			 */
2491 			return (-EINVAL);
2492 		default:
2493 			*outlenp = 0;
2494 			return (EINVAL);
2495 		}
2496 		break;
2497 	case IPPROTO_IPV6: {
2498 		ip6_pkt_t		*ipp;
2499 		boolean_t		sticky;
2500 
2501 		if (icmp->icmp_family != AF_INET6) {
2502 			*outlenp = 0;
2503 			return (ENOPROTOOPT);
2504 		}
2505 		/*
2506 		 * Deal with both sticky options and ancillary data
2507 		 */
2508 		if (thisdg_attrs == NULL) {
2509 			/* sticky options, or none */
2510 			ipp = &icmp->icmp_sticky_ipp;
2511 			sticky = B_TRUE;
2512 		} else {
2513 			/* ancillary data */
2514 			ipp = (ip6_pkt_t *)thisdg_attrs;
2515 			sticky = B_FALSE;
2516 		}
2517 
2518 		switch (name) {
2519 		case IPV6_MULTICAST_IF:
2520 			if (!checkonly) {
2521 				icmp->icmp_multicast_if_index = *i1;
2522 				PASS_OPT_TO_IP(connp);
2523 			}
2524 			break;
2525 		case IPV6_UNICAST_HOPS:
2526 			/* -1 means use default */
2527 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2528 				*outlenp = 0;
2529 				return (EINVAL);
2530 			}
2531 			if (!checkonly) {
2532 				if (*i1 == -1) {
2533 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2534 					    is->is_ipv6_hoplimit;
2535 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2536 					/* Pass modified value to IP. */
2537 					*i1 = ipp->ipp_hoplimit;
2538 				} else {
2539 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2540 					    (uint8_t)*i1;
2541 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2542 				}
2543 				/* Rebuild the header template */
2544 				error = icmp_build_hdrs(icmp);
2545 				if (error != 0) {
2546 					*outlenp = 0;
2547 					return (error);
2548 				}
2549 			}
2550 			break;
2551 		case IPV6_MULTICAST_HOPS:
2552 			/* -1 means use default */
2553 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2554 				*outlenp = 0;
2555 				return (EINVAL);
2556 			}
2557 			if (!checkonly) {
2558 				if (*i1 == -1) {
2559 					icmp->icmp_multicast_ttl =
2560 					    ipp->ipp_multicast_hops =
2561 					    IP_DEFAULT_MULTICAST_TTL;
2562 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2563 					/* Pass modified value to IP. */
2564 					*i1 = icmp->icmp_multicast_ttl;
2565 				} else {
2566 					icmp->icmp_multicast_ttl =
2567 					    ipp->ipp_multicast_hops =
2568 					    (uint8_t)*i1;
2569 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2570 				}
2571 			}
2572 			break;
2573 		case IPV6_MULTICAST_LOOP:
2574 			if (*i1 != 0 && *i1 != 1) {
2575 				*outlenp = 0;
2576 				return (EINVAL);
2577 			}
2578 			if (!checkonly) {
2579 				connp->conn_multicast_loop = *i1;
2580 				PASS_OPT_TO_IP(connp);
2581 			}
2582 			break;
2583 		case IPV6_CHECKSUM:
2584 			/*
2585 			 * Integer offset into the user data of where the
2586 			 * checksum is located.
2587 			 * Offset of -1 disables option.
2588 			 * Does not apply to IPPROTO_ICMPV6.
2589 			 */
2590 			if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
2591 				*outlenp = 0;
2592 				return (EINVAL);
2593 			}
2594 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
2595 				/* Negative or not 16 bit aligned offset */
2596 				*outlenp = 0;
2597 				return (EINVAL);
2598 			}
2599 			if (checkonly)
2600 				break;
2601 
2602 			if (*i1 == -1) {
2603 				icmp->icmp_raw_checksum = 0;
2604 				ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
2605 			} else {
2606 				icmp->icmp_raw_checksum = 1;
2607 				icmp->icmp_checksum_off = *i1;
2608 				ipp->ipp_fields |= IPPF_RAW_CKSUM;
2609 			}
2610 			/* Rebuild the header template */
2611 			error = icmp_build_hdrs(icmp);
2612 			if (error != 0) {
2613 				*outlenp = 0;
2614 				return (error);
2615 			}
2616 			break;
2617 		case IPV6_JOIN_GROUP:
2618 		case IPV6_LEAVE_GROUP:
2619 		case MCAST_JOIN_GROUP:
2620 		case MCAST_LEAVE_GROUP:
2621 		case MCAST_BLOCK_SOURCE:
2622 		case MCAST_UNBLOCK_SOURCE:
2623 		case MCAST_JOIN_SOURCE_GROUP:
2624 		case MCAST_LEAVE_SOURCE_GROUP:
2625 			/*
2626 			 * "soft" error (negative)
2627 			 * option not handled at this level
2628 			 * Note: Do not modify *outlenp
2629 			 */
2630 			return (-EINVAL);
2631 		case IPV6_BOUND_IF:
2632 			if (!checkonly) {
2633 				icmp->icmp_bound_if = *i1;
2634 				PASS_OPT_TO_IP(connp);
2635 			}
2636 			break;
2637 		case IPV6_UNSPEC_SRC:
2638 			if (!checkonly) {
2639 				icmp->icmp_unspec_source = onoff;
2640 				PASS_OPT_TO_IP(connp);
2641 			}
2642 			break;
2643 		case IPV6_RECVTCLASS:
2644 			if (!checkonly) {
2645 				icmp->icmp_ipv6_recvtclass = onoff;
2646 				PASS_OPT_TO_IP(connp);
2647 			}
2648 			break;
2649 		/*
2650 		 * Set boolean switches for ancillary data delivery
2651 		 */
2652 		case IPV6_RECVPKTINFO:
2653 			if (!checkonly) {
2654 				icmp->icmp_ip_recvpktinfo = onoff;
2655 				PASS_OPT_TO_IP(connp);
2656 			}
2657 			break;
2658 		case IPV6_RECVPATHMTU:
2659 			if (!checkonly) {
2660 				icmp->icmp_ipv6_recvpathmtu = onoff;
2661 				PASS_OPT_TO_IP(connp);
2662 			}
2663 			break;
2664 		case IPV6_RECVHOPLIMIT:
2665 			if (!checkonly) {
2666 				icmp->icmp_ipv6_recvhoplimit = onoff;
2667 				PASS_OPT_TO_IP(connp);
2668 			}
2669 			break;
2670 		case IPV6_RECVHOPOPTS:
2671 			if (!checkonly) {
2672 				icmp->icmp_ipv6_recvhopopts = onoff;
2673 				PASS_OPT_TO_IP(connp);
2674 			}
2675 			break;
2676 		case IPV6_RECVDSTOPTS:
2677 			if (!checkonly) {
2678 				icmp->icmp_ipv6_recvdstopts = onoff;
2679 				PASS_OPT_TO_IP(connp);
2680 			}
2681 			break;
2682 		case _OLD_IPV6_RECVDSTOPTS:
2683 			if (!checkonly)
2684 				icmp->icmp_old_ipv6_recvdstopts = onoff;
2685 			break;
2686 		case IPV6_RECVRTHDRDSTOPTS:
2687 			if (!checkonly) {
2688 				icmp->icmp_ipv6_recvrtdstopts = onoff;
2689 				PASS_OPT_TO_IP(connp);
2690 			}
2691 			break;
2692 		case IPV6_RECVRTHDR:
2693 			if (!checkonly) {
2694 				icmp->icmp_ipv6_recvrthdr = onoff;
2695 				PASS_OPT_TO_IP(connp);
2696 			}
2697 			break;
2698 		/*
2699 		 * Set sticky options or ancillary data.
2700 		 * If sticky options, (re)build any extension headers
2701 		 * that might be needed as a result.
2702 		 */
2703 		case IPV6_PKTINFO:
2704 			/*
2705 			 * The source address and ifindex are verified
2706 			 * in ip_opt_set(). For ancillary data the
2707 			 * source address is checked in ip_wput_v6.
2708 			 */
2709 			if (inlen != 0 && inlen !=
2710 			    sizeof (struct in6_pktinfo)) {
2711 				return (EINVAL);
2712 			}
2713 			if (checkonly)
2714 				break;
2715 
2716 			if (inlen == 0) {
2717 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2718 				ipp->ipp_sticky_ignored |=
2719 				    (IPPF_IFINDEX|IPPF_ADDR);
2720 			} else {
2721 				struct in6_pktinfo *pkti;
2722 
2723 				pkti = (struct in6_pktinfo *)invalp;
2724 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2725 				ipp->ipp_addr = pkti->ipi6_addr;
2726 				if (ipp->ipp_ifindex != 0)
2727 					ipp->ipp_fields |= IPPF_IFINDEX;
2728 				else
2729 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2730 				if (!IN6_IS_ADDR_UNSPECIFIED(
2731 				    &ipp->ipp_addr))
2732 					ipp->ipp_fields |= IPPF_ADDR;
2733 				else
2734 					ipp->ipp_fields &= ~IPPF_ADDR;
2735 			}
2736 			if (sticky) {
2737 				error = icmp_build_hdrs(icmp);
2738 				if (error != 0)
2739 					return (error);
2740 				PASS_OPT_TO_IP(connp);
2741 			}
2742 			break;
2743 		case IPV6_HOPLIMIT:
2744 			/* This option can only be used as ancillary data. */
2745 			if (sticky)
2746 				return (EINVAL);
2747 			if (inlen != 0 && inlen != sizeof (int))
2748 				return (EINVAL);
2749 			if (checkonly)
2750 				break;
2751 
2752 			if (inlen == 0) {
2753 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2754 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2755 			} else {
2756 				if (*i1 > 255 || *i1 < -1)
2757 					return (EINVAL);
2758 				if (*i1 == -1)
2759 					ipp->ipp_hoplimit =
2760 					    is->is_ipv6_hoplimit;
2761 				else
2762 					ipp->ipp_hoplimit = *i1;
2763 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2764 			}
2765 			break;
2766 		case IPV6_TCLASS:
2767 			/*
2768 			 * IPV6_RECVTCLASS accepts -1 as use kernel default
2769 			 * and [0, 255] as the actualy traffic class.
2770 			 */
2771 			if (inlen != 0 && inlen != sizeof (int)) {
2772 				return (EINVAL);
2773 			}
2774 			if (checkonly)
2775 				break;
2776 
2777 			if (inlen == 0) {
2778 				ipp->ipp_fields &= ~IPPF_TCLASS;
2779 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2780 			} else {
2781 				if (*i1 >= 256 || *i1 < -1)
2782 					return (EINVAL);
2783 				if (*i1 == -1) {
2784 					ipp->ipp_tclass =
2785 					    IPV6_FLOW_TCLASS(
2786 					    IPV6_DEFAULT_VERS_AND_FLOW);
2787 				} else {
2788 					ipp->ipp_tclass = *i1;
2789 				}
2790 				ipp->ipp_fields |= IPPF_TCLASS;
2791 			}
2792 			if (sticky) {
2793 				error = icmp_build_hdrs(icmp);
2794 				if (error != 0)
2795 					return (error);
2796 			}
2797 			break;
2798 		case IPV6_NEXTHOP:
2799 			/*
2800 			 * IP will verify that the nexthop is reachable
2801 			 * and fail for sticky options.
2802 			 */
2803 			if (inlen != 0 && inlen != sizeof (sin6_t)) {
2804 				return (EINVAL);
2805 			}
2806 			if (checkonly)
2807 				break;
2808 
2809 			if (inlen == 0) {
2810 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2811 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2812 			} else {
2813 				sin6_t *sin6 = (sin6_t *)invalp;
2814 
2815 				if (sin6->sin6_family != AF_INET6) {
2816 					return (EAFNOSUPPORT);
2817 				}
2818 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2819 					return (EADDRNOTAVAIL);
2820 				}
2821 				ipp->ipp_nexthop = sin6->sin6_addr;
2822 				if (!IN6_IS_ADDR_UNSPECIFIED(
2823 				    &ipp->ipp_nexthop))
2824 					ipp->ipp_fields |= IPPF_NEXTHOP;
2825 				else
2826 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2827 			}
2828 			if (sticky) {
2829 				error = icmp_build_hdrs(icmp);
2830 				if (error != 0)
2831 					return (error);
2832 				PASS_OPT_TO_IP(connp);
2833 			}
2834 			break;
2835 		case IPV6_HOPOPTS: {
2836 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2837 			/*
2838 			 * Sanity checks - minimum size, size a multiple of
2839 			 * eight bytes, and matching size passed in.
2840 			 */
2841 			if (inlen != 0 &&
2842 			    inlen != (8 * (hopts->ip6h_len + 1))) {
2843 				return (EINVAL);
2844 			}
2845 
2846 			if (checkonly)
2847 				break;
2848 			error = optcom_pkt_set(invalp, inlen, sticky,
2849 			    (uchar_t **)&ipp->ipp_hopopts,
2850 			    &ipp->ipp_hopoptslen,
2851 			    sticky ? icmp->icmp_label_len_v6 : 0);
2852 			if (error != 0)
2853 				return (error);
2854 			if (ipp->ipp_hopoptslen == 0) {
2855 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
2856 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
2857 			} else {
2858 				ipp->ipp_fields |= IPPF_HOPOPTS;
2859 			}
2860 			if (sticky) {
2861 				error = icmp_build_hdrs(icmp);
2862 				if (error != 0)
2863 					return (error);
2864 			}
2865 			break;
2866 		}
2867 		case IPV6_RTHDRDSTOPTS: {
2868 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2869 
2870 			/*
2871 			 * Sanity checks - minimum size, size a multiple of
2872 			 * eight bytes, and matching size passed in.
2873 			 */
2874 			if (inlen != 0 &&
2875 			    inlen != (8 * (dopts->ip6d_len + 1)))
2876 				return (EINVAL);
2877 
2878 			if (checkonly)
2879 				break;
2880 
2881 			if (inlen == 0) {
2882 				if (sticky &&
2883 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
2884 					kmem_free(ipp->ipp_rtdstopts,
2885 					    ipp->ipp_rtdstoptslen);
2886 					ipp->ipp_rtdstopts = NULL;
2887 					ipp->ipp_rtdstoptslen = 0;
2888 				}
2889 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
2890 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
2891 			} else {
2892 				error = optcom_pkt_set(invalp, inlen, sticky,
2893 				    (uchar_t **)&ipp->ipp_rtdstopts,
2894 				    &ipp->ipp_rtdstoptslen, 0);
2895 				if (error != 0)
2896 					return (error);
2897 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
2898 			}
2899 			if (sticky) {
2900 				error = icmp_build_hdrs(icmp);
2901 				if (error != 0)
2902 					return (error);
2903 			}
2904 			break;
2905 		}
2906 		case IPV6_DSTOPTS: {
2907 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2908 
2909 			/*
2910 			 * Sanity checks - minimum size, size a multiple of
2911 			 * eight bytes, and matching size passed in.
2912 			 */
2913 			if (inlen != 0 &&
2914 			    inlen != (8 * (dopts->ip6d_len + 1)))
2915 				return (EINVAL);
2916 
2917 			if (checkonly)
2918 				break;
2919 
2920 			if (inlen == 0) {
2921 				if (sticky &&
2922 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
2923 					kmem_free(ipp->ipp_dstopts,
2924 					    ipp->ipp_dstoptslen);
2925 					ipp->ipp_dstopts = NULL;
2926 					ipp->ipp_dstoptslen = 0;
2927 				}
2928 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2929 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
2930 			} else {
2931 				error = optcom_pkt_set(invalp, inlen, sticky,
2932 				    (uchar_t **)&ipp->ipp_dstopts,
2933 				    &ipp->ipp_dstoptslen, 0);
2934 				if (error != 0)
2935 					return (error);
2936 				ipp->ipp_fields |= IPPF_DSTOPTS;
2937 			}
2938 			if (sticky) {
2939 				error = icmp_build_hdrs(icmp);
2940 				if (error != 0)
2941 					return (error);
2942 			}
2943 			break;
2944 		}
2945 		case IPV6_RTHDR: {
2946 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
2947 
2948 			/*
2949 			 * Sanity checks - minimum size, size a multiple of
2950 			 * eight bytes, and matching size passed in.
2951 			 */
2952 			if (inlen != 0 &&
2953 			    inlen != (8 * (rt->ip6r_len + 1)))
2954 				return (EINVAL);
2955 
2956 			if (checkonly)
2957 				break;
2958 
2959 			if (inlen == 0) {
2960 				if (sticky &&
2961 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
2962 					kmem_free(ipp->ipp_rthdr,
2963 					    ipp->ipp_rthdrlen);
2964 					ipp->ipp_rthdr = NULL;
2965 					ipp->ipp_rthdrlen = 0;
2966 				}
2967 				ipp->ipp_fields &= ~IPPF_RTHDR;
2968 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
2969 			} else {
2970 				error = optcom_pkt_set(invalp, inlen, sticky,
2971 				    (uchar_t **)&ipp->ipp_rthdr,
2972 				    &ipp->ipp_rthdrlen, 0);
2973 				if (error != 0)
2974 					return (error);
2975 				ipp->ipp_fields |= IPPF_RTHDR;
2976 			}
2977 			if (sticky) {
2978 				error = icmp_build_hdrs(icmp);
2979 				if (error != 0)
2980 					return (error);
2981 			}
2982 			break;
2983 		}
2984 
2985 		case IPV6_DONTFRAG:
2986 			if (checkonly)
2987 				break;
2988 
2989 			if (onoff) {
2990 				ipp->ipp_fields |= IPPF_DONTFRAG;
2991 			} else {
2992 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
2993 			}
2994 			break;
2995 
2996 		case IPV6_USE_MIN_MTU:
2997 			if (inlen != sizeof (int))
2998 				return (EINVAL);
2999 
3000 			if (*i1 < -1 || *i1 > 1)
3001 				return (EINVAL);
3002 
3003 			if (checkonly)
3004 				break;
3005 
3006 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
3007 			ipp->ipp_use_min_mtu = *i1;
3008 			break;
3009 
3010 		/*
3011 		 * This option can't be set.  Its only returned via
3012 		 * getsockopt() or ancillary data.
3013 		 */
3014 		case IPV6_PATHMTU:
3015 			return (EINVAL);
3016 
3017 		case IPV6_BOUND_PIF:
3018 		case IPV6_SEC_OPT:
3019 		case IPV6_DONTFAILOVER_IF:
3020 		case IPV6_SRC_PREFERENCES:
3021 		case IPV6_V6ONLY:
3022 			/* Handled at IP level */
3023 			return (-EINVAL);
3024 		default:
3025 			*outlenp = 0;
3026 			return (EINVAL);
3027 		}
3028 		break;
3029 	}		/* end IPPROTO_IPV6 */
3030 
3031 	case IPPROTO_ICMPV6:
3032 		/*
3033 		 * Only allow IPv6 option processing on IPv6 sockets.
3034 		 */
3035 		if (icmp->icmp_family != AF_INET6) {
3036 			*outlenp = 0;
3037 			return (ENOPROTOOPT);
3038 		}
3039 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
3040 			*outlenp = 0;
3041 			return (ENOPROTOOPT);
3042 		}
3043 		switch (name) {
3044 		case ICMP6_FILTER:
3045 			if (!checkonly) {
3046 				if ((inlen != 0) &&
3047 				    (inlen != sizeof (icmp6_filter_t)))
3048 					return (EINVAL);
3049 
3050 				if (inlen == 0) {
3051 					if (icmp->icmp_filter != NULL) {
3052 						kmem_free(icmp->icmp_filter,
3053 						    sizeof (icmp6_filter_t));
3054 						icmp->icmp_filter = NULL;
3055 					}
3056 				} else {
3057 					if (icmp->icmp_filter == NULL) {
3058 						icmp->icmp_filter = kmem_alloc(
3059 						    sizeof (icmp6_filter_t),
3060 						    KM_NOSLEEP);
3061 						if (icmp->icmp_filter == NULL) {
3062 							*outlenp = 0;
3063 							return (ENOBUFS);
3064 						}
3065 					}
3066 					(void) bcopy(invalp, icmp->icmp_filter,
3067 					    inlen);
3068 				}
3069 			}
3070 			break;
3071 
3072 		default:
3073 			*outlenp = 0;
3074 			return (EINVAL);
3075 		}
3076 		break;
3077 	default:
3078 		*outlenp = 0;
3079 		return (EINVAL);
3080 	}
3081 	/*
3082 	 * Common case of OK return with outval same as inval.
3083 	 */
3084 	if (invalp != outvalp) {
3085 		/* don't trust bcopy for identical src/dst */
3086 		(void) bcopy(invalp, outvalp, inlen);
3087 	}
3088 	*outlenp = inlen;
3089 	return (0);
3090 }
3091 
3092 /* This routine sets socket options. */
3093 /* ARGSUSED */
3094 int
3095 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
3096     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3097     void *thisdg_attrs, cred_t *cr)
3098 {
3099 	boolean_t checkonly;
3100 	int	error;
3101 
3102 	error = 0;
3103 	switch (optset_context) {
3104 	case SETFN_OPTCOM_CHECKONLY:
3105 		checkonly = B_TRUE;
3106 		/*
3107 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3108 		 * inlen != 0 implies value supplied and
3109 		 * 	we have to "pretend" to set it.
3110 		 * inlen == 0 implies that there is no
3111 		 * 	value part in T_CHECK request and just validation
3112 		 * done elsewhere should be enough, we just return here.
3113 		 */
3114 		if (inlen == 0) {
3115 			*outlenp = 0;
3116 			error = 0;
3117 			goto done;
3118 		}
3119 		break;
3120 	case SETFN_OPTCOM_NEGOTIATE:
3121 		checkonly = B_FALSE;
3122 		break;
3123 	case SETFN_UD_NEGOTIATE:
3124 	case SETFN_CONN_NEGOTIATE:
3125 		checkonly = B_FALSE;
3126 		/*
3127 		 * Negotiating local and "association-related" options
3128 		 * through T_UNITDATA_REQ.
3129 		 *
3130 		 * Following routine can filter out ones we do not
3131 		 * want to be "set" this way.
3132 		 */
3133 		if (!icmp_opt_allow_udr_set(level, name)) {
3134 			*outlenp = 0;
3135 			error = EINVAL;
3136 			goto done;
3137 		}
3138 		break;
3139 	default:
3140 		/*
3141 		 * We should never get here
3142 		 */
3143 		*outlenp = 0;
3144 		error = EINVAL;
3145 		goto done;
3146 	}
3147 
3148 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3149 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3150 	error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
3151 	    outvalp, cr, thisdg_attrs, checkonly);
3152 
3153 done:
3154 	return (error);
3155 }
3156 
3157 /* This routine sets socket options. */
3158 /* ARGSUSED */
3159 int
3160 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
3161     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3162     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3163 {
3164 	conn_t	*connp =  Q_TO_CONN(q);
3165 	icmp_t	*icmp;
3166 	int error;
3167 
3168 	icmp = connp->conn_icmp;
3169 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3170 	error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
3171 	    outlenp, outvalp, thisdg_attrs, cr);
3172 	rw_exit(&icmp->icmp_rwlock);
3173 	return (error);
3174 }
3175 
3176 /*
3177  * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl,
3178  * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum.
3179  * The headers include ip6i_t (if needed), ip6_t, and any sticky extension
3180  * headers.
3181  * Returns failure if can't allocate memory.
3182  */
3183 static int
3184 icmp_build_hdrs(icmp_t *icmp)
3185 {
3186 	icmp_stack_t *is = icmp->icmp_is;
3187 	uchar_t	*hdrs;
3188 	uint_t	hdrs_len;
3189 	ip6_t	*ip6h;
3190 	ip6i_t	*ip6i;
3191 	ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
3192 
3193 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
3194 	hdrs_len = ip_total_hdrs_len_v6(ipp);
3195 	ASSERT(hdrs_len != 0);
3196 	if (hdrs_len != icmp->icmp_sticky_hdrs_len) {
3197 		/* Need to reallocate */
3198 		if (hdrs_len != 0) {
3199 			hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3200 			if (hdrs == NULL)
3201 				return (ENOMEM);
3202 		} else {
3203 			hdrs = NULL;
3204 		}
3205 		if (icmp->icmp_sticky_hdrs_len != 0) {
3206 			kmem_free(icmp->icmp_sticky_hdrs,
3207 			    icmp->icmp_sticky_hdrs_len);
3208 		}
3209 		icmp->icmp_sticky_hdrs = hdrs;
3210 		icmp->icmp_sticky_hdrs_len = hdrs_len;
3211 	}
3212 	ip_build_hdrs_v6(icmp->icmp_sticky_hdrs,
3213 	    icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto);
3214 
3215 	/* Set header fields not in ipp */
3216 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3217 		ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs;
3218 		ip6h = (ip6_t *)&ip6i[1];
3219 
3220 		if (ipp->ipp_fields & IPPF_RAW_CKSUM) {
3221 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
3222 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
3223 		}
3224 		if (ipp->ipp_fields & IPPF_NO_CKSUM) {
3225 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
3226 		}
3227 	} else {
3228 		ip6h = (ip6_t *)icmp->icmp_sticky_hdrs;
3229 	}
3230 
3231 	if (!(ipp->ipp_fields & IPPF_ADDR))
3232 		ip6h->ip6_src = icmp->icmp_v6src;
3233 
3234 	/* Try to get everything in a single mblk */
3235 	if (hdrs_len > icmp->icmp_max_hdr_len) {
3236 		icmp->icmp_max_hdr_len = hdrs_len;
3237 		rw_exit(&icmp->icmp_rwlock);
3238 		(void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq,
3239 		    icmp->icmp_connp,
3240 		    icmp->icmp_max_hdr_len + is->is_wroff_extra);
3241 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3242 	}
3243 	return (0);
3244 }
3245 
3246 /*
3247  * This routine retrieves the value of an ND variable in a icmpparam_t
3248  * structure.  It is called through nd_getset when a user reads the
3249  * variable.
3250  */
3251 /* ARGSUSED */
3252 static int
3253 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3254 {
3255 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3256 
3257 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
3258 	return (0);
3259 }
3260 
3261 /*
3262  * Walk through the param array specified registering each element with the
3263  * named dispatch (ND) handler.
3264  */
3265 static boolean_t
3266 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
3267 {
3268 	for (; cnt-- > 0; icmppa++) {
3269 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
3270 			if (!nd_load(ndp, icmppa->icmp_param_name,
3271 			    icmp_param_get, icmp_param_set,
3272 			    (caddr_t)icmppa)) {
3273 				nd_free(ndp);
3274 				return (B_FALSE);
3275 			}
3276 		}
3277 	}
3278 	if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL,
3279 	    NULL)) {
3280 		nd_free(ndp);
3281 		return (B_FALSE);
3282 	}
3283 	return (B_TRUE);
3284 }
3285 
3286 /* This routine sets an ND variable in a icmpparam_t structure. */
3287 /* ARGSUSED */
3288 static int
3289 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3290 {
3291 	long		new_value;
3292 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3293 
3294 	/*
3295 	 * Fail the request if the new value does not lie within the
3296 	 * required bounds.
3297 	 */
3298 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3299 	    new_value < icmppa->icmp_param_min ||
3300 	    new_value > icmppa->icmp_param_max) {
3301 		return (EINVAL);
3302 	}
3303 	/* Set the new value */
3304 	icmppa->icmp_param_value = new_value;
3305 	return (0);
3306 }
3307 static void
3308 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
3309 {
3310 	ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
3311 	if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
3312 		/*
3313 		 * fallback has started but messages have not been moved yet
3314 		 */
3315 		if (icmp->icmp_fallback_queue_head == NULL) {
3316 			ASSERT(icmp->icmp_fallback_queue_tail == NULL);
3317 			icmp->icmp_fallback_queue_head = mp;
3318 			icmp->icmp_fallback_queue_tail = mp;
3319 		} else {
3320 			ASSERT(icmp->icmp_fallback_queue_tail != NULL);
3321 			icmp->icmp_fallback_queue_tail->b_next = mp;
3322 			icmp->icmp_fallback_queue_tail = mp;
3323 		}
3324 		mutex_exit(&icmp->icmp_recv_lock);
3325 	} else {
3326 		/*
3327 		 * no more fallbacks possible, ok to drop lock.
3328 		 */
3329 		mutex_exit(&icmp->icmp_recv_lock);
3330 		putnext(icmp->icmp_connp->conn_rq, mp);
3331 	}
3332 }
3333 
3334 /*ARGSUSED2*/
3335 static void
3336 icmp_input(void *arg1, mblk_t *mp, void *arg2)
3337 {
3338 	conn_t *connp = (conn_t *)arg1;
3339 	struct T_unitdata_ind	*tudi;
3340 	uchar_t			*rptr;
3341 	icmp_t			*icmp;
3342 	icmp_stack_t		*is;
3343 	sin_t			*sin;
3344 	sin6_t			*sin6;
3345 	ip6_t			*ip6h;
3346 	ip6i_t			*ip6i;
3347 	mblk_t			*mp1;
3348 	int			hdr_len;
3349 	ipha_t			*ipha;
3350 	int			udi_size;	/* Size of T_unitdata_ind */
3351 	uint_t			ipvers;
3352 	ip6_pkt_t		ipp;
3353 	uint8_t			nexthdr;
3354 	ip_pktinfo_t		*pinfo = NULL;
3355 	mblk_t			*options_mp = NULL;
3356 	uint_t			icmp_opt = 0;
3357 	boolean_t		icmp_ipv6_recvhoplimit = B_FALSE;
3358 	uint_t			hopstrip;
3359 	int			error;
3360 
3361 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
3362 
3363 	icmp = connp->conn_icmp;
3364 	is = icmp->icmp_is;
3365 	rptr = mp->b_rptr;
3366 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3367 	ASSERT(OK_32PTR(rptr));
3368 
3369 	/*
3370 	 * IP should have prepended the options data in an M_CTL
3371 	 * Check M_CTL "type" to make sure are not here bcos of
3372 	 * a valid ICMP message
3373 	 */
3374 	if (DB_TYPE(mp) == M_CTL) {
3375 		/*
3376 		 * FIXME: does IP still do this?
3377 		 * IP sends up the IPSEC_IN message for handling IPSEC
3378 		 * policy at the TCP level. We don't need it here.
3379 		 */
3380 		if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) {
3381 			mp1 = mp->b_cont;
3382 			freeb(mp);
3383 			mp = mp1;
3384 			rptr = mp->b_rptr;
3385 		} else if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3386 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3387 		    IN_PKTINFO) {
3388 			/*
3389 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3390 			 * has been prepended to the packet by IP. We need to
3391 			 * extract the mblk and adjust the rptr
3392 			 */
3393 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3394 			options_mp = mp;
3395 			mp = mp->b_cont;
3396 			rptr = mp->b_rptr;
3397 		} else {
3398 			/*
3399 			 * ICMP messages.
3400 			 */
3401 			icmp_icmp_error(connp, mp);
3402 			return;
3403 		}
3404 	}
3405 
3406 	/*
3407 	 * Discard message if it is misaligned or smaller than the IP header.
3408 	 */
3409 	if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) {
3410 		freemsg(mp);
3411 		if (options_mp != NULL)
3412 			freeb(options_mp);
3413 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3414 		return;
3415 	}
3416 	ipvers = IPH_HDR_VERSION((ipha_t *)rptr);
3417 
3418 	/* Handle M_DATA messages containing IP packets messages */
3419 	if (ipvers == IPV4_VERSION) {
3420 		/*
3421 		 * Special case where IP attaches
3422 		 * the IRE needs to be handled so that we don't send up
3423 		 * IRE to the user land.
3424 		 */
3425 		ipha = (ipha_t *)rptr;
3426 		hdr_len = IPH_HDR_LENGTH(ipha);
3427 
3428 		if (ipha->ipha_protocol == IPPROTO_TCP) {
3429 			tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3430 
3431 			if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) ==
3432 			    TH_SYN) && mp->b_cont != NULL) {
3433 				mp1 = mp->b_cont;
3434 				if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3435 					freeb(mp1);
3436 					mp->b_cont = NULL;
3437 				}
3438 			}
3439 		}
3440 		if (is->is_bsd_compat) {
3441 			ushort_t len;
3442 			len = ntohs(ipha->ipha_length);
3443 
3444 			if (mp->b_datap->db_ref > 1) {
3445 				/*
3446 				 * Allocate a new IP header so that we can
3447 				 * modify ipha_length.
3448 				 */
3449 				mblk_t	*mp1;
3450 
3451 				mp1 = allocb(hdr_len, BPRI_MED);
3452 				if (!mp1) {
3453 					freemsg(mp);
3454 					if (options_mp != NULL)
3455 						freeb(options_mp);
3456 					BUMP_MIB(&is->is_rawip_mib,
3457 					    rawipInErrors);
3458 					return;
3459 				}
3460 				bcopy(rptr, mp1->b_rptr, hdr_len);
3461 				mp->b_rptr = rptr + hdr_len;
3462 				rptr = mp1->b_rptr;
3463 				ipha = (ipha_t *)rptr;
3464 				mp1->b_cont = mp;
3465 				mp1->b_wptr = rptr + hdr_len;
3466 				mp = mp1;
3467 			}
3468 			len -= hdr_len;
3469 			ipha->ipha_length = htons(len);
3470 		}
3471 	}
3472 
3473 	/*
3474 	 * This is the inbound data path.  Packets are passed upstream as
3475 	 * T_UNITDATA_IND messages with full IP headers still attached.
3476 	 */
3477 	if (icmp->icmp_family == AF_INET) {
3478 		ASSERT(ipvers == IPV4_VERSION);
3479 		udi_size =  sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3480 		if (icmp->icmp_recvif && (pinfo != NULL) &&
3481 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3482 			udi_size += sizeof (struct T_opthdr) +
3483 			    sizeof (uint_t);
3484 		}
3485 
3486 		if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3487 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3488 			udi_size += sizeof (struct T_opthdr) +
3489 			    sizeof (struct in_pktinfo);
3490 		}
3491 
3492 		/*
3493 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3494 		 * buffer. Since gethrestime() expects a pointer aligned
3495 		 * argument, we allocate space necessary for extra
3496 		 * alignment (even though it might not be used).
3497 		 */
3498 		if (icmp->icmp_timestamp) {
3499 			udi_size += sizeof (struct T_opthdr) +
3500 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3501 		}
3502 		mp1 = allocb(udi_size, BPRI_MED);
3503 		if (mp1 == NULL) {
3504 			freemsg(mp);
3505 			if (options_mp != NULL)
3506 				freeb(options_mp);
3507 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3508 			return;
3509 		}
3510 		mp1->b_cont = mp;
3511 		mp = mp1;
3512 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3513 		mp->b_datap->db_type = M_PROTO;
3514 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3515 		tudi->PRIM_type = T_UNITDATA_IND;
3516 		tudi->SRC_length = sizeof (sin_t);
3517 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3518 		sin = (sin_t *)&tudi[1];
3519 		*sin = sin_null;
3520 		sin->sin_family = AF_INET;
3521 		sin->sin_addr.s_addr = ipha->ipha_src;
3522 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
3523 		    sizeof (sin_t);
3524 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3525 		tudi->OPT_length = udi_size;
3526 
3527 		/*
3528 		 * Add options if IP_RECVIF is set
3529 		 */
3530 		if (udi_size != 0) {
3531 			char *dstopt;
3532 
3533 			dstopt = (char *)&sin[1];
3534 			if (icmp->icmp_recvif && (pinfo != NULL) &&
3535 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3536 
3537 				struct T_opthdr *toh;
3538 				uint_t		*dstptr;
3539 
3540 				toh = (struct T_opthdr *)dstopt;
3541 				toh->level = IPPROTO_IP;
3542 				toh->name = IP_RECVIF;
3543 				toh->len = sizeof (struct T_opthdr) +
3544 				    sizeof (uint_t);
3545 				toh->status = 0;
3546 				dstopt += sizeof (struct T_opthdr);
3547 				dstptr = (uint_t *)dstopt;
3548 				*dstptr = pinfo->ip_pkt_ifindex;
3549 				dstopt += sizeof (uint_t);
3550 				udi_size -= toh->len;
3551 			}
3552 			if (icmp->icmp_timestamp) {
3553 				struct	T_opthdr *toh;
3554 
3555 				toh = (struct T_opthdr *)dstopt;
3556 				toh->level = SOL_SOCKET;
3557 				toh->name = SCM_TIMESTAMP;
3558 				toh->len = sizeof (struct T_opthdr) +
3559 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3560 				toh->status = 0;
3561 				dstopt += sizeof (struct T_opthdr);
3562 				/* Align for gethrestime() */
3563 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
3564 				    sizeof (intptr_t));
3565 				gethrestime((timestruc_t *)dstopt);
3566 				dstopt = (char *)toh + toh->len;
3567 				udi_size -= toh->len;
3568 			}
3569 			if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3570 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3571 				struct	T_opthdr *toh;
3572 				struct	in_pktinfo *pktinfop;
3573 
3574 				toh = (struct T_opthdr *)dstopt;
3575 				toh->level = IPPROTO_IP;
3576 				toh->name = IP_PKTINFO;
3577 				toh->len = sizeof (struct T_opthdr) +
3578 				    sizeof (in_pktinfo_t);
3579 				toh->status = 0;
3580 				dstopt += sizeof (struct T_opthdr);
3581 				pktinfop = (struct in_pktinfo *)dstopt;
3582 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
3583 				pktinfop->ipi_spec_dst =
3584 				    pinfo->ip_pkt_match_addr;
3585 
3586 				pktinfop->ipi_addr.s_addr = ipha->ipha_dst;
3587 
3588 				dstopt += sizeof (struct in_pktinfo);
3589 				udi_size -= toh->len;
3590 			}
3591 
3592 			/* Consumed all of allocated space */
3593 			ASSERT(udi_size == 0);
3594 		}
3595 
3596 		if (options_mp != NULL)
3597 			freeb(options_mp);
3598 
3599 		BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3600 		goto deliver;
3601 	}
3602 
3603 	/*
3604 	 * We don't need options_mp in the IPv6 path.
3605 	 */
3606 	if (options_mp != NULL) {
3607 		freeb(options_mp);
3608 		options_mp = NULL;
3609 	}
3610 
3611 	/*
3612 	 * Discard message if it is smaller than the IPv6 header
3613 	 * or if the header is malformed.
3614 	 */
3615 	if ((mp->b_wptr - rptr) < sizeof (ip6_t) ||
3616 	    IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION ||
3617 	    icmp->icmp_family != AF_INET6) {
3618 		freemsg(mp);
3619 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3620 		return;
3621 	}
3622 
3623 	/* Initialize */
3624 	ipp.ipp_fields = 0;
3625 	hopstrip = 0;
3626 
3627 	ip6h = (ip6_t *)rptr;
3628 	/*
3629 	 * Call on ip_find_hdr_v6 which gets the total hdr len
3630 	 * as well as individual lenghts of ext hdrs (and ptrs to
3631 	 * them).
3632 	 */
3633 	if (ip6h->ip6_nxt != icmp->icmp_proto) {
3634 		/* Look for ifindex information */
3635 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
3636 			ip6i = (ip6i_t *)ip6h;
3637 			if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3638 				ASSERT(ip6i->ip6i_ifindex != 0);
3639 				ipp.ipp_fields |= IPPF_IFINDEX;
3640 				ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3641 			}
3642 			rptr = (uchar_t *)&ip6i[1];
3643 			mp->b_rptr = rptr;
3644 			if (rptr == mp->b_wptr) {
3645 				mp1 = mp->b_cont;
3646 				freeb(mp);
3647 				mp = mp1;
3648 				rptr = mp->b_rptr;
3649 			}
3650 			ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN);
3651 			ip6h = (ip6_t *)rptr;
3652 		}
3653 		hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr);
3654 
3655 		/*
3656 		 * We need to lie a bit to the user because users inside
3657 		 * labeled compartments should not see their own labels.  We
3658 		 * assume that in all other respects IP has checked the label,
3659 		 * and that the label is always first among the options.  (If
3660 		 * it's not first, then this code won't see it, and the option
3661 		 * will be passed along to the user.)
3662 		 *
3663 		 * If we had multilevel ICMP sockets, then the following code
3664 		 * should be skipped for them to allow the user to see the
3665 		 * label.
3666 		 *
3667 		 * Alignment restrictions in the definition of IP options
3668 		 * (namely, the requirement that the 4-octet DOI goes on a
3669 		 * 4-octet boundary) mean that we know exactly where the option
3670 		 * should start, but we're lenient for other hosts.
3671 		 *
3672 		 * Note that there are no multilevel ICMP or raw IP sockets
3673 		 * yet, thus nobody ever sees the IP6OPT_LS option.
3674 		 */
3675 		if ((ipp.ipp_fields & IPPF_HOPOPTS) &&
3676 		    ipp.ipp_hopoptslen > 5 && is_system_labeled()) {
3677 			const uchar_t *ucp =
3678 			    (const uchar_t *)ipp.ipp_hopopts + 2;
3679 			int remlen = ipp.ipp_hopoptslen - 2;
3680 
3681 			while (remlen > 0) {
3682 				if (*ucp == IP6OPT_PAD1) {
3683 					remlen--;
3684 					ucp++;
3685 				} else if (*ucp == IP6OPT_PADN) {
3686 					remlen -= ucp[1] + 2;
3687 					ucp += ucp[1] + 2;
3688 				} else if (*ucp == ip6opt_ls) {
3689 					hopstrip = (ucp -
3690 					    (const uchar_t *)ipp.ipp_hopopts) +
3691 					    ucp[1] + 2;
3692 					hopstrip = (hopstrip + 7) & ~7;
3693 					break;
3694 				} else {
3695 					/* label option must be first */
3696 					break;
3697 				}
3698 			}
3699 		}
3700 	} else {
3701 		hdr_len = IPV6_HDR_LEN;
3702 		ip6i = NULL;
3703 		nexthdr = ip6h->ip6_nxt;
3704 	}
3705 	/*
3706 	 * One special case where IP attaches the IRE needs to
3707 	 * be handled so that we don't send up IRE to the user land.
3708 	 */
3709 	if (nexthdr == IPPROTO_TCP) {
3710 		tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3711 
3712 		if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) &&
3713 		    mp->b_cont != NULL) {
3714 			mp1 = mp->b_cont;
3715 			if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3716 				freeb(mp1);
3717 				mp->b_cont = NULL;
3718 			}
3719 		}
3720 	}
3721 	/*
3722 	 * Check a filter for ICMPv6 types if needed.
3723 	 * Verify raw checksums if needed.
3724 	 */
3725 	if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) {
3726 		if (icmp->icmp_filter != NULL) {
3727 			int type;
3728 
3729 			/* Assumes that IP has done the pullupmsg */
3730 			type = mp->b_rptr[hdr_len];
3731 
3732 			ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr);
3733 			if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
3734 				freemsg(mp);
3735 				return;
3736 			}
3737 		} else {
3738 			/* Checksum */
3739 			uint16_t	*up;
3740 			uint32_t	sum;
3741 			int		remlen;
3742 
3743 			up = (uint16_t *)&ip6h->ip6_src;
3744 
3745 			remlen = msgdsize(mp) - hdr_len;
3746 			sum = htons(icmp->icmp_proto + remlen)
3747 			    + up[0] + up[1] + up[2] + up[3]
3748 			    + up[4] + up[5] + up[6] + up[7]
3749 			    + up[8] + up[9] + up[10] + up[11]
3750 			    + up[12] + up[13] + up[14] + up[15];
3751 			sum = (sum & 0xffff) + (sum >> 16);
3752 			sum = IP_CSUM(mp, hdr_len, sum);
3753 			if (sum != 0) {
3754 				/* IPv6 RAW checksum failed */
3755 				ip0dbg(("icmp_rput: RAW checksum "
3756 				    "failed %x\n", sum));
3757 				freemsg(mp);
3758 				BUMP_MIB(&is->is_rawip_mib,
3759 				    rawipInCksumErrs);
3760 				return;
3761 			}
3762 		}
3763 	}
3764 	/* Skip all the IPv6 headers per API */
3765 	mp->b_rptr += hdr_len;
3766 
3767 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3768 
3769 	/*
3770 	 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to
3771 	 * maintain state information, instead of relying on icmp_t
3772 	 * structure, since there arent any locks protecting these members
3773 	 * and there is a window where there might be a race between a
3774 	 * thread setting options on the write side and a thread reading
3775 	 * these options on the read size.
3776 	 */
3777 	if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
3778 	    IPPF_RTHDR|IPPF_IFINDEX)) {
3779 		if (icmp->icmp_ipv6_recvhopopts &&
3780 		    (ipp.ipp_fields & IPPF_HOPOPTS) &&
3781 		    ipp.ipp_hopoptslen > hopstrip) {
3782 			udi_size += sizeof (struct T_opthdr) +
3783 			    ipp.ipp_hopoptslen - hopstrip;
3784 			icmp_opt |= IPPF_HOPOPTS;
3785 		}
3786 		if ((icmp->icmp_ipv6_recvdstopts ||
3787 		    icmp->icmp_old_ipv6_recvdstopts) &&
3788 		    (ipp.ipp_fields & IPPF_DSTOPTS)) {
3789 			udi_size += sizeof (struct T_opthdr) +
3790 			    ipp.ipp_dstoptslen;
3791 			icmp_opt |= IPPF_DSTOPTS;
3792 		}
3793 		if (((icmp->icmp_ipv6_recvdstopts &&
3794 		    icmp->icmp_ipv6_recvrthdr &&
3795 		    (ipp.ipp_fields & IPPF_RTHDR)) ||
3796 		    icmp->icmp_ipv6_recvrtdstopts) &&
3797 		    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
3798 			udi_size += sizeof (struct T_opthdr) +
3799 			    ipp.ipp_rtdstoptslen;
3800 			icmp_opt |= IPPF_RTDSTOPTS;
3801 		}
3802 		if (icmp->icmp_ipv6_recvrthdr &&
3803 		    (ipp.ipp_fields & IPPF_RTHDR)) {
3804 			udi_size += sizeof (struct T_opthdr) +
3805 			    ipp.ipp_rthdrlen;
3806 			icmp_opt |= IPPF_RTHDR;
3807 		}
3808 		if (icmp->icmp_ip_recvpktinfo &&
3809 		    (ipp.ipp_fields & IPPF_IFINDEX)) {
3810 			udi_size += sizeof (struct T_opthdr) +
3811 			    sizeof (struct in6_pktinfo);
3812 			icmp_opt |= IPPF_IFINDEX;
3813 		}
3814 	}
3815 	if (icmp->icmp_ipv6_recvhoplimit) {
3816 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3817 		icmp_ipv6_recvhoplimit = B_TRUE;
3818 	}
3819 
3820 	if (icmp->icmp_ipv6_recvtclass)
3821 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3822 
3823 	/*
3824 	 * If SO_TIMESTAMP is set allocate the appropriate sized
3825 	 * buffer. Since gethrestime() expects a pointer aligned
3826 	 * argument, we allocate space necessary for extra
3827 	 * alignment (even though it might not be used).
3828 	 */
3829 	if (icmp->icmp_timestamp) {
3830 		udi_size += sizeof (struct T_opthdr) +
3831 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3832 	}
3833 
3834 	mp1 = allocb(udi_size, BPRI_MED);
3835 	if (mp1 == NULL) {
3836 		freemsg(mp);
3837 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3838 		return;
3839 	}
3840 	mp1->b_cont = mp;
3841 	mp = mp1;
3842 	mp->b_datap->db_type = M_PROTO;
3843 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
3844 	mp->b_wptr = (uchar_t *)tudi + udi_size;
3845 	tudi->PRIM_type = T_UNITDATA_IND;
3846 	tudi->SRC_length = sizeof (sin6_t);
3847 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3848 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3849 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
3850 	tudi->OPT_length = udi_size;
3851 	sin6 = (sin6_t *)&tudi[1];
3852 	sin6->sin6_port = 0;
3853 	sin6->sin6_family = AF_INET6;
3854 
3855 	sin6->sin6_addr = ip6h->ip6_src;
3856 	/* No sin6_flowinfo per API */
3857 	sin6->sin6_flowinfo = 0;
3858 	/* For link-scope source pass up scope id */
3859 	if ((ipp.ipp_fields & IPPF_IFINDEX) &&
3860 	    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
3861 		sin6->sin6_scope_id = ipp.ipp_ifindex;
3862 	else
3863 		sin6->sin6_scope_id = 0;
3864 
3865 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
3866 	    icmp->icmp_zoneid, is->is_netstack);
3867 
3868 	if (udi_size != 0) {
3869 		uchar_t *dstopt;
3870 
3871 		dstopt = (uchar_t *)&sin6[1];
3872 		if (icmp_opt & IPPF_IFINDEX) {
3873 			struct T_opthdr *toh;
3874 			struct in6_pktinfo *pkti;
3875 
3876 			toh = (struct T_opthdr *)dstopt;
3877 			toh->level = IPPROTO_IPV6;
3878 			toh->name = IPV6_PKTINFO;
3879 			toh->len = sizeof (struct T_opthdr) +
3880 			    sizeof (*pkti);
3881 			toh->status = 0;
3882 			dstopt += sizeof (struct T_opthdr);
3883 			pkti = (struct in6_pktinfo *)dstopt;
3884 			pkti->ipi6_addr = ip6h->ip6_dst;
3885 			pkti->ipi6_ifindex = ipp.ipp_ifindex;
3886 			dstopt += sizeof (*pkti);
3887 			udi_size -= toh->len;
3888 		}
3889 		if (icmp_ipv6_recvhoplimit) {
3890 			struct T_opthdr *toh;
3891 
3892 			toh = (struct T_opthdr *)dstopt;
3893 			toh->level = IPPROTO_IPV6;
3894 			toh->name = IPV6_HOPLIMIT;
3895 			toh->len = sizeof (struct T_opthdr) +
3896 			    sizeof (uint_t);
3897 			toh->status = 0;
3898 			dstopt += sizeof (struct T_opthdr);
3899 			*(uint_t *)dstopt = ip6h->ip6_hops;
3900 			dstopt += sizeof (uint_t);
3901 			udi_size -= toh->len;
3902 		}
3903 		if (icmp->icmp_ipv6_recvtclass) {
3904 			struct T_opthdr *toh;
3905 
3906 			toh = (struct T_opthdr *)dstopt;
3907 			toh->level = IPPROTO_IPV6;
3908 			toh->name = IPV6_TCLASS;
3909 			toh->len = sizeof (struct T_opthdr) +
3910 			    sizeof (uint_t);
3911 			toh->status = 0;
3912 			dstopt += sizeof (struct T_opthdr);
3913 			*(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
3914 			dstopt += sizeof (uint_t);
3915 			udi_size -= toh->len;
3916 		}
3917 		if (icmp->icmp_timestamp) {
3918 			struct  T_opthdr *toh;
3919 
3920 			toh = (struct T_opthdr *)dstopt;
3921 			toh->level = SOL_SOCKET;
3922 			toh->name = SCM_TIMESTAMP;
3923 			toh->len = sizeof (struct T_opthdr) +
3924 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3925 			toh->status = 0;
3926 			dstopt += sizeof (struct T_opthdr);
3927 			/* Align for gethrestime() */
3928 			dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
3929 			    sizeof (intptr_t));
3930 			gethrestime((timestruc_t *)dstopt);
3931 			dstopt = (uchar_t *)toh + toh->len;
3932 			udi_size -= toh->len;
3933 		}
3934 
3935 		if (icmp_opt & IPPF_HOPOPTS) {
3936 			struct T_opthdr *toh;
3937 
3938 			toh = (struct T_opthdr *)dstopt;
3939 			toh->level = IPPROTO_IPV6;
3940 			toh->name = IPV6_HOPOPTS;
3941 			toh->len = sizeof (struct T_opthdr) +
3942 			    ipp.ipp_hopoptslen - hopstrip;
3943 			toh->status = 0;
3944 			dstopt += sizeof (struct T_opthdr);
3945 			bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt,
3946 			    ipp.ipp_hopoptslen - hopstrip);
3947 			if (hopstrip > 0) {
3948 				/* copy next header value and fake length */
3949 				dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0];
3950 				dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] -
3951 				    hopstrip / 8;
3952 			}
3953 			dstopt += ipp.ipp_hopoptslen - hopstrip;
3954 			udi_size -= toh->len;
3955 		}
3956 		if (icmp_opt & IPPF_RTDSTOPTS) {
3957 			struct T_opthdr *toh;
3958 
3959 			toh = (struct T_opthdr *)dstopt;
3960 			toh->level = IPPROTO_IPV6;
3961 			toh->name = IPV6_DSTOPTS;
3962 			toh->len = sizeof (struct T_opthdr) +
3963 			    ipp.ipp_rtdstoptslen;
3964 			toh->status = 0;
3965 			dstopt += sizeof (struct T_opthdr);
3966 			bcopy(ipp.ipp_rtdstopts, dstopt,
3967 			    ipp.ipp_rtdstoptslen);
3968 			dstopt += ipp.ipp_rtdstoptslen;
3969 			udi_size -= toh->len;
3970 		}
3971 		if (icmp_opt & IPPF_RTHDR) {
3972 			struct T_opthdr *toh;
3973 
3974 			toh = (struct T_opthdr *)dstopt;
3975 			toh->level = IPPROTO_IPV6;
3976 			toh->name = IPV6_RTHDR;
3977 			toh->len = sizeof (struct T_opthdr) +
3978 			    ipp.ipp_rthdrlen;
3979 			toh->status = 0;
3980 			dstopt += sizeof (struct T_opthdr);
3981 			bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
3982 			dstopt += ipp.ipp_rthdrlen;
3983 			udi_size -= toh->len;
3984 		}
3985 		if (icmp_opt & IPPF_DSTOPTS) {
3986 			struct T_opthdr *toh;
3987 
3988 			toh = (struct T_opthdr *)dstopt;
3989 			toh->level = IPPROTO_IPV6;
3990 			toh->name = IPV6_DSTOPTS;
3991 			toh->len = sizeof (struct T_opthdr) +
3992 			    ipp.ipp_dstoptslen;
3993 			toh->status = 0;
3994 			dstopt += sizeof (struct T_opthdr);
3995 			bcopy(ipp.ipp_dstopts, dstopt,
3996 			    ipp.ipp_dstoptslen);
3997 			dstopt += ipp.ipp_dstoptslen;
3998 			udi_size -= toh->len;
3999 		}
4000 		/* Consumed all of allocated space */
4001 		ASSERT(udi_size == 0);
4002 	}
4003 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
4004 
4005 deliver:
4006 	if (IPCL_IS_NONSTR(connp)) {
4007 		if ((*connp->conn_upcalls->su_recv)
4008 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
4009 		    NULL) < 0) {
4010 			mutex_enter(&icmp->icmp_recv_lock);
4011 			if (error == ENOSPC) {
4012 				/*
4013 				 * let's confirm while holding the lock
4014 				 */
4015 				if ((*connp->conn_upcalls->su_recv)
4016 				    (connp->conn_upper_handle, NULL, 0, 0,
4017 				    &error, NULL) < 0) {
4018 					if (error == ENOSPC) {
4019 						connp->conn_flow_cntrld =
4020 						    B_TRUE;
4021 					} else {
4022 						ASSERT(error == EOPNOTSUPP);
4023 					}
4024 				}
4025 				mutex_exit(&icmp->icmp_recv_lock);
4026 			} else {
4027 				ASSERT(error == EOPNOTSUPP);
4028 				icmp_queue_fallback(icmp, mp);
4029 			}
4030 		}
4031 	} else {
4032 		putnext(connp->conn_rq, mp);
4033 	}
4034 	ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
4035 }
4036 
4037 /*
4038  * return SNMP stuff in buffer in mpdata
4039  */
4040 mblk_t *
4041 icmp_snmp_get(queue_t *q, mblk_t *mpctl)
4042 {
4043 	mblk_t			*mpdata;
4044 	struct opthdr		*optp;
4045 	conn_t			*connp = Q_TO_CONN(q);
4046 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
4047 	mblk_t			*mp2ctl;
4048 
4049 	/*
4050 	 * make a copy of the original message
4051 	 */
4052 	mp2ctl = copymsg(mpctl);
4053 
4054 	if (mpctl == NULL ||
4055 	    (mpdata = mpctl->b_cont) == NULL) {
4056 		freemsg(mpctl);
4057 		freemsg(mp2ctl);
4058 		return (0);
4059 	}
4060 
4061 	/* fixed length structure for IPv4 and IPv6 counters */
4062 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4063 	optp->level = EXPER_RAWIP;
4064 	optp->name = 0;
4065 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
4066 	    sizeof (is->is_rawip_mib));
4067 	optp->len = msgdsize(mpdata);
4068 	qreply(q, mpctl);
4069 
4070 	return (mp2ctl);
4071 }
4072 
4073 /*
4074  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
4075  * TODO:  If this ever actually tries to set anything, it needs to be
4076  * to do the appropriate locking.
4077  */
4078 /* ARGSUSED */
4079 int
4080 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4081     uchar_t *ptr, int len)
4082 {
4083 	switch (level) {
4084 	case EXPER_RAWIP:
4085 		return (0);
4086 	default:
4087 		return (1);
4088 	}
4089 }
4090 
4091 /* Report for ndd "icmp_status" */
4092 /* ARGSUSED */
4093 static int
4094 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
4095 {
4096 	conn_t  *connp;
4097 	ip_stack_t *ipst;
4098 	char	laddrbuf[INET6_ADDRSTRLEN];
4099 	char	faddrbuf[INET6_ADDRSTRLEN];
4100 	int	i;
4101 
4102 	(void) mi_mpprintf(mp,
4103 	    "RAWIP    " MI_COL_HDRPAD_STR
4104 	/*   01234567[89ABCDEF] */
4105 	    "  src addr        dest addr       state");
4106 	/*   xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */
4107 
4108 	connp = Q_TO_CONN(q);
4109 	ipst = connp->conn_netstack->netstack_ip;
4110 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
4111 		connf_t *connfp;
4112 		char	*state;
4113 
4114 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
4115 		connp = NULL;
4116 
4117 		while ((connp = ipcl_get_next_conn(connfp, connp,
4118 		    IPCL_RAWIPCONN)) != NULL) {
4119 			icmp_t  *icmp;
4120 
4121 			mutex_enter(&(connp)->conn_lock);
4122 			icmp = connp->conn_icmp;
4123 
4124 			if (icmp->icmp_state == TS_UNBND)
4125 				state = "UNBOUND";
4126 			else if (icmp->icmp_state == TS_IDLE)
4127 				state = "IDLE";
4128 			else if (icmp->icmp_state == TS_DATA_XFER)
4129 				state = "CONNECTED";
4130 			else
4131 				state = "UnkState";
4132 
4133 			(void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s",
4134 			    (void *)icmp,
4135 			    inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr,
4136 			    faddrbuf,
4137 			    sizeof (faddrbuf)),
4138 			    inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf,
4139 			    sizeof (laddrbuf)),
4140 			    state);
4141 			mutex_exit(&(connp)->conn_lock);
4142 		}
4143 	}
4144 	return (0);
4145 }
4146 
4147 /*
4148  * This routine creates a T_UDERROR_IND message and passes it upstream.
4149  * The address and options are copied from the T_UNITDATA_REQ message
4150  * passed in mp.  This message is freed.
4151  */
4152 static void
4153 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
4154 {
4155 	mblk_t	*mp1;
4156 	uchar_t	*rptr = mp->b_rptr;
4157 	struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr;
4158 
4159 	mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset],
4160 	    tudr->DEST_length, (char *)&rptr[tudr->OPT_offset],
4161 	    tudr->OPT_length, err);
4162 	if (mp1)
4163 		qreply(q, mp1);
4164 	freemsg(mp);
4165 }
4166 
4167 
4168 static int
4169 rawip_do_unbind(conn_t *connp)
4170 {
4171 	icmp_t *icmp = connp->conn_icmp;
4172 
4173 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4174 	/* If a bind has not been done, we can't unbind. */
4175 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
4176 		rw_exit(&icmp->icmp_rwlock);
4177 		return (-TOUTSTATE);
4178 	}
4179 	icmp->icmp_pending_op = T_UNBIND_REQ;
4180 	rw_exit(&icmp->icmp_rwlock);
4181 
4182 	/*
4183 	 * Call ip to unbind
4184 	 */
4185 
4186 	ip_unbind(connp);
4187 
4188 	/*
4189 	 * Once we're unbound from IP, the pending operation may be cleared
4190 	 * here.
4191 	 */
4192 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4193 	V6_SET_ZERO(icmp->icmp_v6src);
4194 	V6_SET_ZERO(icmp->icmp_bound_v6src);
4195 	icmp->icmp_pending_op = -1;
4196 	icmp->icmp_state = TS_UNBND;
4197 	if (icmp->icmp_family == AF_INET6)
4198 		(void) icmp_build_hdrs(icmp);
4199 	rw_exit(&icmp->icmp_rwlock);
4200 	return (0);
4201 }
4202 
4203 /*
4204  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
4205  * After some error checking, the message is passed downstream to ip.
4206  */
4207 static void
4208 icmp_tpi_unbind(queue_t *q, mblk_t *mp)
4209 {
4210 	conn_t	*connp = Q_TO_CONN(q);
4211 	int	error;
4212 
4213 	ASSERT(mp->b_cont == NULL);
4214 	error = rawip_do_unbind(connp);
4215 	if (error) {
4216 		if (error < 0) {
4217 			icmp_err_ack(q, mp, -error, 0);
4218 		} else {
4219 			icmp_err_ack(q, mp, 0, error);
4220 		}
4221 		return;
4222 	}
4223 
4224 	/*
4225 	 * Convert mp into a T_OK_ACK
4226 	 */
4227 
4228 	mp = mi_tpi_ok_ack_alloc(mp);
4229 
4230 	/*
4231 	 * should not happen in practice... T_OK_ACK is smaller than the
4232 	 * original message.
4233 	 */
4234 	ASSERT(mp != NULL);
4235 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4236 	qreply(q, mp);
4237 }
4238 
4239 
4240 /*
4241  * Process IPv4 packets that already include an IP header.
4242  * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
4243  * IPPROTO_IGMP).
4244  */
4245 static int
4246 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp,
4247     ip4_pkt_t *pktinfop)
4248 {
4249 	icmp_stack_t *is = icmp->icmp_is;
4250 	ipha_t	*ipha;
4251 	int	ip_hdr_length;
4252 	int	tp_hdr_len;
4253 	mblk_t	*mp1;
4254 	uint_t	pkt_len;
4255 	ip_opt_info_t optinfo;
4256 
4257 	optinfo.ip_opt_flags = 0;
4258 	optinfo.ip_opt_ill_index = 0;
4259 	ipha = (ipha_t *)mp->b_rptr;
4260 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4261 	if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
4262 		if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4263 			ASSERT(icmp != NULL);
4264 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4265 			freemsg(mp);
4266 			return (0);
4267 		}
4268 		ipha = (ipha_t *)mp->b_rptr;
4269 	}
4270 	ipha->ipha_version_and_hdr_length =
4271 	    (IP_VERSION<<4) | (ip_hdr_length>>2);
4272 
4273 	/*
4274 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4275 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4276 	 * tell IP that the application has sent a complete IP header and not
4277 	 * to compute the transport checksum nor change the DF flag.
4278 	 */
4279 	ipha->ipha_ident = IP_HDR_INCLUDED;
4280 	ipha->ipha_hdr_checksum = 0;
4281 	ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
4282 	/* Insert options if any */
4283 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4284 		/*
4285 		 * Put the IP header plus any transport header that is
4286 		 * checksumed by ip_wput into the first mblk. (ip_wput assumes
4287 		 * that at least the checksum field is in the first mblk.)
4288 		 */
4289 		switch (ipha->ipha_protocol) {
4290 		case IPPROTO_UDP:
4291 			tp_hdr_len = 8;
4292 			break;
4293 		case IPPROTO_TCP:
4294 			tp_hdr_len = 20;
4295 			break;
4296 		default:
4297 			tp_hdr_len = 0;
4298 			break;
4299 		}
4300 		/*
4301 		 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus
4302 		 * tp_hdr_len bytes will be in a single mblk.
4303 		 */
4304 		if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH +
4305 		    tp_hdr_len)) {
4306 			if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH +
4307 			    tp_hdr_len)) {
4308 				BUMP_MIB(&is->is_rawip_mib,
4309 				    rawipOutErrors);
4310 				freemsg(mp);
4311 				return (0);
4312 			}
4313 			ipha = (ipha_t *)mp->b_rptr;
4314 		}
4315 
4316 		/*
4317 		 * if the length is larger then the max allowed IP packet,
4318 		 * then send an error and abort the processing.
4319 		 */
4320 		pkt_len = ntohs(ipha->ipha_length)
4321 		    + icmp->icmp_ip_snd_options_len;
4322 		if (pkt_len > IP_MAXPACKET) {
4323 			return (EMSGSIZE);
4324 		}
4325 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
4326 		    tp_hdr_len, BPRI_LO))) {
4327 			return (ENOMEM);
4328 		}
4329 		mp1->b_rptr += is->is_wroff_extra;
4330 		mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
4331 
4332 		ipha->ipha_length = htons((uint16_t)pkt_len);
4333 		bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH);
4334 
4335 		/* Copy transport header if any */
4336 		bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len);
4337 		mp1->b_wptr += tp_hdr_len;
4338 
4339 		/* Add options */
4340 		ipha = (ipha_t *)mp1->b_rptr;
4341 		bcopy(icmp->icmp_ip_snd_options, &ipha[1],
4342 		    icmp->icmp_ip_snd_options_len);
4343 
4344 		/* Drop IP header and transport header from original */
4345 		(void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len);
4346 
4347 		mp1->b_cont = mp;
4348 		mp = mp1;
4349 		/*
4350 		 * Massage source route putting first source
4351 		 * route in ipha_dst.
4352 		 */
4353 		(void) ip_massage_options(ipha, is->is_netstack);
4354 	}
4355 
4356 	if (pktinfop != NULL) {
4357 		/*
4358 		 * Over write the source address provided in the header
4359 		 */
4360 		if (pktinfop->ip4_addr != INADDR_ANY) {
4361 			ipha->ipha_src = pktinfop->ip4_addr;
4362 			optinfo.ip_opt_flags = IP_VERIFY_SRC;
4363 		}
4364 
4365 		if (pktinfop->ip4_ill_index != 0) {
4366 			optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4367 		}
4368 	}
4369 
4370 	mblk_setcred(mp, connp->conn_cred);
4371 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
4372 	return (0);
4373 }
4374 
4375 static int
4376 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
4377 {
4378 	int err;
4379 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4380 	icmp_stack_t		*is = icmp->icmp_is;
4381 	conn_t	*connp = icmp->icmp_connp;
4382 
4383 	err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst,
4384 	    opt_storage, connp->conn_mac_exempt,
4385 	    is->is_netstack->netstack_ip);
4386 	if (err == 0) {
4387 		err = tsol_update_options(&icmp->icmp_ip_snd_options,
4388 		    &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len,
4389 		    opt_storage);
4390 	}
4391 	if (err != 0) {
4392 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4393 		DTRACE_PROBE4(
4394 		    tx__ip__log__drop__updatelabel__icmp,
4395 		    char *, "icmp(1) failed to update options(2) on mp(3)",
4396 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
4397 		return (err);
4398 	}
4399 	IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
4400 	return (0);
4401 }
4402 
4403 /*
4404  * This routine handles all messages passed downstream.  It either
4405  * consumes the message or passes it downstream; it never queues a
4406  * a message.
4407  */
4408 static void
4409 icmp_wput(queue_t *q, mblk_t *mp)
4410 {
4411 	uchar_t	*rptr = mp->b_rptr;
4412 	ipha_t	*ipha;
4413 	mblk_t	*mp1;
4414 #define	tudr ((struct T_unitdata_req *)rptr)
4415 	size_t	ip_len;
4416 	conn_t	*connp = Q_TO_CONN(q);
4417 	icmp_t	*icmp = connp->conn_icmp;
4418 	icmp_stack_t *is = icmp->icmp_is;
4419 	sin6_t	*sin6;
4420 	sin_t	*sin;
4421 	ipaddr_t	v4dst;
4422 	ip4_pkt_t	pktinfo;
4423 	ip4_pkt_t	*pktinfop = &pktinfo;
4424 	ip6_pkt_t	ipp_s;  /* For ancillary data options */
4425 	ip6_pkt_t	*ipp = &ipp_s;
4426 	int error;
4427 
4428 	ipp->ipp_fields = 0;
4429 	ipp->ipp_sticky_ignored = 0;
4430 
4431 	switch (mp->b_datap->db_type) {
4432 	case M_DATA:
4433 		if (icmp->icmp_hdrincl) {
4434 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
4435 			ipha = (ipha_t *)mp->b_rptr;
4436 			if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
4437 				if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4438 					BUMP_MIB(&is->is_rawip_mib,
4439 					    rawipOutErrors);
4440 					freemsg(mp);
4441 					return;
4442 				}
4443 				ipha = (ipha_t *)mp->b_rptr;
4444 			}
4445 			/*
4446 			 * If this connection was used for v6 (inconceivable!)
4447 			 * or if we have a new destination, then it's time to
4448 			 * figure a new label.
4449 			 */
4450 			if (is_system_labeled() &&
4451 			    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4452 			    V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
4453 			    ipha->ipha_dst)) {
4454 				error = icmp_update_label(icmp, mp,
4455 				    ipha->ipha_dst);
4456 				if (error != 0) {
4457 					icmp_ud_err(q, mp, error);
4458 					return;
4459 				}
4460 			}
4461 			error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL);
4462 			if (error != 0)
4463 				icmp_ud_err(q, mp, error);
4464 			return;
4465 		}
4466 		freemsg(mp);
4467 		return;
4468 	case M_PROTO:
4469 	case M_PCPROTO:
4470 		ip_len = mp->b_wptr - rptr;
4471 		if (ip_len >= sizeof (struct T_unitdata_req)) {
4472 			/* Expedite valid T_UNITDATA_REQ to below the switch */
4473 			if (((union T_primitives *)rptr)->type
4474 			    == T_UNITDATA_REQ)
4475 				break;
4476 		}
4477 		/* FALLTHRU */
4478 	default:
4479 		icmp_wput_other(q, mp);
4480 		return;
4481 	}
4482 
4483 	/* Handle T_UNITDATA_REQ messages here. */
4484 
4485 	mp1 = mp->b_cont;
4486 	if (mp1 == NULL) {
4487 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4488 		icmp_ud_err(q, mp, EPROTO);
4489 		return;
4490 	}
4491 
4492 	if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) {
4493 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4494 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4495 		return;
4496 	}
4497 
4498 	switch (icmp->icmp_family) {
4499 	case AF_INET6:
4500 		sin6 = (sin6_t *)&rptr[tudr->DEST_offset];
4501 		if (!OK_32PTR((char *)sin6) ||
4502 		    tudr->DEST_length != sizeof (sin6_t) ||
4503 		    sin6->sin6_family != AF_INET6) {
4504 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4505 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4506 			return;
4507 		}
4508 
4509 		/* No support for mapped addresses on raw sockets */
4510 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4511 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4512 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4513 			return;
4514 		}
4515 
4516 		/*
4517 		 * Destination is a native IPv6 address.
4518 		 * Send out an IPv6 format packet.
4519 		 */
4520 		if (tudr->OPT_length != 0) {
4521 			int error;
4522 
4523 			error = 0;
4524 			if (icmp_unitdata_opt_process(q, mp, &error,
4525 			    (void *)ipp) < 0) {
4526 				/* failure */
4527 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4528 				icmp_ud_err(q, mp, error);
4529 				return;
4530 			}
4531 			ASSERT(error == 0);
4532 		}
4533 
4534 		error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp);
4535 		goto done;
4536 
4537 	case AF_INET:
4538 		sin = (sin_t *)&rptr[tudr->DEST_offset];
4539 		if (!OK_32PTR((char *)sin) ||
4540 		    tudr->DEST_length != sizeof (sin_t) ||
4541 		    sin->sin_family != AF_INET) {
4542 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4543 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4544 			return;
4545 		}
4546 		/* Extract and ipaddr */
4547 		v4dst = sin->sin_addr.s_addr;
4548 		break;
4549 
4550 	default:
4551 		ASSERT(0);
4552 	}
4553 
4554 	pktinfop->ip4_ill_index = 0;
4555 	pktinfop->ip4_addr = INADDR_ANY;
4556 
4557 	/*
4558 	 * If options passed in, feed it for verification and handling
4559 	 */
4560 	if (tudr->OPT_length != 0) {
4561 		int error;
4562 
4563 		error = 0;
4564 		if (icmp_unitdata_opt_process(q, mp, &error,
4565 		    (void *)pktinfop) < 0) {
4566 			/* failure */
4567 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4568 			icmp_ud_err(q, mp, error);
4569 			return;
4570 		}
4571 		ASSERT(error == 0);
4572 		/*
4573 		 * Note: Success in processing options.
4574 		 * mp option buffer represented by
4575 		 * OPT_length/offset now potentially modified
4576 		 * and contain option setting results
4577 		 */
4578 	}
4579 
4580 	error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop);
4581 done:
4582 	if (error != 0) {
4583 		icmp_ud_err(q, mp, error);
4584 		return;
4585 	} else {
4586 		mp->b_cont = NULL;
4587 		freeb(mp);
4588 	}
4589 }
4590 
4591 
4592 /* ARGSUSED */
4593 static void
4594 icmp_wput_fallback(queue_t *q, mblk_t *mp)
4595 {
4596 #ifdef DEBUG
4597 	cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n");
4598 #endif
4599 	freemsg(mp);
4600 }
4601 
4602 static int
4603 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst,
4604     ip4_pkt_t *pktinfop)
4605 {
4606 	ipha_t	*ipha;
4607 	size_t	ip_len;
4608 	icmp_t	*icmp = connp->conn_icmp;
4609 	icmp_stack_t *is = icmp->icmp_is;
4610 	int	ip_hdr_length;
4611 	ip_opt_info_t	optinfo;
4612 
4613 	optinfo.ip_opt_flags = 0;
4614 	optinfo.ip_opt_ill_index = 0;
4615 
4616 	if (icmp->icmp_state == TS_UNBND) {
4617 		/* If a port has not been bound to the stream, fail. */
4618 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4619 		return (EPROTO);
4620 	}
4621 
4622 	if (v4dst == INADDR_ANY)
4623 		v4dst = htonl(INADDR_LOOPBACK);
4624 
4625 	/* Check if our saved options are valid; update if not */
4626 	if (is_system_labeled() &&
4627 	    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4628 	    V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) {
4629 		int error = icmp_update_label(icmp, mp, v4dst);
4630 
4631 		if (error != 0)
4632 			return (error);
4633 	}
4634 
4635 	/* Protocol 255 contains full IP headers */
4636 	if (icmp->icmp_hdrincl)
4637 		return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop));
4638 
4639 	/* Add an IP header */
4640 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4641 	ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length];
4642 	if ((uchar_t *)ipha < mp->b_datap->db_base ||
4643 	    mp->b_datap->db_ref != 1 ||
4644 	    !OK_32PTR(ipha)) {
4645 		mblk_t	*mp1;
4646 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
4647 		    BPRI_LO))) {
4648 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4649 			return (ENOMEM);
4650 		}
4651 		mp1->b_cont = mp;
4652 		ipha = (ipha_t *)mp1->b_datap->db_lim;
4653 		mp1->b_wptr = (uchar_t *)ipha;
4654 		ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
4655 		mp = mp1;
4656 	}
4657 #ifdef	_BIG_ENDIAN
4658 	/* Set version, header length, and tos */
4659 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4660 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
4661 	    icmp->icmp_type_of_service);
4662 	/* Set ttl and protocol */
4663 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto;
4664 #else
4665 	/* Set version, header length, and tos */
4666 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4667 	    ((icmp->icmp_type_of_service << 8) |
4668 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
4669 	/* Set ttl and protocol */
4670 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl;
4671 #endif
4672 	if (pktinfop->ip4_addr != INADDR_ANY) {
4673 		ipha->ipha_src = pktinfop->ip4_addr;
4674 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
4675 	} else {
4676 
4677 		/*
4678 		 * Copy our address into the packet.  If this is zero,
4679 		 * ip will fill in the real source address.
4680 		 */
4681 		IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src);
4682 	}
4683 
4684 	ipha->ipha_fragment_offset_and_flags = 0;
4685 
4686 	if (pktinfop->ip4_ill_index != 0) {
4687 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4688 	}
4689 
4690 
4691 	/*
4692 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4693 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4694 	 * tell IP that the application has sent a complete IP header and not
4695 	 * to compute the transport checksum nor change the DF flag.
4696 	 */
4697 	ipha->ipha_ident = IP_HDR_INCLUDED;
4698 
4699 	/* Finish common formatting of the packet. */
4700 	mp->b_rptr = (uchar_t *)ipha;
4701 
4702 	ip_len = mp->b_wptr - (uchar_t *)ipha;
4703 	if (mp->b_cont != NULL)
4704 		ip_len += msgdsize(mp->b_cont);
4705 
4706 	/*
4707 	 * Set the length into the IP header.
4708 	 * If the length is greater than the maximum allowed by IP,
4709 	 * then free the message and return. Do not try and send it
4710 	 * as this can cause problems in layers below.
4711 	 */
4712 	if (ip_len > IP_MAXPACKET) {
4713 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4714 		return (EMSGSIZE);
4715 	}
4716 	ipha->ipha_length = htons((uint16_t)ip_len);
4717 	/*
4718 	 * Copy in the destination address request
4719 	 */
4720 	ipha->ipha_dst = v4dst;
4721 
4722 	/*
4723 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
4724 	 */
4725 	if (CLASSD(v4dst))
4726 		ipha->ipha_ttl = icmp->icmp_multicast_ttl;
4727 
4728 	/* Copy in options if any */
4729 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4730 		bcopy(icmp->icmp_ip_snd_options,
4731 		    &ipha[1], icmp->icmp_ip_snd_options_len);
4732 		/*
4733 		 * Massage source route putting first source route in ipha_dst.
4734 		 * Ignore the destination in the T_unitdata_req.
4735 		 */
4736 		(void) ip_massage_options(ipha, is->is_netstack);
4737 	}
4738 
4739 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
4740 	mblk_setcred(mp, connp->conn_cred);
4741 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
4742 	return (0);
4743 }
4744 
4745 static int
4746 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
4747 {
4748 	int err;
4749 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
4750 	icmp_stack_t		*is = icmp->icmp_is;
4751 	conn_t	*connp = icmp->icmp_connp;
4752 
4753 	err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst,
4754 	    opt_storage, connp->conn_mac_exempt,
4755 	    is->is_netstack->netstack_ip);
4756 	if (err == 0) {
4757 		err = tsol_update_sticky(&icmp->icmp_sticky_ipp,
4758 		    &icmp->icmp_label_len_v6, opt_storage);
4759 	}
4760 	if (err != 0) {
4761 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4762 		DTRACE_PROBE4(
4763 		    tx__ip__log__drop__updatelabel__icmp6,
4764 		    char *, "icmp(1) failed to update options(2) on mp(3)",
4765 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
4766 		return (err);
4767 	}
4768 
4769 	icmp->icmp_v6lastdst = *dst;
4770 	return (0);
4771 }
4772 
4773 /*
4774  * raw_ip_send_data_v6():
4775  * Assumes that icmp_wput did some sanity checking on the destination
4776  * address, but that the label may not yet be correct.
4777  */
4778 static int
4779 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6,
4780     ip6_pkt_t *ipp)
4781 {
4782 	ip6_t			*ip6h;
4783 	ip6i_t			*ip6i;	/* mp->b_rptr even if no ip6i_t */
4784 	int			ip_hdr_len = IPV6_HDR_LEN;
4785 	size_t			ip_len;
4786 	icmp_t			*icmp = connp->conn_icmp;
4787 	icmp_stack_t		*is = icmp->icmp_is;
4788 	ip6_pkt_t		*tipp;
4789 	uint32_t		csum = 0;
4790 	uint_t			ignore = 0;
4791 	uint_t			option_exists = 0, is_sticky = 0;
4792 	uint8_t			*cp;
4793 	uint8_t			*nxthdr_ptr;
4794 	in6_addr_t		ip6_dst;
4795 
4796 	/*
4797 	 * If the local address is a mapped address return
4798 	 * an error.
4799 	 * It would be possible to send an IPv6 packet but the
4800 	 * response would never make it back to the application
4801 	 * since it is bound to a mapped address.
4802 	 */
4803 	if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
4804 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4805 		return (EADDRNOTAVAIL);
4806 	}
4807 
4808 	ignore = ipp->ipp_sticky_ignored;
4809 	if (sin6->sin6_scope_id != 0 &&
4810 	    IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
4811 		/*
4812 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
4813 		 * option nor ancillary data.  It needs to be
4814 		 * explicitly set in options_exists.
4815 		 */
4816 		option_exists |= IPPF_SCOPE_ID;
4817 	}
4818 
4819 	/*
4820 	 * Compute the destination address
4821 	 */
4822 	ip6_dst = sin6->sin6_addr;
4823 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
4824 		ip6_dst = ipv6_loopback;
4825 
4826 	/*
4827 	 * If we're not going to the same destination as last time, then
4828 	 * recompute the label required.  This is done in a separate routine to
4829 	 * avoid blowing up our stack here.
4830 	 */
4831 	if (is_system_labeled() &&
4832 	    !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) {
4833 		int error = 0;
4834 
4835 		error = icmp_update_label_v6(icmp, mp, &ip6_dst);
4836 		if (error != 0)
4837 			return (error);
4838 	}
4839 
4840 	/*
4841 	 * If there's a security label here, then we ignore any options the
4842 	 * user may try to set.  We keep the peer's label as a hidden sticky
4843 	 * option.
4844 	 */
4845 	if (icmp->icmp_label_len_v6 > 0) {
4846 		ignore &= ~IPPF_HOPOPTS;
4847 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
4848 	}
4849 
4850 	if ((icmp->icmp_sticky_ipp.ipp_fields == 0) &&
4851 	    (ipp->ipp_fields == 0)) {
4852 		/* No sticky options nor ancillary data. */
4853 		goto no_options;
4854 	}
4855 
4856 	/*
4857 	 * Go through the options figuring out where each is going to
4858 	 * come from and build two masks.  The first mask indicates if
4859 	 * the option exists at all.  The second mask indicates if the
4860 	 * option is sticky or ancillary.
4861 	 */
4862 	if (!(ignore & IPPF_HOPOPTS)) {
4863 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
4864 			option_exists |= IPPF_HOPOPTS;
4865 			ip_hdr_len += ipp->ipp_hopoptslen;
4866 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
4867 			option_exists |= IPPF_HOPOPTS;
4868 			is_sticky |= IPPF_HOPOPTS;
4869 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen;
4870 		}
4871 	}
4872 
4873 	if (!(ignore & IPPF_RTHDR)) {
4874 		if (ipp->ipp_fields & IPPF_RTHDR) {
4875 			option_exists |= IPPF_RTHDR;
4876 			ip_hdr_len += ipp->ipp_rthdrlen;
4877 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
4878 			option_exists |= IPPF_RTHDR;
4879 			is_sticky |= IPPF_RTHDR;
4880 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen;
4881 		}
4882 	}
4883 
4884 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
4885 		/*
4886 		 * Need to have a router header to use these.
4887 		 */
4888 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
4889 			option_exists |= IPPF_RTDSTOPTS;
4890 			ip_hdr_len += ipp->ipp_rtdstoptslen;
4891 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
4892 			option_exists |= IPPF_RTDSTOPTS;
4893 			is_sticky |= IPPF_RTDSTOPTS;
4894 			ip_hdr_len +=
4895 			    icmp->icmp_sticky_ipp.ipp_rtdstoptslen;
4896 		}
4897 	}
4898 
4899 	if (!(ignore & IPPF_DSTOPTS)) {
4900 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
4901 			option_exists |= IPPF_DSTOPTS;
4902 			ip_hdr_len += ipp->ipp_dstoptslen;
4903 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
4904 			option_exists |= IPPF_DSTOPTS;
4905 			is_sticky |= IPPF_DSTOPTS;
4906 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen;
4907 		}
4908 	}
4909 
4910 	if (!(ignore & IPPF_IFINDEX)) {
4911 		if (ipp->ipp_fields & IPPF_IFINDEX) {
4912 			option_exists |= IPPF_IFINDEX;
4913 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
4914 			option_exists |= IPPF_IFINDEX;
4915 			is_sticky |= IPPF_IFINDEX;
4916 		}
4917 	}
4918 
4919 	if (!(ignore & IPPF_ADDR)) {
4920 		if (ipp->ipp_fields & IPPF_ADDR) {
4921 			option_exists |= IPPF_ADDR;
4922 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) {
4923 			option_exists |= IPPF_ADDR;
4924 			is_sticky |= IPPF_ADDR;
4925 		}
4926 	}
4927 
4928 	if (!(ignore & IPPF_DONTFRAG)) {
4929 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
4930 			option_exists |= IPPF_DONTFRAG;
4931 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
4932 			option_exists |= IPPF_DONTFRAG;
4933 			is_sticky |= IPPF_DONTFRAG;
4934 		}
4935 	}
4936 
4937 	if (!(ignore & IPPF_USE_MIN_MTU)) {
4938 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
4939 			option_exists |= IPPF_USE_MIN_MTU;
4940 		} else if (icmp->icmp_sticky_ipp.ipp_fields &
4941 		    IPPF_USE_MIN_MTU) {
4942 			option_exists |= IPPF_USE_MIN_MTU;
4943 			is_sticky |= IPPF_USE_MIN_MTU;
4944 		}
4945 	}
4946 
4947 	if (!(ignore & IPPF_NEXTHOP)) {
4948 		if (ipp->ipp_fields & IPPF_NEXTHOP) {
4949 			option_exists |= IPPF_NEXTHOP;
4950 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) {
4951 			option_exists |= IPPF_NEXTHOP;
4952 			is_sticky |= IPPF_NEXTHOP;
4953 		}
4954 	}
4955 
4956 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
4957 		option_exists |= IPPF_HOPLIMIT;
4958 	/* IPV6_HOPLIMIT can never be sticky */
4959 	ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
4960 
4961 	if (!(ignore & IPPF_UNICAST_HOPS) &&
4962 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
4963 		option_exists |= IPPF_UNICAST_HOPS;
4964 		is_sticky |= IPPF_UNICAST_HOPS;
4965 	}
4966 
4967 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
4968 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
4969 		option_exists |= IPPF_MULTICAST_HOPS;
4970 		is_sticky |= IPPF_MULTICAST_HOPS;
4971 	}
4972 
4973 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) {
4974 		/* This is a sticky socket option only */
4975 		option_exists |= IPPF_NO_CKSUM;
4976 		is_sticky |= IPPF_NO_CKSUM;
4977 	}
4978 
4979 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) {
4980 		/* This is a sticky socket option only */
4981 		option_exists |= IPPF_RAW_CKSUM;
4982 		is_sticky |= IPPF_RAW_CKSUM;
4983 	}
4984 
4985 	if (!(ignore & IPPF_TCLASS)) {
4986 		if (ipp->ipp_fields & IPPF_TCLASS) {
4987 			option_exists |= IPPF_TCLASS;
4988 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
4989 			option_exists |= IPPF_TCLASS;
4990 			is_sticky |= IPPF_TCLASS;
4991 		}
4992 	}
4993 
4994 no_options:
4995 
4996 	/*
4997 	 * If any options carried in the ip6i_t were specified, we
4998 	 * need to account for the ip6i_t in the data we'll be sending
4999 	 * down.
5000 	 */
5001 	if (option_exists & IPPF_HAS_IP6I)
5002 		ip_hdr_len += sizeof (ip6i_t);
5003 
5004 	/* check/fix buffer config, setup pointers into it */
5005 	ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len];
5006 	if ((mp->b_datap->db_ref != 1) ||
5007 	    ((unsigned char *)ip6h < mp->b_datap->db_base) ||
5008 	    !OK_32PTR(ip6h)) {
5009 		mblk_t	*mp1;
5010 
5011 		/* Try to get everything in a single mblk next time */
5012 		if (ip_hdr_len > icmp->icmp_max_hdr_len) {
5013 			icmp->icmp_max_hdr_len = ip_hdr_len;
5014 
5015 			(void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp,
5016 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
5017 		}
5018 		mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
5019 		if (!mp1) {
5020 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5021 			return (ENOMEM);
5022 		}
5023 		mp1->b_cont = mp;
5024 		mp1->b_wptr = mp1->b_datap->db_lim;
5025 		ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
5026 		mp = mp1;
5027 	}
5028 	mp->b_rptr = (unsigned char *)ip6h;
5029 	ip6i = (ip6i_t *)ip6h;
5030 
5031 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
5032 	if (option_exists & IPPF_HAS_IP6I) {
5033 		ip6h = (ip6_t *)&ip6i[1];
5034 		ip6i->ip6i_flags = 0;
5035 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
5036 
5037 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
5038 		if (option_exists & IPPF_SCOPE_ID) {
5039 			ip6i->ip6i_flags |= IP6I_IFINDEX;
5040 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
5041 		} else if (option_exists & IPPF_IFINDEX) {
5042 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
5043 			ASSERT(tipp->ipp_ifindex != 0);
5044 			ip6i->ip6i_flags |= IP6I_IFINDEX;
5045 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
5046 		}
5047 
5048 		if (option_exists & IPPF_RAW_CKSUM) {
5049 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
5050 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
5051 		}
5052 
5053 		if (option_exists & IPPF_NO_CKSUM) {
5054 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
5055 		}
5056 
5057 		if (option_exists & IPPF_ADDR) {
5058 			/*
5059 			 * Enable per-packet source address verification if
5060 			 * IPV6_PKTINFO specified the source address.
5061 			 * ip6_src is set in the transport's _wput function.
5062 			 */
5063 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
5064 		}
5065 
5066 		if (option_exists & IPPF_DONTFRAG) {
5067 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
5068 		}
5069 
5070 		if (option_exists & IPPF_USE_MIN_MTU) {
5071 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
5072 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
5073 		}
5074 
5075 		if (option_exists & IPPF_NEXTHOP) {
5076 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
5077 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
5078 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
5079 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
5080 		}
5081 
5082 		/*
5083 		 * tell IP this is an ip6i_t private header
5084 		 */
5085 		ip6i->ip6i_nxt = IPPROTO_RAW;
5086 	}
5087 
5088 	/* Initialize IPv6 header */
5089 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
5090 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
5091 
5092 	/* Set the hoplimit of the outgoing packet. */
5093 	if (option_exists & IPPF_HOPLIMIT) {
5094 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
5095 		ip6h->ip6_hops = ipp->ipp_hoplimit;
5096 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5097 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
5098 		ip6h->ip6_hops = icmp->icmp_multicast_ttl;
5099 		if (option_exists & IPPF_MULTICAST_HOPS)
5100 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5101 	} else {
5102 		ip6h->ip6_hops = icmp->icmp_ttl;
5103 		if (option_exists & IPPF_UNICAST_HOPS)
5104 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5105 	}
5106 
5107 	if (option_exists & IPPF_ADDR) {
5108 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
5109 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
5110 		ip6h->ip6_src = tipp->ipp_addr;
5111 	} else {
5112 		/*
5113 		 * The source address was not set using IPV6_PKTINFO.
5114 		 * First look at the bound source.
5115 		 * If unspecified fallback to __sin6_src_id.
5116 		 */
5117 		ip6h->ip6_src = icmp->icmp_v6src;
5118 		if (sin6->__sin6_src_id != 0 &&
5119 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
5120 			ip_srcid_find_id(sin6->__sin6_src_id,
5121 			    &ip6h->ip6_src, icmp->icmp_zoneid,
5122 			    is->is_netstack);
5123 		}
5124 	}
5125 
5126 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
5127 	cp = (uint8_t *)&ip6h[1];
5128 
5129 	/*
5130 	 * Here's where we have to start stringing together
5131 	 * any extension headers in the right order:
5132 	 * Hop-by-hop, destination, routing, and final destination opts.
5133 	 */
5134 	if (option_exists & IPPF_HOPOPTS) {
5135 		/* Hop-by-hop options */
5136 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
5137 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
5138 
5139 		*nxthdr_ptr = IPPROTO_HOPOPTS;
5140 		nxthdr_ptr = &hbh->ip6h_nxt;
5141 
5142 		bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen);
5143 		cp += tipp->ipp_hopoptslen;
5144 	}
5145 	/*
5146 	 * En-route destination options
5147 	 * Only do them if there's a routing header as well
5148 	 */
5149 	if (option_exists & IPPF_RTDSTOPTS) {
5150 		ip6_dest_t *dst = (ip6_dest_t *)cp;
5151 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
5152 
5153 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5154 		nxthdr_ptr = &dst->ip6d_nxt;
5155 
5156 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
5157 		cp += tipp->ipp_rtdstoptslen;
5158 	}
5159 	/*
5160 	 * Routing header next
5161 	 */
5162 	if (option_exists & IPPF_RTHDR) {
5163 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
5164 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
5165 
5166 		*nxthdr_ptr = IPPROTO_ROUTING;
5167 		nxthdr_ptr = &rt->ip6r_nxt;
5168 
5169 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
5170 		cp += tipp->ipp_rthdrlen;
5171 	}
5172 	/*
5173 	 * Do ultimate destination options
5174 	 */
5175 	if (option_exists & IPPF_DSTOPTS) {
5176 		ip6_dest_t *dest = (ip6_dest_t *)cp;
5177 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
5178 
5179 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5180 		nxthdr_ptr = &dest->ip6d_nxt;
5181 
5182 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
5183 		cp += tipp->ipp_dstoptslen;
5184 	}
5185 
5186 	/*
5187 	 * Now set the last header pointer to the proto passed in
5188 	 */
5189 	ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len);
5190 	*nxthdr_ptr = icmp->icmp_proto;
5191 
5192 	/*
5193 	 * Copy in the destination address
5194 	 */
5195 	ip6h->ip6_dst = ip6_dst;
5196 
5197 	ip6h->ip6_vcf =
5198 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
5199 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
5200 
5201 	if (option_exists & IPPF_TCLASS) {
5202 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
5203 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
5204 		    tipp->ipp_tclass);
5205 	}
5206 	if (option_exists & IPPF_RTHDR) {
5207 		ip6_rthdr_t	*rth;
5208 
5209 		/*
5210 		 * Perform any processing needed for source routing.
5211 		 * We know that all extension headers will be in the same mblk
5212 		 * as the IPv6 header.
5213 		 */
5214 		rth = ip_find_rthdr_v6(ip6h, mp->b_wptr);
5215 		if (rth != NULL && rth->ip6r_segleft != 0) {
5216 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
5217 				/*
5218 				 * Drop packet - only support Type 0 routing.
5219 				 * Notify the application as well.
5220 				 */
5221 				BUMP_MIB(&is->is_rawip_mib,
5222 				    rawipOutErrors);
5223 				return (EPROTO);
5224 			}
5225 			/*
5226 			 * rth->ip6r_len is twice the number of
5227 			 * addresses in the header
5228 			 */
5229 			if (rth->ip6r_len & 0x1) {
5230 				BUMP_MIB(&is->is_rawip_mib,
5231 				    rawipOutErrors);
5232 				return (EPROTO);
5233 			}
5234 			/*
5235 			 * Shuffle the routing header and ip6_dst
5236 			 * addresses, and get the checksum difference
5237 			 * between the first hop (in ip6_dst) and
5238 			 * the destination (in the last routing hdr entry).
5239 			 */
5240 			csum = ip_massage_options_v6(ip6h, rth,
5241 			    is->is_netstack);
5242 			/*
5243 			 * Verify that the first hop isn't a mapped address.
5244 			 * Routers along the path need to do this verification
5245 			 * for subsequent hops.
5246 			 */
5247 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
5248 				BUMP_MIB(&is->is_rawip_mib,
5249 				    rawipOutErrors);
5250 				return (EADDRNOTAVAIL);
5251 			}
5252 		}
5253 	}
5254 
5255 	ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
5256 	if (mp->b_cont != NULL)
5257 		ip_len += msgdsize(mp->b_cont);
5258 
5259 	/*
5260 	 * Set the length into the IP header.
5261 	 * If the length is greater than the maximum allowed by IP,
5262 	 * then free the message and return. Do not try and send it
5263 	 * as this can cause problems in layers below.
5264 	 */
5265 	if (ip_len > IP_MAXPACKET) {
5266 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5267 		return (EMSGSIZE);
5268 	}
5269 	if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
5270 		uint_t	cksum_off;	/* From ip6i == mp->b_rptr */
5271 		uint16_t *cksum_ptr;
5272 		uint_t	ext_hdrs_len;
5273 
5274 		/* ICMPv6 must have an offset matching icmp6_cksum offset */
5275 		ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 ||
5276 		    icmp->icmp_checksum_off == 2);
5277 
5278 		/*
5279 		 * We make it easy for IP to include our pseudo header
5280 		 * by putting our length in uh_checksum, modified (if
5281 		 * we have a routing header) by the checksum difference
5282 		 * between the ultimate destination and first hop addresses.
5283 		 * Note: ICMPv6 must always checksum the packet.
5284 		 */
5285 		cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
5286 		if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) {
5287 			if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) {
5288 				BUMP_MIB(&is->is_rawip_mib,
5289 				    rawipOutErrors);
5290 				freemsg(mp);
5291 				return (0);
5292 			}
5293 			ip6i = (ip6i_t *)mp->b_rptr;
5294 			if (ip6i->ip6i_nxt == IPPROTO_RAW)
5295 				ip6h = (ip6_t *)&ip6i[1];
5296 			else
5297 				ip6h = (ip6_t *)ip6i;
5298 		}
5299 		/* Add payload length to checksum */
5300 		ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN -
5301 		    (int)((uchar_t *)ip6h - (uchar_t *)ip6i);
5302 		csum += htons(ip_len - ext_hdrs_len);
5303 
5304 		cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off);
5305 		csum = (csum & 0xFFFF) + (csum >> 16);
5306 		*cksum_ptr = (uint16_t)csum;
5307 	}
5308 
5309 #ifdef _LITTLE_ENDIAN
5310 	ip_len = htons(ip_len);
5311 #endif
5312 	ip6h->ip6_plen = (uint16_t)ip_len;
5313 
5314 	/* We're done. Pass the packet to IP */
5315 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
5316 	ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT);
5317 	return (0);
5318 }
5319 
5320 static void
5321 icmp_wput_other(queue_t *q, mblk_t *mp)
5322 {
5323 	uchar_t	*rptr = mp->b_rptr;
5324 	struct iocblk *iocp;
5325 #define	tudr ((struct T_unitdata_req *)rptr)
5326 	conn_t	*connp = Q_TO_CONN(q);
5327 	icmp_t	*icmp = connp->conn_icmp;
5328 	icmp_stack_t *is = icmp->icmp_is;
5329 	cred_t *cr;
5330 
5331 	cr = DB_CREDDEF(mp, connp->conn_cred);
5332 
5333 	switch (mp->b_datap->db_type) {
5334 	case M_PROTO:
5335 	case M_PCPROTO:
5336 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
5337 			/*
5338 			 * If the message does not contain a PRIM_type,
5339 			 * throw it away.
5340 			 */
5341 			freemsg(mp);
5342 			return;
5343 		}
5344 		switch (((union T_primitives *)rptr)->type) {
5345 		case T_ADDR_REQ:
5346 			icmp_addr_req(q, mp);
5347 			return;
5348 		case O_T_BIND_REQ:
5349 		case T_BIND_REQ:
5350 			icmp_tpi_bind(q, mp);
5351 			return;
5352 		case T_CONN_REQ:
5353 			icmp_tpi_connect(q, mp);
5354 			return;
5355 		case T_CAPABILITY_REQ:
5356 			icmp_capability_req(q, mp);
5357 			return;
5358 		case T_INFO_REQ:
5359 			icmp_info_req(q, mp);
5360 			return;
5361 		case T_UNITDATA_REQ:
5362 			/*
5363 			 * If a T_UNITDATA_REQ gets here, the address must
5364 			 * be bad.  Valid T_UNITDATA_REQs are found above
5365 			 * and break to below this switch.
5366 			 */
5367 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
5368 			return;
5369 		case T_UNBIND_REQ:
5370 			icmp_tpi_unbind(q, mp);
5371 			return;
5372 
5373 		case T_SVR4_OPTMGMT_REQ:
5374 			if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
5375 			    cr)) {
5376 				/* Only IP can return anything meaningful */
5377 				(void) svr4_optcom_req(q, mp, cr,
5378 				    &icmp_opt_obj, B_TRUE);
5379 			}
5380 			return;
5381 
5382 		case T_OPTMGMT_REQ:
5383 			/* Only IP can return anything meaningful */
5384 			(void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE);
5385 			return;
5386 
5387 		case T_DISCON_REQ:
5388 			icmp_tpi_disconnect(q, mp);
5389 			return;
5390 
5391 		/* The following TPI message is not supported by icmp. */
5392 		case O_T_CONN_RES:
5393 		case T_CONN_RES:
5394 			icmp_err_ack(q, mp, TNOTSUPPORT, 0);
5395 			return;
5396 
5397 		/* The following 3 TPI requests are illegal for icmp. */
5398 		case T_DATA_REQ:
5399 		case T_EXDATA_REQ:
5400 		case T_ORDREL_REQ:
5401 			freemsg(mp);
5402 			(void) putctl1(RD(q), M_ERROR, EPROTO);
5403 			return;
5404 		default:
5405 			break;
5406 		}
5407 		break;
5408 	case M_IOCTL:
5409 		iocp = (struct iocblk *)mp->b_rptr;
5410 		switch (iocp->ioc_cmd) {
5411 		case TI_GETPEERNAME:
5412 			if (icmp->icmp_state != TS_DATA_XFER) {
5413 				/*
5414 				 * If a default destination address has not
5415 				 * been associated with the stream, then we
5416 				 * don't know the peer's name.
5417 				 */
5418 				iocp->ioc_error = ENOTCONN;
5419 		err_ret:;
5420 				iocp->ioc_count = 0;
5421 				mp->b_datap->db_type = M_IOCACK;
5422 				qreply(q, mp);
5423 				return;
5424 			}
5425 			/* FALLTHRU */
5426 		case TI_GETMYNAME:
5427 			/*
5428 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
5429 			 * need to copyin the user's strbuf structure.
5430 			 * Processing will continue in the M_IOCDATA case
5431 			 * below.
5432 			 */
5433 			mi_copyin(q, mp, NULL,
5434 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
5435 			return;
5436 		case ND_SET:
5437 			/* nd_getset performs the necessary error checking */
5438 		case ND_GET:
5439 			if (nd_getset(q, is->is_nd, mp)) {
5440 				qreply(q, mp);
5441 				return;
5442 			}
5443 			break;
5444 		case _SIOCSOCKFALLBACK:
5445 			/*
5446 			 * socket is falling back to be a
5447 			 * streams socket. Nothing  to do
5448 			 */
5449 			iocp->ioc_count = 0;
5450 			iocp->ioc_rval = 0;
5451 			qreply(q, mp);
5452 			return;
5453 		default:
5454 			break;
5455 		}
5456 		break;
5457 	case M_IOCDATA:
5458 		icmp_wput_iocdata(q, mp);
5459 		return;
5460 	default:
5461 		break;
5462 	}
5463 	ip_wput(q, mp);
5464 }
5465 
5466 /*
5467  * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA
5468  * messages.
5469  */
5470 static void
5471 icmp_wput_iocdata(queue_t *q, mblk_t *mp)
5472 {
5473 	mblk_t	*mp1;
5474 	STRUCT_HANDLE(strbuf, sb);
5475 	icmp_t	*icmp;
5476 	uint_t	addrlen;
5477 	uint_t	error;
5478 
5479 	/* Make sure it is one of ours. */
5480 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5481 	case TI_GETMYNAME:
5482 	case TI_GETPEERNAME:
5483 		break;
5484 	default:
5485 		icmp = Q_TO_ICMP(q);
5486 		ip_output(icmp->icmp_connp, mp, q, IP_WPUT);
5487 		return;
5488 	}
5489 	switch (mi_copy_state(q, mp, &mp1)) {
5490 	case -1:
5491 		return;
5492 	case MI_COPY_CASE(MI_COPY_IN, 1):
5493 		break;
5494 	case MI_COPY_CASE(MI_COPY_OUT, 1):
5495 		/*
5496 		 * The address has been copied out, so now
5497 		 * copyout the strbuf.
5498 		 */
5499 		mi_copyout(q, mp);
5500 		return;
5501 	case MI_COPY_CASE(MI_COPY_OUT, 2):
5502 		/*
5503 		 * The address and strbuf have been copied out.
5504 		 * We're done, so just acknowledge the original
5505 		 * M_IOCTL.
5506 		 */
5507 		mi_copy_done(q, mp, 0);
5508 		return;
5509 	default:
5510 		/*
5511 		 * Something strange has happened, so acknowledge
5512 		 * the original M_IOCTL with an EPROTO error.
5513 		 */
5514 		mi_copy_done(q, mp, EPROTO);
5515 		return;
5516 	}
5517 	/*
5518 	 * Now we have the strbuf structure for TI_GETMYNAME
5519 	 * and TI_GETPEERNAME.  Next we copyout the requested
5520 	 * address and then we'll copyout the strbuf.
5521 	 */
5522 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
5523 	    (void *)mp1->b_rptr);
5524 	icmp = Q_TO_ICMP(q);
5525 	if (icmp->icmp_family == AF_INET)
5526 		addrlen = sizeof (sin_t);
5527 	else
5528 		addrlen = sizeof (sin6_t);
5529 
5530 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
5531 		mi_copy_done(q, mp, EINVAL);
5532 		return;
5533 	}
5534 
5535 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
5536 
5537 	if (mp1 == NULL)
5538 		return;
5539 
5540 	rw_enter(&icmp->icmp_rwlock, RW_READER);
5541 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5542 	case TI_GETMYNAME:
5543 		error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr,
5544 		    &addrlen);
5545 		break;
5546 	case TI_GETPEERNAME:
5547 		error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr,
5548 		    &addrlen);
5549 		break;
5550 	}
5551 	rw_exit(&icmp->icmp_rwlock);
5552 
5553 	if (error != 0) {
5554 		mi_copy_done(q, mp, error);
5555 	} else {
5556 		mp1->b_wptr += addrlen;
5557 		STRUCT_FSET(sb, len, addrlen);
5558 
5559 		/* Copy out the address */
5560 		mi_copyout(q, mp);
5561 	}
5562 }
5563 
5564 static int
5565 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
5566     void *thisdg_attrs)
5567 {
5568 	conn_t	*connp = Q_TO_CONN(q);
5569 	struct T_unitdata_req *udreqp;
5570 	int is_absreq_failure;
5571 	cred_t *cr;
5572 
5573 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
5574 	*errorp = 0;
5575 
5576 	cr = DB_CREDDEF(mp, connp->conn_cred);
5577 
5578 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
5579 	    udreqp->OPT_offset, cr, &icmp_opt_obj,
5580 	    thisdg_attrs, &is_absreq_failure);
5581 
5582 	if (*errorp != 0) {
5583 		/*
5584 		 * Note: No special action needed in this
5585 		 * module for "is_absreq_failure"
5586 		 */
5587 		return (-1);		/* failure */
5588 	}
5589 	ASSERT(is_absreq_failure == 0);
5590 	return (0);	/* success */
5591 }
5592 
5593 void
5594 icmp_ddi_g_init(void)
5595 {
5596 	icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
5597 	    icmp_opt_obj.odb_opt_arr_cnt);
5598 
5599 	/*
5600 	 * We want to be informed each time a stack is created or
5601 	 * destroyed in the kernel, so we can maintain the
5602 	 * set of icmp_stack_t's.
5603 	 */
5604 	netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
5605 }
5606 
5607 void
5608 icmp_ddi_g_destroy(void)
5609 {
5610 	netstack_unregister(NS_ICMP);
5611 }
5612 
5613 #define	INET_NAME	"ip"
5614 
5615 /*
5616  * Initialize the ICMP stack instance.
5617  */
5618 static void *
5619 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
5620 {
5621 	icmp_stack_t	*is;
5622 	icmpparam_t	*pa;
5623 	int		error = 0;
5624 	major_t		major;
5625 
5626 	is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
5627 	is->is_netstack = ns;
5628 
5629 	pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP);
5630 	is->is_param_arr = pa;
5631 	bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr));
5632 
5633 	(void) icmp_param_register(&is->is_nd,
5634 	    is->is_param_arr, A_CNT(icmp_param_arr));
5635 	is->is_ksp = rawip_kstat_init(stackid);
5636 
5637 	major = mod_name_to_major(INET_NAME);
5638 	error = ldi_ident_from_major(major, &is->is_ldi_ident);
5639 	ASSERT(error == 0);
5640 	return (is);
5641 }
5642 
5643 /*
5644  * Free the ICMP stack instance.
5645  */
5646 static void
5647 rawip_stack_fini(netstackid_t stackid, void *arg)
5648 {
5649 	icmp_stack_t *is = (icmp_stack_t *)arg;
5650 
5651 	nd_free(&is->is_nd);
5652 	kmem_free(is->is_param_arr, sizeof (icmp_param_arr));
5653 	is->is_param_arr = NULL;
5654 
5655 	rawip_kstat_fini(stackid, is->is_ksp);
5656 	is->is_ksp = NULL;
5657 	ldi_ident_release(is->is_ldi_ident);
5658 	kmem_free(is, sizeof (*is));
5659 }
5660 
5661 static void *
5662 rawip_kstat_init(netstackid_t stackid) {
5663 	kstat_t	*ksp;
5664 
5665 	rawip_named_kstat_t template = {
5666 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
5667 		{ "inCksumErrs",	KSTAT_DATA_UINT32, 0 },
5668 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
5669 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
5670 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
5671 	};
5672 
5673 	ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
5674 					KSTAT_TYPE_NAMED,
5675 					NUM_OF_FIELDS(rawip_named_kstat_t),
5676 					0, stackid);
5677 	if (ksp == NULL || ksp->ks_data == NULL)
5678 		return (NULL);
5679 
5680 	bcopy(&template, ksp->ks_data, sizeof (template));
5681 	ksp->ks_update = rawip_kstat_update;
5682 	ksp->ks_private = (void *)(uintptr_t)stackid;
5683 
5684 	kstat_install(ksp);
5685 	return (ksp);
5686 }
5687 
5688 static void
5689 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
5690 {
5691 	if (ksp != NULL) {
5692 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
5693 		kstat_delete_netstack(ksp, stackid);
5694 	}
5695 }
5696 
5697 static int
5698 rawip_kstat_update(kstat_t *ksp, int rw)
5699 {
5700 	rawip_named_kstat_t *rawipkp;
5701 	netstackid_t	stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
5702 	netstack_t	*ns;
5703 	icmp_stack_t	*is;
5704 
5705 	if ((ksp == NULL) || (ksp->ks_data == NULL))
5706 		return (EIO);
5707 
5708 	if (rw == KSTAT_WRITE)
5709 		return (EACCES);
5710 
5711 	rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
5712 
5713 	ns = netstack_find_by_stackid(stackid);
5714 	if (ns == NULL)
5715 		return (-1);
5716 	is = ns->netstack_icmp;
5717 	if (is == NULL) {
5718 		netstack_rele(ns);
5719 		return (-1);
5720 	}
5721 	rawipkp->inDatagrams.value.ui32 =  is->is_rawip_mib.rawipInDatagrams;
5722 	rawipkp->inCksumErrs.value.ui32 =  is->is_rawip_mib.rawipInCksumErrs;
5723 	rawipkp->inErrors.value.ui32 =	   is->is_rawip_mib.rawipInErrors;
5724 	rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
5725 	rawipkp->outErrors.value.ui32 =	   is->is_rawip_mib.rawipOutErrors;
5726 	netstack_rele(ns);
5727 	return (0);
5728 }
5729 
5730 /* ARGSUSED */
5731 int
5732 rawip_accept(sock_lower_handle_t lproto_handle,
5733     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
5734     cred_t *cr)
5735 {
5736 	return (EOPNOTSUPP);
5737 }
5738 
5739 /* ARGSUSED */
5740 int
5741 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5742     socklen_t len, cred_t *cr)
5743 {
5744 	conn_t  *connp = (conn_t *)proto_handle;
5745 	int error;
5746 
5747 	/* Binding to a NULL address really means unbind */
5748 	if (sa == NULL)
5749 		error = rawip_do_unbind(connp);
5750 	else
5751 		error = rawip_do_bind(connp, sa, len);
5752 
5753 	if (error < 0) {
5754 		if (error == -TOUTSTATE)
5755 			error = EINVAL;
5756 		else
5757 			error = proto_tlitosyserr(-error);
5758 	}
5759 	return (error);
5760 }
5761 
5762 static int
5763 rawip_implicit_bind(conn_t *connp)
5764 {
5765 	sin6_t sin6addr;
5766 	sin_t *sin;
5767 	sin6_t *sin6;
5768 	socklen_t len;
5769 	int error;
5770 
5771 	if (connp->conn_icmp->icmp_family == AF_INET) {
5772 		len = sizeof (struct sockaddr_in);
5773 		sin = (sin_t *)&sin6addr;
5774 		*sin = sin_null;
5775 		sin->sin_family = AF_INET;
5776 		sin->sin_addr.s_addr = INADDR_ANY;
5777 	} else {
5778 		ASSERT(connp->conn_icmp->icmp_family == AF_INET6);
5779 		len = sizeof (sin6_t);
5780 		sin6 = (sin6_t *)&sin6addr;
5781 		*sin6 = sin6_null;
5782 		sin6->sin6_family = AF_INET6;
5783 		V6_SET_ZERO(sin6->sin6_addr);
5784 	}
5785 
5786 	error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len);
5787 
5788 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5789 }
5790 
5791 static int
5792 rawip_unbind(conn_t *connp)
5793 {
5794 	int error;
5795 
5796 	error = rawip_do_unbind(connp);
5797 	if (error < 0) {
5798 		error = proto_tlitosyserr(-error);
5799 	}
5800 	return (error);
5801 }
5802 
5803 /* ARGSUSED */
5804 int
5805 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
5806 {
5807 	return (EOPNOTSUPP);
5808 }
5809 
5810 /* ARGSUSED */
5811 int
5812 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5813     socklen_t len, sock_connid_t *id, cred_t *cr)
5814 {
5815 	conn_t	*connp = (conn_t *)proto_handle;
5816 	icmp_t *icmp = connp->conn_icmp;
5817 	int	error;
5818 	boolean_t did_bind = B_FALSE;
5819 
5820 	if (sa == NULL) {
5821 		/*
5822 		 * Disconnect
5823 		 * Make sure we are connected
5824 		 */
5825 		if (icmp->icmp_state != TS_DATA_XFER)
5826 			return (EINVAL);
5827 
5828 		error = icmp_disconnect(connp);
5829 		return (error);
5830 	}
5831 
5832 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
5833 	if (error != 0)
5834 		return (error);
5835 
5836 	/* do an implicit bind if necessary */
5837 	if (icmp->icmp_state == TS_UNBND) {
5838 		error = rawip_implicit_bind(connp);
5839 		/*
5840 		 * We could be racing with an actual bind, in which case
5841 		 * we would see EPROTO. We cross our fingers and try
5842 		 * to connect.
5843 		 */
5844 		if (!(error == 0 || error == EPROTO))
5845 			return (error);
5846 		did_bind = B_TRUE;
5847 	}
5848 
5849 	/*
5850 	 * set SO_DGRAM_ERRIND
5851 	 */
5852 	icmp->icmp_dgram_errind = B_TRUE;
5853 
5854 	error = rawip_do_connect(connp, sa, len);
5855 
5856 	if (error != 0 && did_bind) {
5857 		int unbind_err;
5858 
5859 		unbind_err = rawip_unbind(connp);
5860 		ASSERT(unbind_err == 0);
5861 	}
5862 
5863 	if (error == 0) {
5864 		*id = 0;
5865 		(*connp->conn_upcalls->su_connected)
5866 		    (connp->conn_upper_handle, 0, NULL, -1);
5867 	} else if (error < 0) {
5868 		error = proto_tlitosyserr(-error);
5869 	}
5870 	return (error);
5871 }
5872 
5873 /* ARGSUSED */
5874 void
5875 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q,
5876     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
5877 {
5878 	conn_t  *connp = (conn_t *)proto_handle;
5879 	icmp_t	*icmp;
5880 	struct T_capability_ack tca;
5881 	struct sockaddr_in6 laddr, faddr;
5882 	socklen_t laddrlen, faddrlen;
5883 	short opts;
5884 	struct stroptions *stropt;
5885 	mblk_t *stropt_mp;
5886 	int error;
5887 
5888 	icmp = connp->conn_icmp;
5889 
5890 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
5891 
5892 	/*
5893 	 * setup the fallback stream that was allocated
5894 	 */
5895 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
5896 	connp->conn_minor_arena = WR(q)->q_ptr;
5897 
5898 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
5899 
5900 	WR(q)->q_qinfo = &icmpwinit;
5901 
5902 	connp->conn_rq = RD(q);
5903 	connp->conn_wq = WR(q);
5904 
5905 	/* Notify stream head about options before sending up data */
5906 	stropt_mp->b_datap->db_type = M_SETOPTS;
5907 	stropt_mp->b_wptr += sizeof (*stropt);
5908 	stropt = (struct stroptions *)stropt_mp->b_rptr;
5909 	stropt->so_flags = SO_WROFF | SO_HIWAT;
5910 	stropt->so_wroff =
5911 	    (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra);
5912 	stropt->so_hiwat = icmp->icmp_recv_hiwat;
5913 	putnext(RD(q), stropt_mp);
5914 
5915 	/*
5916 	 * free helper stream
5917 	 */
5918 	ip_close_helper_stream(connp);
5919 
5920 	/*
5921 	 * Collect the information needed to sync with the sonode
5922 	 */
5923 	icmp_do_capability_ack(icmp, &tca, TC1_INFO);
5924 
5925 	laddrlen = faddrlen = sizeof (sin6_t);
5926 	(void) rawip_getsockname((sock_lower_handle_t)connp,
5927 	    (struct sockaddr *)&laddr, &laddrlen, NULL);
5928 	error = rawip_getpeername((sock_lower_handle_t)connp,
5929 	    (struct sockaddr *)&faddr, &faddrlen, NULL);
5930 	if (error != 0)
5931 		faddrlen = 0;
5932 	opts = 0;
5933 	if (icmp->icmp_dgram_errind)
5934 		opts |= SO_DGRAM_ERRIND;
5935 	if (icmp->icmp_dontroute)
5936 		opts |= SO_DONTROUTE;
5937 
5938 	/*
5939 	 * Once we grab the drain lock, no data will be send up
5940 	 * to the socket. So we notify the socket that the endpoint
5941 	 * is quiescent and it's therefore safe move data from
5942 	 * the socket to the stream head.
5943 	 */
5944 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
5945 	    (struct sockaddr *)&laddr, laddrlen,
5946 	    (struct sockaddr *)&faddr, faddrlen, opts);
5947 
5948 	/*
5949 	 * push up any packets that were queued in icmp_t
5950 	 */
5951 
5952 	mutex_enter(&icmp->icmp_recv_lock);
5953 	while (icmp->icmp_fallback_queue_head != NULL) {
5954 		mblk_t	*mp;
5955 
5956 		mp = icmp->icmp_fallback_queue_head;
5957 		icmp->icmp_fallback_queue_head = mp->b_next;
5958 		mp->b_next = NULL;
5959 		mutex_exit(&icmp->icmp_recv_lock);
5960 		putnext(RD(q), mp);
5961 		mutex_enter(&icmp->icmp_recv_lock);
5962 	}
5963 	icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head;
5964 	/*
5965 	 * No longer a streams less socket
5966 	 */
5967 	connp->conn_flags &= ~IPCL_NONSTR;
5968 	mutex_exit(&icmp->icmp_recv_lock);
5969 	ASSERT(icmp->icmp_fallback_queue_head == NULL &&
5970 	    icmp->icmp_fallback_queue_tail == NULL);
5971 
5972 	ASSERT(connp->conn_ref >= 1);
5973 }
5974 
5975 /* ARGSUSED */
5976 sock_lower_handle_t
5977 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
5978     uint_t *smodep, int *errorp, int flags, cred_t *credp)
5979 {
5980 	conn_t *connp;
5981 
5982 	if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) {
5983 		*errorp = EPROTONOSUPPORT;
5984 		return (NULL);
5985 	}
5986 
5987 	connp = icmp_open(family, credp, errorp, flags);
5988 	if (connp != NULL) {
5989 		icmp_stack_t *is;
5990 
5991 		is = connp->conn_icmp->icmp_is;
5992 		connp->conn_flags |= IPCL_NONSTR;
5993 
5994 		if (connp->conn_icmp->icmp_family == AF_INET6) {
5995 			/* Build initial header template for transmit */
5996 			rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
5997 			if ((*errorp =
5998 			    icmp_build_hdrs(connp->conn_icmp)) != 0) {
5999 				rw_exit(&connp->conn_icmp->icmp_rwlock);
6000 				ipcl_conn_destroy(connp);
6001 				return (NULL);
6002 			}
6003 			rw_exit(&connp->conn_icmp->icmp_rwlock);
6004 		}
6005 
6006 		connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat;
6007 		connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat;
6008 
6009 		if ((*errorp = ip_create_helper_stream(connp,
6010 		    is->is_ldi_ident)) != 0) {
6011 			cmn_err(CE_CONT, "create of IP helper stream failed\n");
6012 			(void) rawip_do_close(connp);
6013 			return (NULL);
6014 		}
6015 
6016 		mutex_enter(&connp->conn_lock);
6017 		connp->conn_state_flags &= ~CONN_INCIPIENT;
6018 		mutex_exit(&connp->conn_lock);
6019 		*sock_downcalls = &sock_rawip_downcalls;
6020 		*smodep = SM_ATOMIC;
6021 	} else {
6022 		ASSERT(*errorp != 0);
6023 	}
6024 
6025 	return ((sock_lower_handle_t)connp);
6026 }
6027 
6028 /* ARGSUSED */
6029 void
6030 rawip_activate(sock_lower_handle_t proto_handle,
6031     sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags,
6032     cred_t *cr)
6033 {
6034 	conn_t 			*connp = (conn_t *)proto_handle;
6035 	icmp_stack_t 		*is = connp->conn_icmp->icmp_is;
6036 	struct sock_proto_props sopp;
6037 
6038 	connp->conn_upcalls = sock_upcalls;
6039 	connp->conn_upper_handle = sock_handle;
6040 
6041 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
6042 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
6043 	sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len +
6044 	    is->is_wroff_extra;
6045 	sopp.sopp_rxhiwat = is->is_recv_hiwat;
6046 	sopp.sopp_rxlowat = icmp_mod_info.mi_lowat;
6047 	sopp.sopp_maxblk = INFPSZ;
6048 	sopp.sopp_maxpsz = IP_MAXPACKET;
6049 	sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 :
6050 	    icmp_mod_info.mi_minpsz;
6051 
6052 	(*connp->conn_upcalls->su_set_proto_props)
6053 	    (connp->conn_upper_handle, &sopp);
6054 }
6055 
6056 static int
6057 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
6058 {
6059 	sin_t	*sin = (sin_t *)sa;
6060 	sin6_t	*sin6 = (sin6_t *)sa;
6061 
6062 	ASSERT(icmp != NULL);
6063 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
6064 
6065 	switch (icmp->icmp_family) {
6066 	case AF_INET:
6067 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
6068 		if (*salenp < sizeof (sin_t))
6069 			return (EINVAL);
6070 
6071 		*salenp = sizeof (sin_t);
6072 		*sin = sin_null;
6073 		sin->sin_family = AF_INET;
6074 		if (icmp->icmp_state == TS_UNBND) {
6075 			break;
6076 		}
6077 
6078 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
6079 		    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
6080 			sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src);
6081 		} else {
6082 			/*
6083 			 * INADDR_ANY
6084 			 * icmp_v6src is not set, we might be bound to
6085 			 * broadcast/multicast. Use icmp_bound_v6src as
6086 			 * local address instead (that could
6087 			 * also still be INADDR_ANY)
6088 			 */
6089 			sin->sin_addr.s_addr =
6090 			    V4_PART_OF_V6(icmp->icmp_bound_v6src);
6091 		}
6092 		break;
6093 	case AF_INET6:
6094 
6095 		if (*salenp < sizeof (sin6_t))
6096 			return (EINVAL);
6097 
6098 		*salenp = sizeof (sin6_t);
6099 		*sin6 = sin6_null;
6100 		sin6->sin6_family = AF_INET6;
6101 		if (icmp->icmp_state == TS_UNBND) {
6102 			break;
6103 		}
6104 		if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
6105 			sin6->sin6_addr = icmp->icmp_v6src;
6106 		} else {
6107 			/*
6108 			 * UNSPECIFIED
6109 			 * icmp_v6src is not set, we might be bound to
6110 			 * broadcast/multicast. Use icmp_bound_v6src as
6111 			 * local address instead (that could
6112 			 * also still be UNSPECIFIED)
6113 			 */
6114 
6115 			sin6->sin6_addr = icmp->icmp_bound_v6src;
6116 		}
6117 		break;
6118 	}
6119 	return (0);
6120 }
6121 
6122 static int
6123 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
6124 {
6125 	sin_t   *sin = (sin_t *)sa;
6126 	sin6_t  *sin6 = (sin6_t *)sa;
6127 
6128 	ASSERT(icmp != NULL);
6129 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
6130 
6131 	if (icmp->icmp_state != TS_DATA_XFER)
6132 		return (ENOTCONN);
6133 
6134 	sa->sa_family = icmp->icmp_family;
6135 	switch (icmp->icmp_family) {
6136 	case AF_INET:
6137 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
6138 
6139 		if (*salenp < sizeof (sin_t))
6140 			return (EINVAL);
6141 
6142 		*salenp = sizeof (sin_t);
6143 		*sin = sin_null;
6144 		sin->sin_family = AF_INET;
6145 		sin->sin_addr.s_addr =
6146 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
6147 		break;
6148 	case AF_INET6:
6149 		if (*salenp < sizeof (sin6_t))
6150 			return (EINVAL);
6151 
6152 		*salenp = sizeof (sin6_t);
6153 		*sin6 = sin6_null;
6154 		*sin6 = icmp->icmp_v6dst;
6155 		break;
6156 	}
6157 	return (0);
6158 }
6159 
6160 /* ARGSUSED */
6161 int
6162 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6163     socklen_t *salenp, cred_t *cr)
6164 {
6165 	conn_t  *connp = (conn_t *)proto_handle;
6166 	icmp_t  *icmp = connp->conn_icmp;
6167 	int	error;
6168 
6169 	ASSERT(icmp != NULL);
6170 
6171 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6172 
6173 	error = rawip_do_getpeername(icmp, sa, salenp);
6174 
6175 	rw_exit(&icmp->icmp_rwlock);
6176 
6177 	return (error);
6178 }
6179 
6180 /* ARGSUSED */
6181 int
6182 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6183     socklen_t *salenp, cred_t *cr)
6184 {
6185 	conn_t  *connp = (conn_t *)proto_handle;
6186 	icmp_t	*icmp = connp->conn_icmp;
6187 	int	error;
6188 
6189 	ASSERT(icmp != NULL);
6190 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6191 
6192 	error = rawip_do_getsockname(icmp, sa, salenp);
6193 
6194 	rw_exit(&icmp->icmp_rwlock);
6195 
6196 	return (error);
6197 }
6198 
6199 int
6200 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6201     const void *optvalp, socklen_t optlen, cred_t *cr)
6202 {
6203 	conn_t	*connp = (conn_t *)proto_handle;
6204 	icmp_t *icmp = connp->conn_icmp;
6205 	int error;
6206 
6207 	error = proto_opt_check(level, option_name, optlen, NULL,
6208 	    icmp_opt_obj.odb_opt_des_arr,
6209 	    icmp_opt_obj.odb_opt_arr_cnt,
6210 	    icmp_opt_obj.odb_topmost_tpiprovider,
6211 	    B_TRUE, B_FALSE, cr);
6212 
6213 	if (error != 0) {
6214 		/*
6215 		 * option not recognized
6216 		 */
6217 		if (error < 0) {
6218 			error = proto_tlitosyserr(-error);
6219 		}
6220 		return (error);
6221 	}
6222 
6223 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
6224 	error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level,
6225 	    option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen,
6226 	    (uchar_t *)optvalp, NULL, cr);
6227 	rw_exit(&icmp->icmp_rwlock);
6228 
6229 	if (error < 0) {
6230 		/*
6231 		 * Pass on to ip
6232 		 */
6233 		error = ip_set_options(connp, level, option_name, optvalp,
6234 		    optlen, cr);
6235 	}
6236 
6237 	ASSERT(error >= 0);
6238 
6239 	return (error);
6240 }
6241 
6242 int
6243 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6244     void *optvalp, socklen_t *optlen, cred_t *cr)
6245 {
6246 	int		error;
6247 	conn_t		*connp = (conn_t *)proto_handle;
6248 	icmp_t		*icmp = connp->conn_icmp;
6249 	t_uscalar_t	max_optbuf_len;
6250 	void		*optvalp_buf;
6251 	int		len;
6252 
6253 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6254 	    icmp_opt_obj.odb_opt_des_arr,
6255 	    icmp_opt_obj.odb_opt_arr_cnt,
6256 	    icmp_opt_obj.odb_topmost_tpiprovider,
6257 	    B_FALSE, B_TRUE, cr);
6258 
6259 	if (error != 0) {
6260 		if (error < 0) {
6261 			error = proto_tlitosyserr(-error);
6262 		}
6263 		return (error);
6264 	}
6265 
6266 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6267 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6268 	len = icmp_opt_get(connp, level, option_name, optvalp_buf);
6269 	rw_exit(&icmp->icmp_rwlock);
6270 
6271 	if (len < 0) {
6272 		/*
6273 		 * Pass on to IP
6274 		 */
6275 		kmem_free(optvalp_buf, max_optbuf_len);
6276 		return (ip_get_options(connp, level, option_name, optvalp,
6277 		    optlen, cr));
6278 	} else {
6279 		/*
6280 		 * update optlen and copy option value
6281 		 */
6282 		t_uscalar_t size = MIN(len, *optlen);
6283 		bcopy(optvalp_buf, optvalp, size);
6284 		bcopy(&size, optlen, sizeof (size));
6285 
6286 		kmem_free(optvalp_buf, max_optbuf_len);
6287 		return (0);
6288 	}
6289 }
6290 
6291 /* ARGSUSED */
6292 int
6293 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
6294 {
6295 	conn_t	*connp = (conn_t *)proto_handle;
6296 	(void) rawip_do_close(connp);
6297 	return (0);
6298 }
6299 
6300 /* ARGSUSED */
6301 int
6302 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6303 {
6304 	conn_t  *connp = (conn_t *)proto_handle;
6305 
6306 	/* shut down the send side */
6307 	if (how != SHUT_RD)
6308 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6309 		    SOCK_OPCTL_SHUT_SEND, 0);
6310 	/* shut down the recv side */
6311 	if (how != SHUT_WR)
6312 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6313 		    SOCK_OPCTL_SHUT_RECV, 0);
6314 	return (0);
6315 }
6316 
6317 void
6318 rawip_clr_flowctrl(sock_lower_handle_t proto_handle)
6319 {
6320 	conn_t  *connp = (conn_t *)proto_handle;
6321 	icmp_t	*icmp = connp->conn_icmp;
6322 
6323 	mutex_enter(&icmp->icmp_recv_lock);
6324 	connp->conn_flow_cntrld = B_FALSE;
6325 	mutex_exit(&icmp->icmp_recv_lock);
6326 }
6327 
6328 int
6329 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6330     int mode, int32_t *rvalp, cred_t *cr)
6331 {
6332 	conn_t  	*connp = (conn_t *)proto_handle;
6333 	int		error;
6334 
6335 	switch (cmd) {
6336 	case ND_SET:
6337 	case ND_GET:
6338 	case _SIOCSOCKFALLBACK:
6339 	case TI_GETPEERNAME:
6340 	case TI_GETMYNAME:
6341 #ifdef DEBUG
6342 		cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams"
6343 		    " socket", cmd);
6344 #endif
6345 		error = EINVAL;
6346 		break;
6347 	default:
6348 		/*
6349 		 * Pass on to IP using helper stream
6350 		 */
6351 		error = ldi_ioctl(
6352 		    connp->conn_helper_info->ip_helper_stream_handle,
6353 		    cmd, arg, mode, cr, rvalp);
6354 		break;
6355 	}
6356 	return (error);
6357 }
6358 
6359 /* ARGSUSED */
6360 int
6361 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6362     cred_t *cr)
6363 {
6364 	conn_t *connp = (conn_t *)proto_handle;
6365 	icmp_t	*icmp = connp->conn_icmp;
6366 	icmp_stack_t *is = icmp->icmp_is;
6367 	int error = 0;
6368 	boolean_t bypass_dgram_errind = B_FALSE;
6369 
6370 	ASSERT(DB_TYPE(mp) == M_DATA);
6371 
6372 	if (is_system_labeled())
6373 		msg_setcredpid(mp, cr, curproc->p_pid);
6374 
6375 	/* do an implicit bind if necessary */
6376 	if (icmp->icmp_state == TS_UNBND) {
6377 		error = rawip_implicit_bind(connp);
6378 		/*
6379 		 * We could be racing with an actual bind, in which case
6380 		 * we would see EPROTO. We cross our fingers and try
6381 		 * to connect.
6382 		 */
6383 		if (!(error == 0 || error == EPROTO)) {
6384 			freemsg(mp);
6385 			return (error);
6386 		}
6387 	}
6388 
6389 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
6390 
6391 	if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) {
6392 		error = EISCONN;
6393 		goto done_lock;
6394 	}
6395 
6396 	switch (icmp->icmp_family) {
6397 	case AF_INET6: {
6398 		sin6_t	*sin6;
6399 		ip6_pkt_t	ipp_s;	/* For ancillary data options */
6400 		ip6_pkt_t	*ipp = &ipp_s;
6401 
6402 		sin6 = (sin6_t *)msg->msg_name;
6403 		if (sin6 != NULL) {
6404 			error = proto_verify_ip_addr(icmp->icmp_family,
6405 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6406 			if (error != 0) {
6407 				bypass_dgram_errind = B_TRUE;
6408 				goto done_lock;
6409 			}
6410 			if (icmp->icmp_delayed_error != 0) {
6411 				sin6_t  *sin1 = (sin6_t *)msg->msg_name;
6412 				sin6_t  *sin2 = (sin6_t *)
6413 				    &icmp->icmp_delayed_addr;
6414 
6415 				error = icmp->icmp_delayed_error;
6416 				icmp->icmp_delayed_error = 0;
6417 
6418 				/* Compare IP address and port */
6419 
6420 				if (sin1->sin6_port == sin2->sin6_port &&
6421 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
6422 				    &sin2->sin6_addr)) {
6423 					goto done_lock;
6424 				}
6425 			}
6426 		} else {
6427 			/*
6428 			 * Use connected address
6429 			 */
6430 			if (icmp->icmp_state != TS_DATA_XFER) {
6431 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6432 				error = EDESTADDRREQ;
6433 				bypass_dgram_errind = B_TRUE;
6434 				goto done_lock;
6435 			}
6436 			sin6 = &icmp->icmp_v6dst;
6437 		}
6438 
6439 		/* No support for mapped addresses on raw sockets */
6440 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6441 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6442 			error = EADDRNOTAVAIL;
6443 			goto done_lock;
6444 		}
6445 
6446 		ipp->ipp_fields = 0;
6447 		ipp->ipp_sticky_ignored = 0;
6448 
6449 		/*
6450 		 * If options passed in, feed it for verification and handling
6451 		 */
6452 		if (msg->msg_controllen != 0) {
6453 			error = process_auxiliary_options(connp,
6454 			    msg->msg_control, msg->msg_controllen,
6455 			    ipp, &icmp_opt_obj, icmp_opt_set);
6456 			if (error != 0) {
6457 				goto done_lock;
6458 			}
6459 		}
6460 
6461 		rw_exit(&icmp->icmp_rwlock);
6462 
6463 		/*
6464 		 * Destination is a native IPv6 address.
6465 		 * Send out an IPv6 format packet.
6466 		 */
6467 
6468 		error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6,
6469 		    ipp);
6470 	}
6471 		break;
6472 	case AF_INET: {
6473 		sin_t	*sin;
6474 		ip4_pkt_t pktinfo;
6475 		ip4_pkt_t *pktinfop = &pktinfo;
6476 		ipaddr_t	v4dst;
6477 
6478 		sin = (sin_t *)msg->msg_name;
6479 		if (sin != NULL) {
6480 			error = proto_verify_ip_addr(icmp->icmp_family,
6481 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6482 			if (error != 0) {
6483 				bypass_dgram_errind = B_TRUE;
6484 				goto done_lock;
6485 			}
6486 			v4dst = sin->sin_addr.s_addr;
6487 			if (icmp->icmp_delayed_error != 0) {
6488 				sin_t *sin1 = (sin_t *)msg->msg_name;
6489 				sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr;
6490 
6491 				error = icmp->icmp_delayed_error;
6492 				icmp->icmp_delayed_error = 0;
6493 
6494 				/* Compare IP address and port */
6495 				if (sin1->sin_port == sin2->sin_port &&
6496 				    sin1->sin_addr.s_addr ==
6497 				    sin2->sin_addr.s_addr) {
6498 					goto done_lock;
6499 				}
6500 
6501 			}
6502 		} else {
6503 			/*
6504 			 * Use connected address
6505 			 */
6506 			if (icmp->icmp_state != TS_DATA_XFER) {
6507 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6508 				error = EDESTADDRREQ;
6509 				bypass_dgram_errind = B_TRUE;
6510 				goto done_lock;
6511 			}
6512 			v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
6513 		}
6514 
6515 
6516 		pktinfop->ip4_ill_index = 0;
6517 		pktinfop->ip4_addr = INADDR_ANY;
6518 
6519 		/*
6520 		 * If options passed in, feed it for verification and handling
6521 		 */
6522 		if (msg->msg_controllen != 0) {
6523 			error = process_auxiliary_options(connp,
6524 			    msg->msg_control, msg->msg_controllen,
6525 			    pktinfop, &icmp_opt_obj, icmp_opt_set);
6526 			if (error != 0) {
6527 				goto done_lock;
6528 			}
6529 		}
6530 		rw_exit(&icmp->icmp_rwlock);
6531 
6532 		error = raw_ip_send_data_v4(connp->conn_wq, connp, mp,
6533 		    v4dst, pktinfop);
6534 		break;
6535 	}
6536 
6537 	default:
6538 		ASSERT(0);
6539 	}
6540 
6541 	goto done;
6542 
6543 done_lock:
6544 	rw_exit(&icmp->icmp_rwlock);
6545 	if (error != 0) {
6546 		ASSERT(mp != NULL);
6547 		freemsg(mp);
6548 	}
6549 done:
6550 	if (bypass_dgram_errind)
6551 		return (error);
6552 	return (icmp->icmp_dgram_errind ? error : 0);
6553 }
6554 
6555 sock_downcalls_t sock_rawip_downcalls = {
6556 	rawip_activate,
6557 	rawip_accept,
6558 	rawip_bind,
6559 	rawip_listen,
6560 	rawip_connect,
6561 	rawip_getpeername,
6562 	rawip_getsockname,
6563 	rawip_getsockopt,
6564 	rawip_setsockopt,
6565 	rawip_send,
6566 	NULL,
6567 	NULL,
6568 	NULL,
6569 	rawip_shutdown,
6570 	rawip_clr_flowctrl,
6571 	rawip_ioctl,
6572 	rawip_close
6573 };
6574