xref: /titanic_44/usr/src/uts/common/inet/ip/icmp.c (revision 2d40c3b296fd82c4f1f14694b16f9b39d9fa0e4a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #define	_SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/strsubr.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/kmem.h>
41 #include <sys/policy.h>
42 #include <sys/priv.h>
43 #include <sys/zone.h>
44 #include <sys/time.h>
45 
46 #include <sys/sockio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/isa_defs.h>
50 #include <sys/suntpi.h>
51 #include <sys/xti_inet.h>
52 #include <sys/netstack.h>
53 
54 #include <net/route.h>
55 #include <net/if.h>
56 
57 #include <netinet/in.h>
58 #include <netinet/ip6.h>
59 #include <netinet/icmp6.h>
60 #include <inet/common.h>
61 #include <inet/ip.h>
62 #include <inet/ip6.h>
63 #include <inet/proto_set.h>
64 #include <inet/nd.h>
65 #include <inet/optcom.h>
66 #include <inet/snmpcom.h>
67 #include <inet/kstatcom.h>
68 #include <inet/rawip_impl.h>
69 
70 #include <netinet/ip_mroute.h>
71 #include <inet/tcp.h>
72 #include <net/pfkeyv2.h>
73 #include <inet/ipsec_info.h>
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/label.h>
77 #include <sys/tsol/tnet.h>
78 
79 #include <inet/ip_ire.h>
80 #include <inet/ip_if.h>
81 
82 #include <inet/ip_impl.h>
83 #include <sys/disp.h>
84 
85 /*
86  * Synchronization notes:
87  *
88  * RAWIP is MT and uses the usual kernel synchronization primitives. There is
89  * locks, which is icmp_rwlock. We also use conn_lock when updating things
90  * which affect the IP classifier lookup.
91  * The lock order is icmp_rwlock -> conn_lock.
92  *
93  * The icmp_rwlock:
94  * This protects most of the other fields in the icmp_t. The exact list of
95  * fields which are protected by each of the above locks is documented in
96  * the icmp_t structure definition.
97  *
98  * Plumbing notes:
99  * ICMP is always a device driver. For compatibility with mibopen() code
100  * it is possible to I_PUSH "icmp", but that results in pushing a passthrough
101  * dummy module.
102  */
103 
104 static void	icmp_addr_req(queue_t *q, mblk_t *mp);
105 static void	icmp_tpi_bind(queue_t *q, mblk_t *mp);
106 static int	icmp_bind_proto(conn_t *connp);
107 static int	icmp_build_hdrs(icmp_t *icmp);
108 static void	icmp_capability_req(queue_t *q, mblk_t *mp);
109 static int	icmp_close(queue_t *q, int flags);
110 static void	icmp_tpi_connect(queue_t *q, mblk_t *mp);
111 static void	icmp_tpi_disconnect(queue_t *q, mblk_t *mp);
112 static void	icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
113 		    int sys_error);
114 static void	icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
115 		    t_scalar_t t_error, int sys_error);
116 static void	icmp_icmp_error(conn_t *connp, mblk_t *mp);
117 static void	icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp);
118 static void	icmp_info_req(queue_t *q, mblk_t *mp);
119 static void	icmp_input(void *, mblk_t *, void *);
120 static conn_t 	*icmp_open(int family, cred_t *credp, int *err, int flags);
121 static int	icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
122 		    cred_t *credp);
123 static int	icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
124 		    cred_t *credp);
125 static int	icmp_unitdata_opt_process(queue_t *q, mblk_t *mp,
126 		    int *errorp, void *thisdg_attrs);
127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
128 int		icmp_opt_set(conn_t *connp, uint_t optset_context,
129 		    int level, int name, uint_t inlen,
130 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
131 		    void *thisdg_attrs, cred_t *cr);
132 int		icmp_opt_get(conn_t *connp, int level, int name,
133 		    uchar_t *ptr);
134 static int	icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt);
136 static int	icmp_param_set(queue_t *q, mblk_t *mp, char *value,
137 		    caddr_t cp, cred_t *cr);
138 static int	icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
139 		    uchar_t *ptr, int len);
140 static void	icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
141 static void	icmp_tpi_unbind(queue_t *q, mblk_t *mp);
142 static int	icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst);
143 static void	icmp_wput(queue_t *q, mblk_t *mp);
144 static void	icmp_wput_fallback(queue_t *q, mblk_t *mp);
145 static int	raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp,
146 		    sin6_t *sin6, ip6_pkt_t *ipp);
147 static int	raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp,
148 		    ipaddr_t v4dst, ip4_pkt_t *pktinfop);
149 static void	icmp_wput_other(queue_t *q, mblk_t *mp);
150 static void	icmp_wput_iocdata(queue_t *q, mblk_t *mp);
151 static void	icmp_wput_restricted(queue_t *q, mblk_t *mp);
152 static void	icmp_ulp_recv(conn_t *, mblk_t *);
153 
154 static void	*rawip_stack_init(netstackid_t stackid, netstack_t *ns);
155 static void	rawip_stack_fini(netstackid_t stackid, void *arg);
156 
157 static void	*rawip_kstat_init(netstackid_t stackid);
158 static void	rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp);
159 static int	rawip_kstat_update(kstat_t *kp, int rw);
160 static void	rawip_stack_shutdown(netstackid_t stackid, void *arg);
161 static int	rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa,
162 		    uint_t *salenp);
163 static int	rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa,
164 		    uint_t *salenp);
165 
166 int		rawip_getsockname(sock_lower_handle_t, struct sockaddr *,
167 		    socklen_t *, cred_t *);
168 int		rawip_getpeername(sock_lower_handle_t, struct sockaddr *,
169 		    socklen_t *, cred_t *);
170 
171 static struct module_info icmp_mod_info =  {
172 	5707, "icmp", 1, INFPSZ, 512, 128
173 };
174 
175 /*
176  * Entry points for ICMP as a device.
177  * We have separate open functions for the /dev/icmp and /dev/icmp6 devices.
178  */
179 static struct qinit icmprinitv4 = {
180 	NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info
181 };
182 
183 static struct qinit icmprinitv6 = {
184 	NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info
185 };
186 
187 static struct qinit icmpwinit = {
188 	(pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info
189 };
190 
191 /* ICMP entry point during fallback */
192 static struct qinit icmp_fallback_sock_winit = {
193 	(pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info
194 };
195 
196 /* For AF_INET aka /dev/icmp */
197 struct streamtab icmpinfov4 = {
198 	&icmprinitv4, &icmpwinit
199 };
200 
201 /* For AF_INET6 aka /dev/icmp6 */
202 struct streamtab icmpinfov6 = {
203 	&icmprinitv6, &icmpwinit
204 };
205 
206 static sin_t	sin_null;	/* Zero address for quick clears */
207 static sin6_t	sin6_null;	/* Zero address for quick clears */
208 
209 /* Default structure copied into T_INFO_ACK messages */
210 static struct T_info_ack icmp_g_t_info_ack = {
211 	T_INFO_ACK,
212 	IP_MAXPACKET,	 /* TSDU_size.  icmp allows maximum size messages. */
213 	T_INVALID,	/* ETSDU_size.  icmp does not support expedited data. */
214 	T_INVALID,	/* CDATA_size. icmp does not support connect data. */
215 	T_INVALID,	/* DDATA_size. icmp does not support disconnect data. */
216 	0,		/* ADDR_size - filled in later. */
217 	0,		/* OPT_size - not initialized here */
218 	IP_MAXPACKET,	/* TIDU_size.  icmp allows maximum size messages. */
219 	T_CLTS,		/* SERV_type.  icmp supports connection-less. */
220 	TS_UNBND,	/* CURRENT_state.  This is set from icmp_state. */
221 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
222 };
223 
224 /*
225  * Table of ND variables supported by icmp.  These are loaded into is_nd
226  * when the stack instance is created.
227  * All of these are alterable, within the min/max values given, at run time.
228  */
229 static icmpparam_t	icmp_param_arr[] = {
230 	/* min	max	value	name */
231 	{ 0,	128,	32,	"icmp_wroff_extra" },
232 	{ 1,	255,	255,	"icmp_ipv4_ttl" },
233 	{ 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS,	"icmp_ipv6_hoplimit"},
234 	{ 0,	1,	1,	"icmp_bsd_compat" },
235 	{ 4096,	65536,	8192,	"icmp_xmit_hiwat"},
236 	{ 0,	65536,	1024,	"icmp_xmit_lowat"},
237 	{ 4096,	65536,	8192,	"icmp_recv_hiwat"},
238 	{ 65536, 1024*1024*1024, 256*1024,	"icmp_max_buf"},
239 };
240 #define	is_wroff_extra			is_param_arr[0].icmp_param_value
241 #define	is_ipv4_ttl			is_param_arr[1].icmp_param_value
242 #define	is_ipv6_hoplimit		is_param_arr[2].icmp_param_value
243 #define	is_bsd_compat			is_param_arr[3].icmp_param_value
244 #define	is_xmit_hiwat			is_param_arr[4].icmp_param_value
245 #define	is_xmit_lowat			is_param_arr[5].icmp_param_value
246 #define	is_recv_hiwat			is_param_arr[6].icmp_param_value
247 #define	is_max_buf			is_param_arr[7].icmp_param_value
248 
249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len);
250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa,
251     socklen_t len, cred_t *cr);
252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error);
253 
254 /*
255  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
256  * passed to icmp_wput.
257  * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP
258  * protocol type placed in the message following the address. A T_BIND_ACK
259  * message is returned by ip_bind_v4/v6.
260  */
261 static void
262 icmp_tpi_bind(queue_t *q, mblk_t *mp)
263 {
264 	int	error;
265 	struct sockaddr *sa;
266 	struct T_bind_req *tbr;
267 	socklen_t	len;
268 	sin_t	*sin;
269 	sin6_t	*sin6;
270 	icmp_t		*icmp;
271 	conn_t	*connp = Q_TO_CONN(q);
272 	mblk_t *mp1;
273 	cred_t *cr;
274 
275 	/*
276 	 * All Solaris components should pass a db_credp
277 	 * for this TPI message, hence we ASSERT.
278 	 * But in case there is some other M_PROTO that looks
279 	 * like a TPI message sent by some other kernel
280 	 * component, we check and return an error.
281 	 */
282 	cr = msg_getcred(mp, NULL);
283 	ASSERT(cr != NULL);
284 	if (cr == NULL) {
285 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
286 		return;
287 	}
288 
289 	icmp = connp->conn_icmp;
290 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
291 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
292 		    "icmp_bind: bad req, len %u",
293 		    (uint_t)(mp->b_wptr - mp->b_rptr));
294 		icmp_err_ack(q, mp, TPROTO, 0);
295 		return;
296 	}
297 
298 	if (icmp->icmp_state != TS_UNBND) {
299 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
300 		    "icmp_bind: bad state, %d", icmp->icmp_state);
301 		icmp_err_ack(q, mp, TOUTSTATE, 0);
302 		return;
303 	}
304 
305 	/*
306 	 * Reallocate the message to make sure we have enough room for an
307 	 * address and the protocol type.
308 	 */
309 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1);
310 	if (!mp1) {
311 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
312 		return;
313 	}
314 	mp = mp1;
315 
316 	/* Reset the message type in preparation for shipping it back. */
317 	DB_TYPE(mp) = M_PCPROTO;
318 	tbr = (struct T_bind_req *)mp->b_rptr;
319 	len = tbr->ADDR_length;
320 	switch (len) {
321 	case 0:	/* request for a generic port */
322 		tbr->ADDR_offset = sizeof (struct T_bind_req);
323 		if (icmp->icmp_family == AF_INET) {
324 			tbr->ADDR_length = sizeof (sin_t);
325 			sin = (sin_t *)&tbr[1];
326 			*sin = sin_null;
327 			sin->sin_family = AF_INET;
328 			mp->b_wptr = (uchar_t *)&sin[1];
329 			sa = (struct sockaddr *)sin;
330 			len = sizeof (sin_t);
331 		} else {
332 			ASSERT(icmp->icmp_family == AF_INET6);
333 			tbr->ADDR_length = sizeof (sin6_t);
334 			sin6 = (sin6_t *)&tbr[1];
335 			*sin6 = sin6_null;
336 			sin6->sin6_family = AF_INET6;
337 			mp->b_wptr = (uchar_t *)&sin6[1];
338 			sa = (struct sockaddr *)sin6;
339 			len = sizeof (sin6_t);
340 		}
341 		break;
342 
343 	case sizeof (sin_t):	/* Complete IPv4 address */
344 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
345 		    sizeof (sin_t));
346 		break;
347 
348 	case sizeof (sin6_t):	/* Complete IPv6 address */
349 		sa = (struct sockaddr *)mi_offset_param(mp,
350 		    tbr->ADDR_offset, sizeof (sin6_t));
351 		break;
352 
353 	default:
354 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
355 		    "icmp_bind: bad ADDR_length %d", tbr->ADDR_length);
356 		icmp_err_ack(q, mp, TBADADDR, 0);
357 		return;
358 	}
359 
360 	error = rawip_do_bind(connp, sa, len);
361 done:
362 	ASSERT(mp->b_cont == NULL);
363 	if (error != 0) {
364 		if (error > 0) {
365 			icmp_err_ack(q, mp, TSYSERR, error);
366 		} else {
367 			icmp_err_ack(q, mp, -error, 0);
368 		}
369 	} else {
370 		tbr->PRIM_type = T_BIND_ACK;
371 		qreply(q, mp);
372 	}
373 }
374 
375 static int
376 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len)
377 {
378 	sin_t		*sin;
379 	sin6_t		*sin6;
380 	icmp_t		*icmp;
381 	int		error = 0;
382 	mblk_t		*ire_mp;
383 
384 
385 	icmp = connp->conn_icmp;
386 
387 	if (sa == NULL || !OK_32PTR((char *)sa)) {
388 		return (EINVAL);
389 	}
390 
391 	/*
392 	 * The state must be TS_UNBND. TPI mandates that users must send
393 	 * TPI primitives only 1 at a time and wait for the response before
394 	 * sending the next primitive.
395 	 */
396 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
397 	if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) {
398 		error = -TOUTSTATE;
399 		goto done;
400 	}
401 
402 	ASSERT(len != 0);
403 	switch (len) {
404 	case sizeof (sin_t):    /* Complete IPv4 address */
405 		sin = (sin_t *)sa;
406 		if (sin->sin_family != AF_INET ||
407 		    icmp->icmp_family != AF_INET) {
408 			/* TSYSERR, EAFNOSUPPORT */
409 			error = EAFNOSUPPORT;
410 			goto done;
411 		}
412 		break;
413 	case sizeof (sin6_t): /* Complete IPv6 address */
414 		sin6 = (sin6_t *)sa;
415 		if (sin6->sin6_family != AF_INET6 ||
416 		    icmp->icmp_family != AF_INET6) {
417 			/* TSYSERR, EAFNOSUPPORT */
418 			error = EAFNOSUPPORT;
419 			goto done;
420 		}
421 		/* No support for mapped addresses on raw sockets */
422 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
423 			/* TSYSERR, EADDRNOTAVAIL */
424 			error = EADDRNOTAVAIL;
425 			goto done;
426 		}
427 		break;
428 
429 	default:
430 		/* TBADADDR */
431 		error = EADDRNOTAVAIL;
432 		goto done;
433 	}
434 
435 	icmp->icmp_pending_op = T_BIND_REQ;
436 	icmp->icmp_state = TS_IDLE;
437 
438 	/*
439 	 * Copy the source address into our icmp structure.  This address
440 	 * may still be zero; if so, ip will fill in the correct address
441 	 * each time an outbound packet is passed to it.
442 	 * If we are binding to a broadcast or multicast address then
443 	 * rawip_post_ip_bind_connect will clear the source address.
444 	 */
445 
446 	if (icmp->icmp_family == AF_INET) {
447 		ASSERT(sin != NULL);
448 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
449 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr,
450 		    &icmp->icmp_v6src);
451 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
452 		    icmp->icmp_ip_snd_options_len;
453 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
454 	} else {
455 		int error;
456 
457 		ASSERT(sin6 != NULL);
458 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
459 		icmp->icmp_v6src = sin6->sin6_addr;
460 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
461 		icmp->icmp_bound_v6src = icmp->icmp_v6src;
462 
463 		/* Rebuild the header template */
464 		error = icmp_build_hdrs(icmp);
465 		if (error != 0) {
466 			icmp->icmp_pending_op = -1;
467 			/*
468 			 * TSYSERR
469 			 */
470 			goto done;
471 		}
472 	}
473 
474 	ire_mp = NULL;
475 	if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) {
476 		/*
477 		 * request an IRE if src not 0 (INADDR_ANY)
478 		 */
479 		ire_mp = allocb(sizeof (ire_t), BPRI_HI);
480 		if (ire_mp == NULL) {
481 			icmp->icmp_pending_op = -1;
482 			error = ENOMEM;
483 			goto done;
484 		}
485 		DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
486 	}
487 done:
488 	rw_exit(&icmp->icmp_rwlock);
489 	if (error != 0)
490 		return (error);
491 
492 	if (icmp->icmp_family == AF_INET6) {
493 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
494 		    &sin6->sin6_addr, sin6->sin6_port, B_TRUE);
495 	} else {
496 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
497 		    sin->sin_addr.s_addr, sin->sin_port, B_TRUE);
498 	}
499 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
500 	return (error);
501 }
502 
503 static void
504 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error)
505 {
506 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
507 	if (icmp->icmp_state == TS_UNBND) {
508 		/*
509 		 * not yet bound - bind sent by icmp_bind_proto.
510 		 */
511 		rw_exit(&icmp->icmp_rwlock);
512 		return;
513 	}
514 	ASSERT(icmp->icmp_pending_op != -1);
515 	icmp->icmp_pending_op = -1;
516 
517 	if (error != 0) {
518 		if (icmp->icmp_state == TS_DATA_XFER) {
519 			/* Connect failed */
520 			/* Revert back to the bound source */
521 			icmp->icmp_v6src = icmp->icmp_bound_v6src;
522 			icmp->icmp_state = TS_IDLE;
523 			if (icmp->icmp_family == AF_INET6)
524 				(void) icmp_build_hdrs(icmp);
525 		} else {
526 			V6_SET_ZERO(icmp->icmp_v6src);
527 			V6_SET_ZERO(icmp->icmp_bound_v6src);
528 			icmp->icmp_state = TS_UNBND;
529 			if (icmp->icmp_family == AF_INET6)
530 				(void) icmp_build_hdrs(icmp);
531 		}
532 	} else {
533 		if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) {
534 			ire_t *ire;
535 
536 			ire = (ire_t *)ire_mp->b_rptr;
537 			/*
538 			 * If a broadcast/multicast address was bound set
539 			 * the source address to 0.
540 			 * This ensures no datagrams with broadcast address
541 			 * as source address are emitted (which would violate
542 			 * RFC1122 - Hosts requirements)
543 			 * Note: we get IRE_BROADCAST for IPv6
544 			 * to "mark" a multicast local address.
545 			 */
546 
547 
548 			if (ire->ire_type == IRE_BROADCAST &&
549 			    icmp->icmp_state != TS_DATA_XFER) {
550 				/*
551 				 * This was just a local bind to a
552 				 * MC/broadcast addr
553 				 */
554 				V6_SET_ZERO(icmp->icmp_v6src);
555 				if (icmp->icmp_family == AF_INET6)
556 					(void) icmp_build_hdrs(icmp);
557 			}
558 		}
559 
560 	}
561 	rw_exit(&icmp->icmp_rwlock);
562 	if (ire_mp != NULL)
563 		freeb(ire_mp);
564 }
565 
566 /*
567  * Send message to IP to just bind to the protocol.
568  */
569 static int
570 icmp_bind_proto(conn_t *connp)
571 {
572 	icmp_t	*icmp;
573 	int	error;
574 
575 	icmp = connp->conn_icmp;
576 
577 	if (icmp->icmp_family == AF_INET6)
578 		error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto,
579 		    &sin6_null.sin6_addr, 0, B_TRUE);
580 	else
581 		error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto,
582 		    sin_null.sin_addr.s_addr, 0, B_TRUE);
583 
584 	rawip_post_ip_bind_connect(icmp, NULL, error);
585 	return (error);
586 }
587 
588 static void
589 icmp_tpi_connect(queue_t *q, mblk_t *mp)
590 {
591 	conn_t	*connp = Q_TO_CONN(q);
592 	struct T_conn_req	*tcr;
593 	icmp_t	*icmp;
594 	struct sockaddr *sa;
595 	socklen_t len;
596 	int error;
597 	cred_t *cr;
598 
599 	/*
600 	 * All Solaris components should pass a db_credp
601 	 * for this TPI message, hence we ASSERT.
602 	 * But in case there is some other M_PROTO that looks
603 	 * like a TPI message sent by some other kernel
604 	 * component, we check and return an error.
605 	 */
606 	cr = msg_getcred(mp, NULL);
607 	ASSERT(cr != NULL);
608 	if (cr == NULL) {
609 		icmp_err_ack(q, mp, TSYSERR, EINVAL);
610 		return;
611 	}
612 
613 	icmp = connp->conn_icmp;
614 	tcr = (struct T_conn_req *)mp->b_rptr;
615 	/* Sanity checks */
616 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
617 		icmp_err_ack(q, mp, TPROTO, 0);
618 		return;
619 	}
620 
621 	if (tcr->OPT_length != 0) {
622 		icmp_err_ack(q, mp, TBADOPT, 0);
623 		return;
624 	}
625 
626 	len = tcr->DEST_length;
627 
628 	switch (len) {
629 	default:
630 		icmp_err_ack(q, mp, TBADADDR, 0);
631 		return;
632 	case sizeof (sin_t):
633 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
634 		    sizeof (sin_t));
635 		break;
636 	case sizeof (sin6_t):
637 		sa = (struct sockaddr *)mi_offset_param(mp,
638 		    tcr->DEST_offset, sizeof (sin6_t));
639 		break;
640 	}
641 
642 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
643 	if (error != 0) {
644 		icmp_err_ack(q, mp, TSYSERR, error);
645 		return;
646 	}
647 
648 	error = rawip_do_connect(connp, sa, len, cr);
649 	if (error != 0) {
650 		if (error < 0) {
651 			icmp_err_ack(q, mp, -error, 0);
652 		} else {
653 			icmp_err_ack(q, mp, 0, error);
654 		}
655 	} else {
656 		mblk_t *mp1;
657 
658 		/*
659 		 * We have to send a connection confirmation to
660 		 * keep TLI happy.
661 		 */
662 		if (icmp->icmp_family == AF_INET) {
663 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
664 			    sizeof (sin_t), NULL, 0);
665 		} else {
666 			ASSERT(icmp->icmp_family == AF_INET6);
667 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
668 			    sizeof (sin6_t), NULL, 0);
669 		}
670 		if (mp1 == NULL) {
671 			icmp_err_ack(q, mp, TSYSERR, ENOMEM);
672 			return;
673 		}
674 
675 		/*
676 		 * Send ok_ack for T_CONN_REQ
677 		 */
678 		mp = mi_tpi_ok_ack_alloc(mp);
679 		if (mp == NULL) {
680 			/* Unable to reuse the T_CONN_REQ for the ack. */
681 			icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
682 			return;
683 		}
684 		putnext(connp->conn_rq, mp);
685 		putnext(connp->conn_rq, mp1);
686 	}
687 }
688 
689 static int
690 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
691     cred_t *cr)
692 {
693 	icmp_t	*icmp;
694 	sin_t	*sin;
695 	sin6_t	*sin6;
696 	mblk_t  *ire_mp;
697 	int	error;
698 	ipaddr_t	v4dst;
699 	in6_addr_t	v6dst;
700 
701 	icmp = connp->conn_icmp;
702 
703 	if (sa == NULL || !OK_32PTR((char *)sa)) {
704 		return (EINVAL);
705 	}
706 
707 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
708 	if (ire_mp == NULL)
709 		return (ENOMEM);
710 	DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE;
711 
712 
713 	ASSERT(sa != NULL && len != 0);
714 
715 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
716 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
717 		rw_exit(&icmp->icmp_rwlock);
718 		freeb(ire_mp);
719 		return (-TOUTSTATE);
720 	}
721 
722 	switch (len) {
723 	case sizeof (sin_t):
724 		sin = (sin_t *)sa;
725 
726 		ASSERT(icmp->icmp_family == AF_INET);
727 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
728 
729 		v4dst = sin->sin_addr.s_addr;
730 		/*
731 		 * Interpret a zero destination to mean loopback.
732 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
733 		 * generate the T_CONN_CON.
734 		 */
735 		if (v4dst == INADDR_ANY) {
736 			v4dst = htonl(INADDR_LOOPBACK);
737 		}
738 
739 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
740 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
741 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
742 		    icmp->icmp_ip_snd_options_len;
743 		icmp->icmp_v6dst.sin6_addr = v6dst;
744 		icmp->icmp_v6dst.sin6_family = AF_INET6;
745 		icmp->icmp_v6dst.sin6_flowinfo = 0;
746 		icmp->icmp_v6dst.sin6_port = 0;
747 
748 		/*
749 		 * If the destination address is multicast and
750 		 * an outgoing multicast interface has been set,
751 		 * use the address of that interface as our
752 		 * source address if no source address has been set.
753 		 */
754 		if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY &&
755 		    CLASSD(v4dst) &&
756 		    icmp->icmp_multicast_if_addr != INADDR_ANY) {
757 			IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr,
758 			    &icmp->icmp_v6src);
759 		}
760 		break;
761 	case sizeof (sin6_t):
762 		sin6 = (sin6_t *)sa;
763 
764 		/* No support for mapped addresses on raw sockets */
765 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
766 			rw_exit(&icmp->icmp_rwlock);
767 			freeb(ire_mp);
768 			return (EADDRNOTAVAIL);
769 		}
770 
771 		ASSERT(icmp->icmp_ipversion == IPV6_VERSION);
772 		ASSERT(icmp->icmp_family == AF_INET6);
773 
774 		icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len;
775 
776 		icmp->icmp_v6dst = *sin6;
777 		icmp->icmp_v6dst.sin6_port = 0;
778 
779 		/*
780 		 * Interpret a zero destination to mean loopback.
781 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
782 		 * generate the T_CONN_CON.
783 		 */
784 		if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) {
785 			icmp->icmp_v6dst.sin6_addr = ipv6_loopback;
786 		}
787 		/*
788 		 * If the destination address is multicast and
789 		 * an outgoing multicast interface has been set,
790 		 * then the ip bind logic will pick the correct source
791 		 * address (i.e. matching the outgoing multicast interface).
792 		 */
793 		break;
794 	}
795 
796 	icmp->icmp_pending_op = T_CONN_REQ;
797 
798 	if (icmp->icmp_state == TS_DATA_XFER) {
799 		/* Already connected - clear out state */
800 		icmp->icmp_v6src = icmp->icmp_bound_v6src;
801 		icmp->icmp_state = TS_IDLE;
802 	}
803 
804 	icmp->icmp_state = TS_DATA_XFER;
805 	rw_exit(&icmp->icmp_rwlock);
806 
807 	if (icmp->icmp_family == AF_INET6) {
808 		error = ip_proto_bind_connected_v6(connp, &ire_mp,
809 		    icmp->icmp_proto, &icmp->icmp_v6src, 0,
810 		    &icmp->icmp_v6dst.sin6_addr,
811 		    NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr);
812 	} else {
813 		error = ip_proto_bind_connected_v4(connp, &ire_mp,
814 		    icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0,
815 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port,
816 		    B_TRUE, B_TRUE, cr);
817 	}
818 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
819 	return (error);
820 }
821 
822 static void
823 icmp_close_free(conn_t *connp)
824 {
825 	icmp_t *icmp = connp->conn_icmp;
826 
827 	/* If there are any options associated with the stream, free them. */
828 	if (icmp->icmp_ip_snd_options != NULL) {
829 		mi_free((char *)icmp->icmp_ip_snd_options);
830 		icmp->icmp_ip_snd_options = NULL;
831 		icmp->icmp_ip_snd_options_len = 0;
832 	}
833 
834 	if (icmp->icmp_filter != NULL) {
835 		kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t));
836 		icmp->icmp_filter = NULL;
837 	}
838 
839 	/* Free memory associated with sticky options */
840 	if (icmp->icmp_sticky_hdrs_len != 0) {
841 		kmem_free(icmp->icmp_sticky_hdrs,
842 		    icmp->icmp_sticky_hdrs_len);
843 		icmp->icmp_sticky_hdrs = NULL;
844 		icmp->icmp_sticky_hdrs_len = 0;
845 	}
846 	ip6_pkt_free(&icmp->icmp_sticky_ipp);
847 
848 	/*
849 	 * Clear any fields which the kmem_cache constructor clears.
850 	 * Only icmp_connp needs to be preserved.
851 	 * TBD: We should make this more efficient to avoid clearing
852 	 * everything.
853 	 */
854 	ASSERT(icmp->icmp_connp == connp);
855 	bzero(icmp, sizeof (icmp_t));
856 	icmp->icmp_connp = connp;
857 }
858 
859 static int
860 rawip_do_close(conn_t *connp)
861 {
862 	ASSERT(connp != NULL && IPCL_IS_RAWIP(connp));
863 
864 	ip_quiesce_conn(connp);
865 
866 	if (!IPCL_IS_NONSTR(connp)) {
867 		qprocsoff(connp->conn_rq);
868 	}
869 
870 	ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL &&
871 	    connp->conn_icmp->icmp_fallback_queue_tail == NULL);
872 	icmp_close_free(connp);
873 
874 	/*
875 	 * Now we are truly single threaded on this stream, and can
876 	 * delete the things hanging off the connp, and finally the connp.
877 	 * We removed this connp from the fanout list, it cannot be
878 	 * accessed thru the fanouts, and we already waited for the
879 	 * conn_ref to drop to 0. We are already in close, so
880 	 * there cannot be any other thread from the top. qprocsoff
881 	 * has completed, and service has completed or won't run in
882 	 * future.
883 	 */
884 	ASSERT(connp->conn_ref == 1);
885 
886 	if (!IPCL_IS_NONSTR(connp)) {
887 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
888 	} else {
889 		ip_free_helper_stream(connp);
890 	}
891 
892 	connp->conn_ref--;
893 	ipcl_conn_destroy(connp);
894 
895 	return (0);
896 }
897 
898 static int
899 icmp_close(queue_t *q, int flags)
900 {
901 	conn_t  *connp;
902 
903 	if (flags & SO_FALLBACK) {
904 		/*
905 		 * stream is being closed while in fallback
906 		 * simply free the resources that were allocated
907 		 */
908 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
909 		qprocsoff(q);
910 		goto done;
911 	}
912 
913 	connp = Q_TO_CONN(q);
914 	(void) rawip_do_close(connp);
915 done:
916 	q->q_ptr = WR(q)->q_ptr = NULL;
917 	return (0);
918 }
919 
920 /*
921  * This routine handles each T_DISCON_REQ message passed to icmp
922  * as an indicating that ICMP is no longer connected. This results
923  * in sending a T_BIND_REQ to IP to restore the binding to just
924  * the local address.
925  *
926  * The disconnect completes in rawip_post_ip_bind_connect.
927  */
928 static int
929 icmp_do_disconnect(conn_t *connp)
930 {
931 	icmp_t	*icmp;
932 	mblk_t	*ire_mp;
933 	int error;
934 
935 	icmp = connp->conn_icmp;
936 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
937 	if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) {
938 		rw_exit(&icmp->icmp_rwlock);
939 		return (-TOUTSTATE);
940 	}
941 	icmp->icmp_pending_op = T_DISCON_REQ;
942 	icmp->icmp_v6src = icmp->icmp_bound_v6src;
943 	icmp->icmp_state = TS_IDLE;
944 
945 
946 	if (icmp->icmp_family == AF_INET6) {
947 		/* Rebuild the header template */
948 		error = icmp_build_hdrs(icmp);
949 		if (error != 0) {
950 			icmp->icmp_pending_op = -1;
951 			rw_exit(&icmp->icmp_rwlock);
952 			return (error);
953 		}
954 	}
955 
956 	rw_exit(&icmp->icmp_rwlock);
957 	ire_mp = allocb(sizeof (ire_t), BPRI_HI);
958 	if (ire_mp == NULL) {
959 		return (ENOMEM);
960 	}
961 
962 	if (icmp->icmp_family == AF_INET6) {
963 		error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto,
964 		    &icmp->icmp_bound_v6src, 0, B_TRUE);
965 	} else {
966 
967 		error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto,
968 		    V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE);
969 	}
970 
971 	rawip_post_ip_bind_connect(icmp, ire_mp, error);
972 
973 	return (error);
974 }
975 
976 static void
977 icmp_tpi_disconnect(queue_t *q, mblk_t *mp)
978 {
979 	conn_t	*connp = Q_TO_CONN(q);
980 	int	error;
981 
982 	/*
983 	 * Allocate the largest primitive we need to send back
984 	 * T_error_ack is > than T_ok_ack
985 	 */
986 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
987 	if (mp == NULL) {
988 		/* Unable to reuse the T_DISCON_REQ for the ack. */
989 		icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
990 		return;
991 	}
992 
993 	error = icmp_do_disconnect(connp);
994 
995 	if (error != 0) {
996 		if (error > 0) {
997 			icmp_err_ack(q, mp, 0, error);
998 		} else {
999 			icmp_err_ack(q, mp, -error, 0);
1000 		}
1001 	} else {
1002 		mp = mi_tpi_ok_ack_alloc(mp);
1003 		ASSERT(mp != NULL);
1004 		qreply(q, mp);
1005 	}
1006 
1007 }
1008 
1009 static int
1010 icmp_disconnect(conn_t *connp)
1011 {
1012 	int	error;
1013 	icmp_t	*icmp = connp->conn_icmp;
1014 
1015 	icmp->icmp_dgram_errind = B_FALSE;
1016 
1017 	error = icmp_do_disconnect(connp);
1018 
1019 	if (error < 0)
1020 		error = proto_tlitosyserr(-error);
1021 	return (error);
1022 }
1023 
1024 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
1025 static void
1026 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
1027 {
1028 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
1029 		qreply(q, mp);
1030 }
1031 
1032 /* Shorthand to generate and send TPI error acks to our client */
1033 static void
1034 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
1035     t_scalar_t t_error, int sys_error)
1036 {
1037 	struct T_error_ack	*teackp;
1038 
1039 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
1040 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
1041 		teackp = (struct T_error_ack *)mp->b_rptr;
1042 		teackp->ERROR_prim = primitive;
1043 		teackp->TLI_error = t_error;
1044 		teackp->UNIX_error = sys_error;
1045 		qreply(q, mp);
1046 	}
1047 }
1048 
1049 /*
1050  * icmp_icmp_error is called by icmp_input to process ICMP
1051  * messages passed up by IP.
1052  * Generates the appropriate permanent (non-transient) errors.
1053  * Assumes that IP has pulled up everything up to and including
1054  * the ICMP header.
1055  */
1056 static void
1057 icmp_icmp_error(conn_t *connp, mblk_t *mp)
1058 {
1059 	icmph_t *icmph;
1060 	ipha_t	*ipha;
1061 	int	iph_hdr_length;
1062 	sin_t	sin;
1063 	mblk_t	*mp1;
1064 	int	error = 0;
1065 	icmp_t	*icmp = connp->conn_icmp;
1066 
1067 	ipha = (ipha_t *)mp->b_rptr;
1068 
1069 	ASSERT(OK_32PTR(mp->b_rptr));
1070 
1071 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
1072 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
1073 		icmp_icmp_error_ipv6(connp, mp);
1074 		return;
1075 	}
1076 
1077 	/*
1078 	 * icmp does not support v4 mapped addresses
1079 	 * so we can never be here for a V6 socket
1080 	 * i.e. icmp_family == AF_INET6
1081 	 */
1082 	ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) &&
1083 	    (icmp->icmp_family == AF_INET));
1084 
1085 	ASSERT(icmp->icmp_family == AF_INET);
1086 
1087 	/* Skip past the outer IP and ICMP headers */
1088 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1089 	icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]);
1090 	ipha = (ipha_t *)&icmph[1];
1091 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
1092 
1093 	switch (icmph->icmph_type) {
1094 	case ICMP_DEST_UNREACHABLE:
1095 		switch (icmph->icmph_code) {
1096 		case ICMP_FRAGMENTATION_NEEDED:
1097 			/*
1098 			 * IP has already adjusted the path MTU.
1099 			 */
1100 			break;
1101 		case ICMP_PORT_UNREACHABLE:
1102 		case ICMP_PROTOCOL_UNREACHABLE:
1103 			error = ECONNREFUSED;
1104 			break;
1105 		default:
1106 			/* Transient errors */
1107 			break;
1108 		}
1109 		break;
1110 	default:
1111 		/* Transient errors */
1112 		break;
1113 	}
1114 	if (error == 0) {
1115 		freemsg(mp);
1116 		return;
1117 	}
1118 
1119 	/*
1120 	 * Deliver T_UDERROR_IND when the application has asked for it.
1121 	 * The socket layer enables this automatically when connected.
1122 	 */
1123 	if (!icmp->icmp_dgram_errind) {
1124 		freemsg(mp);
1125 		return;
1126 	}
1127 
1128 	sin = sin_null;
1129 	sin.sin_family = AF_INET;
1130 	sin.sin_addr.s_addr = ipha->ipha_dst;
1131 
1132 	if (IPCL_IS_NONSTR(connp)) {
1133 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1134 		if (icmp->icmp_state == TS_DATA_XFER) {
1135 			if (sin.sin_addr.s_addr ==
1136 			    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) {
1137 				rw_exit(&icmp->icmp_rwlock);
1138 				(*connp->conn_upcalls->su_set_error)
1139 				    (connp->conn_upper_handle, error);
1140 				goto done;
1141 			}
1142 		} else {
1143 			icmp->icmp_delayed_error = error;
1144 			*((sin_t *)&icmp->icmp_delayed_addr) = sin;
1145 		}
1146 		rw_exit(&icmp->icmp_rwlock);
1147 	} else {
1148 		mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL,
1149 		    0, error);
1150 		if (mp1 != NULL)
1151 			putnext(connp->conn_rq, mp1);
1152 	}
1153 done:
1154 	ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock));
1155 	freemsg(mp);
1156 }
1157 
1158 /*
1159  * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6
1160  * for IPv6 packets.
1161  * Send permanent (non-transient) errors upstream.
1162  * Assumes that IP has pulled up all the extension headers as well
1163  * as the ICMPv6 header.
1164  */
1165 static void
1166 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp)
1167 {
1168 	icmp6_t		*icmp6;
1169 	ip6_t		*ip6h, *outer_ip6h;
1170 	uint16_t	iph_hdr_length;
1171 	uint8_t		*nexthdrp;
1172 	sin6_t		sin6;
1173 	mblk_t		*mp1;
1174 	int		error = 0;
1175 	icmp_t		*icmp = connp->conn_icmp;
1176 
1177 	outer_ip6h = (ip6_t *)mp->b_rptr;
1178 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
1179 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
1180 	else
1181 		iph_hdr_length = IPV6_HDR_LEN;
1182 
1183 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
1184 	ip6h = (ip6_t *)&icmp6[1];
1185 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
1186 		freemsg(mp);
1187 		return;
1188 	}
1189 
1190 	switch (icmp6->icmp6_type) {
1191 	case ICMP6_DST_UNREACH:
1192 		switch (icmp6->icmp6_code) {
1193 		case ICMP6_DST_UNREACH_NOPORT:
1194 			error = ECONNREFUSED;
1195 			break;
1196 		case ICMP6_DST_UNREACH_ADMIN:
1197 		case ICMP6_DST_UNREACH_NOROUTE:
1198 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
1199 		case ICMP6_DST_UNREACH_ADDR:
1200 			/* Transient errors */
1201 			break;
1202 		default:
1203 			break;
1204 		}
1205 		break;
1206 	case ICMP6_PACKET_TOO_BIG: {
1207 		struct T_unitdata_ind	*tudi;
1208 		struct T_opthdr		*toh;
1209 		size_t			udi_size;
1210 		mblk_t			*newmp;
1211 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
1212 		    sizeof (struct ip6_mtuinfo);
1213 		sin6_t			*sin6;
1214 		struct ip6_mtuinfo	*mtuinfo;
1215 
1216 		/*
1217 		 * If the application has requested to receive path mtu
1218 		 * information, send up an empty message containing an
1219 		 * IPV6_PATHMTU ancillary data item.
1220 		 */
1221 		if (!icmp->icmp_ipv6_recvpathmtu)
1222 			break;
1223 
1224 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
1225 		    opt_length;
1226 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
1227 			BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors);
1228 			break;
1229 		}
1230 
1231 		/*
1232 		 * newmp->b_cont is left to NULL on purpose.  This is an
1233 		 * empty message containing only ancillary data.
1234 		 */
1235 		newmp->b_datap->db_type = M_PROTO;
1236 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
1237 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
1238 		tudi->PRIM_type = T_UNITDATA_IND;
1239 		tudi->SRC_length = sizeof (sin6_t);
1240 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1241 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
1242 		tudi->OPT_length = opt_length;
1243 
1244 		sin6 = (sin6_t *)&tudi[1];
1245 		bzero(sin6, sizeof (sin6_t));
1246 		sin6->sin6_family = AF_INET6;
1247 		sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr;
1248 
1249 		toh = (struct T_opthdr *)&sin6[1];
1250 		toh->level = IPPROTO_IPV6;
1251 		toh->name = IPV6_PATHMTU;
1252 		toh->len = opt_length;
1253 		toh->status = 0;
1254 
1255 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
1256 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
1257 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
1258 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
1259 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
1260 		/*
1261 		 * We've consumed everything we need from the original
1262 		 * message.  Free it, then send our empty message.
1263 		 */
1264 		freemsg(mp);
1265 		icmp_ulp_recv(connp, newmp);
1266 
1267 		return;
1268 	}
1269 	case ICMP6_TIME_EXCEEDED:
1270 		/* Transient errors */
1271 		break;
1272 	case ICMP6_PARAM_PROB:
1273 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
1274 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
1275 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
1276 		    (uchar_t *)nexthdrp) {
1277 			error = ECONNREFUSED;
1278 			break;
1279 		}
1280 		break;
1281 	}
1282 	if (error == 0) {
1283 		freemsg(mp);
1284 		return;
1285 	}
1286 
1287 	/*
1288 	 * Deliver T_UDERROR_IND when the application has asked for it.
1289 	 * The socket layer enables this automatically when connected.
1290 	 */
1291 	if (!icmp->icmp_dgram_errind) {
1292 		freemsg(mp);
1293 		return;
1294 	}
1295 
1296 	sin6 = sin6_null;
1297 	sin6.sin6_family = AF_INET6;
1298 	sin6.sin6_addr = ip6h->ip6_dst;
1299 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
1300 
1301 	if (IPCL_IS_NONSTR(connp)) {
1302 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1303 		if (icmp->icmp_state == TS_DATA_XFER) {
1304 			if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
1305 			    &icmp->icmp_v6dst.sin6_addr)) {
1306 				rw_exit(&icmp->icmp_rwlock);
1307 				(*connp->conn_upcalls->su_set_error)
1308 				    (connp->conn_upper_handle, error);
1309 				goto done;
1310 			}
1311 		} else {
1312 			icmp->icmp_delayed_error = error;
1313 			*((sin6_t *)&icmp->icmp_delayed_addr) = sin6;
1314 		}
1315 		rw_exit(&icmp->icmp_rwlock);
1316 	} else {
1317 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
1318 		    NULL, 0, error);
1319 		if (mp1 != NULL)
1320 			putnext(connp->conn_rq, mp1);
1321 	}
1322 done:
1323 	ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock));
1324 	freemsg(mp);
1325 }
1326 
1327 /*
1328  * This routine responds to T_ADDR_REQ messages.  It is called by icmp_wput.
1329  * The local address is filled in if endpoint is bound. The remote address
1330  * is filled in if remote address has been precified ("connected endpoint")
1331  * (The concept of connected CLTS sockets is alien to published TPI
1332  *  but we support it anyway).
1333  */
1334 static void
1335 icmp_addr_req(queue_t *q, mblk_t *mp)
1336 {
1337 	icmp_t	*icmp = Q_TO_ICMP(q);
1338 	mblk_t	*ackmp;
1339 	struct T_addr_ack *taa;
1340 
1341 	/* Make it large enough for worst case */
1342 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
1343 	    2 * sizeof (sin6_t), 1);
1344 	if (ackmp == NULL) {
1345 		icmp_err_ack(q, mp, TSYSERR, ENOMEM);
1346 		return;
1347 	}
1348 	taa = (struct T_addr_ack *)ackmp->b_rptr;
1349 
1350 	bzero(taa, sizeof (struct T_addr_ack));
1351 	ackmp->b_wptr = (uchar_t *)&taa[1];
1352 
1353 	taa->PRIM_type = T_ADDR_ACK;
1354 	ackmp->b_datap->db_type = M_PCPROTO;
1355 	rw_enter(&icmp->icmp_rwlock, RW_READER);
1356 	/*
1357 	 * Note: Following code assumes 32 bit alignment of basic
1358 	 * data structures like sin_t and struct T_addr_ack.
1359 	 */
1360 	if (icmp->icmp_state != TS_UNBND) {
1361 		/*
1362 		 * Fill in local address
1363 		 */
1364 		taa->LOCADDR_offset = sizeof (*taa);
1365 		if (icmp->icmp_family == AF_INET) {
1366 			sin_t	*sin;
1367 
1368 			taa->LOCADDR_length = sizeof (sin_t);
1369 			sin = (sin_t *)&taa[1];
1370 			/* Fill zeroes and then intialize non-zero fields */
1371 			*sin = sin_null;
1372 			sin->sin_family = AF_INET;
1373 			if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
1374 			    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1375 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src,
1376 				    sin->sin_addr.s_addr);
1377 			} else {
1378 				/*
1379 				 * INADDR_ANY
1380 				 * icmp_v6src is not set, we might be bound to
1381 				 * broadcast/multicast. Use icmp_bound_v6src as
1382 				 * local address instead (that could
1383 				 * also still be INADDR_ANY)
1384 				 */
1385 				IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src,
1386 				    sin->sin_addr.s_addr);
1387 			}
1388 			ackmp->b_wptr = (uchar_t *)&sin[1];
1389 		} else {
1390 			sin6_t	*sin6;
1391 
1392 			ASSERT(icmp->icmp_family == AF_INET6);
1393 			taa->LOCADDR_length = sizeof (sin6_t);
1394 			sin6 = (sin6_t *)&taa[1];
1395 			/* Fill zeroes and then intialize non-zero fields */
1396 			*sin6 = sin6_null;
1397 			sin6->sin6_family = AF_INET6;
1398 			if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
1399 				sin6->sin6_addr = icmp->icmp_v6src;
1400 			} else {
1401 				/*
1402 				 * UNSPECIFIED
1403 				 * icmp_v6src is not set, we might be bound to
1404 				 * broadcast/multicast. Use icmp_bound_v6src as
1405 				 * local address instead (that could
1406 				 * also still be UNSPECIFIED)
1407 				 */
1408 				sin6->sin6_addr = icmp->icmp_bound_v6src;
1409 			}
1410 			ackmp->b_wptr = (uchar_t *)&sin6[1];
1411 		}
1412 	}
1413 	rw_exit(&icmp->icmp_rwlock);
1414 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
1415 	qreply(q, ackmp);
1416 }
1417 
1418 static void
1419 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp)
1420 {
1421 	*tap = icmp_g_t_info_ack;
1422 
1423 	if (icmp->icmp_family == AF_INET6)
1424 		tap->ADDR_size = sizeof (sin6_t);
1425 	else
1426 		tap->ADDR_size = sizeof (sin_t);
1427 	tap->CURRENT_state = icmp->icmp_state;
1428 	tap->OPT_size = icmp_max_optsize;
1429 }
1430 
1431 static void
1432 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap,
1433     t_uscalar_t cap_bits1)
1434 {
1435 	tcap->CAP_bits1 = 0;
1436 
1437 	if (cap_bits1 & TC1_INFO) {
1438 		icmp_copy_info(&tcap->INFO_ack, icmp);
1439 		tcap->CAP_bits1 |= TC1_INFO;
1440 	}
1441 }
1442 
1443 /*
1444  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
1445  * icmp_wput.  Much of the T_CAPABILITY_ACK information is copied from
1446  * icmp_g_t_info_ack.  The current state of the stream is copied from
1447  * icmp_state.
1448  */
1449 static void
1450 icmp_capability_req(queue_t *q, mblk_t *mp)
1451 {
1452 	icmp_t			*icmp = Q_TO_ICMP(q);
1453 	t_uscalar_t		cap_bits1;
1454 	struct T_capability_ack	*tcap;
1455 
1456 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
1457 
1458 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
1459 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
1460 	if (!mp)
1461 		return;
1462 
1463 	tcap = (struct T_capability_ack *)mp->b_rptr;
1464 
1465 	icmp_do_capability_ack(icmp, tcap, cap_bits1);
1466 
1467 	qreply(q, mp);
1468 }
1469 
1470 /*
1471  * This routine responds to T_INFO_REQ messages.  It is called by icmp_wput.
1472  * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
1473  * The current state of the stream is copied from icmp_state.
1474  */
1475 static void
1476 icmp_info_req(queue_t *q, mblk_t *mp)
1477 {
1478 	icmp_t	*icmp = Q_TO_ICMP(q);
1479 
1480 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
1481 	    T_INFO_ACK);
1482 	if (!mp)
1483 		return;
1484 	icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp);
1485 	qreply(q, mp);
1486 }
1487 
1488 /* For /dev/icmp aka AF_INET open */
1489 static int
1490 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
1491     int family)
1492 {
1493 	conn_t *connp;
1494 	dev_t	conn_dev;
1495 	icmp_stack_t *is;
1496 	int	error;
1497 
1498 	conn_dev = NULL;
1499 
1500 	/* If the stream is already open, return immediately. */
1501 	if (q->q_ptr != NULL)
1502 		return (0);
1503 
1504 	if (sflag == MODOPEN)
1505 		return (EINVAL);
1506 
1507 	/*
1508 	 * Since ICMP is not used so heavily, allocating from the small
1509 	 * arena should be sufficient.
1510 	 */
1511 	if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
1512 		return (EBUSY);
1513 	}
1514 
1515 	if (flag & SO_FALLBACK) {
1516 		/*
1517 		 * Non streams socket needs a stream to fallback to
1518 		 */
1519 		RD(q)->q_ptr = (void *)conn_dev;
1520 		WR(q)->q_qinfo = &icmp_fallback_sock_winit;
1521 		WR(q)->q_ptr = (void *)ip_minor_arena_sa;
1522 		qprocson(q);
1523 		return (0);
1524 	}
1525 
1526 	connp = icmp_open(family, credp, &error, KM_SLEEP);
1527 	if (connp == NULL) {
1528 		ASSERT(error != NULL);
1529 		inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
1530 		return (error);
1531 	}
1532 
1533 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
1534 	connp->conn_dev = conn_dev;
1535 	connp->conn_minor_arena = ip_minor_arena_sa;
1536 
1537 	is = connp->conn_icmp->icmp_is;
1538 
1539 	/*
1540 	 * Initialize the icmp_t structure for this stream.
1541 	 */
1542 	q->q_ptr = connp;
1543 	WR(q)->q_ptr = connp;
1544 	connp->conn_rq = q;
1545 	connp->conn_wq = WR(q);
1546 
1547 	if (connp->conn_icmp->icmp_family == AF_INET6) {
1548 		/* Build initial header template for transmit */
1549 		rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
1550 		if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) {
1551 			rw_exit(&connp->conn_icmp->icmp_rwlock);
1552 			inet_minor_free(ip_minor_arena_sa, connp->conn_dev);
1553 			ipcl_conn_destroy(connp);
1554 			return (error);
1555 		}
1556 		rw_exit(&connp->conn_icmp->icmp_rwlock);
1557 	}
1558 
1559 
1560 	q->q_hiwat = is->is_recv_hiwat;
1561 	WR(q)->q_hiwat = is->is_xmit_hiwat;
1562 	WR(q)->q_lowat = is->is_xmit_lowat;
1563 
1564 	qprocson(q);
1565 
1566 	/* Set the Stream head write offset. */
1567 	(void) proto_set_tx_wroff(q, connp,
1568 	    connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra);
1569 	(void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat);
1570 
1571 	mutex_enter(&connp->conn_lock);
1572 	connp->conn_state_flags &= ~CONN_INCIPIENT;
1573 	mutex_exit(&connp->conn_lock);
1574 
1575 	return (0);
1576 }
1577 
1578 /* For /dev/icmp4 aka AF_INET open */
1579 static int
1580 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1581 {
1582 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET));
1583 }
1584 
1585 /* For /dev/icmp6 aka AF_INET6 open */
1586 static int
1587 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
1588 {
1589 	return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6));
1590 }
1591 
1592 /*
1593  * This is the open routine for icmp.  It allocates a icmp_t structure for
1594  * the stream and, on the first open of the module, creates an ND table.
1595  */
1596 /* ARGSUSED */
1597 static conn_t *
1598 icmp_open(int family, cred_t *credp, int *err, int flags)
1599 {
1600 	icmp_t	*icmp;
1601 	conn_t *connp;
1602 	zoneid_t zoneid;
1603 	netstack_t *ns;
1604 	icmp_stack_t *is;
1605 	boolean_t isv6 = B_FALSE;
1606 
1607 	*err = secpolicy_net_icmpaccess(credp);
1608 	if (*err != 0)
1609 		return (NULL);
1610 
1611 	if (family == AF_INET6)
1612 		isv6 = B_TRUE;
1613 	ns = netstack_find_by_cred(credp);
1614 	ASSERT(ns != NULL);
1615 	is = ns->netstack_icmp;
1616 	ASSERT(is != NULL);
1617 
1618 	/*
1619 	 * For exclusive stacks we set the zoneid to zero
1620 	 * to make ICMP operate as if in the global zone.
1621 	 */
1622 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
1623 		zoneid = GLOBAL_ZONEID;
1624 	else
1625 		zoneid = crgetzoneid(credp);
1626 
1627 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
1628 
1629 	connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns);
1630 	icmp = connp->conn_icmp;
1631 	icmp->icmp_v6dst = sin6_null;
1632 
1633 	/*
1634 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
1635 	 * done by netstack_find_by_cred()
1636 	 */
1637 	netstack_rele(ns);
1638 
1639 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
1640 	ASSERT(connp->conn_ulp == IPPROTO_ICMP);
1641 	ASSERT(connp->conn_icmp == icmp);
1642 	ASSERT(icmp->icmp_connp == connp);
1643 
1644 	/* Set the initial state of the stream and the privilege status. */
1645 	icmp->icmp_state = TS_UNBND;
1646 	if (isv6) {
1647 		icmp->icmp_ipversion = IPV6_VERSION;
1648 		icmp->icmp_family = AF_INET6;
1649 		connp->conn_ulp = IPPROTO_ICMPV6;
1650 		/* May be changed by a SO_PROTOTYPE socket option. */
1651 		icmp->icmp_proto = IPPROTO_ICMPV6;
1652 		icmp->icmp_checksum_off = 2;	/* Offset for icmp6_cksum */
1653 		icmp->icmp_max_hdr_len = IPV6_HDR_LEN;
1654 		icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit;
1655 		connp->conn_af_isv6 = B_TRUE;
1656 		connp->conn_flags |= IPCL_ISV6;
1657 	} else {
1658 		icmp->icmp_ipversion = IPV4_VERSION;
1659 		icmp->icmp_family = AF_INET;
1660 		/* May be changed by a SO_PROTOTYPE socket option. */
1661 		icmp->icmp_proto = IPPROTO_ICMP;
1662 		icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH;
1663 		icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl;
1664 		connp->conn_af_isv6 = B_FALSE;
1665 		connp->conn_flags &= ~IPCL_ISV6;
1666 	}
1667 	icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1668 	icmp->icmp_pending_op = -1;
1669 	connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1670 	connp->conn_zoneid = zoneid;
1671 
1672 	/*
1673 	 * If the caller has the process-wide flag set, then default to MAC
1674 	 * exempt mode.  This allows read-down to unlabeled hosts.
1675 	 */
1676 	if (getpflags(NET_MAC_AWARE, credp) != 0)
1677 		connp->conn_mac_exempt = B_TRUE;
1678 
1679 	connp->conn_ulp_labeled = is_system_labeled();
1680 
1681 	icmp->icmp_is = is;
1682 
1683 	connp->conn_recv = icmp_input;
1684 	crhold(credp);
1685 	connp->conn_cred = credp;
1686 
1687 	rw_exit(&icmp->icmp_rwlock);
1688 
1689 	connp->conn_flow_cntrld = B_FALSE;
1690 	return (connp);
1691 }
1692 
1693 /*
1694  * Which ICMP options OK to set through T_UNITDATA_REQ...
1695  */
1696 /* ARGSUSED */
1697 static boolean_t
1698 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
1699 {
1700 	return (B_TRUE);
1701 }
1702 
1703 /*
1704  * This routine gets default values of certain options whose default
1705  * values are maintained by protcol specific code
1706  */
1707 /* ARGSUSED */
1708 int
1709 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
1710 {
1711 	icmp_t *icmp = Q_TO_ICMP(q);
1712 	icmp_stack_t *is = icmp->icmp_is;
1713 	int *i1 = (int *)ptr;
1714 
1715 	switch (level) {
1716 	case IPPROTO_IP:
1717 		switch (name) {
1718 		case IP_MULTICAST_TTL:
1719 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
1720 			return (sizeof (uchar_t));
1721 		case IP_MULTICAST_LOOP:
1722 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
1723 			return (sizeof (uchar_t));
1724 		}
1725 		break;
1726 	case IPPROTO_IPV6:
1727 		switch (name) {
1728 		case IPV6_MULTICAST_HOPS:
1729 			*i1 = IP_DEFAULT_MULTICAST_TTL;
1730 			return (sizeof (int));
1731 		case IPV6_MULTICAST_LOOP:
1732 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
1733 			return (sizeof (int));
1734 		case IPV6_UNICAST_HOPS:
1735 			*i1 = is->is_ipv6_hoplimit;
1736 			return (sizeof (int));
1737 		}
1738 		break;
1739 	case IPPROTO_ICMPV6:
1740 		switch (name) {
1741 		case ICMP6_FILTER:
1742 			/* Make it look like "pass all" */
1743 			ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
1744 			return (sizeof (icmp6_filter_t));
1745 		}
1746 		break;
1747 	}
1748 	return (-1);
1749 }
1750 
1751 /*
1752  * This routine retrieves the current status of socket options.
1753  * It returns the size of the option retrieved.
1754  */
1755 int
1756 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
1757 {
1758 	icmp_t		*icmp = connp->conn_icmp;
1759 	icmp_stack_t	*is = icmp->icmp_is;
1760 	int		*i1 = (int *)ptr;
1761 	ip6_pkt_t	*ipp = &icmp->icmp_sticky_ipp;
1762 	int		ret = 0;
1763 
1764 	ASSERT(RW_READ_HELD(&icmp->icmp_rwlock));
1765 	switch (level) {
1766 	case SOL_SOCKET:
1767 		switch (name) {
1768 		case SO_DEBUG:
1769 			*i1 = icmp->icmp_debug;
1770 			break;
1771 		case SO_TYPE:
1772 			*i1 = SOCK_RAW;
1773 			break;
1774 		case SO_PROTOTYPE:
1775 			*i1 = icmp->icmp_proto;
1776 			break;
1777 		case SO_REUSEADDR:
1778 			*i1 = icmp->icmp_reuseaddr;
1779 			break;
1780 
1781 		/*
1782 		 * The following three items are available here,
1783 		 * but are only meaningful to IP.
1784 		 */
1785 		case SO_DONTROUTE:
1786 			*i1 = icmp->icmp_dontroute;
1787 			break;
1788 		case SO_USELOOPBACK:
1789 			*i1 = icmp->icmp_useloopback;
1790 			break;
1791 		case SO_BROADCAST:
1792 			*i1 = icmp->icmp_broadcast;
1793 			break;
1794 
1795 		case SO_SNDBUF:
1796 			ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX);
1797 			*i1 = icmp->icmp_xmit_hiwat;
1798 			break;
1799 		case SO_RCVBUF:
1800 			ASSERT(icmp->icmp_recv_hiwat <= INT_MAX);
1801 			*i1 = icmp->icmp_recv_hiwat;
1802 			break;
1803 		case SO_DGRAM_ERRIND:
1804 			*i1 = icmp->icmp_dgram_errind;
1805 			break;
1806 		case SO_TIMESTAMP:
1807 			*i1 = icmp->icmp_timestamp;
1808 			break;
1809 		case SO_MAC_EXEMPT:
1810 			*i1 = connp->conn_mac_exempt;
1811 			break;
1812 		case SO_DOMAIN:
1813 			*i1 = icmp->icmp_family;
1814 			break;
1815 
1816 		/*
1817 		 * Following four not meaningful for icmp
1818 		 * Action is same as "default" to which we fallthrough
1819 		 * so we keep them in comments.
1820 		 * case SO_LINGER:
1821 		 * case SO_KEEPALIVE:
1822 		 * case SO_OOBINLINE:
1823 		 * case SO_ALLZONES:
1824 		 */
1825 		default:
1826 			ret = -1;
1827 			goto done;
1828 		}
1829 		break;
1830 	case IPPROTO_IP:
1831 		/*
1832 		 * Only allow IPv4 option processing on IPv4 sockets.
1833 		 */
1834 		if (icmp->icmp_family != AF_INET) {
1835 			ret = -1;
1836 			goto done;
1837 		}
1838 
1839 		switch (name) {
1840 		case IP_OPTIONS:
1841 		case T_IP_OPTIONS:
1842 			/* Options are passed up with each packet */
1843 			ret = 0;
1844 			goto done;
1845 		case IP_HDRINCL:
1846 			*i1 = (int)icmp->icmp_hdrincl;
1847 			break;
1848 		case IP_TOS:
1849 		case T_IP_TOS:
1850 			*i1 = (int)icmp->icmp_type_of_service;
1851 			break;
1852 		case IP_TTL:
1853 			*i1 = (int)icmp->icmp_ttl;
1854 			break;
1855 		case IP_MULTICAST_IF:
1856 			/* 0 address if not set */
1857 			*(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr;
1858 			ret = sizeof (ipaddr_t);
1859 			goto done;
1860 		case IP_MULTICAST_TTL:
1861 			*(uchar_t *)ptr = icmp->icmp_multicast_ttl;
1862 			ret = sizeof (uchar_t);
1863 			goto done;
1864 		case IP_MULTICAST_LOOP:
1865 			*ptr = connp->conn_multicast_loop;
1866 			ret = sizeof (uint8_t);
1867 			goto done;
1868 		case IP_BOUND_IF:
1869 			/* Zero if not set */
1870 			*i1 = icmp->icmp_bound_if;
1871 			break;	/* goto sizeof (int) option return */
1872 		case IP_UNSPEC_SRC:
1873 			*ptr = icmp->icmp_unspec_source;
1874 			break;	/* goto sizeof (int) option return */
1875 		case IP_RECVIF:
1876 			*ptr = icmp->icmp_recvif;
1877 			break;	/* goto sizeof (int) option return */
1878 		case IP_BROADCAST_TTL:
1879 			*(uchar_t *)ptr = connp->conn_broadcast_ttl;
1880 			return (sizeof (uchar_t));
1881 		case IP_RECVPKTINFO:
1882 			/*
1883 			 * This also handles IP_PKTINFO.
1884 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
1885 			 * Differentiation is based on the size of the argument
1886 			 * passed in.
1887 			 * This option is handled in IP which will return an
1888 			 * error for IP_PKTINFO as it's not supported as a
1889 			 * sticky option.
1890 			 */
1891 			ret = -EINVAL;
1892 			goto done;
1893 		/*
1894 		 * Cannot "get" the value of following options
1895 		 * at this level. Action is same as "default" to
1896 		 * which we fallthrough so we keep them in comments.
1897 		 *
1898 		 * case IP_ADD_MEMBERSHIP:
1899 		 * case IP_DROP_MEMBERSHIP:
1900 		 * case IP_BLOCK_SOURCE:
1901 		 * case IP_UNBLOCK_SOURCE:
1902 		 * case IP_ADD_SOURCE_MEMBERSHIP:
1903 		 * case IP_DROP_SOURCE_MEMBERSHIP:
1904 		 * case MCAST_JOIN_GROUP:
1905 		 * case MCAST_LEAVE_GROUP:
1906 		 * case MCAST_BLOCK_SOURCE:
1907 		 * case MCAST_UNBLOCK_SOURCE:
1908 		 * case MCAST_JOIN_SOURCE_GROUP:
1909 		 * case MCAST_LEAVE_SOURCE_GROUP:
1910 		 * case MRT_INIT:
1911 		 * case MRT_DONE:
1912 		 * case MRT_ADD_VIF:
1913 		 * case MRT_DEL_VIF:
1914 		 * case MRT_ADD_MFC:
1915 		 * case MRT_DEL_MFC:
1916 		 * case MRT_VERSION:
1917 		 * case MRT_ASSERT:
1918 		 * case IP_SEC_OPT:
1919 		 * case IP_NEXTHOP:
1920 		 */
1921 		default:
1922 			ret = -1;
1923 			goto done;
1924 		}
1925 		break;
1926 	case IPPROTO_IPV6:
1927 		/*
1928 		 * Only allow IPv6 option processing on native IPv6 sockets.
1929 		 */
1930 		if (icmp->icmp_family != AF_INET6) {
1931 			ret = -1;
1932 			goto done;
1933 		}
1934 		switch (name) {
1935 		case IPV6_UNICAST_HOPS:
1936 			*i1 = (unsigned int)icmp->icmp_ttl;
1937 			break;
1938 		case IPV6_MULTICAST_IF:
1939 			/* 0 index if not set */
1940 			*i1 = icmp->icmp_multicast_if_index;
1941 			break;
1942 		case IPV6_MULTICAST_HOPS:
1943 			*i1 = icmp->icmp_multicast_ttl;
1944 			break;
1945 		case IPV6_MULTICAST_LOOP:
1946 			*i1 = connp->conn_multicast_loop;
1947 			break;
1948 		case IPV6_BOUND_IF:
1949 			/* Zero if not set */
1950 			*i1 = icmp->icmp_bound_if;
1951 			break;
1952 		case IPV6_UNSPEC_SRC:
1953 			*i1 = icmp->icmp_unspec_source;
1954 			break;
1955 		case IPV6_CHECKSUM:
1956 			/*
1957 			 * Return offset or -1 if no checksum offset.
1958 			 * Does not apply to IPPROTO_ICMPV6
1959 			 */
1960 			if (icmp->icmp_proto == IPPROTO_ICMPV6) {
1961 				ret = -1;
1962 				goto done;
1963 			}
1964 
1965 			if (icmp->icmp_raw_checksum) {
1966 				*i1 = icmp->icmp_checksum_off;
1967 			} else {
1968 				*i1 = -1;
1969 			}
1970 			break;
1971 		case IPV6_JOIN_GROUP:
1972 		case IPV6_LEAVE_GROUP:
1973 		case MCAST_JOIN_GROUP:
1974 		case MCAST_LEAVE_GROUP:
1975 		case MCAST_BLOCK_SOURCE:
1976 		case MCAST_UNBLOCK_SOURCE:
1977 		case MCAST_JOIN_SOURCE_GROUP:
1978 		case MCAST_LEAVE_SOURCE_GROUP:
1979 			/* cannot "get" the value for these */
1980 			ret = -1;
1981 			goto done;
1982 		case IPV6_RECVPKTINFO:
1983 			*i1 = icmp->icmp_ip_recvpktinfo;
1984 			break;
1985 		case IPV6_RECVTCLASS:
1986 			*i1 = icmp->icmp_ipv6_recvtclass;
1987 			break;
1988 		case IPV6_RECVPATHMTU:
1989 			*i1 = icmp->icmp_ipv6_recvpathmtu;
1990 			break;
1991 		case IPV6_V6ONLY:
1992 			*i1 = 1;
1993 			break;
1994 		case IPV6_RECVHOPLIMIT:
1995 			*i1 = icmp->icmp_ipv6_recvhoplimit;
1996 			break;
1997 		case IPV6_RECVHOPOPTS:
1998 			*i1 = icmp->icmp_ipv6_recvhopopts;
1999 			break;
2000 		case IPV6_RECVDSTOPTS:
2001 			*i1 = icmp->icmp_ipv6_recvdstopts;
2002 			break;
2003 		case _OLD_IPV6_RECVDSTOPTS:
2004 			*i1 = icmp->icmp_old_ipv6_recvdstopts;
2005 			break;
2006 		case IPV6_RECVRTHDRDSTOPTS:
2007 			*i1 = icmp->icmp_ipv6_recvrtdstopts;
2008 			break;
2009 		case IPV6_RECVRTHDR:
2010 			*i1 = icmp->icmp_ipv6_recvrthdr;
2011 			break;
2012 		case IPV6_PKTINFO: {
2013 			/* XXX assumes that caller has room for max size! */
2014 			struct in6_pktinfo *pkti;
2015 
2016 			pkti = (struct in6_pktinfo *)ptr;
2017 			if (ipp->ipp_fields & IPPF_IFINDEX)
2018 				pkti->ipi6_ifindex = ipp->ipp_ifindex;
2019 			else
2020 				pkti->ipi6_ifindex = 0;
2021 			if (ipp->ipp_fields & IPPF_ADDR)
2022 				pkti->ipi6_addr = ipp->ipp_addr;
2023 			else
2024 				pkti->ipi6_addr = ipv6_all_zeros;
2025 			ret = sizeof (struct in6_pktinfo);
2026 			goto done;
2027 		}
2028 		case IPV6_NEXTHOP: {
2029 			sin6_t *sin6 = (sin6_t *)ptr;
2030 
2031 			if (!(ipp->ipp_fields & IPPF_NEXTHOP))
2032 				return (0);
2033 			*sin6 = sin6_null;
2034 			sin6->sin6_family = AF_INET6;
2035 			sin6->sin6_addr = ipp->ipp_nexthop;
2036 			ret = (sizeof (sin6_t));
2037 			goto done;
2038 		}
2039 		case IPV6_HOPOPTS:
2040 			if (!(ipp->ipp_fields & IPPF_HOPOPTS))
2041 				return (0);
2042 			if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6)
2043 				return (0);
2044 			bcopy((char *)ipp->ipp_hopopts +
2045 			    icmp->icmp_label_len_v6, ptr,
2046 			    ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
2047 			if (icmp->icmp_label_len_v6 > 0) {
2048 				ptr[0] = ((char *)ipp->ipp_hopopts)[0];
2049 				ptr[1] = (ipp->ipp_hopoptslen -
2050 				    icmp->icmp_label_len_v6 + 7) / 8 - 1;
2051 			}
2052 			ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6);
2053 			goto done;
2054 		case IPV6_RTHDRDSTOPTS:
2055 			if (!(ipp->ipp_fields & IPPF_RTDSTOPTS))
2056 				return (0);
2057 			bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen);
2058 			ret = ipp->ipp_rtdstoptslen;
2059 			goto done;
2060 		case IPV6_RTHDR:
2061 			if (!(ipp->ipp_fields & IPPF_RTHDR))
2062 				return (0);
2063 			bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
2064 			ret = ipp->ipp_rthdrlen;
2065 			goto done;
2066 		case IPV6_DSTOPTS:
2067 			if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2068 				ret = 0;
2069 				goto done;
2070 			}
2071 			bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
2072 			ret = ipp->ipp_dstoptslen;
2073 			goto done;
2074 		case IPV6_PATHMTU:
2075 			if (!(ipp->ipp_fields & IPPF_PATHMTU)) {
2076 				ret = 0;
2077 			} else {
2078 				ret = ip_fill_mtuinfo(
2079 				    &icmp->icmp_v6dst.sin6_addr, 0,
2080 				    (struct ip6_mtuinfo *)ptr,
2081 				    is->is_netstack);
2082 			}
2083 			goto done;
2084 		case IPV6_TCLASS:
2085 			if (ipp->ipp_fields & IPPF_TCLASS)
2086 				*i1 = ipp->ipp_tclass;
2087 			else
2088 				*i1 = IPV6_FLOW_TCLASS(
2089 				    IPV6_DEFAULT_VERS_AND_FLOW);
2090 			break;
2091 		default:
2092 			ret = -1;
2093 			goto done;
2094 		}
2095 		break;
2096 	case IPPROTO_ICMPV6:
2097 		/*
2098 		 * Only allow IPv6 option processing on native IPv6 sockets.
2099 		 */
2100 		if (icmp->icmp_family != AF_INET6) {
2101 			ret = -1;
2102 		}
2103 
2104 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
2105 			ret = -1;
2106 		}
2107 
2108 		switch (name) {
2109 		case ICMP6_FILTER:
2110 			if (icmp->icmp_filter == NULL) {
2111 				/* Make it look like "pass all" */
2112 				ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr);
2113 			} else {
2114 				(void) bcopy(icmp->icmp_filter, ptr,
2115 				    sizeof (icmp6_filter_t));
2116 			}
2117 			ret = sizeof (icmp6_filter_t);
2118 			goto done;
2119 		default:
2120 			ret = -1;
2121 			goto done;
2122 		}
2123 	default:
2124 		ret = -1;
2125 		goto done;
2126 	}
2127 	ret = sizeof (int);
2128 done:
2129 	return (ret);
2130 }
2131 
2132 /*
2133  * This routine retrieves the current status of socket options.
2134  * It returns the size of the option retrieved.
2135  */
2136 int
2137 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
2138 {
2139 	conn_t  *connp = Q_TO_CONN(q);
2140 	icmp_t	*icmp = connp->conn_icmp;
2141 	int 	err;
2142 
2143 	rw_enter(&icmp->icmp_rwlock, RW_READER);
2144 	err = icmp_opt_get(connp, level, name, ptr);
2145 	rw_exit(&icmp->icmp_rwlock);
2146 	return (err);
2147 }
2148 
2149 int
2150 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
2151     uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
2152     void *thisdg_attrs, boolean_t checkonly)
2153 {
2154 
2155 	int	*i1 = (int *)invalp;
2156 	boolean_t onoff = (*i1 == 0) ? 0 : 1;
2157 	icmp_t *icmp = connp->conn_icmp;
2158 	icmp_stack_t *is = icmp->icmp_is;
2159 	int	error;
2160 
2161 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
2162 	/*
2163 	 * For fixed length options, no sanity check
2164 	 * of passed in length is done. It is assumed *_optcom_req()
2165 	 * routines do the right thing.
2166 	 */
2167 	switch (level) {
2168 	case SOL_SOCKET:
2169 		switch (name) {
2170 		case SO_DEBUG:
2171 			if (!checkonly)
2172 				icmp->icmp_debug = onoff;
2173 			break;
2174 		case SO_PROTOTYPE:
2175 			if ((*i1 & 0xFF) != IPPROTO_ICMP &&
2176 			    (*i1 & 0xFF) != IPPROTO_ICMPV6 &&
2177 			    secpolicy_net_rawaccess(cr) != 0) {
2178 				*outlenp = 0;
2179 				return (EACCES);
2180 			}
2181 			/* Can't use IPPROTO_RAW with IPv6 */
2182 			if ((*i1 & 0xFF) == IPPROTO_RAW &&
2183 			    icmp->icmp_family == AF_INET6) {
2184 				*outlenp = 0;
2185 				return (EPROTONOSUPPORT);
2186 			}
2187 			if (checkonly) {
2188 				/* T_CHECK case */
2189 				*(int *)outvalp = (*i1 & 0xFF);
2190 				break;
2191 			}
2192 			icmp->icmp_proto = *i1 & 0xFF;
2193 			if ((icmp->icmp_proto == IPPROTO_RAW ||
2194 			    icmp->icmp_proto == IPPROTO_IGMP) &&
2195 			    icmp->icmp_family == AF_INET)
2196 				icmp->icmp_hdrincl = 1;
2197 			else
2198 				icmp->icmp_hdrincl = 0;
2199 
2200 			if (icmp->icmp_family == AF_INET6 &&
2201 			    icmp->icmp_proto == IPPROTO_ICMPV6) {
2202 				/* Set offset for icmp6_cksum */
2203 				icmp->icmp_raw_checksum = 0;
2204 				icmp->icmp_checksum_off = 2;
2205 			}
2206 			if (icmp->icmp_proto == IPPROTO_UDP ||
2207 			    icmp->icmp_proto == IPPROTO_TCP ||
2208 			    icmp->icmp_proto == IPPROTO_SCTP) {
2209 				icmp->icmp_no_tp_cksum = 1;
2210 				icmp->icmp_sticky_ipp.ipp_fields |=
2211 				    IPPF_NO_CKSUM;
2212 			} else {
2213 				icmp->icmp_no_tp_cksum = 0;
2214 				icmp->icmp_sticky_ipp.ipp_fields &=
2215 				    ~IPPF_NO_CKSUM;
2216 			}
2217 
2218 			if (icmp->icmp_filter != NULL &&
2219 			    icmp->icmp_proto != IPPROTO_ICMPV6) {
2220 				kmem_free(icmp->icmp_filter,
2221 				    sizeof (icmp6_filter_t));
2222 				icmp->icmp_filter = NULL;
2223 			}
2224 
2225 			/* Rebuild the header template */
2226 			error = icmp_build_hdrs(icmp);
2227 			if (error != 0) {
2228 				*outlenp = 0;
2229 				return (error);
2230 			}
2231 
2232 			/*
2233 			 * For SCTP, we don't use icmp_bind_proto() for
2234 			 * raw socket binding.  Note that we do not need
2235 			 * to set *outlenp.
2236 			 * FIXME: how does SCTP work?
2237 			 */
2238 			if (icmp->icmp_proto == IPPROTO_SCTP)
2239 				return (0);
2240 
2241 			*outlenp = sizeof (int);
2242 			*(int *)outvalp = *i1 & 0xFF;
2243 
2244 			/* Drop lock across the bind operation */
2245 			rw_exit(&icmp->icmp_rwlock);
2246 			(void) icmp_bind_proto(connp);
2247 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2248 			return (0);
2249 		case SO_REUSEADDR:
2250 			if (!checkonly) {
2251 				icmp->icmp_reuseaddr = onoff;
2252 				PASS_OPT_TO_IP(connp);
2253 			}
2254 			break;
2255 
2256 		/*
2257 		 * The following three items are available here,
2258 		 * but are only meaningful to IP.
2259 		 */
2260 		case SO_DONTROUTE:
2261 			if (!checkonly) {
2262 				icmp->icmp_dontroute = onoff;
2263 				PASS_OPT_TO_IP(connp);
2264 			}
2265 			break;
2266 		case SO_USELOOPBACK:
2267 			if (!checkonly) {
2268 				icmp->icmp_useloopback = onoff;
2269 				PASS_OPT_TO_IP(connp);
2270 			}
2271 			break;
2272 		case SO_BROADCAST:
2273 			if (!checkonly) {
2274 				icmp->icmp_broadcast = onoff;
2275 				PASS_OPT_TO_IP(connp);
2276 			}
2277 			break;
2278 
2279 		case SO_SNDBUF:
2280 			if (*i1 > is->is_max_buf) {
2281 				*outlenp = 0;
2282 				return (ENOBUFS);
2283 			}
2284 			if (!checkonly) {
2285 				if (!IPCL_IS_NONSTR(connp)) {
2286 					connp->conn_wq->q_hiwat = *i1;
2287 				}
2288 				icmp->icmp_xmit_hiwat = *i1;
2289 			}
2290 			break;
2291 		case SO_RCVBUF:
2292 			if (*i1 > is->is_max_buf) {
2293 				*outlenp = 0;
2294 				return (ENOBUFS);
2295 			}
2296 			if (!checkonly) {
2297 				icmp->icmp_recv_hiwat = *i1;
2298 				rw_exit(&icmp->icmp_rwlock);
2299 				(void) proto_set_rx_hiwat(connp->conn_rq, connp,
2300 				    *i1);
2301 				rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2302 			}
2303 			break;
2304 		case SO_DGRAM_ERRIND:
2305 			if (!checkonly)
2306 				icmp->icmp_dgram_errind = onoff;
2307 			break;
2308 		case SO_ALLZONES:
2309 			/*
2310 			 * "soft" error (negative)
2311 			 * option not handled at this level
2312 			 * Note: Do not modify *outlenp
2313 			 */
2314 			return (-EINVAL);
2315 		case SO_TIMESTAMP:
2316 			if (!checkonly) {
2317 				icmp->icmp_timestamp = onoff;
2318 			}
2319 			break;
2320 		case SO_MAC_EXEMPT:
2321 			/*
2322 			 * "soft" error (negative)
2323 			 * option not handled at this level
2324 			 * Note: Do not modify *outlenp
2325 			 */
2326 			return (-EINVAL);
2327 		case SO_RCVTIMEO:
2328 		case SO_SNDTIMEO:
2329 			/*
2330 			 * Pass these two options in order for third part
2331 			 * protocol usage. Here just return directly.
2332 			 */
2333 			return (0);
2334 		/*
2335 		 * Following three not meaningful for icmp
2336 		 * Action is same as "default" so we keep them
2337 		 * in comments.
2338 		 * case SO_LINGER:
2339 		 * case SO_KEEPALIVE:
2340 		 * case SO_OOBINLINE:
2341 		 */
2342 		default:
2343 			*outlenp = 0;
2344 			return (EINVAL);
2345 		}
2346 		break;
2347 	case IPPROTO_IP:
2348 		/*
2349 		 * Only allow IPv4 option processing on IPv4 sockets.
2350 		 */
2351 		if (icmp->icmp_family != AF_INET) {
2352 			*outlenp = 0;
2353 			return (ENOPROTOOPT);
2354 		}
2355 		switch (name) {
2356 		case IP_OPTIONS:
2357 		case T_IP_OPTIONS:
2358 			/* Save options for use by IP. */
2359 			if ((inlen & 0x3) ||
2360 			    inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) {
2361 				*outlenp = 0;
2362 				return (EINVAL);
2363 			}
2364 			if (checkonly)
2365 				break;
2366 
2367 			if (!tsol_option_set(&icmp->icmp_ip_snd_options,
2368 			    &icmp->icmp_ip_snd_options_len,
2369 			    icmp->icmp_label_len, invalp, inlen)) {
2370 				*outlenp = 0;
2371 				return (ENOMEM);
2372 			}
2373 
2374 			icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH +
2375 			    icmp->icmp_ip_snd_options_len;
2376 			rw_exit(&icmp->icmp_rwlock);
2377 			(void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL:
2378 			    RD(connp->conn_rq), connp,
2379 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
2380 			rw_enter(&icmp->icmp_rwlock, RW_WRITER);
2381 			break;
2382 		case IP_HDRINCL:
2383 			if (!checkonly)
2384 				icmp->icmp_hdrincl = onoff;
2385 			break;
2386 		case IP_TOS:
2387 		case T_IP_TOS:
2388 			if (!checkonly) {
2389 				icmp->icmp_type_of_service = (uint8_t)*i1;
2390 			}
2391 			break;
2392 		case IP_TTL:
2393 			if (!checkonly) {
2394 				icmp->icmp_ttl = (uint8_t)*i1;
2395 			}
2396 			break;
2397 		case IP_MULTICAST_IF:
2398 			/*
2399 			 * TODO should check OPTMGMT reply and undo this if
2400 			 * there is an error.
2401 			 */
2402 			if (!checkonly) {
2403 				icmp->icmp_multicast_if_addr = *i1;
2404 				PASS_OPT_TO_IP(connp);
2405 			}
2406 			break;
2407 		case IP_MULTICAST_TTL:
2408 			if (!checkonly)
2409 				icmp->icmp_multicast_ttl = *invalp;
2410 			break;
2411 		case IP_MULTICAST_LOOP:
2412 			if (!checkonly) {
2413 				connp->conn_multicast_loop =
2414 				    (*invalp == 0) ? 0 : 1;
2415 				PASS_OPT_TO_IP(connp);
2416 			}
2417 			break;
2418 		case IP_BOUND_IF:
2419 			if (!checkonly) {
2420 				icmp->icmp_bound_if = *i1;
2421 				PASS_OPT_TO_IP(connp);
2422 			}
2423 			break;
2424 		case IP_UNSPEC_SRC:
2425 			if (!checkonly) {
2426 				icmp->icmp_unspec_source = onoff;
2427 				PASS_OPT_TO_IP(connp);
2428 			}
2429 			break;
2430 		case IP_BROADCAST_TTL:
2431 			if (!checkonly)
2432 				connp->conn_broadcast_ttl = *invalp;
2433 			break;
2434 		case IP_RECVIF:
2435 			if (!checkonly) {
2436 				icmp->icmp_recvif = onoff;
2437 			}
2438 			/*
2439 			 * pass to ip
2440 			 */
2441 			return (-EINVAL);
2442 		case IP_PKTINFO: {
2443 			/*
2444 			 * This also handles IP_RECVPKTINFO.
2445 			 * IP_PKTINFO and IP_RECVPKTINFO have the same value.
2446 			 * Differentiation is based on the size of the argument
2447 			 * passed in.
2448 			 */
2449 			struct in_pktinfo *pktinfop;
2450 			ip4_pkt_t *attr_pktinfop;
2451 
2452 			if (checkonly)
2453 				break;
2454 
2455 			if (inlen == sizeof (int)) {
2456 				/*
2457 				 * This is IP_RECVPKTINFO option.
2458 				 * Keep a local copy of wether this option is
2459 				 * set or not and pass it down to IP for
2460 				 * processing.
2461 				 */
2462 				icmp->icmp_ip_recvpktinfo = onoff;
2463 				return (-EINVAL);
2464 			}
2465 
2466 
2467 			if (inlen != sizeof (struct in_pktinfo)) {
2468 				return (EINVAL);
2469 			}
2470 
2471 			if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs)
2472 			    == NULL) {
2473 				/*
2474 				 * sticky option is not supported
2475 				 */
2476 				return (EINVAL);
2477 			}
2478 
2479 			pktinfop = (struct in_pktinfo *)invalp;
2480 
2481 			/*
2482 			 * Atleast one of the values should be specified
2483 			 */
2484 			if (pktinfop->ipi_ifindex == 0 &&
2485 			    pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) {
2486 				return (EINVAL);
2487 			}
2488 
2489 			attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr;
2490 			attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex;
2491 		}
2492 			break;
2493 		case IP_ADD_MEMBERSHIP:
2494 		case IP_DROP_MEMBERSHIP:
2495 		case IP_BLOCK_SOURCE:
2496 		case IP_UNBLOCK_SOURCE:
2497 		case IP_ADD_SOURCE_MEMBERSHIP:
2498 		case IP_DROP_SOURCE_MEMBERSHIP:
2499 		case MCAST_JOIN_GROUP:
2500 		case MCAST_LEAVE_GROUP:
2501 		case MCAST_BLOCK_SOURCE:
2502 		case MCAST_UNBLOCK_SOURCE:
2503 		case MCAST_JOIN_SOURCE_GROUP:
2504 		case MCAST_LEAVE_SOURCE_GROUP:
2505 		case MRT_INIT:
2506 		case MRT_DONE:
2507 		case MRT_ADD_VIF:
2508 		case MRT_DEL_VIF:
2509 		case MRT_ADD_MFC:
2510 		case MRT_DEL_MFC:
2511 		case MRT_VERSION:
2512 		case MRT_ASSERT:
2513 		case IP_SEC_OPT:
2514 		case IP_NEXTHOP:
2515 			/*
2516 			 * "soft" error (negative)
2517 			 * option not handled at this level
2518 			 * Note: Do not modify *outlenp
2519 			 */
2520 			return (-EINVAL);
2521 		default:
2522 			*outlenp = 0;
2523 			return (EINVAL);
2524 		}
2525 		break;
2526 	case IPPROTO_IPV6: {
2527 		ip6_pkt_t		*ipp;
2528 		boolean_t		sticky;
2529 
2530 		if (icmp->icmp_family != AF_INET6) {
2531 			*outlenp = 0;
2532 			return (ENOPROTOOPT);
2533 		}
2534 		/*
2535 		 * Deal with both sticky options and ancillary data
2536 		 */
2537 		if (thisdg_attrs == NULL) {
2538 			/* sticky options, or none */
2539 			ipp = &icmp->icmp_sticky_ipp;
2540 			sticky = B_TRUE;
2541 		} else {
2542 			/* ancillary data */
2543 			ipp = (ip6_pkt_t *)thisdg_attrs;
2544 			sticky = B_FALSE;
2545 		}
2546 
2547 		switch (name) {
2548 		case IPV6_MULTICAST_IF:
2549 			if (!checkonly) {
2550 				icmp->icmp_multicast_if_index = *i1;
2551 				PASS_OPT_TO_IP(connp);
2552 			}
2553 			break;
2554 		case IPV6_UNICAST_HOPS:
2555 			/* -1 means use default */
2556 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2557 				*outlenp = 0;
2558 				return (EINVAL);
2559 			}
2560 			if (!checkonly) {
2561 				if (*i1 == -1) {
2562 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2563 					    is->is_ipv6_hoplimit;
2564 					ipp->ipp_fields &= ~IPPF_UNICAST_HOPS;
2565 					/* Pass modified value to IP. */
2566 					*i1 = ipp->ipp_hoplimit;
2567 				} else {
2568 					icmp->icmp_ttl = ipp->ipp_unicast_hops =
2569 					    (uint8_t)*i1;
2570 					ipp->ipp_fields |= IPPF_UNICAST_HOPS;
2571 				}
2572 				/* Rebuild the header template */
2573 				error = icmp_build_hdrs(icmp);
2574 				if (error != 0) {
2575 					*outlenp = 0;
2576 					return (error);
2577 				}
2578 			}
2579 			break;
2580 		case IPV6_MULTICAST_HOPS:
2581 			/* -1 means use default */
2582 			if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) {
2583 				*outlenp = 0;
2584 				return (EINVAL);
2585 			}
2586 			if (!checkonly) {
2587 				if (*i1 == -1) {
2588 					icmp->icmp_multicast_ttl =
2589 					    ipp->ipp_multicast_hops =
2590 					    IP_DEFAULT_MULTICAST_TTL;
2591 					ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS;
2592 					/* Pass modified value to IP. */
2593 					*i1 = icmp->icmp_multicast_ttl;
2594 				} else {
2595 					icmp->icmp_multicast_ttl =
2596 					    ipp->ipp_multicast_hops =
2597 					    (uint8_t)*i1;
2598 					ipp->ipp_fields |= IPPF_MULTICAST_HOPS;
2599 				}
2600 			}
2601 			break;
2602 		case IPV6_MULTICAST_LOOP:
2603 			if (*i1 != 0 && *i1 != 1) {
2604 				*outlenp = 0;
2605 				return (EINVAL);
2606 			}
2607 			if (!checkonly) {
2608 				connp->conn_multicast_loop = *i1;
2609 				PASS_OPT_TO_IP(connp);
2610 			}
2611 			break;
2612 		case IPV6_CHECKSUM:
2613 			/*
2614 			 * Integer offset into the user data of where the
2615 			 * checksum is located.
2616 			 * Offset of -1 disables option.
2617 			 * Does not apply to IPPROTO_ICMPV6.
2618 			 */
2619 			if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) {
2620 				*outlenp = 0;
2621 				return (EINVAL);
2622 			}
2623 			if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) {
2624 				/* Negative or not 16 bit aligned offset */
2625 				*outlenp = 0;
2626 				return (EINVAL);
2627 			}
2628 			if (checkonly)
2629 				break;
2630 
2631 			if (*i1 == -1) {
2632 				icmp->icmp_raw_checksum = 0;
2633 				ipp->ipp_fields &= ~IPPF_RAW_CKSUM;
2634 			} else {
2635 				icmp->icmp_raw_checksum = 1;
2636 				icmp->icmp_checksum_off = *i1;
2637 				ipp->ipp_fields |= IPPF_RAW_CKSUM;
2638 			}
2639 			/* Rebuild the header template */
2640 			error = icmp_build_hdrs(icmp);
2641 			if (error != 0) {
2642 				*outlenp = 0;
2643 				return (error);
2644 			}
2645 			break;
2646 		case IPV6_JOIN_GROUP:
2647 		case IPV6_LEAVE_GROUP:
2648 		case MCAST_JOIN_GROUP:
2649 		case MCAST_LEAVE_GROUP:
2650 		case MCAST_BLOCK_SOURCE:
2651 		case MCAST_UNBLOCK_SOURCE:
2652 		case MCAST_JOIN_SOURCE_GROUP:
2653 		case MCAST_LEAVE_SOURCE_GROUP:
2654 			/*
2655 			 * "soft" error (negative)
2656 			 * option not handled at this level
2657 			 * Note: Do not modify *outlenp
2658 			 */
2659 			return (-EINVAL);
2660 		case IPV6_BOUND_IF:
2661 			if (!checkonly) {
2662 				icmp->icmp_bound_if = *i1;
2663 				PASS_OPT_TO_IP(connp);
2664 			}
2665 			break;
2666 		case IPV6_UNSPEC_SRC:
2667 			if (!checkonly) {
2668 				icmp->icmp_unspec_source = onoff;
2669 				PASS_OPT_TO_IP(connp);
2670 			}
2671 			break;
2672 		case IPV6_RECVTCLASS:
2673 			if (!checkonly) {
2674 				icmp->icmp_ipv6_recvtclass = onoff;
2675 				PASS_OPT_TO_IP(connp);
2676 			}
2677 			break;
2678 		/*
2679 		 * Set boolean switches for ancillary data delivery
2680 		 */
2681 		case IPV6_RECVPKTINFO:
2682 			if (!checkonly) {
2683 				icmp->icmp_ip_recvpktinfo = onoff;
2684 				PASS_OPT_TO_IP(connp);
2685 			}
2686 			break;
2687 		case IPV6_RECVPATHMTU:
2688 			if (!checkonly) {
2689 				icmp->icmp_ipv6_recvpathmtu = onoff;
2690 				PASS_OPT_TO_IP(connp);
2691 			}
2692 			break;
2693 		case IPV6_RECVHOPLIMIT:
2694 			if (!checkonly) {
2695 				icmp->icmp_ipv6_recvhoplimit = onoff;
2696 				PASS_OPT_TO_IP(connp);
2697 			}
2698 			break;
2699 		case IPV6_RECVHOPOPTS:
2700 			if (!checkonly) {
2701 				icmp->icmp_ipv6_recvhopopts = onoff;
2702 				PASS_OPT_TO_IP(connp);
2703 			}
2704 			break;
2705 		case IPV6_RECVDSTOPTS:
2706 			if (!checkonly) {
2707 				icmp->icmp_ipv6_recvdstopts = onoff;
2708 				PASS_OPT_TO_IP(connp);
2709 			}
2710 			break;
2711 		case _OLD_IPV6_RECVDSTOPTS:
2712 			if (!checkonly)
2713 				icmp->icmp_old_ipv6_recvdstopts = onoff;
2714 			break;
2715 		case IPV6_RECVRTHDRDSTOPTS:
2716 			if (!checkonly) {
2717 				icmp->icmp_ipv6_recvrtdstopts = onoff;
2718 				PASS_OPT_TO_IP(connp);
2719 			}
2720 			break;
2721 		case IPV6_RECVRTHDR:
2722 			if (!checkonly) {
2723 				icmp->icmp_ipv6_recvrthdr = onoff;
2724 				PASS_OPT_TO_IP(connp);
2725 			}
2726 			break;
2727 		/*
2728 		 * Set sticky options or ancillary data.
2729 		 * If sticky options, (re)build any extension headers
2730 		 * that might be needed as a result.
2731 		 */
2732 		case IPV6_PKTINFO:
2733 			/*
2734 			 * The source address and ifindex are verified
2735 			 * in ip_opt_set(). For ancillary data the
2736 			 * source address is checked in ip_wput_v6.
2737 			 */
2738 			if (inlen != 0 && inlen !=
2739 			    sizeof (struct in6_pktinfo)) {
2740 				return (EINVAL);
2741 			}
2742 			if (checkonly)
2743 				break;
2744 
2745 			if (inlen == 0) {
2746 				ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR);
2747 				ipp->ipp_sticky_ignored |=
2748 				    (IPPF_IFINDEX|IPPF_ADDR);
2749 			} else {
2750 				struct in6_pktinfo *pkti;
2751 
2752 				pkti = (struct in6_pktinfo *)invalp;
2753 				ipp->ipp_ifindex = pkti->ipi6_ifindex;
2754 				ipp->ipp_addr = pkti->ipi6_addr;
2755 				if (ipp->ipp_ifindex != 0)
2756 					ipp->ipp_fields |= IPPF_IFINDEX;
2757 				else
2758 					ipp->ipp_fields &= ~IPPF_IFINDEX;
2759 				if (!IN6_IS_ADDR_UNSPECIFIED(
2760 				    &ipp->ipp_addr))
2761 					ipp->ipp_fields |= IPPF_ADDR;
2762 				else
2763 					ipp->ipp_fields &= ~IPPF_ADDR;
2764 			}
2765 			if (sticky) {
2766 				error = icmp_build_hdrs(icmp);
2767 				if (error != 0)
2768 					return (error);
2769 				PASS_OPT_TO_IP(connp);
2770 			}
2771 			break;
2772 		case IPV6_HOPLIMIT:
2773 			/* This option can only be used as ancillary data. */
2774 			if (sticky)
2775 				return (EINVAL);
2776 			if (inlen != 0 && inlen != sizeof (int))
2777 				return (EINVAL);
2778 			if (checkonly)
2779 				break;
2780 
2781 			if (inlen == 0) {
2782 				ipp->ipp_fields &= ~IPPF_HOPLIMIT;
2783 				ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT;
2784 			} else {
2785 				if (*i1 > 255 || *i1 < -1)
2786 					return (EINVAL);
2787 				if (*i1 == -1)
2788 					ipp->ipp_hoplimit =
2789 					    is->is_ipv6_hoplimit;
2790 				else
2791 					ipp->ipp_hoplimit = *i1;
2792 				ipp->ipp_fields |= IPPF_HOPLIMIT;
2793 			}
2794 			break;
2795 		case IPV6_TCLASS:
2796 			/*
2797 			 * IPV6_RECVTCLASS accepts -1 as use kernel default
2798 			 * and [0, 255] as the actualy traffic class.
2799 			 */
2800 			if (inlen != 0 && inlen != sizeof (int)) {
2801 				return (EINVAL);
2802 			}
2803 			if (checkonly)
2804 				break;
2805 
2806 			if (inlen == 0) {
2807 				ipp->ipp_fields &= ~IPPF_TCLASS;
2808 				ipp->ipp_sticky_ignored |= IPPF_TCLASS;
2809 			} else {
2810 				if (*i1 >= 256 || *i1 < -1)
2811 					return (EINVAL);
2812 				if (*i1 == -1) {
2813 					ipp->ipp_tclass =
2814 					    IPV6_FLOW_TCLASS(
2815 					    IPV6_DEFAULT_VERS_AND_FLOW);
2816 				} else {
2817 					ipp->ipp_tclass = *i1;
2818 				}
2819 				ipp->ipp_fields |= IPPF_TCLASS;
2820 			}
2821 			if (sticky) {
2822 				error = icmp_build_hdrs(icmp);
2823 				if (error != 0)
2824 					return (error);
2825 			}
2826 			break;
2827 		case IPV6_NEXTHOP:
2828 			/*
2829 			 * IP will verify that the nexthop is reachable
2830 			 * and fail for sticky options.
2831 			 */
2832 			if (inlen != 0 && inlen != sizeof (sin6_t)) {
2833 				return (EINVAL);
2834 			}
2835 			if (checkonly)
2836 				break;
2837 
2838 			if (inlen == 0) {
2839 				ipp->ipp_fields &= ~IPPF_NEXTHOP;
2840 				ipp->ipp_sticky_ignored |= IPPF_NEXTHOP;
2841 			} else {
2842 				sin6_t *sin6 = (sin6_t *)invalp;
2843 
2844 				if (sin6->sin6_family != AF_INET6) {
2845 					return (EAFNOSUPPORT);
2846 				}
2847 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
2848 					return (EADDRNOTAVAIL);
2849 				}
2850 				ipp->ipp_nexthop = sin6->sin6_addr;
2851 				if (!IN6_IS_ADDR_UNSPECIFIED(
2852 				    &ipp->ipp_nexthop))
2853 					ipp->ipp_fields |= IPPF_NEXTHOP;
2854 				else
2855 					ipp->ipp_fields &= ~IPPF_NEXTHOP;
2856 			}
2857 			if (sticky) {
2858 				error = icmp_build_hdrs(icmp);
2859 				if (error != 0)
2860 					return (error);
2861 				PASS_OPT_TO_IP(connp);
2862 			}
2863 			break;
2864 		case IPV6_HOPOPTS: {
2865 			ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
2866 			/*
2867 			 * Sanity checks - minimum size, size a multiple of
2868 			 * eight bytes, and matching size passed in.
2869 			 */
2870 			if (inlen != 0 &&
2871 			    inlen != (8 * (hopts->ip6h_len + 1))) {
2872 				return (EINVAL);
2873 			}
2874 
2875 			if (checkonly)
2876 				break;
2877 			error = optcom_pkt_set(invalp, inlen, sticky,
2878 			    (uchar_t **)&ipp->ipp_hopopts,
2879 			    &ipp->ipp_hopoptslen,
2880 			    sticky ? icmp->icmp_label_len_v6 : 0);
2881 			if (error != 0)
2882 				return (error);
2883 			if (ipp->ipp_hopoptslen == 0) {
2884 				ipp->ipp_fields &= ~IPPF_HOPOPTS;
2885 				ipp->ipp_sticky_ignored |= IPPF_HOPOPTS;
2886 			} else {
2887 				ipp->ipp_fields |= IPPF_HOPOPTS;
2888 			}
2889 			if (sticky) {
2890 				error = icmp_build_hdrs(icmp);
2891 				if (error != 0)
2892 					return (error);
2893 			}
2894 			break;
2895 		}
2896 		case IPV6_RTHDRDSTOPTS: {
2897 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2898 
2899 			/*
2900 			 * Sanity checks - minimum size, size a multiple of
2901 			 * eight bytes, and matching size passed in.
2902 			 */
2903 			if (inlen != 0 &&
2904 			    inlen != (8 * (dopts->ip6d_len + 1)))
2905 				return (EINVAL);
2906 
2907 			if (checkonly)
2908 				break;
2909 
2910 			if (inlen == 0) {
2911 				if (sticky &&
2912 				    (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) {
2913 					kmem_free(ipp->ipp_rtdstopts,
2914 					    ipp->ipp_rtdstoptslen);
2915 					ipp->ipp_rtdstopts = NULL;
2916 					ipp->ipp_rtdstoptslen = 0;
2917 				}
2918 				ipp->ipp_fields &= ~IPPF_RTDSTOPTS;
2919 				ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS;
2920 			} else {
2921 				error = optcom_pkt_set(invalp, inlen, sticky,
2922 				    (uchar_t **)&ipp->ipp_rtdstopts,
2923 				    &ipp->ipp_rtdstoptslen, 0);
2924 				if (error != 0)
2925 					return (error);
2926 				ipp->ipp_fields |= IPPF_RTDSTOPTS;
2927 			}
2928 			if (sticky) {
2929 				error = icmp_build_hdrs(icmp);
2930 				if (error != 0)
2931 					return (error);
2932 			}
2933 			break;
2934 		}
2935 		case IPV6_DSTOPTS: {
2936 			ip6_dest_t *dopts = (ip6_dest_t *)invalp;
2937 
2938 			/*
2939 			 * Sanity checks - minimum size, size a multiple of
2940 			 * eight bytes, and matching size passed in.
2941 			 */
2942 			if (inlen != 0 &&
2943 			    inlen != (8 * (dopts->ip6d_len + 1)))
2944 				return (EINVAL);
2945 
2946 			if (checkonly)
2947 				break;
2948 
2949 			if (inlen == 0) {
2950 				if (sticky &&
2951 				    (ipp->ipp_fields & IPPF_DSTOPTS) != 0) {
2952 					kmem_free(ipp->ipp_dstopts,
2953 					    ipp->ipp_dstoptslen);
2954 					ipp->ipp_dstopts = NULL;
2955 					ipp->ipp_dstoptslen = 0;
2956 				}
2957 				ipp->ipp_fields &= ~IPPF_DSTOPTS;
2958 				ipp->ipp_sticky_ignored |= IPPF_DSTOPTS;
2959 			} else {
2960 				error = optcom_pkt_set(invalp, inlen, sticky,
2961 				    (uchar_t **)&ipp->ipp_dstopts,
2962 				    &ipp->ipp_dstoptslen, 0);
2963 				if (error != 0)
2964 					return (error);
2965 				ipp->ipp_fields |= IPPF_DSTOPTS;
2966 			}
2967 			if (sticky) {
2968 				error = icmp_build_hdrs(icmp);
2969 				if (error != 0)
2970 					return (error);
2971 			}
2972 			break;
2973 		}
2974 		case IPV6_RTHDR: {
2975 			ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp;
2976 
2977 			/*
2978 			 * Sanity checks - minimum size, size a multiple of
2979 			 * eight bytes, and matching size passed in.
2980 			 */
2981 			if (inlen != 0 &&
2982 			    inlen != (8 * (rt->ip6r_len + 1)))
2983 				return (EINVAL);
2984 
2985 			if (checkonly)
2986 				break;
2987 
2988 			if (inlen == 0) {
2989 				if (sticky &&
2990 				    (ipp->ipp_fields & IPPF_RTHDR) != 0) {
2991 					kmem_free(ipp->ipp_rthdr,
2992 					    ipp->ipp_rthdrlen);
2993 					ipp->ipp_rthdr = NULL;
2994 					ipp->ipp_rthdrlen = 0;
2995 				}
2996 				ipp->ipp_fields &= ~IPPF_RTHDR;
2997 				ipp->ipp_sticky_ignored |= IPPF_RTHDR;
2998 			} else {
2999 				error = optcom_pkt_set(invalp, inlen, sticky,
3000 				    (uchar_t **)&ipp->ipp_rthdr,
3001 				    &ipp->ipp_rthdrlen, 0);
3002 				if (error != 0)
3003 					return (error);
3004 				ipp->ipp_fields |= IPPF_RTHDR;
3005 			}
3006 			if (sticky) {
3007 				error = icmp_build_hdrs(icmp);
3008 				if (error != 0)
3009 					return (error);
3010 			}
3011 			break;
3012 		}
3013 
3014 		case IPV6_DONTFRAG:
3015 			if (checkonly)
3016 				break;
3017 
3018 			if (onoff) {
3019 				ipp->ipp_fields |= IPPF_DONTFRAG;
3020 			} else {
3021 				ipp->ipp_fields &= ~IPPF_DONTFRAG;
3022 			}
3023 			break;
3024 
3025 		case IPV6_USE_MIN_MTU:
3026 			if (inlen != sizeof (int))
3027 				return (EINVAL);
3028 
3029 			if (*i1 < -1 || *i1 > 1)
3030 				return (EINVAL);
3031 
3032 			if (checkonly)
3033 				break;
3034 
3035 			ipp->ipp_fields |= IPPF_USE_MIN_MTU;
3036 			ipp->ipp_use_min_mtu = *i1;
3037 			break;
3038 
3039 		/*
3040 		 * This option can't be set.  Its only returned via
3041 		 * getsockopt() or ancillary data.
3042 		 */
3043 		case IPV6_PATHMTU:
3044 			return (EINVAL);
3045 
3046 		case IPV6_SEC_OPT:
3047 		case IPV6_SRC_PREFERENCES:
3048 		case IPV6_V6ONLY:
3049 			/* Handled at IP level */
3050 			return (-EINVAL);
3051 		default:
3052 			*outlenp = 0;
3053 			return (EINVAL);
3054 		}
3055 		break;
3056 	}		/* end IPPROTO_IPV6 */
3057 
3058 	case IPPROTO_ICMPV6:
3059 		/*
3060 		 * Only allow IPv6 option processing on IPv6 sockets.
3061 		 */
3062 		if (icmp->icmp_family != AF_INET6) {
3063 			*outlenp = 0;
3064 			return (ENOPROTOOPT);
3065 		}
3066 		if (icmp->icmp_proto != IPPROTO_ICMPV6) {
3067 			*outlenp = 0;
3068 			return (ENOPROTOOPT);
3069 		}
3070 		switch (name) {
3071 		case ICMP6_FILTER:
3072 			if (!checkonly) {
3073 				if ((inlen != 0) &&
3074 				    (inlen != sizeof (icmp6_filter_t)))
3075 					return (EINVAL);
3076 
3077 				if (inlen == 0) {
3078 					if (icmp->icmp_filter != NULL) {
3079 						kmem_free(icmp->icmp_filter,
3080 						    sizeof (icmp6_filter_t));
3081 						icmp->icmp_filter = NULL;
3082 					}
3083 				} else {
3084 					if (icmp->icmp_filter == NULL) {
3085 						icmp->icmp_filter = kmem_alloc(
3086 						    sizeof (icmp6_filter_t),
3087 						    KM_NOSLEEP);
3088 						if (icmp->icmp_filter == NULL) {
3089 							*outlenp = 0;
3090 							return (ENOBUFS);
3091 						}
3092 					}
3093 					(void) bcopy(invalp, icmp->icmp_filter,
3094 					    inlen);
3095 				}
3096 			}
3097 			break;
3098 
3099 		default:
3100 			*outlenp = 0;
3101 			return (EINVAL);
3102 		}
3103 		break;
3104 	default:
3105 		*outlenp = 0;
3106 		return (EINVAL);
3107 	}
3108 	/*
3109 	 * Common case of OK return with outval same as inval.
3110 	 */
3111 	if (invalp != outvalp) {
3112 		/* don't trust bcopy for identical src/dst */
3113 		(void) bcopy(invalp, outvalp, inlen);
3114 	}
3115 	*outlenp = inlen;
3116 	return (0);
3117 }
3118 
3119 /* This routine sets socket options. */
3120 /* ARGSUSED */
3121 int
3122 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
3123     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3124     void *thisdg_attrs, cred_t *cr)
3125 {
3126 	boolean_t checkonly;
3127 	int	error;
3128 
3129 	error = 0;
3130 	switch (optset_context) {
3131 	case SETFN_OPTCOM_CHECKONLY:
3132 		checkonly = B_TRUE;
3133 		/*
3134 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
3135 		 * inlen != 0 implies value supplied and
3136 		 * 	we have to "pretend" to set it.
3137 		 * inlen == 0 implies that there is no
3138 		 * 	value part in T_CHECK request and just validation
3139 		 * done elsewhere should be enough, we just return here.
3140 		 */
3141 		if (inlen == 0) {
3142 			*outlenp = 0;
3143 			error = 0;
3144 			goto done;
3145 		}
3146 		break;
3147 	case SETFN_OPTCOM_NEGOTIATE:
3148 		checkonly = B_FALSE;
3149 		break;
3150 	case SETFN_UD_NEGOTIATE:
3151 	case SETFN_CONN_NEGOTIATE:
3152 		checkonly = B_FALSE;
3153 		/*
3154 		 * Negotiating local and "association-related" options
3155 		 * through T_UNITDATA_REQ.
3156 		 *
3157 		 * Following routine can filter out ones we do not
3158 		 * want to be "set" this way.
3159 		 */
3160 		if (!icmp_opt_allow_udr_set(level, name)) {
3161 			*outlenp = 0;
3162 			error = EINVAL;
3163 			goto done;
3164 		}
3165 		break;
3166 	default:
3167 		/*
3168 		 * We should never get here
3169 		 */
3170 		*outlenp = 0;
3171 		error = EINVAL;
3172 		goto done;
3173 	}
3174 
3175 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
3176 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
3177 	error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp,
3178 	    outvalp, cr, thisdg_attrs, checkonly);
3179 
3180 done:
3181 	return (error);
3182 }
3183 
3184 /* This routine sets socket options. */
3185 /* ARGSUSED */
3186 int
3187 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
3188     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3189     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3190 {
3191 	conn_t	*connp =  Q_TO_CONN(q);
3192 	icmp_t	*icmp;
3193 	int error;
3194 
3195 	icmp = connp->conn_icmp;
3196 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3197 	error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp,
3198 	    outlenp, outvalp, thisdg_attrs, cr);
3199 	rw_exit(&icmp->icmp_rwlock);
3200 	return (error);
3201 }
3202 
3203 /*
3204  * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl,
3205  * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum.
3206  * The headers include ip6i_t (if needed), ip6_t, and any sticky extension
3207  * headers.
3208  * Returns failure if can't allocate memory.
3209  */
3210 static int
3211 icmp_build_hdrs(icmp_t *icmp)
3212 {
3213 	icmp_stack_t *is = icmp->icmp_is;
3214 	uchar_t	*hdrs;
3215 	uint_t	hdrs_len;
3216 	ip6_t	*ip6h;
3217 	ip6i_t	*ip6i;
3218 	ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp;
3219 
3220 	ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock));
3221 	hdrs_len = ip_total_hdrs_len_v6(ipp);
3222 	ASSERT(hdrs_len != 0);
3223 	if (hdrs_len != icmp->icmp_sticky_hdrs_len) {
3224 		/* Need to reallocate */
3225 		if (hdrs_len != 0) {
3226 			hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
3227 			if (hdrs == NULL)
3228 				return (ENOMEM);
3229 		} else {
3230 			hdrs = NULL;
3231 		}
3232 		if (icmp->icmp_sticky_hdrs_len != 0) {
3233 			kmem_free(icmp->icmp_sticky_hdrs,
3234 			    icmp->icmp_sticky_hdrs_len);
3235 		}
3236 		icmp->icmp_sticky_hdrs = hdrs;
3237 		icmp->icmp_sticky_hdrs_len = hdrs_len;
3238 	}
3239 	ip_build_hdrs_v6(icmp->icmp_sticky_hdrs,
3240 	    icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto);
3241 
3242 	/* Set header fields not in ipp */
3243 	if (ipp->ipp_fields & IPPF_HAS_IP6I) {
3244 		ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs;
3245 		ip6h = (ip6_t *)&ip6i[1];
3246 
3247 		if (ipp->ipp_fields & IPPF_RAW_CKSUM) {
3248 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
3249 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
3250 		}
3251 		if (ipp->ipp_fields & IPPF_NO_CKSUM) {
3252 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
3253 		}
3254 	} else {
3255 		ip6h = (ip6_t *)icmp->icmp_sticky_hdrs;
3256 	}
3257 
3258 	if (!(ipp->ipp_fields & IPPF_ADDR))
3259 		ip6h->ip6_src = icmp->icmp_v6src;
3260 
3261 	/* Try to get everything in a single mblk */
3262 	if (hdrs_len > icmp->icmp_max_hdr_len) {
3263 		icmp->icmp_max_hdr_len = hdrs_len;
3264 		rw_exit(&icmp->icmp_rwlock);
3265 		(void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq,
3266 		    icmp->icmp_connp,
3267 		    icmp->icmp_max_hdr_len + is->is_wroff_extra);
3268 		rw_enter(&icmp->icmp_rwlock, RW_WRITER);
3269 	}
3270 	return (0);
3271 }
3272 
3273 /*
3274  * This routine retrieves the value of an ND variable in a icmpparam_t
3275  * structure.  It is called through nd_getset when a user reads the
3276  * variable.
3277  */
3278 /* ARGSUSED */
3279 static int
3280 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
3281 {
3282 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3283 
3284 	(void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value);
3285 	return (0);
3286 }
3287 
3288 /*
3289  * Walk through the param array specified registering each element with the
3290  * named dispatch (ND) handler.
3291  */
3292 static boolean_t
3293 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt)
3294 {
3295 	for (; cnt-- > 0; icmppa++) {
3296 		if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) {
3297 			if (!nd_load(ndp, icmppa->icmp_param_name,
3298 			    icmp_param_get, icmp_param_set,
3299 			    (caddr_t)icmppa)) {
3300 				nd_free(ndp);
3301 				return (B_FALSE);
3302 			}
3303 		}
3304 	}
3305 	return (B_TRUE);
3306 }
3307 
3308 /* This routine sets an ND variable in a icmpparam_t structure. */
3309 /* ARGSUSED */
3310 static int
3311 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
3312 {
3313 	long		new_value;
3314 	icmpparam_t	*icmppa = (icmpparam_t *)cp;
3315 
3316 	/*
3317 	 * Fail the request if the new value does not lie within the
3318 	 * required bounds.
3319 	 */
3320 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
3321 	    new_value < icmppa->icmp_param_min ||
3322 	    new_value > icmppa->icmp_param_max) {
3323 		return (EINVAL);
3324 	}
3325 	/* Set the new value */
3326 	icmppa->icmp_param_value = new_value;
3327 	return (0);
3328 }
3329 
3330 static mblk_t *
3331 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp)
3332 {
3333 	ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock));
3334 	if (IPCL_IS_NONSTR(icmp->icmp_connp)) {
3335 		/*
3336 		 * fallback has started but messages have not been moved yet
3337 		 */
3338 		if (icmp->icmp_fallback_queue_head == NULL) {
3339 			ASSERT(icmp->icmp_fallback_queue_tail == NULL);
3340 			icmp->icmp_fallback_queue_head = mp;
3341 			icmp->icmp_fallback_queue_tail = mp;
3342 		} else {
3343 			ASSERT(icmp->icmp_fallback_queue_tail != NULL);
3344 			icmp->icmp_fallback_queue_tail->b_next = mp;
3345 			icmp->icmp_fallback_queue_tail = mp;
3346 		}
3347 		return (NULL);
3348 	} else {
3349 		/*
3350 		 * Fallback completed, let the caller putnext() the mblk.
3351 		 */
3352 		return (mp);
3353 	}
3354 }
3355 
3356 /*
3357  * Deliver data to ULP. In case we have a socket, and it's falling back to
3358  * TPI, then we'll queue the mp for later processing.
3359  */
3360 static void
3361 icmp_ulp_recv(conn_t *connp, mblk_t *mp)
3362 {
3363 
3364 	if (IPCL_IS_NONSTR(connp)) {
3365 		icmp_t *icmp = connp->conn_icmp;
3366 		int error;
3367 
3368 		if ((*connp->conn_upcalls->su_recv)
3369 		    (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error,
3370 		    NULL) < 0) {
3371 			mutex_enter(&icmp->icmp_recv_lock);
3372 			if (error == ENOSPC) {
3373 				/*
3374 				 * let's confirm while holding the lock
3375 				 */
3376 				if ((*connp->conn_upcalls->su_recv)
3377 				    (connp->conn_upper_handle, NULL, 0, 0,
3378 				    &error, NULL) < 0) {
3379 					ASSERT(error == ENOSPC);
3380 					if (error == ENOSPC) {
3381 						connp->conn_flow_cntrld =
3382 						    B_TRUE;
3383 					}
3384 				}
3385 				mutex_exit(&icmp->icmp_recv_lock);
3386 			} else {
3387 				ASSERT(error == EOPNOTSUPP);
3388 				mp = icmp_queue_fallback(icmp, mp);
3389 				mutex_exit(&icmp->icmp_recv_lock);
3390 				if (mp != NULL)
3391 					putnext(connp->conn_rq, mp);
3392 			}
3393 		}
3394 		ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock));
3395 	} else {
3396 		putnext(connp->conn_rq, mp);
3397 	}
3398 }
3399 
3400 /*ARGSUSED2*/
3401 static void
3402 icmp_input(void *arg1, mblk_t *mp, void *arg2)
3403 {
3404 	conn_t *connp = (conn_t *)arg1;
3405 	struct T_unitdata_ind	*tudi;
3406 	uchar_t			*rptr;
3407 	icmp_t			*icmp;
3408 	icmp_stack_t		*is;
3409 	sin_t			*sin;
3410 	sin6_t			*sin6;
3411 	ip6_t			*ip6h;
3412 	ip6i_t			*ip6i;
3413 	mblk_t			*mp1;
3414 	int			hdr_len;
3415 	ipha_t			*ipha;
3416 	int			udi_size;	/* Size of T_unitdata_ind */
3417 	uint_t			ipvers;
3418 	ip6_pkt_t		ipp;
3419 	uint8_t			nexthdr;
3420 	ip_pktinfo_t		*pinfo = NULL;
3421 	mblk_t			*options_mp = NULL;
3422 	uint_t			icmp_opt = 0;
3423 	boolean_t		icmp_ipv6_recvhoplimit = B_FALSE;
3424 	uint_t			hopstrip;
3425 
3426 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
3427 
3428 	icmp = connp->conn_icmp;
3429 	is = icmp->icmp_is;
3430 	rptr = mp->b_rptr;
3431 	ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL);
3432 	ASSERT(OK_32PTR(rptr));
3433 
3434 	/*
3435 	 * IP should have prepended the options data in an M_CTL
3436 	 * Check M_CTL "type" to make sure are not here bcos of
3437 	 * a valid ICMP message
3438 	 */
3439 	if (DB_TYPE(mp) == M_CTL) {
3440 		/*
3441 		 * FIXME: does IP still do this?
3442 		 * IP sends up the IPSEC_IN message for handling IPSEC
3443 		 * policy at the TCP level. We don't need it here.
3444 		 */
3445 		if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) {
3446 			mp1 = mp->b_cont;
3447 			freeb(mp);
3448 			mp = mp1;
3449 			rptr = mp->b_rptr;
3450 		} else if (MBLKL(mp) == sizeof (ip_pktinfo_t) &&
3451 		    ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type ==
3452 		    IN_PKTINFO) {
3453 			/*
3454 			 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information
3455 			 * has been prepended to the packet by IP. We need to
3456 			 * extract the mblk and adjust the rptr
3457 			 */
3458 			pinfo = (ip_pktinfo_t *)mp->b_rptr;
3459 			options_mp = mp;
3460 			mp = mp->b_cont;
3461 			rptr = mp->b_rptr;
3462 		} else {
3463 			/*
3464 			 * ICMP messages.
3465 			 */
3466 			icmp_icmp_error(connp, mp);
3467 			return;
3468 		}
3469 	}
3470 
3471 	/*
3472 	 * Discard message if it is misaligned or smaller than the IP header.
3473 	 */
3474 	if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) {
3475 		freemsg(mp);
3476 		if (options_mp != NULL)
3477 			freeb(options_mp);
3478 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3479 		return;
3480 	}
3481 	ipvers = IPH_HDR_VERSION((ipha_t *)rptr);
3482 
3483 	/* Handle M_DATA messages containing IP packets messages */
3484 	if (ipvers == IPV4_VERSION) {
3485 		/*
3486 		 * Special case where IP attaches
3487 		 * the IRE needs to be handled so that we don't send up
3488 		 * IRE to the user land.
3489 		 */
3490 		ipha = (ipha_t *)rptr;
3491 		hdr_len = IPH_HDR_LENGTH(ipha);
3492 
3493 		if (ipha->ipha_protocol == IPPROTO_TCP) {
3494 			tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3495 
3496 			if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) ==
3497 			    TH_SYN) && mp->b_cont != NULL) {
3498 				mp1 = mp->b_cont;
3499 				if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3500 					freeb(mp1);
3501 					mp->b_cont = NULL;
3502 				}
3503 			}
3504 		}
3505 		if (is->is_bsd_compat) {
3506 			ushort_t len;
3507 			len = ntohs(ipha->ipha_length);
3508 
3509 			if (mp->b_datap->db_ref > 1) {
3510 				/*
3511 				 * Allocate a new IP header so that we can
3512 				 * modify ipha_length.
3513 				 */
3514 				mblk_t	*mp1;
3515 
3516 				mp1 = allocb(hdr_len, BPRI_MED);
3517 				if (!mp1) {
3518 					freemsg(mp);
3519 					if (options_mp != NULL)
3520 						freeb(options_mp);
3521 					BUMP_MIB(&is->is_rawip_mib,
3522 					    rawipInErrors);
3523 					return;
3524 				}
3525 				bcopy(rptr, mp1->b_rptr, hdr_len);
3526 				mp->b_rptr = rptr + hdr_len;
3527 				rptr = mp1->b_rptr;
3528 				ipha = (ipha_t *)rptr;
3529 				mp1->b_cont = mp;
3530 				mp1->b_wptr = rptr + hdr_len;
3531 				mp = mp1;
3532 			}
3533 			len -= hdr_len;
3534 			ipha->ipha_length = htons(len);
3535 		}
3536 	}
3537 
3538 	/*
3539 	 * This is the inbound data path.  Packets are passed upstream as
3540 	 * T_UNITDATA_IND messages with full IP headers still attached.
3541 	 */
3542 	if (icmp->icmp_family == AF_INET) {
3543 		ASSERT(ipvers == IPV4_VERSION);
3544 		udi_size =  sizeof (struct T_unitdata_ind) + sizeof (sin_t);
3545 		if (icmp->icmp_recvif && (pinfo != NULL) &&
3546 		    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3547 			udi_size += sizeof (struct T_opthdr) +
3548 			    sizeof (uint_t);
3549 		}
3550 
3551 		if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3552 		    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3553 			udi_size += sizeof (struct T_opthdr) +
3554 			    sizeof (struct in_pktinfo);
3555 		}
3556 
3557 		/*
3558 		 * If SO_TIMESTAMP is set allocate the appropriate sized
3559 		 * buffer. Since gethrestime() expects a pointer aligned
3560 		 * argument, we allocate space necessary for extra
3561 		 * alignment (even though it might not be used).
3562 		 */
3563 		if (icmp->icmp_timestamp) {
3564 			udi_size += sizeof (struct T_opthdr) +
3565 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3566 		}
3567 		mp1 = allocb(udi_size, BPRI_MED);
3568 		if (mp1 == NULL) {
3569 			freemsg(mp);
3570 			if (options_mp != NULL)
3571 				freeb(options_mp);
3572 			BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3573 			return;
3574 		}
3575 		mp1->b_cont = mp;
3576 		mp = mp1;
3577 		tudi = (struct T_unitdata_ind *)mp->b_rptr;
3578 		mp->b_datap->db_type = M_PROTO;
3579 		mp->b_wptr = (uchar_t *)tudi + udi_size;
3580 		tudi->PRIM_type = T_UNITDATA_IND;
3581 		tudi->SRC_length = sizeof (sin_t);
3582 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3583 		sin = (sin_t *)&tudi[1];
3584 		*sin = sin_null;
3585 		sin->sin_family = AF_INET;
3586 		sin->sin_addr.s_addr = ipha->ipha_src;
3587 		tudi->OPT_offset =  sizeof (struct T_unitdata_ind) +
3588 		    sizeof (sin_t);
3589 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
3590 		tudi->OPT_length = udi_size;
3591 
3592 		/*
3593 		 * Add options if IP_RECVIF is set
3594 		 */
3595 		if (udi_size != 0) {
3596 			char *dstopt;
3597 
3598 			dstopt = (char *)&sin[1];
3599 			if (icmp->icmp_recvif && (pinfo != NULL) &&
3600 			    (pinfo->ip_pkt_flags & IPF_RECVIF)) {
3601 
3602 				struct T_opthdr *toh;
3603 				uint_t		*dstptr;
3604 
3605 				toh = (struct T_opthdr *)dstopt;
3606 				toh->level = IPPROTO_IP;
3607 				toh->name = IP_RECVIF;
3608 				toh->len = sizeof (struct T_opthdr) +
3609 				    sizeof (uint_t);
3610 				toh->status = 0;
3611 				dstopt += sizeof (struct T_opthdr);
3612 				dstptr = (uint_t *)dstopt;
3613 				*dstptr = pinfo->ip_pkt_ifindex;
3614 				dstopt += sizeof (uint_t);
3615 				udi_size -= toh->len;
3616 			}
3617 			if (icmp->icmp_timestamp) {
3618 				struct	T_opthdr *toh;
3619 
3620 				toh = (struct T_opthdr *)dstopt;
3621 				toh->level = SOL_SOCKET;
3622 				toh->name = SCM_TIMESTAMP;
3623 				toh->len = sizeof (struct T_opthdr) +
3624 				    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3625 				toh->status = 0;
3626 				dstopt += sizeof (struct T_opthdr);
3627 				/* Align for gethrestime() */
3628 				dstopt = (char *)P2ROUNDUP((intptr_t)dstopt,
3629 				    sizeof (intptr_t));
3630 				gethrestime((timestruc_t *)dstopt);
3631 				dstopt = (char *)toh + toh->len;
3632 				udi_size -= toh->len;
3633 			}
3634 			if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) &&
3635 			    (pinfo->ip_pkt_flags & IPF_RECVADDR)) {
3636 				struct	T_opthdr *toh;
3637 				struct	in_pktinfo *pktinfop;
3638 
3639 				toh = (struct T_opthdr *)dstopt;
3640 				toh->level = IPPROTO_IP;
3641 				toh->name = IP_PKTINFO;
3642 				toh->len = sizeof (struct T_opthdr) +
3643 				    sizeof (in_pktinfo_t);
3644 				toh->status = 0;
3645 				dstopt += sizeof (struct T_opthdr);
3646 				pktinfop = (struct in_pktinfo *)dstopt;
3647 				pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex;
3648 				pktinfop->ipi_spec_dst =
3649 				    pinfo->ip_pkt_match_addr;
3650 
3651 				pktinfop->ipi_addr.s_addr = ipha->ipha_dst;
3652 
3653 				dstopt += sizeof (struct in_pktinfo);
3654 				udi_size -= toh->len;
3655 			}
3656 
3657 			/* Consumed all of allocated space */
3658 			ASSERT(udi_size == 0);
3659 		}
3660 
3661 		if (options_mp != NULL)
3662 			freeb(options_mp);
3663 
3664 		BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
3665 		goto deliver;
3666 	}
3667 
3668 	/*
3669 	 * We don't need options_mp in the IPv6 path.
3670 	 */
3671 	if (options_mp != NULL) {
3672 		freeb(options_mp);
3673 		options_mp = NULL;
3674 	}
3675 
3676 	/*
3677 	 * Discard message if it is smaller than the IPv6 header
3678 	 * or if the header is malformed.
3679 	 */
3680 	if ((mp->b_wptr - rptr) < sizeof (ip6_t) ||
3681 	    IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION ||
3682 	    icmp->icmp_family != AF_INET6) {
3683 		freemsg(mp);
3684 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3685 		return;
3686 	}
3687 
3688 	/* Initialize */
3689 	ipp.ipp_fields = 0;
3690 	hopstrip = 0;
3691 
3692 	ip6h = (ip6_t *)rptr;
3693 	/*
3694 	 * Call on ip_find_hdr_v6 which gets the total hdr len
3695 	 * as well as individual lenghts of ext hdrs (and ptrs to
3696 	 * them).
3697 	 */
3698 	if (ip6h->ip6_nxt != icmp->icmp_proto) {
3699 		/* Look for ifindex information */
3700 		if (ip6h->ip6_nxt == IPPROTO_RAW) {
3701 			ip6i = (ip6i_t *)ip6h;
3702 			if (ip6i->ip6i_flags & IP6I_IFINDEX) {
3703 				ASSERT(ip6i->ip6i_ifindex != 0);
3704 				ipp.ipp_fields |= IPPF_IFINDEX;
3705 				ipp.ipp_ifindex = ip6i->ip6i_ifindex;
3706 			}
3707 			rptr = (uchar_t *)&ip6i[1];
3708 			mp->b_rptr = rptr;
3709 			if (rptr == mp->b_wptr) {
3710 				mp1 = mp->b_cont;
3711 				freeb(mp);
3712 				mp = mp1;
3713 				rptr = mp->b_rptr;
3714 			}
3715 			ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN);
3716 			ip6h = (ip6_t *)rptr;
3717 		}
3718 		hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr);
3719 
3720 		/*
3721 		 * We need to lie a bit to the user because users inside
3722 		 * labeled compartments should not see their own labels.  We
3723 		 * assume that in all other respects IP has checked the label,
3724 		 * and that the label is always first among the options.  (If
3725 		 * it's not first, then this code won't see it, and the option
3726 		 * will be passed along to the user.)
3727 		 *
3728 		 * If we had multilevel ICMP sockets, then the following code
3729 		 * should be skipped for them to allow the user to see the
3730 		 * label.
3731 		 *
3732 		 * Alignment restrictions in the definition of IP options
3733 		 * (namely, the requirement that the 4-octet DOI goes on a
3734 		 * 4-octet boundary) mean that we know exactly where the option
3735 		 * should start, but we're lenient for other hosts.
3736 		 *
3737 		 * Note that there are no multilevel ICMP or raw IP sockets
3738 		 * yet, thus nobody ever sees the IP6OPT_LS option.
3739 		 */
3740 		if ((ipp.ipp_fields & IPPF_HOPOPTS) &&
3741 		    ipp.ipp_hopoptslen > 5 && is_system_labeled()) {
3742 			const uchar_t *ucp =
3743 			    (const uchar_t *)ipp.ipp_hopopts + 2;
3744 			int remlen = ipp.ipp_hopoptslen - 2;
3745 
3746 			while (remlen > 0) {
3747 				if (*ucp == IP6OPT_PAD1) {
3748 					remlen--;
3749 					ucp++;
3750 				} else if (*ucp == IP6OPT_PADN) {
3751 					remlen -= ucp[1] + 2;
3752 					ucp += ucp[1] + 2;
3753 				} else if (*ucp == ip6opt_ls) {
3754 					hopstrip = (ucp -
3755 					    (const uchar_t *)ipp.ipp_hopopts) +
3756 					    ucp[1] + 2;
3757 					hopstrip = (hopstrip + 7) & ~7;
3758 					break;
3759 				} else {
3760 					/* label option must be first */
3761 					break;
3762 				}
3763 			}
3764 		}
3765 	} else {
3766 		hdr_len = IPV6_HDR_LEN;
3767 		ip6i = NULL;
3768 		nexthdr = ip6h->ip6_nxt;
3769 	}
3770 	/*
3771 	 * One special case where IP attaches the IRE needs to
3772 	 * be handled so that we don't send up IRE to the user land.
3773 	 */
3774 	if (nexthdr == IPPROTO_TCP) {
3775 		tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len];
3776 
3777 		if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) &&
3778 		    mp->b_cont != NULL) {
3779 			mp1 = mp->b_cont;
3780 			if (mp1->b_datap->db_type == IRE_DB_TYPE) {
3781 				freeb(mp1);
3782 				mp->b_cont = NULL;
3783 			}
3784 		}
3785 	}
3786 	/*
3787 	 * Check a filter for ICMPv6 types if needed.
3788 	 * Verify raw checksums if needed.
3789 	 */
3790 	if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) {
3791 		if (icmp->icmp_filter != NULL) {
3792 			int type;
3793 
3794 			/* Assumes that IP has done the pullupmsg */
3795 			type = mp->b_rptr[hdr_len];
3796 
3797 			ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr);
3798 			if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) {
3799 				freemsg(mp);
3800 				return;
3801 			}
3802 		} else {
3803 			/* Checksum */
3804 			uint16_t	*up;
3805 			uint32_t	sum;
3806 			int		remlen;
3807 
3808 			up = (uint16_t *)&ip6h->ip6_src;
3809 
3810 			remlen = msgdsize(mp) - hdr_len;
3811 			sum = htons(icmp->icmp_proto + remlen)
3812 			    + up[0] + up[1] + up[2] + up[3]
3813 			    + up[4] + up[5] + up[6] + up[7]
3814 			    + up[8] + up[9] + up[10] + up[11]
3815 			    + up[12] + up[13] + up[14] + up[15];
3816 			sum = (sum & 0xffff) + (sum >> 16);
3817 			sum = IP_CSUM(mp, hdr_len, sum);
3818 			if (sum != 0) {
3819 				/* IPv6 RAW checksum failed */
3820 				ip0dbg(("icmp_rput: RAW checksum "
3821 				    "failed %x\n", sum));
3822 				freemsg(mp);
3823 				BUMP_MIB(&is->is_rawip_mib,
3824 				    rawipInCksumErrs);
3825 				return;
3826 			}
3827 		}
3828 	}
3829 	/* Skip all the IPv6 headers per API */
3830 	mp->b_rptr += hdr_len;
3831 
3832 	udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3833 
3834 	/*
3835 	 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to
3836 	 * maintain state information, instead of relying on icmp_t
3837 	 * structure, since there arent any locks protecting these members
3838 	 * and there is a window where there might be a race between a
3839 	 * thread setting options on the write side and a thread reading
3840 	 * these options on the read size.
3841 	 */
3842 	if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS|
3843 	    IPPF_RTHDR|IPPF_IFINDEX)) {
3844 		if (icmp->icmp_ipv6_recvhopopts &&
3845 		    (ipp.ipp_fields & IPPF_HOPOPTS) &&
3846 		    ipp.ipp_hopoptslen > hopstrip) {
3847 			udi_size += sizeof (struct T_opthdr) +
3848 			    ipp.ipp_hopoptslen - hopstrip;
3849 			icmp_opt |= IPPF_HOPOPTS;
3850 		}
3851 		if ((icmp->icmp_ipv6_recvdstopts ||
3852 		    icmp->icmp_old_ipv6_recvdstopts) &&
3853 		    (ipp.ipp_fields & IPPF_DSTOPTS)) {
3854 			udi_size += sizeof (struct T_opthdr) +
3855 			    ipp.ipp_dstoptslen;
3856 			icmp_opt |= IPPF_DSTOPTS;
3857 		}
3858 		if (((icmp->icmp_ipv6_recvdstopts &&
3859 		    icmp->icmp_ipv6_recvrthdr &&
3860 		    (ipp.ipp_fields & IPPF_RTHDR)) ||
3861 		    icmp->icmp_ipv6_recvrtdstopts) &&
3862 		    (ipp.ipp_fields & IPPF_RTDSTOPTS)) {
3863 			udi_size += sizeof (struct T_opthdr) +
3864 			    ipp.ipp_rtdstoptslen;
3865 			icmp_opt |= IPPF_RTDSTOPTS;
3866 		}
3867 		if (icmp->icmp_ipv6_recvrthdr &&
3868 		    (ipp.ipp_fields & IPPF_RTHDR)) {
3869 			udi_size += sizeof (struct T_opthdr) +
3870 			    ipp.ipp_rthdrlen;
3871 			icmp_opt |= IPPF_RTHDR;
3872 		}
3873 		if (icmp->icmp_ip_recvpktinfo &&
3874 		    (ipp.ipp_fields & IPPF_IFINDEX)) {
3875 			udi_size += sizeof (struct T_opthdr) +
3876 			    sizeof (struct in6_pktinfo);
3877 			icmp_opt |= IPPF_IFINDEX;
3878 		}
3879 	}
3880 	if (icmp->icmp_ipv6_recvhoplimit) {
3881 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3882 		icmp_ipv6_recvhoplimit = B_TRUE;
3883 	}
3884 
3885 	if (icmp->icmp_ipv6_recvtclass)
3886 		udi_size += sizeof (struct T_opthdr) + sizeof (int);
3887 
3888 	/*
3889 	 * If SO_TIMESTAMP is set allocate the appropriate sized
3890 	 * buffer. Since gethrestime() expects a pointer aligned
3891 	 * argument, we allocate space necessary for extra
3892 	 * alignment (even though it might not be used).
3893 	 */
3894 	if (icmp->icmp_timestamp) {
3895 		udi_size += sizeof (struct T_opthdr) +
3896 		    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3897 	}
3898 
3899 	mp1 = allocb(udi_size, BPRI_MED);
3900 	if (mp1 == NULL) {
3901 		freemsg(mp);
3902 		BUMP_MIB(&is->is_rawip_mib, rawipInErrors);
3903 		return;
3904 	}
3905 	mp1->b_cont = mp;
3906 	mp = mp1;
3907 	mp->b_datap->db_type = M_PROTO;
3908 	tudi = (struct T_unitdata_ind *)mp->b_rptr;
3909 	mp->b_wptr = (uchar_t *)tudi + udi_size;
3910 	tudi->PRIM_type = T_UNITDATA_IND;
3911 	tudi->SRC_length = sizeof (sin6_t);
3912 	tudi->SRC_offset = sizeof (struct T_unitdata_ind);
3913 	tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
3914 	udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
3915 	tudi->OPT_length = udi_size;
3916 	sin6 = (sin6_t *)&tudi[1];
3917 	sin6->sin6_port = 0;
3918 	sin6->sin6_family = AF_INET6;
3919 
3920 	sin6->sin6_addr = ip6h->ip6_src;
3921 	/* No sin6_flowinfo per API */
3922 	sin6->sin6_flowinfo = 0;
3923 	/* For link-scope source pass up scope id */
3924 	if ((ipp.ipp_fields & IPPF_IFINDEX) &&
3925 	    IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
3926 		sin6->sin6_scope_id = ipp.ipp_ifindex;
3927 	else
3928 		sin6->sin6_scope_id = 0;
3929 
3930 	sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst,
3931 	    icmp->icmp_zoneid, is->is_netstack);
3932 
3933 	if (udi_size != 0) {
3934 		uchar_t *dstopt;
3935 
3936 		dstopt = (uchar_t *)&sin6[1];
3937 		if (icmp_opt & IPPF_IFINDEX) {
3938 			struct T_opthdr *toh;
3939 			struct in6_pktinfo *pkti;
3940 
3941 			toh = (struct T_opthdr *)dstopt;
3942 			toh->level = IPPROTO_IPV6;
3943 			toh->name = IPV6_PKTINFO;
3944 			toh->len = sizeof (struct T_opthdr) +
3945 			    sizeof (*pkti);
3946 			toh->status = 0;
3947 			dstopt += sizeof (struct T_opthdr);
3948 			pkti = (struct in6_pktinfo *)dstopt;
3949 			pkti->ipi6_addr = ip6h->ip6_dst;
3950 			pkti->ipi6_ifindex = ipp.ipp_ifindex;
3951 			dstopt += sizeof (*pkti);
3952 			udi_size -= toh->len;
3953 		}
3954 		if (icmp_ipv6_recvhoplimit) {
3955 			struct T_opthdr *toh;
3956 
3957 			toh = (struct T_opthdr *)dstopt;
3958 			toh->level = IPPROTO_IPV6;
3959 			toh->name = IPV6_HOPLIMIT;
3960 			toh->len = sizeof (struct T_opthdr) +
3961 			    sizeof (uint_t);
3962 			toh->status = 0;
3963 			dstopt += sizeof (struct T_opthdr);
3964 			*(uint_t *)dstopt = ip6h->ip6_hops;
3965 			dstopt += sizeof (uint_t);
3966 			udi_size -= toh->len;
3967 		}
3968 		if (icmp->icmp_ipv6_recvtclass) {
3969 			struct T_opthdr *toh;
3970 
3971 			toh = (struct T_opthdr *)dstopt;
3972 			toh->level = IPPROTO_IPV6;
3973 			toh->name = IPV6_TCLASS;
3974 			toh->len = sizeof (struct T_opthdr) +
3975 			    sizeof (uint_t);
3976 			toh->status = 0;
3977 			dstopt += sizeof (struct T_opthdr);
3978 			*(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
3979 			dstopt += sizeof (uint_t);
3980 			udi_size -= toh->len;
3981 		}
3982 		if (icmp->icmp_timestamp) {
3983 			struct  T_opthdr *toh;
3984 
3985 			toh = (struct T_opthdr *)dstopt;
3986 			toh->level = SOL_SOCKET;
3987 			toh->name = SCM_TIMESTAMP;
3988 			toh->len = sizeof (struct T_opthdr) +
3989 			    sizeof (timestruc_t) + _POINTER_ALIGNMENT;
3990 			toh->status = 0;
3991 			dstopt += sizeof (struct T_opthdr);
3992 			/* Align for gethrestime() */
3993 			dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt,
3994 			    sizeof (intptr_t));
3995 			gethrestime((timestruc_t *)dstopt);
3996 			dstopt = (uchar_t *)toh + toh->len;
3997 			udi_size -= toh->len;
3998 		}
3999 
4000 		if (icmp_opt & IPPF_HOPOPTS) {
4001 			struct T_opthdr *toh;
4002 
4003 			toh = (struct T_opthdr *)dstopt;
4004 			toh->level = IPPROTO_IPV6;
4005 			toh->name = IPV6_HOPOPTS;
4006 			toh->len = sizeof (struct T_opthdr) +
4007 			    ipp.ipp_hopoptslen - hopstrip;
4008 			toh->status = 0;
4009 			dstopt += sizeof (struct T_opthdr);
4010 			bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt,
4011 			    ipp.ipp_hopoptslen - hopstrip);
4012 			if (hopstrip > 0) {
4013 				/* copy next header value and fake length */
4014 				dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0];
4015 				dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] -
4016 				    hopstrip / 8;
4017 			}
4018 			dstopt += ipp.ipp_hopoptslen - hopstrip;
4019 			udi_size -= toh->len;
4020 		}
4021 		if (icmp_opt & IPPF_RTDSTOPTS) {
4022 			struct T_opthdr *toh;
4023 
4024 			toh = (struct T_opthdr *)dstopt;
4025 			toh->level = IPPROTO_IPV6;
4026 			toh->name = IPV6_DSTOPTS;
4027 			toh->len = sizeof (struct T_opthdr) +
4028 			    ipp.ipp_rtdstoptslen;
4029 			toh->status = 0;
4030 			dstopt += sizeof (struct T_opthdr);
4031 			bcopy(ipp.ipp_rtdstopts, dstopt,
4032 			    ipp.ipp_rtdstoptslen);
4033 			dstopt += ipp.ipp_rtdstoptslen;
4034 			udi_size -= toh->len;
4035 		}
4036 		if (icmp_opt & IPPF_RTHDR) {
4037 			struct T_opthdr *toh;
4038 
4039 			toh = (struct T_opthdr *)dstopt;
4040 			toh->level = IPPROTO_IPV6;
4041 			toh->name = IPV6_RTHDR;
4042 			toh->len = sizeof (struct T_opthdr) +
4043 			    ipp.ipp_rthdrlen;
4044 			toh->status = 0;
4045 			dstopt += sizeof (struct T_opthdr);
4046 			bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen);
4047 			dstopt += ipp.ipp_rthdrlen;
4048 			udi_size -= toh->len;
4049 		}
4050 		if (icmp_opt & IPPF_DSTOPTS) {
4051 			struct T_opthdr *toh;
4052 
4053 			toh = (struct T_opthdr *)dstopt;
4054 			toh->level = IPPROTO_IPV6;
4055 			toh->name = IPV6_DSTOPTS;
4056 			toh->len = sizeof (struct T_opthdr) +
4057 			    ipp.ipp_dstoptslen;
4058 			toh->status = 0;
4059 			dstopt += sizeof (struct T_opthdr);
4060 			bcopy(ipp.ipp_dstopts, dstopt,
4061 			    ipp.ipp_dstoptslen);
4062 			dstopt += ipp.ipp_dstoptslen;
4063 			udi_size -= toh->len;
4064 		}
4065 		/* Consumed all of allocated space */
4066 		ASSERT(udi_size == 0);
4067 	}
4068 	BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams);
4069 
4070 deliver:
4071 	icmp_ulp_recv(connp, mp);
4072 
4073 }
4074 
4075 /*
4076  * return SNMP stuff in buffer in mpdata
4077  */
4078 mblk_t *
4079 icmp_snmp_get(queue_t *q, mblk_t *mpctl)
4080 {
4081 	mblk_t			*mpdata;
4082 	struct opthdr		*optp;
4083 	conn_t			*connp = Q_TO_CONN(q);
4084 	icmp_stack_t		*is = connp->conn_netstack->netstack_icmp;
4085 	mblk_t			*mp2ctl;
4086 
4087 	/*
4088 	 * make a copy of the original message
4089 	 */
4090 	mp2ctl = copymsg(mpctl);
4091 
4092 	if (mpctl == NULL ||
4093 	    (mpdata = mpctl->b_cont) == NULL) {
4094 		freemsg(mpctl);
4095 		freemsg(mp2ctl);
4096 		return (0);
4097 	}
4098 
4099 	/* fixed length structure for IPv4 and IPv6 counters */
4100 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
4101 	optp->level = EXPER_RAWIP;
4102 	optp->name = 0;
4103 	(void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib,
4104 	    sizeof (is->is_rawip_mib));
4105 	optp->len = msgdsize(mpdata);
4106 	qreply(q, mpctl);
4107 
4108 	return (mp2ctl);
4109 }
4110 
4111 /*
4112  * Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
4113  * TODO:  If this ever actually tries to set anything, it needs to be
4114  * to do the appropriate locking.
4115  */
4116 /* ARGSUSED */
4117 int
4118 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
4119     uchar_t *ptr, int len)
4120 {
4121 	switch (level) {
4122 	case EXPER_RAWIP:
4123 		return (0);
4124 	default:
4125 		return (1);
4126 	}
4127 }
4128 
4129 /*
4130  * This routine creates a T_UDERROR_IND message and passes it upstream.
4131  * The address and options are copied from the T_UNITDATA_REQ message
4132  * passed in mp.  This message is freed.
4133  */
4134 static void
4135 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
4136 {
4137 	mblk_t	*mp1;
4138 	uchar_t	*rptr = mp->b_rptr;
4139 	struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr;
4140 
4141 	mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset],
4142 	    tudr->DEST_length, (char *)&rptr[tudr->OPT_offset],
4143 	    tudr->OPT_length, err);
4144 	if (mp1)
4145 		qreply(q, mp1);
4146 	freemsg(mp);
4147 }
4148 
4149 
4150 static int
4151 rawip_do_unbind(conn_t *connp)
4152 {
4153 	icmp_t *icmp = connp->conn_icmp;
4154 
4155 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4156 	/* If a bind has not been done, we can't unbind. */
4157 	if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) {
4158 		rw_exit(&icmp->icmp_rwlock);
4159 		return (-TOUTSTATE);
4160 	}
4161 	icmp->icmp_pending_op = T_UNBIND_REQ;
4162 	rw_exit(&icmp->icmp_rwlock);
4163 
4164 	/*
4165 	 * Call ip to unbind
4166 	 */
4167 
4168 	ip_unbind(connp);
4169 
4170 	/*
4171 	 * Once we're unbound from IP, the pending operation may be cleared
4172 	 * here.
4173 	 */
4174 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
4175 	V6_SET_ZERO(icmp->icmp_v6src);
4176 	V6_SET_ZERO(icmp->icmp_bound_v6src);
4177 	icmp->icmp_pending_op = -1;
4178 	icmp->icmp_state = TS_UNBND;
4179 	if (icmp->icmp_family == AF_INET6)
4180 		(void) icmp_build_hdrs(icmp);
4181 	rw_exit(&icmp->icmp_rwlock);
4182 	return (0);
4183 }
4184 
4185 /*
4186  * This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
4187  * After some error checking, the message is passed downstream to ip.
4188  */
4189 static void
4190 icmp_tpi_unbind(queue_t *q, mblk_t *mp)
4191 {
4192 	conn_t	*connp = Q_TO_CONN(q);
4193 	int	error;
4194 
4195 	ASSERT(mp->b_cont == NULL);
4196 	error = rawip_do_unbind(connp);
4197 	if (error) {
4198 		if (error < 0) {
4199 			icmp_err_ack(q, mp, -error, 0);
4200 		} else {
4201 			icmp_err_ack(q, mp, 0, error);
4202 		}
4203 		return;
4204 	}
4205 
4206 	/*
4207 	 * Convert mp into a T_OK_ACK
4208 	 */
4209 
4210 	mp = mi_tpi_ok_ack_alloc(mp);
4211 
4212 	/*
4213 	 * should not happen in practice... T_OK_ACK is smaller than the
4214 	 * original message.
4215 	 */
4216 	ASSERT(mp != NULL);
4217 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
4218 	qreply(q, mp);
4219 }
4220 
4221 
4222 /*
4223  * Process IPv4 packets that already include an IP header.
4224  * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
4225  * IPPROTO_IGMP).
4226  */
4227 static int
4228 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp,
4229     ip4_pkt_t *pktinfop)
4230 {
4231 	icmp_stack_t *is = icmp->icmp_is;
4232 	ipha_t	*ipha;
4233 	int	ip_hdr_length;
4234 	int	tp_hdr_len;
4235 	mblk_t	*mp1;
4236 	uint_t	pkt_len;
4237 	ip_opt_info_t optinfo;
4238 
4239 	optinfo.ip_opt_flags = 0;
4240 	optinfo.ip_opt_ill_index = 0;
4241 	ipha = (ipha_t *)mp->b_rptr;
4242 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4243 	if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) {
4244 		if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4245 			ASSERT(icmp != NULL);
4246 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4247 			freemsg(mp);
4248 			return (0);
4249 		}
4250 		ipha = (ipha_t *)mp->b_rptr;
4251 	}
4252 	ipha->ipha_version_and_hdr_length =
4253 	    (IP_VERSION<<4) | (ip_hdr_length>>2);
4254 
4255 	/*
4256 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4257 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4258 	 * tell IP that the application has sent a complete IP header and not
4259 	 * to compute the transport checksum nor change the DF flag.
4260 	 */
4261 	ipha->ipha_ident = IP_HDR_INCLUDED;
4262 	ipha->ipha_hdr_checksum = 0;
4263 	ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF);
4264 	/* Insert options if any */
4265 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4266 		/*
4267 		 * Put the IP header plus any transport header that is
4268 		 * checksumed by ip_wput into the first mblk. (ip_wput assumes
4269 		 * that at least the checksum field is in the first mblk.)
4270 		 */
4271 		switch (ipha->ipha_protocol) {
4272 		case IPPROTO_UDP:
4273 			tp_hdr_len = 8;
4274 			break;
4275 		case IPPROTO_TCP:
4276 			tp_hdr_len = 20;
4277 			break;
4278 		default:
4279 			tp_hdr_len = 0;
4280 			break;
4281 		}
4282 		/*
4283 		 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus
4284 		 * tp_hdr_len bytes will be in a single mblk.
4285 		 */
4286 		if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH +
4287 		    tp_hdr_len)) {
4288 			if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH +
4289 			    tp_hdr_len)) {
4290 				BUMP_MIB(&is->is_rawip_mib,
4291 				    rawipOutErrors);
4292 				freemsg(mp);
4293 				return (0);
4294 			}
4295 			ipha = (ipha_t *)mp->b_rptr;
4296 		}
4297 
4298 		/*
4299 		 * if the length is larger then the max allowed IP packet,
4300 		 * then send an error and abort the processing.
4301 		 */
4302 		pkt_len = ntohs(ipha->ipha_length)
4303 		    + icmp->icmp_ip_snd_options_len;
4304 		if (pkt_len > IP_MAXPACKET) {
4305 			return (EMSGSIZE);
4306 		}
4307 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra +
4308 		    tp_hdr_len, BPRI_LO))) {
4309 			return (ENOMEM);
4310 		}
4311 		mp1->b_rptr += is->is_wroff_extra;
4312 		mp1->b_wptr = mp1->b_rptr + ip_hdr_length;
4313 
4314 		ipha->ipha_length = htons((uint16_t)pkt_len);
4315 		bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH);
4316 
4317 		/* Copy transport header if any */
4318 		bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len);
4319 		mp1->b_wptr += tp_hdr_len;
4320 
4321 		/* Add options */
4322 		ipha = (ipha_t *)mp1->b_rptr;
4323 		bcopy(icmp->icmp_ip_snd_options, &ipha[1],
4324 		    icmp->icmp_ip_snd_options_len);
4325 
4326 		/* Drop IP header and transport header from original */
4327 		(void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len);
4328 
4329 		mp1->b_cont = mp;
4330 		mp = mp1;
4331 		/*
4332 		 * Massage source route putting first source
4333 		 * route in ipha_dst.
4334 		 */
4335 		(void) ip_massage_options(ipha, is->is_netstack);
4336 	}
4337 
4338 	if (pktinfop != NULL) {
4339 		/*
4340 		 * Over write the source address provided in the header
4341 		 */
4342 		if (pktinfop->ip4_addr != INADDR_ANY) {
4343 			ipha->ipha_src = pktinfop->ip4_addr;
4344 			optinfo.ip_opt_flags = IP_VERIFY_SRC;
4345 		}
4346 
4347 		if (pktinfop->ip4_ill_index != 0) {
4348 			optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4349 		}
4350 	}
4351 
4352 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
4353 	return (0);
4354 }
4355 
4356 static int
4357 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst)
4358 {
4359 	int err;
4360 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
4361 	icmp_stack_t		*is = icmp->icmp_is;
4362 	conn_t			*connp = icmp->icmp_connp;
4363 	cred_t			*cr;
4364 
4365 	/*
4366 	 * All Solaris components should pass a db_credp
4367 	 * for this message, hence we ASSERT.
4368 	 * On production kernels we return an error to be robust against
4369 	 * random streams modules sitting on top of us.
4370 	 */
4371 	cr = msg_getcred(mp, NULL);
4372 	ASSERT(cr != NULL);
4373 	if (cr == NULL)
4374 		return (EINVAL);
4375 
4376 	err = tsol_compute_label(cr, dst,
4377 	    opt_storage, connp->conn_mac_exempt,
4378 	    is->is_netstack->netstack_ip);
4379 	if (err == 0) {
4380 		err = tsol_update_options(&icmp->icmp_ip_snd_options,
4381 		    &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len,
4382 		    opt_storage);
4383 	}
4384 	if (err != 0) {
4385 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4386 		DTRACE_PROBE4(
4387 		    tx__ip__log__drop__updatelabel__icmp,
4388 		    char *, "icmp(1) failed to update options(2) on mp(3)",
4389 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
4390 		return (err);
4391 	}
4392 	IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst);
4393 	return (0);
4394 }
4395 
4396 /*
4397  * This routine handles all messages passed downstream.  It either
4398  * consumes the message or passes it downstream; it never queues a
4399  * a message.
4400  */
4401 static void
4402 icmp_wput(queue_t *q, mblk_t *mp)
4403 {
4404 	uchar_t	*rptr = mp->b_rptr;
4405 	ipha_t	*ipha;
4406 	mblk_t	*mp1;
4407 #define	tudr ((struct T_unitdata_req *)rptr)
4408 	size_t	ip_len;
4409 	conn_t	*connp = Q_TO_CONN(q);
4410 	icmp_t	*icmp = connp->conn_icmp;
4411 	icmp_stack_t *is = icmp->icmp_is;
4412 	sin6_t	*sin6;
4413 	sin_t	*sin;
4414 	ipaddr_t	v4dst;
4415 	ip4_pkt_t	pktinfo;
4416 	ip4_pkt_t	*pktinfop = &pktinfo;
4417 	ip6_pkt_t	ipp_s;  /* For ancillary data options */
4418 	ip6_pkt_t	*ipp = &ipp_s;
4419 	int error;
4420 
4421 	ipp->ipp_fields = 0;
4422 	ipp->ipp_sticky_ignored = 0;
4423 
4424 	switch (mp->b_datap->db_type) {
4425 	case M_DATA:
4426 		if (icmp->icmp_hdrincl) {
4427 			ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
4428 			ipha = (ipha_t *)mp->b_rptr;
4429 			if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) {
4430 				if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) {
4431 					BUMP_MIB(&is->is_rawip_mib,
4432 					    rawipOutErrors);
4433 					freemsg(mp);
4434 					return;
4435 				}
4436 				ipha = (ipha_t *)mp->b_rptr;
4437 			}
4438 			/*
4439 			 * If this connection was used for v6 (inconceivable!)
4440 			 * or if we have a new destination, then it's time to
4441 			 * figure a new label.
4442 			 */
4443 			if (is_system_labeled() &&
4444 			    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4445 			    V4_PART_OF_V6(icmp->icmp_v6lastdst) !=
4446 			    ipha->ipha_dst)) {
4447 				error = icmp_update_label(icmp, mp,
4448 				    ipha->ipha_dst);
4449 				if (error != 0) {
4450 					icmp_ud_err(q, mp, error);
4451 					return;
4452 				}
4453 			}
4454 			error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL);
4455 			if (error != 0)
4456 				icmp_ud_err(q, mp, error);
4457 			return;
4458 		}
4459 		freemsg(mp);
4460 		return;
4461 	case M_PROTO:
4462 	case M_PCPROTO:
4463 		ip_len = mp->b_wptr - rptr;
4464 		if (ip_len >= sizeof (struct T_unitdata_req)) {
4465 			/* Expedite valid T_UNITDATA_REQ to below the switch */
4466 			if (((union T_primitives *)rptr)->type
4467 			    == T_UNITDATA_REQ)
4468 				break;
4469 		}
4470 		/* FALLTHRU */
4471 	default:
4472 		icmp_wput_other(q, mp);
4473 		return;
4474 	}
4475 
4476 	/* Handle T_UNITDATA_REQ messages here. */
4477 
4478 	mp1 = mp->b_cont;
4479 	if (mp1 == NULL) {
4480 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4481 		icmp_ud_err(q, mp, EPROTO);
4482 		return;
4483 	}
4484 
4485 	if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) {
4486 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4487 		icmp_ud_err(q, mp, EADDRNOTAVAIL);
4488 		return;
4489 	}
4490 
4491 	switch (icmp->icmp_family) {
4492 	case AF_INET6:
4493 		sin6 = (sin6_t *)&rptr[tudr->DEST_offset];
4494 		if (!OK_32PTR((char *)sin6) ||
4495 		    tudr->DEST_length != sizeof (sin6_t) ||
4496 		    sin6->sin6_family != AF_INET6) {
4497 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4498 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4499 			return;
4500 		}
4501 
4502 		/* No support for mapped addresses on raw sockets */
4503 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
4504 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4505 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4506 			return;
4507 		}
4508 
4509 		/*
4510 		 * Destination is a native IPv6 address.
4511 		 * Send out an IPv6 format packet.
4512 		 */
4513 		if (tudr->OPT_length != 0) {
4514 			int error;
4515 
4516 			error = 0;
4517 			if (icmp_unitdata_opt_process(q, mp, &error,
4518 			    (void *)ipp) < 0) {
4519 				/* failure */
4520 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4521 				icmp_ud_err(q, mp, error);
4522 				return;
4523 			}
4524 			ASSERT(error == 0);
4525 		}
4526 
4527 		error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp);
4528 		goto done;
4529 
4530 	case AF_INET:
4531 		sin = (sin_t *)&rptr[tudr->DEST_offset];
4532 		if (!OK_32PTR((char *)sin) ||
4533 		    tudr->DEST_length != sizeof (sin_t) ||
4534 		    sin->sin_family != AF_INET) {
4535 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4536 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
4537 			return;
4538 		}
4539 		/* Extract and ipaddr */
4540 		v4dst = sin->sin_addr.s_addr;
4541 		break;
4542 
4543 	default:
4544 		ASSERT(0);
4545 	}
4546 
4547 	pktinfop->ip4_ill_index = 0;
4548 	pktinfop->ip4_addr = INADDR_ANY;
4549 
4550 	/*
4551 	 * If options passed in, feed it for verification and handling
4552 	 */
4553 	if (tudr->OPT_length != 0) {
4554 		int error;
4555 
4556 		error = 0;
4557 		if (icmp_unitdata_opt_process(q, mp, &error,
4558 		    (void *)pktinfop) < 0) {
4559 			/* failure */
4560 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4561 			icmp_ud_err(q, mp, error);
4562 			return;
4563 		}
4564 		ASSERT(error == 0);
4565 		/*
4566 		 * Note: Success in processing options.
4567 		 * mp option buffer represented by
4568 		 * OPT_length/offset now potentially modified
4569 		 * and contain option setting results
4570 		 */
4571 	}
4572 
4573 	error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop);
4574 done:
4575 	if (error != 0) {
4576 		icmp_ud_err(q, mp, error);
4577 		return;
4578 	} else {
4579 		mp->b_cont = NULL;
4580 		freeb(mp);
4581 	}
4582 }
4583 
4584 
4585 /* ARGSUSED */
4586 static void
4587 icmp_wput_fallback(queue_t *q, mblk_t *mp)
4588 {
4589 #ifdef DEBUG
4590 	cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n");
4591 #endif
4592 	freemsg(mp);
4593 }
4594 
4595 static int
4596 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst,
4597     ip4_pkt_t *pktinfop)
4598 {
4599 	ipha_t	*ipha;
4600 	size_t	ip_len;
4601 	icmp_t	*icmp = connp->conn_icmp;
4602 	icmp_stack_t *is = icmp->icmp_is;
4603 	int	ip_hdr_length;
4604 	ip_opt_info_t	optinfo;
4605 
4606 	optinfo.ip_opt_flags = 0;
4607 	optinfo.ip_opt_ill_index = 0;
4608 
4609 	if (icmp->icmp_state == TS_UNBND) {
4610 		/* If a port has not been bound to the stream, fail. */
4611 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4612 		return (EPROTO);
4613 	}
4614 
4615 	if (v4dst == INADDR_ANY)
4616 		v4dst = htonl(INADDR_LOOPBACK);
4617 
4618 	/* Check if our saved options are valid; update if not */
4619 	if (is_system_labeled() &&
4620 	    (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) ||
4621 	    V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) {
4622 		int error = icmp_update_label(icmp, mp, v4dst);
4623 
4624 		if (error != 0)
4625 			return (error);
4626 	}
4627 
4628 	/* Protocol 255 contains full IP headers */
4629 	if (icmp->icmp_hdrincl)
4630 		return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop));
4631 
4632 	/* Add an IP header */
4633 	ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len;
4634 	ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length];
4635 	if ((uchar_t *)ipha < mp->b_datap->db_base ||
4636 	    mp->b_datap->db_ref != 1 ||
4637 	    !OK_32PTR(ipha)) {
4638 		mblk_t	*mp1;
4639 		if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra,
4640 		    BPRI_LO))) {
4641 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4642 			return (ENOMEM);
4643 		}
4644 		mp1->b_cont = mp;
4645 		ipha = (ipha_t *)mp1->b_datap->db_lim;
4646 		mp1->b_wptr = (uchar_t *)ipha;
4647 		ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length);
4648 		mp = mp1;
4649 	}
4650 #ifdef	_BIG_ENDIAN
4651 	/* Set version, header length, and tos */
4652 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4653 	    ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) |
4654 	    icmp->icmp_type_of_service);
4655 	/* Set ttl and protocol */
4656 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto;
4657 #else
4658 	/* Set version, header length, and tos */
4659 	*(uint16_t *)&ipha->ipha_version_and_hdr_length =
4660 	    ((icmp->icmp_type_of_service << 8) |
4661 	    ((IP_VERSION << 4) | (ip_hdr_length>>2)));
4662 	/* Set ttl and protocol */
4663 	*(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl;
4664 #endif
4665 	if (pktinfop->ip4_addr != INADDR_ANY) {
4666 		ipha->ipha_src = pktinfop->ip4_addr;
4667 		optinfo.ip_opt_flags = IP_VERIFY_SRC;
4668 	} else {
4669 
4670 		/*
4671 		 * Copy our address into the packet.  If this is zero,
4672 		 * ip will fill in the real source address.
4673 		 */
4674 		IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src);
4675 	}
4676 
4677 	ipha->ipha_fragment_offset_and_flags = 0;
4678 
4679 	if (pktinfop->ip4_ill_index != 0) {
4680 		optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index;
4681 	}
4682 
4683 
4684 	/*
4685 	 * For the socket of SOCK_RAW type, the checksum is provided in the
4686 	 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to
4687 	 * tell IP that the application has sent a complete IP header and not
4688 	 * to compute the transport checksum nor change the DF flag.
4689 	 */
4690 	ipha->ipha_ident = IP_HDR_INCLUDED;
4691 
4692 	/* Finish common formatting of the packet. */
4693 	mp->b_rptr = (uchar_t *)ipha;
4694 
4695 	ip_len = mp->b_wptr - (uchar_t *)ipha;
4696 	if (mp->b_cont != NULL)
4697 		ip_len += msgdsize(mp->b_cont);
4698 
4699 	/*
4700 	 * Set the length into the IP header.
4701 	 * If the length is greater than the maximum allowed by IP,
4702 	 * then free the message and return. Do not try and send it
4703 	 * as this can cause problems in layers below.
4704 	 */
4705 	if (ip_len > IP_MAXPACKET) {
4706 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4707 		return (EMSGSIZE);
4708 	}
4709 	ipha->ipha_length = htons((uint16_t)ip_len);
4710 	/*
4711 	 * Copy in the destination address request
4712 	 */
4713 	ipha->ipha_dst = v4dst;
4714 
4715 	/*
4716 	 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic.
4717 	 */
4718 	if (CLASSD(v4dst))
4719 		ipha->ipha_ttl = icmp->icmp_multicast_ttl;
4720 
4721 	/* Copy in options if any */
4722 	if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) {
4723 		bcopy(icmp->icmp_ip_snd_options,
4724 		    &ipha[1], icmp->icmp_ip_snd_options_len);
4725 		/*
4726 		 * Massage source route putting first source route in ipha_dst.
4727 		 * Ignore the destination in the T_unitdata_req.
4728 		 */
4729 		(void) ip_massage_options(ipha, is->is_netstack);
4730 	}
4731 
4732 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
4733 	ip_output_options(connp, mp, q, IP_WPUT, &optinfo);
4734 	return (0);
4735 }
4736 
4737 static int
4738 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst)
4739 {
4740 	int err;
4741 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
4742 	icmp_stack_t		*is = icmp->icmp_is;
4743 	conn_t			*connp = icmp->icmp_connp;
4744 	cred_t			*cr;
4745 
4746 	/*
4747 	 * All Solaris components should pass a db_credp
4748 	 * for this message, hence we ASSERT.
4749 	 * On production kernels we return an error to be robust against
4750 	 * random streams modules sitting on top of us.
4751 	 */
4752 	cr = msg_getcred(mp, NULL);
4753 	ASSERT(cr != NULL);
4754 	if (cr == NULL)
4755 		return (EINVAL);
4756 
4757 	err = tsol_compute_label_v6(cr, dst,
4758 	    opt_storage, connp->conn_mac_exempt,
4759 	    is->is_netstack->netstack_ip);
4760 	if (err == 0) {
4761 		err = tsol_update_sticky(&icmp->icmp_sticky_ipp,
4762 		    &icmp->icmp_label_len_v6, opt_storage);
4763 	}
4764 	if (err != 0) {
4765 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4766 		DTRACE_PROBE4(
4767 		    tx__ip__log__drop__updatelabel__icmp6,
4768 		    char *, "icmp(1) failed to update options(2) on mp(3)",
4769 		    icmp_t *, icmp, char *, opt_storage, mblk_t *, mp);
4770 		return (err);
4771 	}
4772 
4773 	icmp->icmp_v6lastdst = *dst;
4774 	return (0);
4775 }
4776 
4777 /*
4778  * raw_ip_send_data_v6():
4779  * Assumes that icmp_wput did some sanity checking on the destination
4780  * address, but that the label may not yet be correct.
4781  */
4782 static int
4783 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6,
4784     ip6_pkt_t *ipp)
4785 {
4786 	ip6_t			*ip6h;
4787 	ip6i_t			*ip6i;	/* mp->b_rptr even if no ip6i_t */
4788 	int			ip_hdr_len = IPV6_HDR_LEN;
4789 	size_t			ip_len;
4790 	icmp_t			*icmp = connp->conn_icmp;
4791 	icmp_stack_t		*is = icmp->icmp_is;
4792 	ip6_pkt_t		*tipp;
4793 	uint32_t		csum = 0;
4794 	uint_t			ignore = 0;
4795 	uint_t			option_exists = 0, is_sticky = 0;
4796 	uint8_t			*cp;
4797 	uint8_t			*nxthdr_ptr;
4798 	in6_addr_t		ip6_dst;
4799 
4800 	/*
4801 	 * If the local address is a mapped address return
4802 	 * an error.
4803 	 * It would be possible to send an IPv6 packet but the
4804 	 * response would never make it back to the application
4805 	 * since it is bound to a mapped address.
4806 	 */
4807 	if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) {
4808 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
4809 		return (EADDRNOTAVAIL);
4810 	}
4811 
4812 	ignore = ipp->ipp_sticky_ignored;
4813 	if (sin6->sin6_scope_id != 0 &&
4814 	    IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
4815 		/*
4816 		 * IPPF_SCOPE_ID is special.  It's neither a sticky
4817 		 * option nor ancillary data.  It needs to be
4818 		 * explicitly set in options_exists.
4819 		 */
4820 		option_exists |= IPPF_SCOPE_ID;
4821 	}
4822 
4823 	/*
4824 	 * Compute the destination address
4825 	 */
4826 	ip6_dst = sin6->sin6_addr;
4827 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
4828 		ip6_dst = ipv6_loopback;
4829 
4830 	/*
4831 	 * If we're not going to the same destination as last time, then
4832 	 * recompute the label required.  This is done in a separate routine to
4833 	 * avoid blowing up our stack here.
4834 	 */
4835 	if (is_system_labeled() &&
4836 	    !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) {
4837 		int error = 0;
4838 
4839 		error = icmp_update_label_v6(icmp, mp, &ip6_dst);
4840 		if (error != 0)
4841 			return (error);
4842 	}
4843 
4844 	/*
4845 	 * If there's a security label here, then we ignore any options the
4846 	 * user may try to set.  We keep the peer's label as a hidden sticky
4847 	 * option.
4848 	 */
4849 	if (icmp->icmp_label_len_v6 > 0) {
4850 		ignore &= ~IPPF_HOPOPTS;
4851 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
4852 	}
4853 
4854 	if ((icmp->icmp_sticky_ipp.ipp_fields == 0) &&
4855 	    (ipp->ipp_fields == 0)) {
4856 		/* No sticky options nor ancillary data. */
4857 		goto no_options;
4858 	}
4859 
4860 	/*
4861 	 * Go through the options figuring out where each is going to
4862 	 * come from and build two masks.  The first mask indicates if
4863 	 * the option exists at all.  The second mask indicates if the
4864 	 * option is sticky or ancillary.
4865 	 */
4866 	if (!(ignore & IPPF_HOPOPTS)) {
4867 		if (ipp->ipp_fields & IPPF_HOPOPTS) {
4868 			option_exists |= IPPF_HOPOPTS;
4869 			ip_hdr_len += ipp->ipp_hopoptslen;
4870 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) {
4871 			option_exists |= IPPF_HOPOPTS;
4872 			is_sticky |= IPPF_HOPOPTS;
4873 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen;
4874 		}
4875 	}
4876 
4877 	if (!(ignore & IPPF_RTHDR)) {
4878 		if (ipp->ipp_fields & IPPF_RTHDR) {
4879 			option_exists |= IPPF_RTHDR;
4880 			ip_hdr_len += ipp->ipp_rthdrlen;
4881 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) {
4882 			option_exists |= IPPF_RTHDR;
4883 			is_sticky |= IPPF_RTHDR;
4884 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen;
4885 		}
4886 	}
4887 
4888 	if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) {
4889 		/*
4890 		 * Need to have a router header to use these.
4891 		 */
4892 		if (ipp->ipp_fields & IPPF_RTDSTOPTS) {
4893 			option_exists |= IPPF_RTDSTOPTS;
4894 			ip_hdr_len += ipp->ipp_rtdstoptslen;
4895 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) {
4896 			option_exists |= IPPF_RTDSTOPTS;
4897 			is_sticky |= IPPF_RTDSTOPTS;
4898 			ip_hdr_len +=
4899 			    icmp->icmp_sticky_ipp.ipp_rtdstoptslen;
4900 		}
4901 	}
4902 
4903 	if (!(ignore & IPPF_DSTOPTS)) {
4904 		if (ipp->ipp_fields & IPPF_DSTOPTS) {
4905 			option_exists |= IPPF_DSTOPTS;
4906 			ip_hdr_len += ipp->ipp_dstoptslen;
4907 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) {
4908 			option_exists |= IPPF_DSTOPTS;
4909 			is_sticky |= IPPF_DSTOPTS;
4910 			ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen;
4911 		}
4912 	}
4913 
4914 	if (!(ignore & IPPF_IFINDEX)) {
4915 		if (ipp->ipp_fields & IPPF_IFINDEX) {
4916 			option_exists |= IPPF_IFINDEX;
4917 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) {
4918 			option_exists |= IPPF_IFINDEX;
4919 			is_sticky |= IPPF_IFINDEX;
4920 		}
4921 	}
4922 
4923 	if (!(ignore & IPPF_ADDR)) {
4924 		if (ipp->ipp_fields & IPPF_ADDR) {
4925 			option_exists |= IPPF_ADDR;
4926 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) {
4927 			option_exists |= IPPF_ADDR;
4928 			is_sticky |= IPPF_ADDR;
4929 		}
4930 	}
4931 
4932 	if (!(ignore & IPPF_DONTFRAG)) {
4933 		if (ipp->ipp_fields & IPPF_DONTFRAG) {
4934 			option_exists |= IPPF_DONTFRAG;
4935 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) {
4936 			option_exists |= IPPF_DONTFRAG;
4937 			is_sticky |= IPPF_DONTFRAG;
4938 		}
4939 	}
4940 
4941 	if (!(ignore & IPPF_USE_MIN_MTU)) {
4942 		if (ipp->ipp_fields & IPPF_USE_MIN_MTU) {
4943 			option_exists |= IPPF_USE_MIN_MTU;
4944 		} else if (icmp->icmp_sticky_ipp.ipp_fields &
4945 		    IPPF_USE_MIN_MTU) {
4946 			option_exists |= IPPF_USE_MIN_MTU;
4947 			is_sticky |= IPPF_USE_MIN_MTU;
4948 		}
4949 	}
4950 
4951 	if (!(ignore & IPPF_NEXTHOP)) {
4952 		if (ipp->ipp_fields & IPPF_NEXTHOP) {
4953 			option_exists |= IPPF_NEXTHOP;
4954 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) {
4955 			option_exists |= IPPF_NEXTHOP;
4956 			is_sticky |= IPPF_NEXTHOP;
4957 		}
4958 	}
4959 
4960 	if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT))
4961 		option_exists |= IPPF_HOPLIMIT;
4962 	/* IPV6_HOPLIMIT can never be sticky */
4963 	ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT));
4964 
4965 	if (!(ignore & IPPF_UNICAST_HOPS) &&
4966 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) {
4967 		option_exists |= IPPF_UNICAST_HOPS;
4968 		is_sticky |= IPPF_UNICAST_HOPS;
4969 	}
4970 
4971 	if (!(ignore & IPPF_MULTICAST_HOPS) &&
4972 	    (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) {
4973 		option_exists |= IPPF_MULTICAST_HOPS;
4974 		is_sticky |= IPPF_MULTICAST_HOPS;
4975 	}
4976 
4977 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) {
4978 		/* This is a sticky socket option only */
4979 		option_exists |= IPPF_NO_CKSUM;
4980 		is_sticky |= IPPF_NO_CKSUM;
4981 	}
4982 
4983 	if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) {
4984 		/* This is a sticky socket option only */
4985 		option_exists |= IPPF_RAW_CKSUM;
4986 		is_sticky |= IPPF_RAW_CKSUM;
4987 	}
4988 
4989 	if (!(ignore & IPPF_TCLASS)) {
4990 		if (ipp->ipp_fields & IPPF_TCLASS) {
4991 			option_exists |= IPPF_TCLASS;
4992 		} else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) {
4993 			option_exists |= IPPF_TCLASS;
4994 			is_sticky |= IPPF_TCLASS;
4995 		}
4996 	}
4997 
4998 no_options:
4999 
5000 	/*
5001 	 * If any options carried in the ip6i_t were specified, we
5002 	 * need to account for the ip6i_t in the data we'll be sending
5003 	 * down.
5004 	 */
5005 	if (option_exists & IPPF_HAS_IP6I)
5006 		ip_hdr_len += sizeof (ip6i_t);
5007 
5008 	/* check/fix buffer config, setup pointers into it */
5009 	ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len];
5010 	if ((mp->b_datap->db_ref != 1) ||
5011 	    ((unsigned char *)ip6h < mp->b_datap->db_base) ||
5012 	    !OK_32PTR(ip6h)) {
5013 		mblk_t	*mp1;
5014 
5015 		/* Try to get everything in a single mblk next time */
5016 		if (ip_hdr_len > icmp->icmp_max_hdr_len) {
5017 			icmp->icmp_max_hdr_len = ip_hdr_len;
5018 
5019 			(void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp,
5020 			    icmp->icmp_max_hdr_len + is->is_wroff_extra);
5021 		}
5022 		mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO);
5023 		if (!mp1) {
5024 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5025 			return (ENOMEM);
5026 		}
5027 		mp1->b_cont = mp;
5028 		mp1->b_wptr = mp1->b_datap->db_lim;
5029 		ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len);
5030 		mp = mp1;
5031 	}
5032 	mp->b_rptr = (unsigned char *)ip6h;
5033 	ip6i = (ip6i_t *)ip6h;
5034 
5035 #define	ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp)
5036 	if (option_exists & IPPF_HAS_IP6I) {
5037 		ip6h = (ip6_t *)&ip6i[1];
5038 		ip6i->ip6i_flags = 0;
5039 		ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
5040 
5041 		/* sin6_scope_id takes precendence over IPPF_IFINDEX */
5042 		if (option_exists & IPPF_SCOPE_ID) {
5043 			ip6i->ip6i_flags |= IP6I_IFINDEX;
5044 			ip6i->ip6i_ifindex = sin6->sin6_scope_id;
5045 		} else if (option_exists & IPPF_IFINDEX) {
5046 			tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX);
5047 			ASSERT(tipp->ipp_ifindex != 0);
5048 			ip6i->ip6i_flags |= IP6I_IFINDEX;
5049 			ip6i->ip6i_ifindex = tipp->ipp_ifindex;
5050 		}
5051 
5052 		if (option_exists & IPPF_RAW_CKSUM) {
5053 			ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM;
5054 			ip6i->ip6i_checksum_off = icmp->icmp_checksum_off;
5055 		}
5056 
5057 		if (option_exists & IPPF_NO_CKSUM) {
5058 			ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM;
5059 		}
5060 
5061 		if (option_exists & IPPF_ADDR) {
5062 			/*
5063 			 * Enable per-packet source address verification if
5064 			 * IPV6_PKTINFO specified the source address.
5065 			 * ip6_src is set in the transport's _wput function.
5066 			 */
5067 			ip6i->ip6i_flags |= IP6I_VERIFY_SRC;
5068 		}
5069 
5070 		if (option_exists & IPPF_DONTFRAG) {
5071 			ip6i->ip6i_flags |= IP6I_DONTFRAG;
5072 		}
5073 
5074 		if (option_exists & IPPF_USE_MIN_MTU) {
5075 			ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU(
5076 			    ip6i->ip6i_flags, ipp->ipp_use_min_mtu);
5077 		}
5078 
5079 		if (option_exists & IPPF_NEXTHOP) {
5080 			tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP);
5081 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop));
5082 			ip6i->ip6i_flags |= IP6I_NEXTHOP;
5083 			ip6i->ip6i_nexthop = tipp->ipp_nexthop;
5084 		}
5085 
5086 		/*
5087 		 * tell IP this is an ip6i_t private header
5088 		 */
5089 		ip6i->ip6i_nxt = IPPROTO_RAW;
5090 	}
5091 
5092 	/* Initialize IPv6 header */
5093 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
5094 	bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src));
5095 
5096 	/* Set the hoplimit of the outgoing packet. */
5097 	if (option_exists & IPPF_HOPLIMIT) {
5098 		/* IPV6_HOPLIMIT ancillary data overrides all other settings. */
5099 		ip6h->ip6_hops = ipp->ipp_hoplimit;
5100 		ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5101 	} else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
5102 		ip6h->ip6_hops = icmp->icmp_multicast_ttl;
5103 		if (option_exists & IPPF_MULTICAST_HOPS)
5104 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5105 	} else {
5106 		ip6h->ip6_hops = icmp->icmp_ttl;
5107 		if (option_exists & IPPF_UNICAST_HOPS)
5108 			ip6i->ip6i_flags |= IP6I_HOPLIMIT;
5109 	}
5110 
5111 	if (option_exists & IPPF_ADDR) {
5112 		tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR);
5113 		ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr));
5114 		ip6h->ip6_src = tipp->ipp_addr;
5115 	} else {
5116 		/*
5117 		 * The source address was not set using IPV6_PKTINFO.
5118 		 * First look at the bound source.
5119 		 * If unspecified fallback to __sin6_src_id.
5120 		 */
5121 		ip6h->ip6_src = icmp->icmp_v6src;
5122 		if (sin6->__sin6_src_id != 0 &&
5123 		    IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
5124 			ip_srcid_find_id(sin6->__sin6_src_id,
5125 			    &ip6h->ip6_src, icmp->icmp_zoneid,
5126 			    is->is_netstack);
5127 		}
5128 	}
5129 
5130 	nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
5131 	cp = (uint8_t *)&ip6h[1];
5132 
5133 	/*
5134 	 * Here's where we have to start stringing together
5135 	 * any extension headers in the right order:
5136 	 * Hop-by-hop, destination, routing, and final destination opts.
5137 	 */
5138 	if (option_exists & IPPF_HOPOPTS) {
5139 		/* Hop-by-hop options */
5140 		ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
5141 		tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS);
5142 
5143 		*nxthdr_ptr = IPPROTO_HOPOPTS;
5144 		nxthdr_ptr = &hbh->ip6h_nxt;
5145 
5146 		bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen);
5147 		cp += tipp->ipp_hopoptslen;
5148 	}
5149 	/*
5150 	 * En-route destination options
5151 	 * Only do them if there's a routing header as well
5152 	 */
5153 	if (option_exists & IPPF_RTDSTOPTS) {
5154 		ip6_dest_t *dst = (ip6_dest_t *)cp;
5155 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS);
5156 
5157 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5158 		nxthdr_ptr = &dst->ip6d_nxt;
5159 
5160 		bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen);
5161 		cp += tipp->ipp_rtdstoptslen;
5162 	}
5163 	/*
5164 	 * Routing header next
5165 	 */
5166 	if (option_exists & IPPF_RTHDR) {
5167 		ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
5168 		tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR);
5169 
5170 		*nxthdr_ptr = IPPROTO_ROUTING;
5171 		nxthdr_ptr = &rt->ip6r_nxt;
5172 
5173 		bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen);
5174 		cp += tipp->ipp_rthdrlen;
5175 	}
5176 	/*
5177 	 * Do ultimate destination options
5178 	 */
5179 	if (option_exists & IPPF_DSTOPTS) {
5180 		ip6_dest_t *dest = (ip6_dest_t *)cp;
5181 		tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS);
5182 
5183 		*nxthdr_ptr = IPPROTO_DSTOPTS;
5184 		nxthdr_ptr = &dest->ip6d_nxt;
5185 
5186 		bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen);
5187 		cp += tipp->ipp_dstoptslen;
5188 	}
5189 
5190 	/*
5191 	 * Now set the last header pointer to the proto passed in
5192 	 */
5193 	ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len);
5194 	*nxthdr_ptr = icmp->icmp_proto;
5195 
5196 	/*
5197 	 * Copy in the destination address
5198 	 */
5199 	ip6h->ip6_dst = ip6_dst;
5200 
5201 	ip6h->ip6_vcf =
5202 	    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
5203 	    (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
5204 
5205 	if (option_exists & IPPF_TCLASS) {
5206 		tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS);
5207 		ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
5208 		    tipp->ipp_tclass);
5209 	}
5210 	if (option_exists & IPPF_RTHDR) {
5211 		ip6_rthdr_t	*rth;
5212 
5213 		/*
5214 		 * Perform any processing needed for source routing.
5215 		 * We know that all extension headers will be in the same mblk
5216 		 * as the IPv6 header.
5217 		 */
5218 		rth = ip_find_rthdr_v6(ip6h, mp->b_wptr);
5219 		if (rth != NULL && rth->ip6r_segleft != 0) {
5220 			if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) {
5221 				/*
5222 				 * Drop packet - only support Type 0 routing.
5223 				 * Notify the application as well.
5224 				 */
5225 				BUMP_MIB(&is->is_rawip_mib,
5226 				    rawipOutErrors);
5227 				return (EPROTO);
5228 			}
5229 			/*
5230 			 * rth->ip6r_len is twice the number of
5231 			 * addresses in the header
5232 			 */
5233 			if (rth->ip6r_len & 0x1) {
5234 				BUMP_MIB(&is->is_rawip_mib,
5235 				    rawipOutErrors);
5236 				return (EPROTO);
5237 			}
5238 			/*
5239 			 * Shuffle the routing header and ip6_dst
5240 			 * addresses, and get the checksum difference
5241 			 * between the first hop (in ip6_dst) and
5242 			 * the destination (in the last routing hdr entry).
5243 			 */
5244 			csum = ip_massage_options_v6(ip6h, rth,
5245 			    is->is_netstack);
5246 			/*
5247 			 * Verify that the first hop isn't a mapped address.
5248 			 * Routers along the path need to do this verification
5249 			 * for subsequent hops.
5250 			 */
5251 			if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
5252 				BUMP_MIB(&is->is_rawip_mib,
5253 				    rawipOutErrors);
5254 				return (EADDRNOTAVAIL);
5255 			}
5256 		}
5257 	}
5258 
5259 	ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN;
5260 	if (mp->b_cont != NULL)
5261 		ip_len += msgdsize(mp->b_cont);
5262 
5263 	/*
5264 	 * Set the length into the IP header.
5265 	 * If the length is greater than the maximum allowed by IP,
5266 	 * then free the message and return. Do not try and send it
5267 	 * as this can cause problems in layers below.
5268 	 */
5269 	if (ip_len > IP_MAXPACKET) {
5270 		BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
5271 		return (EMSGSIZE);
5272 	}
5273 	if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) {
5274 		uint_t	cksum_off;	/* From ip6i == mp->b_rptr */
5275 		uint16_t *cksum_ptr;
5276 		uint_t	ext_hdrs_len;
5277 
5278 		/* ICMPv6 must have an offset matching icmp6_cksum offset */
5279 		ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 ||
5280 		    icmp->icmp_checksum_off == 2);
5281 
5282 		/*
5283 		 * We make it easy for IP to include our pseudo header
5284 		 * by putting our length in uh_checksum, modified (if
5285 		 * we have a routing header) by the checksum difference
5286 		 * between the ultimate destination and first hop addresses.
5287 		 * Note: ICMPv6 must always checksum the packet.
5288 		 */
5289 		cksum_off = ip_hdr_len + icmp->icmp_checksum_off;
5290 		if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) {
5291 			if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) {
5292 				BUMP_MIB(&is->is_rawip_mib,
5293 				    rawipOutErrors);
5294 				freemsg(mp);
5295 				return (0);
5296 			}
5297 			ip6i = (ip6i_t *)mp->b_rptr;
5298 			if (ip6i->ip6i_nxt == IPPROTO_RAW)
5299 				ip6h = (ip6_t *)&ip6i[1];
5300 			else
5301 				ip6h = (ip6_t *)ip6i;
5302 		}
5303 		/* Add payload length to checksum */
5304 		ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN -
5305 		    (int)((uchar_t *)ip6h - (uchar_t *)ip6i);
5306 		csum += htons(ip_len - ext_hdrs_len);
5307 
5308 		cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off);
5309 		csum = (csum & 0xFFFF) + (csum >> 16);
5310 		*cksum_ptr = (uint16_t)csum;
5311 	}
5312 
5313 #ifdef _LITTLE_ENDIAN
5314 	ip_len = htons(ip_len);
5315 #endif
5316 	ip6h->ip6_plen = (uint16_t)ip_len;
5317 
5318 	/* We're done. Pass the packet to IP */
5319 	BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams);
5320 	ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT);
5321 	return (0);
5322 }
5323 
5324 static void
5325 icmp_wput_other(queue_t *q, mblk_t *mp)
5326 {
5327 	uchar_t	*rptr = mp->b_rptr;
5328 	struct iocblk *iocp;
5329 #define	tudr ((struct T_unitdata_req *)rptr)
5330 	conn_t	*connp = Q_TO_CONN(q);
5331 	icmp_t	*icmp = connp->conn_icmp;
5332 	icmp_stack_t *is = icmp->icmp_is;
5333 	cred_t *cr;
5334 
5335 	switch (mp->b_datap->db_type) {
5336 	case M_PROTO:
5337 	case M_PCPROTO:
5338 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
5339 			/*
5340 			 * If the message does not contain a PRIM_type,
5341 			 * throw it away.
5342 			 */
5343 			freemsg(mp);
5344 			return;
5345 		}
5346 		switch (((union T_primitives *)rptr)->type) {
5347 		case T_ADDR_REQ:
5348 			icmp_addr_req(q, mp);
5349 			return;
5350 		case O_T_BIND_REQ:
5351 		case T_BIND_REQ:
5352 			icmp_tpi_bind(q, mp);
5353 			return;
5354 		case T_CONN_REQ:
5355 			icmp_tpi_connect(q, mp);
5356 			return;
5357 		case T_CAPABILITY_REQ:
5358 			icmp_capability_req(q, mp);
5359 			return;
5360 		case T_INFO_REQ:
5361 			icmp_info_req(q, mp);
5362 			return;
5363 		case T_UNITDATA_REQ:
5364 			/*
5365 			 * If a T_UNITDATA_REQ gets here, the address must
5366 			 * be bad.  Valid T_UNITDATA_REQs are found above
5367 			 * and break to below this switch.
5368 			 */
5369 			icmp_ud_err(q, mp, EADDRNOTAVAIL);
5370 			return;
5371 		case T_UNBIND_REQ:
5372 			icmp_tpi_unbind(q, mp);
5373 			return;
5374 
5375 		case T_SVR4_OPTMGMT_REQ:
5376 			/*
5377 			 * All Solaris components should pass a db_credp
5378 			 * for this TPI message, hence we ASSERT.
5379 			 * But in case there is some other M_PROTO that looks
5380 			 * like a TPI message sent by some other kernel
5381 			 * component, we check and return an error.
5382 			 */
5383 			cr = msg_getcred(mp, NULL);
5384 			ASSERT(cr != NULL);
5385 			if (cr == NULL) {
5386 				icmp_err_ack(q, mp, TSYSERR, EINVAL);
5387 				return;
5388 			}
5389 
5390 			if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get,
5391 			    cr)) {
5392 				/* Only IP can return anything meaningful */
5393 				(void) svr4_optcom_req(q, mp, cr,
5394 				    &icmp_opt_obj, B_TRUE);
5395 			}
5396 			return;
5397 
5398 		case T_OPTMGMT_REQ:
5399 			/*
5400 			 * All Solaris components should pass a db_credp
5401 			 * for this TPI message, hence we ASSERT.
5402 			 * But in case there is some other M_PROTO that looks
5403 			 * like a TPI message sent by some other kernel
5404 			 * component, we check and return an error.
5405 			 */
5406 			cr = msg_getcred(mp, NULL);
5407 			ASSERT(cr != NULL);
5408 			if (cr == NULL) {
5409 				icmp_err_ack(q, mp, TSYSERR, EINVAL);
5410 				return;
5411 			}
5412 			/* Only IP can return anything meaningful */
5413 			(void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE);
5414 			return;
5415 
5416 		case T_DISCON_REQ:
5417 			icmp_tpi_disconnect(q, mp);
5418 			return;
5419 
5420 		/* The following TPI message is not supported by icmp. */
5421 		case O_T_CONN_RES:
5422 		case T_CONN_RES:
5423 			icmp_err_ack(q, mp, TNOTSUPPORT, 0);
5424 			return;
5425 
5426 		/* The following 3 TPI requests are illegal for icmp. */
5427 		case T_DATA_REQ:
5428 		case T_EXDATA_REQ:
5429 		case T_ORDREL_REQ:
5430 			freemsg(mp);
5431 			(void) putctl1(RD(q), M_ERROR, EPROTO);
5432 			return;
5433 		default:
5434 			break;
5435 		}
5436 		break;
5437 	case M_IOCTL:
5438 		iocp = (struct iocblk *)mp->b_rptr;
5439 		switch (iocp->ioc_cmd) {
5440 		case TI_GETPEERNAME:
5441 			if (icmp->icmp_state != TS_DATA_XFER) {
5442 				/*
5443 				 * If a default destination address has not
5444 				 * been associated with the stream, then we
5445 				 * don't know the peer's name.
5446 				 */
5447 				iocp->ioc_error = ENOTCONN;
5448 		err_ret:;
5449 				iocp->ioc_count = 0;
5450 				mp->b_datap->db_type = M_IOCACK;
5451 				qreply(q, mp);
5452 				return;
5453 			}
5454 			/* FALLTHRU */
5455 		case TI_GETMYNAME:
5456 			/*
5457 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
5458 			 * need to copyin the user's strbuf structure.
5459 			 * Processing will continue in the M_IOCDATA case
5460 			 * below.
5461 			 */
5462 			mi_copyin(q, mp, NULL,
5463 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
5464 			return;
5465 		case ND_SET:
5466 			/* nd_getset performs the necessary error checking */
5467 		case ND_GET:
5468 			if (nd_getset(q, is->is_nd, mp)) {
5469 				qreply(q, mp);
5470 				return;
5471 			}
5472 			break;
5473 		case _SIOCSOCKFALLBACK:
5474 			/*
5475 			 * socket is falling back to be a
5476 			 * streams socket. Nothing  to do
5477 			 */
5478 			iocp->ioc_count = 0;
5479 			iocp->ioc_rval = 0;
5480 			qreply(q, mp);
5481 			return;
5482 		default:
5483 			break;
5484 		}
5485 		break;
5486 	case M_IOCDATA:
5487 		icmp_wput_iocdata(q, mp);
5488 		return;
5489 	default:
5490 		break;
5491 	}
5492 	ip_wput(q, mp);
5493 }
5494 
5495 /*
5496  * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA
5497  * messages.
5498  */
5499 static void
5500 icmp_wput_iocdata(queue_t *q, mblk_t *mp)
5501 {
5502 	mblk_t	*mp1;
5503 	STRUCT_HANDLE(strbuf, sb);
5504 	icmp_t	*icmp;
5505 	uint_t	addrlen;
5506 	uint_t	error;
5507 
5508 	/* Make sure it is one of ours. */
5509 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5510 	case TI_GETMYNAME:
5511 	case TI_GETPEERNAME:
5512 		break;
5513 	default:
5514 		icmp = Q_TO_ICMP(q);
5515 		ip_output(icmp->icmp_connp, mp, q, IP_WPUT);
5516 		return;
5517 	}
5518 	switch (mi_copy_state(q, mp, &mp1)) {
5519 	case -1:
5520 		return;
5521 	case MI_COPY_CASE(MI_COPY_IN, 1):
5522 		break;
5523 	case MI_COPY_CASE(MI_COPY_OUT, 1):
5524 		/*
5525 		 * The address has been copied out, so now
5526 		 * copyout the strbuf.
5527 		 */
5528 		mi_copyout(q, mp);
5529 		return;
5530 	case MI_COPY_CASE(MI_COPY_OUT, 2):
5531 		/*
5532 		 * The address and strbuf have been copied out.
5533 		 * We're done, so just acknowledge the original
5534 		 * M_IOCTL.
5535 		 */
5536 		mi_copy_done(q, mp, 0);
5537 		return;
5538 	default:
5539 		/*
5540 		 * Something strange has happened, so acknowledge
5541 		 * the original M_IOCTL with an EPROTO error.
5542 		 */
5543 		mi_copy_done(q, mp, EPROTO);
5544 		return;
5545 	}
5546 	/*
5547 	 * Now we have the strbuf structure for TI_GETMYNAME
5548 	 * and TI_GETPEERNAME.  Next we copyout the requested
5549 	 * address and then we'll copyout the strbuf.
5550 	 */
5551 	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
5552 	    (void *)mp1->b_rptr);
5553 	icmp = Q_TO_ICMP(q);
5554 	if (icmp->icmp_family == AF_INET)
5555 		addrlen = sizeof (sin_t);
5556 	else
5557 		addrlen = sizeof (sin6_t);
5558 
5559 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
5560 		mi_copy_done(q, mp, EINVAL);
5561 		return;
5562 	}
5563 
5564 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
5565 
5566 	if (mp1 == NULL)
5567 		return;
5568 
5569 	rw_enter(&icmp->icmp_rwlock, RW_READER);
5570 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
5571 	case TI_GETMYNAME:
5572 		error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr,
5573 		    &addrlen);
5574 		break;
5575 	case TI_GETPEERNAME:
5576 		error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr,
5577 		    &addrlen);
5578 		break;
5579 	}
5580 	rw_exit(&icmp->icmp_rwlock);
5581 
5582 	if (error != 0) {
5583 		mi_copy_done(q, mp, error);
5584 	} else {
5585 		mp1->b_wptr += addrlen;
5586 		STRUCT_FSET(sb, len, addrlen);
5587 
5588 		/* Copy out the address */
5589 		mi_copyout(q, mp);
5590 	}
5591 }
5592 
5593 static int
5594 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp,
5595     void *thisdg_attrs)
5596 {
5597 	struct T_unitdata_req *udreqp;
5598 	int is_absreq_failure;
5599 	cred_t *cr;
5600 
5601 	udreqp = (struct T_unitdata_req *)mp->b_rptr;
5602 	*errorp = 0;
5603 
5604 	/*
5605 	 * All Solaris components should pass a db_credp
5606 	 * for this TPI message, hence we ASSERT.
5607 	 * But in case there is some other M_PROTO that looks
5608 	 * like a TPI message sent by some other kernel
5609 	 * component, we check and return an error.
5610 	 */
5611 	cr = msg_getcred(mp, NULL);
5612 	ASSERT(cr != NULL);
5613 	if (cr == NULL)
5614 		return (-1);
5615 
5616 	*errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length,
5617 	    udreqp->OPT_offset, cr, &icmp_opt_obj,
5618 	    thisdg_attrs, &is_absreq_failure);
5619 
5620 	if (*errorp != 0) {
5621 		/*
5622 		 * Note: No special action needed in this
5623 		 * module for "is_absreq_failure"
5624 		 */
5625 		return (-1);		/* failure */
5626 	}
5627 	ASSERT(is_absreq_failure == 0);
5628 	return (0);	/* success */
5629 }
5630 
5631 void
5632 icmp_ddi_g_init(void)
5633 {
5634 	icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr,
5635 	    icmp_opt_obj.odb_opt_arr_cnt);
5636 
5637 	/*
5638 	 * We want to be informed each time a stack is created or
5639 	 * destroyed in the kernel, so we can maintain the
5640 	 * set of icmp_stack_t's.
5641 	 */
5642 	netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini);
5643 }
5644 
5645 void
5646 icmp_ddi_g_destroy(void)
5647 {
5648 	netstack_unregister(NS_ICMP);
5649 }
5650 
5651 #define	INET_NAME	"ip"
5652 
5653 /*
5654  * Initialize the ICMP stack instance.
5655  */
5656 static void *
5657 rawip_stack_init(netstackid_t stackid, netstack_t *ns)
5658 {
5659 	icmp_stack_t	*is;
5660 	icmpparam_t	*pa;
5661 	int		error = 0;
5662 	major_t		major;
5663 
5664 	is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP);
5665 	is->is_netstack = ns;
5666 
5667 	pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP);
5668 	is->is_param_arr = pa;
5669 	bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr));
5670 
5671 	(void) icmp_param_register(&is->is_nd,
5672 	    is->is_param_arr, A_CNT(icmp_param_arr));
5673 	is->is_ksp = rawip_kstat_init(stackid);
5674 
5675 	major = mod_name_to_major(INET_NAME);
5676 	error = ldi_ident_from_major(major, &is->is_ldi_ident);
5677 	ASSERT(error == 0);
5678 	return (is);
5679 }
5680 
5681 /*
5682  * Free the ICMP stack instance.
5683  */
5684 static void
5685 rawip_stack_fini(netstackid_t stackid, void *arg)
5686 {
5687 	icmp_stack_t *is = (icmp_stack_t *)arg;
5688 
5689 	nd_free(&is->is_nd);
5690 	kmem_free(is->is_param_arr, sizeof (icmp_param_arr));
5691 	is->is_param_arr = NULL;
5692 
5693 	rawip_kstat_fini(stackid, is->is_ksp);
5694 	is->is_ksp = NULL;
5695 	ldi_ident_release(is->is_ldi_ident);
5696 	kmem_free(is, sizeof (*is));
5697 }
5698 
5699 static void *
5700 rawip_kstat_init(netstackid_t stackid) {
5701 	kstat_t	*ksp;
5702 
5703 	rawip_named_kstat_t template = {
5704 		{ "inDatagrams",	KSTAT_DATA_UINT32, 0 },
5705 		{ "inCksumErrs",	KSTAT_DATA_UINT32, 0 },
5706 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
5707 		{ "outDatagrams",	KSTAT_DATA_UINT32, 0 },
5708 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
5709 	};
5710 
5711 	ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2",
5712 					KSTAT_TYPE_NAMED,
5713 					NUM_OF_FIELDS(rawip_named_kstat_t),
5714 					0, stackid);
5715 	if (ksp == NULL || ksp->ks_data == NULL)
5716 		return (NULL);
5717 
5718 	bcopy(&template, ksp->ks_data, sizeof (template));
5719 	ksp->ks_update = rawip_kstat_update;
5720 	ksp->ks_private = (void *)(uintptr_t)stackid;
5721 
5722 	kstat_install(ksp);
5723 	return (ksp);
5724 }
5725 
5726 static void
5727 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp)
5728 {
5729 	if (ksp != NULL) {
5730 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
5731 		kstat_delete_netstack(ksp, stackid);
5732 	}
5733 }
5734 
5735 static int
5736 rawip_kstat_update(kstat_t *ksp, int rw)
5737 {
5738 	rawip_named_kstat_t *rawipkp;
5739 	netstackid_t	stackid = (netstackid_t)(uintptr_t)ksp->ks_private;
5740 	netstack_t	*ns;
5741 	icmp_stack_t	*is;
5742 
5743 	if ((ksp == NULL) || (ksp->ks_data == NULL))
5744 		return (EIO);
5745 
5746 	if (rw == KSTAT_WRITE)
5747 		return (EACCES);
5748 
5749 	rawipkp = (rawip_named_kstat_t *)ksp->ks_data;
5750 
5751 	ns = netstack_find_by_stackid(stackid);
5752 	if (ns == NULL)
5753 		return (-1);
5754 	is = ns->netstack_icmp;
5755 	if (is == NULL) {
5756 		netstack_rele(ns);
5757 		return (-1);
5758 	}
5759 	rawipkp->inDatagrams.value.ui32 =  is->is_rawip_mib.rawipInDatagrams;
5760 	rawipkp->inCksumErrs.value.ui32 =  is->is_rawip_mib.rawipInCksumErrs;
5761 	rawipkp->inErrors.value.ui32 =	   is->is_rawip_mib.rawipInErrors;
5762 	rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams;
5763 	rawipkp->outErrors.value.ui32 =	   is->is_rawip_mib.rawipOutErrors;
5764 	netstack_rele(ns);
5765 	return (0);
5766 }
5767 
5768 /* ARGSUSED */
5769 int
5770 rawip_accept(sock_lower_handle_t lproto_handle,
5771     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
5772     cred_t *cr)
5773 {
5774 	return (EOPNOTSUPP);
5775 }
5776 
5777 /* ARGSUSED */
5778 int
5779 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
5780     socklen_t len, cred_t *cr)
5781 {
5782 	conn_t  *connp = (conn_t *)proto_handle;
5783 	int error;
5784 
5785 	/* All Solaris components should pass a cred for this operation. */
5786 	ASSERT(cr != NULL);
5787 
5788 	/* Binding to a NULL address really means unbind */
5789 	if (sa == NULL)
5790 		error = rawip_do_unbind(connp);
5791 	else
5792 		error = rawip_do_bind(connp, sa, len);
5793 
5794 	if (error < 0) {
5795 		if (error == -TOUTSTATE)
5796 			error = EINVAL;
5797 		else
5798 			error = proto_tlitosyserr(-error);
5799 	}
5800 	return (error);
5801 }
5802 
5803 static int
5804 rawip_implicit_bind(conn_t *connp)
5805 {
5806 	sin6_t sin6addr;
5807 	sin_t *sin;
5808 	sin6_t *sin6;
5809 	socklen_t len;
5810 	int error;
5811 
5812 	if (connp->conn_icmp->icmp_family == AF_INET) {
5813 		len = sizeof (struct sockaddr_in);
5814 		sin = (sin_t *)&sin6addr;
5815 		*sin = sin_null;
5816 		sin->sin_family = AF_INET;
5817 		sin->sin_addr.s_addr = INADDR_ANY;
5818 	} else {
5819 		ASSERT(connp->conn_icmp->icmp_family == AF_INET6);
5820 		len = sizeof (sin6_t);
5821 		sin6 = (sin6_t *)&sin6addr;
5822 		*sin6 = sin6_null;
5823 		sin6->sin6_family = AF_INET6;
5824 		V6_SET_ZERO(sin6->sin6_addr);
5825 	}
5826 
5827 	error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len);
5828 
5829 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
5830 }
5831 
5832 static int
5833 rawip_unbind(conn_t *connp)
5834 {
5835 	int error;
5836 
5837 	error = rawip_do_unbind(connp);
5838 	if (error < 0) {
5839 		error = proto_tlitosyserr(-error);
5840 	}
5841 	return (error);
5842 }
5843 
5844 /* ARGSUSED */
5845 int
5846 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
5847 {
5848 	return (EOPNOTSUPP);
5849 }
5850 
5851 /* ARGSUSED */
5852 int
5853 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
5854     socklen_t len, sock_connid_t *id, cred_t *cr)
5855 {
5856 	conn_t	*connp = (conn_t *)proto_handle;
5857 	icmp_t *icmp = connp->conn_icmp;
5858 	int	error;
5859 	boolean_t did_bind = B_FALSE;
5860 
5861 	/* All Solaris components should pass a cred for this operation. */
5862 	ASSERT(cr != NULL);
5863 
5864 	if (sa == NULL) {
5865 		/*
5866 		 * Disconnect
5867 		 * Make sure we are connected
5868 		 */
5869 		if (icmp->icmp_state != TS_DATA_XFER)
5870 			return (EINVAL);
5871 
5872 		error = icmp_disconnect(connp);
5873 		return (error);
5874 	}
5875 
5876 	error = proto_verify_ip_addr(icmp->icmp_family, sa, len);
5877 	if (error != 0)
5878 		return (error);
5879 
5880 	/* do an implicit bind if necessary */
5881 	if (icmp->icmp_state == TS_UNBND) {
5882 		error = rawip_implicit_bind(connp);
5883 		/*
5884 		 * We could be racing with an actual bind, in which case
5885 		 * we would see EPROTO. We cross our fingers and try
5886 		 * to connect.
5887 		 */
5888 		if (!(error == 0 || error == EPROTO))
5889 			return (error);
5890 		did_bind = B_TRUE;
5891 	}
5892 
5893 	/*
5894 	 * set SO_DGRAM_ERRIND
5895 	 */
5896 	icmp->icmp_dgram_errind = B_TRUE;
5897 
5898 	error = rawip_do_connect(connp, sa, len, cr);
5899 
5900 	if (error != 0 && did_bind) {
5901 		int unbind_err;
5902 
5903 		unbind_err = rawip_unbind(connp);
5904 		ASSERT(unbind_err == 0);
5905 	}
5906 
5907 	if (error == 0) {
5908 		*id = 0;
5909 		(*connp->conn_upcalls->su_connected)
5910 		    (connp->conn_upper_handle, 0, NULL, -1);
5911 	} else if (error < 0) {
5912 		error = proto_tlitosyserr(-error);
5913 	}
5914 	return (error);
5915 }
5916 
5917 /* ARGSUSED */
5918 int
5919 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q,
5920     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb)
5921 {
5922 	conn_t  *connp = (conn_t *)proto_handle;
5923 	icmp_t	*icmp;
5924 	struct T_capability_ack tca;
5925 	struct sockaddr_in6 laddr, faddr;
5926 	socklen_t laddrlen, faddrlen;
5927 	short opts;
5928 	struct stroptions *stropt;
5929 	mblk_t *stropt_mp;
5930 	int error;
5931 
5932 	icmp = connp->conn_icmp;
5933 
5934 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
5935 
5936 	/*
5937 	 * setup the fallback stream that was allocated
5938 	 */
5939 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
5940 	connp->conn_minor_arena = WR(q)->q_ptr;
5941 
5942 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
5943 
5944 	WR(q)->q_qinfo = &icmpwinit;
5945 
5946 	connp->conn_rq = RD(q);
5947 	connp->conn_wq = WR(q);
5948 
5949 	/* Notify stream head about options before sending up data */
5950 	stropt_mp->b_datap->db_type = M_SETOPTS;
5951 	stropt_mp->b_wptr += sizeof (*stropt);
5952 	stropt = (struct stroptions *)stropt_mp->b_rptr;
5953 	stropt->so_flags = SO_WROFF | SO_HIWAT;
5954 	stropt->so_wroff =
5955 	    (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra);
5956 	stropt->so_hiwat = icmp->icmp_recv_hiwat;
5957 	putnext(RD(q), stropt_mp);
5958 
5959 	/*
5960 	 * free helper stream
5961 	 */
5962 	ip_free_helper_stream(connp);
5963 
5964 	/*
5965 	 * Collect the information needed to sync with the sonode
5966 	 */
5967 	icmp_do_capability_ack(icmp, &tca, TC1_INFO);
5968 
5969 	laddrlen = faddrlen = sizeof (sin6_t);
5970 	(void) rawip_getsockname((sock_lower_handle_t)connp,
5971 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
5972 	error = rawip_getpeername((sock_lower_handle_t)connp,
5973 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
5974 	if (error != 0)
5975 		faddrlen = 0;
5976 	opts = 0;
5977 	if (icmp->icmp_dgram_errind)
5978 		opts |= SO_DGRAM_ERRIND;
5979 	if (icmp->icmp_dontroute)
5980 		opts |= SO_DONTROUTE;
5981 
5982 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
5983 	    (struct sockaddr *)&laddr, laddrlen,
5984 	    (struct sockaddr *)&faddr, faddrlen, opts);
5985 
5986 	/*
5987 	 * Attempts to send data up during fallback will result in it being
5988 	 * queued in udp_t. Now we push up any queued packets.
5989 	 */
5990 	mutex_enter(&icmp->icmp_recv_lock);
5991 	while (icmp->icmp_fallback_queue_head != NULL) {
5992 		mblk_t	*mp;
5993 
5994 		mp = icmp->icmp_fallback_queue_head;
5995 		icmp->icmp_fallback_queue_head = mp->b_next;
5996 		mp->b_next = NULL;
5997 		mutex_exit(&icmp->icmp_recv_lock);
5998 		putnext(RD(q), mp);
5999 		mutex_enter(&icmp->icmp_recv_lock);
6000 	}
6001 	icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head;
6002 
6003 	/*
6004 	 * No longer a streams less socket
6005 	 */
6006 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
6007 	connp->conn_flags &= ~IPCL_NONSTR;
6008 	rw_exit(&icmp->icmp_rwlock);
6009 
6010 	mutex_exit(&icmp->icmp_recv_lock);
6011 
6012 	ASSERT(icmp->icmp_fallback_queue_head == NULL &&
6013 	    icmp->icmp_fallback_queue_tail == NULL);
6014 
6015 	ASSERT(connp->conn_ref >= 1);
6016 
6017 	return (0);
6018 }
6019 
6020 /* ARGSUSED */
6021 sock_lower_handle_t
6022 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
6023     uint_t *smodep, int *errorp, int flags, cred_t *credp)
6024 {
6025 	conn_t *connp;
6026 
6027 	if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) {
6028 		*errorp = EPROTONOSUPPORT;
6029 		return (NULL);
6030 	}
6031 
6032 	connp = icmp_open(family, credp, errorp, flags);
6033 	if (connp != NULL) {
6034 		icmp_stack_t *is;
6035 
6036 		is = connp->conn_icmp->icmp_is;
6037 		connp->conn_flags |= IPCL_NONSTR;
6038 
6039 		if (connp->conn_icmp->icmp_family == AF_INET6) {
6040 			/* Build initial header template for transmit */
6041 			rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER);
6042 			if ((*errorp =
6043 			    icmp_build_hdrs(connp->conn_icmp)) != 0) {
6044 				rw_exit(&connp->conn_icmp->icmp_rwlock);
6045 				ipcl_conn_destroy(connp);
6046 				return (NULL);
6047 			}
6048 			rw_exit(&connp->conn_icmp->icmp_rwlock);
6049 		}
6050 
6051 		connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat;
6052 		connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat;
6053 
6054 		if ((*errorp = ip_create_helper_stream(connp,
6055 		    is->is_ldi_ident)) != 0) {
6056 			cmn_err(CE_CONT, "create of IP helper stream failed\n");
6057 			(void) rawip_do_close(connp);
6058 			return (NULL);
6059 		}
6060 
6061 		mutex_enter(&connp->conn_lock);
6062 		connp->conn_state_flags &= ~CONN_INCIPIENT;
6063 		mutex_exit(&connp->conn_lock);
6064 		*sock_downcalls = &sock_rawip_downcalls;
6065 		*smodep = SM_ATOMIC;
6066 	} else {
6067 		ASSERT(*errorp != 0);
6068 	}
6069 
6070 	return ((sock_lower_handle_t)connp);
6071 }
6072 
6073 /* ARGSUSED */
6074 void
6075 rawip_activate(sock_lower_handle_t proto_handle,
6076     sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags,
6077     cred_t *cr)
6078 {
6079 	conn_t 			*connp = (conn_t *)proto_handle;
6080 	icmp_stack_t 		*is = connp->conn_icmp->icmp_is;
6081 	struct sock_proto_props sopp;
6082 
6083 	/* All Solaris components should pass a cred for this operation. */
6084 	ASSERT(cr != NULL);
6085 
6086 	connp->conn_upcalls = sock_upcalls;
6087 	connp->conn_upper_handle = sock_handle;
6088 
6089 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
6090 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
6091 	sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len +
6092 	    is->is_wroff_extra;
6093 	sopp.sopp_rxhiwat = is->is_recv_hiwat;
6094 	sopp.sopp_rxlowat = icmp_mod_info.mi_lowat;
6095 	sopp.sopp_maxblk = INFPSZ;
6096 	sopp.sopp_maxpsz = IP_MAXPACKET;
6097 	sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 :
6098 	    icmp_mod_info.mi_minpsz;
6099 
6100 	(*connp->conn_upcalls->su_set_proto_props)
6101 	    (connp->conn_upper_handle, &sopp);
6102 }
6103 
6104 static int
6105 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
6106 {
6107 	sin_t	*sin = (sin_t *)sa;
6108 	sin6_t	*sin6 = (sin6_t *)sa;
6109 
6110 	ASSERT(icmp != NULL);
6111 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
6112 
6113 	switch (icmp->icmp_family) {
6114 	case AF_INET:
6115 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
6116 		if (*salenp < sizeof (sin_t))
6117 			return (EINVAL);
6118 
6119 		*salenp = sizeof (sin_t);
6120 		*sin = sin_null;
6121 		sin->sin_family = AF_INET;
6122 		if (icmp->icmp_state == TS_UNBND) {
6123 			break;
6124 		}
6125 
6126 		if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) &&
6127 		    !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
6128 			sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src);
6129 		} else {
6130 			/*
6131 			 * INADDR_ANY
6132 			 * icmp_v6src is not set, we might be bound to
6133 			 * broadcast/multicast. Use icmp_bound_v6src as
6134 			 * local address instead (that could
6135 			 * also still be INADDR_ANY)
6136 			 */
6137 			sin->sin_addr.s_addr =
6138 			    V4_PART_OF_V6(icmp->icmp_bound_v6src);
6139 		}
6140 		break;
6141 	case AF_INET6:
6142 
6143 		if (*salenp < sizeof (sin6_t))
6144 			return (EINVAL);
6145 
6146 		*salenp = sizeof (sin6_t);
6147 		*sin6 = sin6_null;
6148 		sin6->sin6_family = AF_INET6;
6149 		if (icmp->icmp_state == TS_UNBND) {
6150 			break;
6151 		}
6152 		if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) {
6153 			sin6->sin6_addr = icmp->icmp_v6src;
6154 		} else {
6155 			/*
6156 			 * UNSPECIFIED
6157 			 * icmp_v6src is not set, we might be bound to
6158 			 * broadcast/multicast. Use icmp_bound_v6src as
6159 			 * local address instead (that could
6160 			 * also still be UNSPECIFIED)
6161 			 */
6162 
6163 			sin6->sin6_addr = icmp->icmp_bound_v6src;
6164 		}
6165 		break;
6166 	}
6167 	return (0);
6168 }
6169 
6170 static int
6171 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp)
6172 {
6173 	sin_t   *sin = (sin_t *)sa;
6174 	sin6_t  *sin6 = (sin6_t *)sa;
6175 
6176 	ASSERT(icmp != NULL);
6177 	ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock));
6178 
6179 	if (icmp->icmp_state != TS_DATA_XFER)
6180 		return (ENOTCONN);
6181 
6182 	sa->sa_family = icmp->icmp_family;
6183 	switch (icmp->icmp_family) {
6184 	case AF_INET:
6185 		ASSERT(icmp->icmp_ipversion == IPV4_VERSION);
6186 
6187 		if (*salenp < sizeof (sin_t))
6188 			return (EINVAL);
6189 
6190 		*salenp = sizeof (sin_t);
6191 		*sin = sin_null;
6192 		sin->sin_family = AF_INET;
6193 		sin->sin_addr.s_addr =
6194 		    V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
6195 		break;
6196 	case AF_INET6:
6197 		if (*salenp < sizeof (sin6_t))
6198 			return (EINVAL);
6199 
6200 		*salenp = sizeof (sin6_t);
6201 		*sin6 = sin6_null;
6202 		*sin6 = icmp->icmp_v6dst;
6203 		break;
6204 	}
6205 	return (0);
6206 }
6207 
6208 /* ARGSUSED */
6209 int
6210 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6211     socklen_t *salenp, cred_t *cr)
6212 {
6213 	conn_t  *connp = (conn_t *)proto_handle;
6214 	icmp_t  *icmp = connp->conn_icmp;
6215 	int	error;
6216 
6217 	/* All Solaris components should pass a cred for this operation. */
6218 	ASSERT(cr != NULL);
6219 
6220 	ASSERT(icmp != NULL);
6221 
6222 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6223 
6224 	error = rawip_do_getpeername(icmp, sa, salenp);
6225 
6226 	rw_exit(&icmp->icmp_rwlock);
6227 
6228 	return (error);
6229 }
6230 
6231 /* ARGSUSED */
6232 int
6233 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
6234     socklen_t *salenp, cred_t *cr)
6235 {
6236 	conn_t  *connp = (conn_t *)proto_handle;
6237 	icmp_t	*icmp = connp->conn_icmp;
6238 	int	error;
6239 
6240 	/* All Solaris components should pass a cred for this operation. */
6241 	ASSERT(cr != NULL);
6242 
6243 	ASSERT(icmp != NULL);
6244 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6245 
6246 	error = rawip_do_getsockname(icmp, sa, salenp);
6247 
6248 	rw_exit(&icmp->icmp_rwlock);
6249 
6250 	return (error);
6251 }
6252 
6253 int
6254 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6255     const void *optvalp, socklen_t optlen, cred_t *cr)
6256 {
6257 	conn_t	*connp = (conn_t *)proto_handle;
6258 	icmp_t *icmp = connp->conn_icmp;
6259 	int error;
6260 
6261 	/* All Solaris components should pass a cred for this operation. */
6262 	ASSERT(cr != NULL);
6263 
6264 	error = proto_opt_check(level, option_name, optlen, NULL,
6265 	    icmp_opt_obj.odb_opt_des_arr,
6266 	    icmp_opt_obj.odb_opt_arr_cnt,
6267 	    icmp_opt_obj.odb_topmost_tpiprovider,
6268 	    B_TRUE, B_FALSE, cr);
6269 
6270 	if (error != 0) {
6271 		/*
6272 		 * option not recognized
6273 		 */
6274 		if (error < 0) {
6275 			error = proto_tlitosyserr(-error);
6276 		}
6277 		return (error);
6278 	}
6279 
6280 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
6281 	error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level,
6282 	    option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen,
6283 	    (uchar_t *)optvalp, NULL, cr);
6284 	rw_exit(&icmp->icmp_rwlock);
6285 
6286 	if (error < 0) {
6287 		/*
6288 		 * Pass on to ip
6289 		 */
6290 		error = ip_set_options(connp, level, option_name, optvalp,
6291 		    optlen, cr);
6292 	}
6293 
6294 	ASSERT(error >= 0);
6295 
6296 	return (error);
6297 }
6298 
6299 int
6300 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
6301     void *optvalp, socklen_t *optlen, cred_t *cr)
6302 {
6303 	int		error;
6304 	conn_t		*connp = (conn_t *)proto_handle;
6305 	icmp_t		*icmp = connp->conn_icmp;
6306 	t_uscalar_t	max_optbuf_len;
6307 	void		*optvalp_buf;
6308 	int		len;
6309 
6310 	/* All Solaris components should pass a cred for this operation. */
6311 	ASSERT(cr != NULL);
6312 
6313 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
6314 	    icmp_opt_obj.odb_opt_des_arr,
6315 	    icmp_opt_obj.odb_opt_arr_cnt,
6316 	    icmp_opt_obj.odb_topmost_tpiprovider,
6317 	    B_FALSE, B_TRUE, cr);
6318 
6319 	if (error != 0) {
6320 		if (error < 0) {
6321 			error = proto_tlitosyserr(-error);
6322 		}
6323 		return (error);
6324 	}
6325 
6326 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
6327 	rw_enter(&icmp->icmp_rwlock, RW_READER);
6328 	len = icmp_opt_get(connp, level, option_name, optvalp_buf);
6329 	rw_exit(&icmp->icmp_rwlock);
6330 
6331 	if (len < 0) {
6332 		/*
6333 		 * Pass on to IP
6334 		 */
6335 		kmem_free(optvalp_buf, max_optbuf_len);
6336 		return (ip_get_options(connp, level, option_name, optvalp,
6337 		    optlen, cr));
6338 	} else {
6339 		/*
6340 		 * update optlen and copy option value
6341 		 */
6342 		t_uscalar_t size = MIN(len, *optlen);
6343 		bcopy(optvalp_buf, optvalp, size);
6344 		bcopy(&size, optlen, sizeof (size));
6345 
6346 		kmem_free(optvalp_buf, max_optbuf_len);
6347 		return (0);
6348 	}
6349 }
6350 
6351 /* ARGSUSED */
6352 int
6353 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
6354 {
6355 	conn_t	*connp = (conn_t *)proto_handle;
6356 
6357 	/* All Solaris components should pass a cred for this operation. */
6358 	ASSERT(cr != NULL);
6359 
6360 	(void) rawip_do_close(connp);
6361 	return (0);
6362 }
6363 
6364 /* ARGSUSED */
6365 int
6366 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
6367 {
6368 	conn_t  *connp = (conn_t *)proto_handle;
6369 
6370 	/* All Solaris components should pass a cred for this operation. */
6371 	ASSERT(cr != NULL);
6372 
6373 	/* shut down the send side */
6374 	if (how != SHUT_RD)
6375 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6376 		    SOCK_OPCTL_SHUT_SEND, 0);
6377 	/* shut down the recv side */
6378 	if (how != SHUT_WR)
6379 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
6380 		    SOCK_OPCTL_SHUT_RECV, 0);
6381 	return (0);
6382 }
6383 
6384 void
6385 rawip_clr_flowctrl(sock_lower_handle_t proto_handle)
6386 {
6387 	conn_t  *connp = (conn_t *)proto_handle;
6388 	icmp_t	*icmp = connp->conn_icmp;
6389 
6390 	mutex_enter(&icmp->icmp_recv_lock);
6391 	connp->conn_flow_cntrld = B_FALSE;
6392 	mutex_exit(&icmp->icmp_recv_lock);
6393 }
6394 
6395 int
6396 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
6397     int mode, int32_t *rvalp, cred_t *cr)
6398 {
6399 	conn_t  	*connp = (conn_t *)proto_handle;
6400 	int		error;
6401 
6402 	/* All Solaris components should pass a cred for this operation. */
6403 	ASSERT(cr != NULL);
6404 
6405 	switch (cmd) {
6406 	case ND_SET:
6407 	case ND_GET:
6408 	case _SIOCSOCKFALLBACK:
6409 	case TI_GETPEERNAME:
6410 	case TI_GETMYNAME:
6411 #ifdef DEBUG
6412 		cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams"
6413 		    " socket", cmd);
6414 #endif
6415 		error = EINVAL;
6416 		break;
6417 	default:
6418 		/*
6419 		 * Pass on to IP using helper stream
6420 		 */
6421 		error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
6422 		    cmd, arg, mode, cr, rvalp);
6423 		break;
6424 	}
6425 	return (error);
6426 }
6427 
6428 /* ARGSUSED */
6429 int
6430 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
6431     cred_t *cr)
6432 {
6433 	conn_t *connp = (conn_t *)proto_handle;
6434 	icmp_t	*icmp = connp->conn_icmp;
6435 	icmp_stack_t *is = icmp->icmp_is;
6436 	int error = 0;
6437 	boolean_t bypass_dgram_errind = B_FALSE;
6438 
6439 	ASSERT(DB_TYPE(mp) == M_DATA);
6440 
6441 	/* All Solaris components should pass a cred for this operation. */
6442 	ASSERT(cr != NULL);
6443 
6444 	/* If labeled then sockfs should have already set db_credp */
6445 	ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL);
6446 
6447 	/* do an implicit bind if necessary */
6448 	if (icmp->icmp_state == TS_UNBND) {
6449 		error = rawip_implicit_bind(connp);
6450 		/*
6451 		 * We could be racing with an actual bind, in which case
6452 		 * we would see EPROTO. We cross our fingers and try
6453 		 * to connect.
6454 		 */
6455 		if (!(error == 0 || error == EPROTO)) {
6456 			freemsg(mp);
6457 			return (error);
6458 		}
6459 	}
6460 
6461 	rw_enter(&icmp->icmp_rwlock, RW_WRITER);
6462 
6463 	if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) {
6464 		error = EISCONN;
6465 		goto done_lock;
6466 	}
6467 
6468 	switch (icmp->icmp_family) {
6469 	case AF_INET6: {
6470 		sin6_t	*sin6;
6471 		ip6_pkt_t	ipp_s;	/* For ancillary data options */
6472 		ip6_pkt_t	*ipp = &ipp_s;
6473 
6474 		sin6 = (sin6_t *)msg->msg_name;
6475 		if (sin6 != NULL) {
6476 			error = proto_verify_ip_addr(icmp->icmp_family,
6477 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6478 			if (error != 0) {
6479 				bypass_dgram_errind = B_TRUE;
6480 				goto done_lock;
6481 			}
6482 			if (icmp->icmp_delayed_error != 0) {
6483 				sin6_t  *sin1 = (sin6_t *)msg->msg_name;
6484 				sin6_t  *sin2 = (sin6_t *)
6485 				    &icmp->icmp_delayed_addr;
6486 
6487 				error = icmp->icmp_delayed_error;
6488 				icmp->icmp_delayed_error = 0;
6489 
6490 				/* Compare IP address and port */
6491 
6492 				if (sin1->sin6_port == sin2->sin6_port &&
6493 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
6494 				    &sin2->sin6_addr)) {
6495 					goto done_lock;
6496 				}
6497 			}
6498 		} else {
6499 			/*
6500 			 * Use connected address
6501 			 */
6502 			if (icmp->icmp_state != TS_DATA_XFER) {
6503 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6504 				error = EDESTADDRREQ;
6505 				bypass_dgram_errind = B_TRUE;
6506 				goto done_lock;
6507 			}
6508 			sin6 = &icmp->icmp_v6dst;
6509 		}
6510 
6511 		/* No support for mapped addresses on raw sockets */
6512 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
6513 			BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6514 			error = EADDRNOTAVAIL;
6515 			goto done_lock;
6516 		}
6517 
6518 		ipp->ipp_fields = 0;
6519 		ipp->ipp_sticky_ignored = 0;
6520 
6521 		/*
6522 		 * If options passed in, feed it for verification and handling
6523 		 */
6524 		if (msg->msg_controllen != 0) {
6525 			error = process_auxiliary_options(connp,
6526 			    msg->msg_control, msg->msg_controllen,
6527 			    ipp, &icmp_opt_obj, icmp_opt_set, cr);
6528 			if (error != 0) {
6529 				goto done_lock;
6530 			}
6531 		}
6532 
6533 		rw_exit(&icmp->icmp_rwlock);
6534 
6535 		/*
6536 		 * Destination is a native IPv6 address.
6537 		 * Send out an IPv6 format packet.
6538 		 */
6539 
6540 		error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6,
6541 		    ipp);
6542 	}
6543 		break;
6544 	case AF_INET: {
6545 		sin_t	*sin;
6546 		ip4_pkt_t pktinfo;
6547 		ip4_pkt_t *pktinfop = &pktinfo;
6548 		ipaddr_t	v4dst;
6549 
6550 		sin = (sin_t *)msg->msg_name;
6551 		if (sin != NULL) {
6552 			error = proto_verify_ip_addr(icmp->icmp_family,
6553 			    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
6554 			if (error != 0) {
6555 				bypass_dgram_errind = B_TRUE;
6556 				goto done_lock;
6557 			}
6558 			v4dst = sin->sin_addr.s_addr;
6559 			if (icmp->icmp_delayed_error != 0) {
6560 				sin_t *sin1 = (sin_t *)msg->msg_name;
6561 				sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr;
6562 
6563 				error = icmp->icmp_delayed_error;
6564 				icmp->icmp_delayed_error = 0;
6565 
6566 				/* Compare IP address and port */
6567 				if (sin1->sin_port == sin2->sin_port &&
6568 				    sin1->sin_addr.s_addr ==
6569 				    sin2->sin_addr.s_addr) {
6570 					goto done_lock;
6571 				}
6572 
6573 			}
6574 		} else {
6575 			/*
6576 			 * Use connected address
6577 			 */
6578 			if (icmp->icmp_state != TS_DATA_XFER) {
6579 				BUMP_MIB(&is->is_rawip_mib, rawipOutErrors);
6580 				error = EDESTADDRREQ;
6581 				bypass_dgram_errind = B_TRUE;
6582 				goto done_lock;
6583 			}
6584 			v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr);
6585 		}
6586 
6587 
6588 		pktinfop->ip4_ill_index = 0;
6589 		pktinfop->ip4_addr = INADDR_ANY;
6590 
6591 		/*
6592 		 * If options passed in, feed it for verification and handling
6593 		 */
6594 		if (msg->msg_controllen != 0) {
6595 			error = process_auxiliary_options(connp,
6596 			    msg->msg_control, msg->msg_controllen,
6597 			    pktinfop, &icmp_opt_obj, icmp_opt_set, cr);
6598 			if (error != 0) {
6599 				goto done_lock;
6600 			}
6601 		}
6602 		rw_exit(&icmp->icmp_rwlock);
6603 
6604 		error = raw_ip_send_data_v4(connp->conn_wq, connp, mp,
6605 		    v4dst, pktinfop);
6606 		break;
6607 	}
6608 
6609 	default:
6610 		ASSERT(0);
6611 	}
6612 
6613 	goto done;
6614 
6615 done_lock:
6616 	rw_exit(&icmp->icmp_rwlock);
6617 	if (error != 0) {
6618 		ASSERT(mp != NULL);
6619 		freemsg(mp);
6620 	}
6621 done:
6622 	if (bypass_dgram_errind)
6623 		return (error);
6624 	return (icmp->icmp_dgram_errind ? error : 0);
6625 }
6626 
6627 sock_downcalls_t sock_rawip_downcalls = {
6628 	rawip_activate,
6629 	rawip_accept,
6630 	rawip_bind,
6631 	rawip_listen,
6632 	rawip_connect,
6633 	rawip_getpeername,
6634 	rawip_getsockname,
6635 	rawip_getsockopt,
6636 	rawip_setsockopt,
6637 	rawip_send,
6638 	NULL,
6639 	NULL,
6640 	NULL,
6641 	rawip_shutdown,
6642 	rawip_clr_flowctrl,
6643 	rawip_ioctl,
6644 	rawip_close
6645 };
6646