1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 #define ICMP6 "icmp6" 86 major_t ICMP6_MAJ; 87 88 /* 89 * Object to represent database of options to search passed to 90 * {sock,tpi}optcom_req() interface routine to take care of option 91 * management and associated methods. 92 * XXX These and other extern's should really move to a icmp header. 93 */ 94 extern optdb_obj_t icmp_opt_obj; 95 extern uint_t icmp_max_optsize; 96 97 /* 98 * Synchronization notes: 99 * 100 * At all points in this code where exclusive access is required, we 101 * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) 102 * which will arrange to call the routine only after all threads have 103 * exited the shared resource. 104 */ 105 106 static void icmp_addr_req(queue_t *q, mblk_t *mp); 107 static void icmp_bind(queue_t *q, mblk_t *mp); 108 static void icmp_bind_proto(queue_t *q); 109 static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); 110 static void icmp_capability_req(queue_t *q, mblk_t *mp); 111 static int icmp_close(queue_t *q); 112 static void icmp_connect(queue_t *q, mblk_t *mp); 113 static void icmp_disconnect(queue_t *q, mblk_t *mp); 114 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 115 int sys_error); 116 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 117 t_scalar_t t_error, int sys_error); 118 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 119 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 120 static void icmp_info_req(queue_t *q, mblk_t *mp); 121 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 122 t_scalar_t addr_length, in_port_t); 123 static int icmp_open(queue_t *q, dev_t *devp, int flag, 124 int sflag, cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(queue_t *q, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 132 int icmp_opt_get(queue_t *q, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static void icmp_rput(queue_t *q, mblk_t *mp); 139 static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); 140 static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); 141 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 142 uchar_t *ptr, int len); 143 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 144 cred_t *cr); 145 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 146 static void icmp_unbind(queue_t *q, mblk_t *mp); 147 static void icmp_wput(queue_t *q, mblk_t *mp); 148 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 149 t_scalar_t tudr_optlen); 150 static void icmp_wput_other(queue_t *q, mblk_t *mp); 151 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 152 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 161 162 static struct module_info info = { 163 5707, "icmp", 1, INFPSZ, 512, 128 164 }; 165 166 static struct qinit rinit = { 167 (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info 168 }; 169 170 static struct qinit winit = { 171 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info 172 }; 173 174 struct streamtab icmpinfo = { 175 &rinit, &winit 176 }; 177 178 static sin_t sin_null; /* Zero address for quick clears */ 179 static sin6_t sin6_null; /* Zero address for quick clears */ 180 181 /* Default structure copied into T_INFO_ACK messages */ 182 static struct T_info_ack icmp_g_t_info_ack = { 183 T_INFO_ACK, 184 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 185 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 186 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 187 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 188 0, /* ADDR_size - filled in later. */ 189 0, /* OPT_size - not initialized here */ 190 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 191 T_CLTS, /* SERV_type. icmp supports connection-less. */ 192 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 193 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 194 }; 195 196 /* 197 * Table of ND variables supported by icmp. These are loaded into is_nd 198 * when the stack instance is created. 199 * All of these are alterable, within the min/max values given, at run time. 200 */ 201 static icmpparam_t icmp_param_arr[] = { 202 /* min max value name */ 203 { 0, 128, 32, "icmp_wroff_extra" }, 204 { 1, 255, 255, "icmp_ipv4_ttl" }, 205 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 206 { 0, 1, 1, "icmp_bsd_compat" }, 207 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 208 { 0, 65536, 1024, "icmp_xmit_lowat"}, 209 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 210 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 211 }; 212 #define is_wroff_extra is_param_arr[0].icmp_param_value 213 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 214 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 215 #define is_bsd_compat is_param_arr[3].icmp_param_value 216 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 217 #define is_xmit_lowat is_param_arr[5].icmp_param_value 218 #define is_recv_hiwat is_param_arr[6].icmp_param_value 219 #define is_max_buf is_param_arr[7].icmp_param_value 220 221 /* 222 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 223 * passed to icmp_wput. 224 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 225 * protocol type placed in the message following the address. A T_BIND_ACK 226 * message is passed upstream when ip acknowledges the request. 227 * (Called as writer.) 228 */ 229 static void 230 icmp_bind(queue_t *q, mblk_t *mp) 231 { 232 sin_t *sin; 233 sin6_t *sin6; 234 mblk_t *mp1; 235 struct T_bind_req *tbr; 236 icmp_t *icmp; 237 238 icmp = (icmp_t *)q->q_ptr; 239 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 240 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 241 "icmp_bind: bad req, len %u", 242 (uint_t)(mp->b_wptr - mp->b_rptr)); 243 icmp_err_ack(q, mp, TPROTO, 0); 244 return; 245 } 246 if (icmp->icmp_state != TS_UNBND) { 247 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 248 "icmp_bind: bad state, %d", icmp->icmp_state); 249 icmp_err_ack(q, mp, TOUTSTATE, 0); 250 return; 251 } 252 /* 253 * Reallocate the message to make sure we have enough room for an 254 * address and the protocol type. 255 */ 256 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 257 if (!mp1) { 258 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 259 return; 260 } 261 mp = mp1; 262 tbr = (struct T_bind_req *)mp->b_rptr; 263 switch (tbr->ADDR_length) { 264 case 0: /* Generic request */ 265 tbr->ADDR_offset = sizeof (struct T_bind_req); 266 if (icmp->icmp_family == AF_INET) { 267 tbr->ADDR_length = sizeof (sin_t); 268 sin = (sin_t *)&tbr[1]; 269 *sin = sin_null; 270 sin->sin_family = AF_INET; 271 mp->b_wptr = (uchar_t *)&sin[1]; 272 } else { 273 ASSERT(icmp->icmp_family == AF_INET6); 274 tbr->ADDR_length = sizeof (sin6_t); 275 sin6 = (sin6_t *)&tbr[1]; 276 *sin6 = sin6_null; 277 sin6->sin6_family = AF_INET6; 278 mp->b_wptr = (uchar_t *)&sin6[1]; 279 } 280 break; 281 case sizeof (sin_t): /* Complete IP address */ 282 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 283 sizeof (sin_t)); 284 if (sin == NULL || !OK_32PTR((char *)sin)) { 285 icmp_err_ack(q, mp, TSYSERR, EINVAL); 286 return; 287 } 288 if (icmp->icmp_family != AF_INET || 289 sin->sin_family != AF_INET) { 290 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 291 return; 292 } 293 break; 294 case sizeof (sin6_t): /* Complete IP address */ 295 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 296 sizeof (sin6_t)); 297 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 298 icmp_err_ack(q, mp, TSYSERR, EINVAL); 299 return; 300 } 301 if (icmp->icmp_family != AF_INET6 || 302 sin6->sin6_family != AF_INET6) { 303 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 304 return; 305 } 306 /* No support for mapped addresses on raw sockets */ 307 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 308 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 309 return; 310 } 311 break; 312 default: 313 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 314 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 315 icmp_err_ack(q, mp, TBADADDR, 0); 316 return; 317 } 318 /* 319 * Copy the source address into our icmp structure. This address 320 * may still be zero; if so, ip will fill in the correct address 321 * each time an outbound packet is passed to it. 322 * If we are binding to a broadcast or multicast address icmp_rput 323 * will clear the source address when it receives the T_BIND_ACK. 324 */ 325 icmp->icmp_state = TS_IDLE; 326 327 if (icmp->icmp_family == AF_INET) { 328 ASSERT(sin != NULL); 329 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 330 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 331 &icmp->icmp_v6src); 332 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 333 icmp->icmp_ip_snd_options_len; 334 icmp->icmp_bound_v6src = icmp->icmp_v6src; 335 } else { 336 int error; 337 338 ASSERT(sin6 != NULL); 339 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 340 icmp->icmp_v6src = sin6->sin6_addr; 341 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 342 icmp->icmp_bound_v6src = icmp->icmp_v6src; 343 344 /* Rebuild the header template */ 345 error = icmp_build_hdrs(q, icmp); 346 if (error != 0) { 347 icmp_err_ack(q, mp, TSYSERR, error); 348 return; 349 } 350 } 351 /* 352 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 353 * the address. 354 */ 355 *mp->b_wptr++ = icmp->icmp_proto; 356 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 357 /* 358 * Append a request for an IRE if src not 0 (INADDR_ANY) 359 */ 360 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 361 if (!mp->b_cont) { 362 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 363 return; 364 } 365 mp->b_cont->b_wptr += sizeof (ire_t); 366 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 367 } 368 369 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 370 putnext(q, mp); 371 } 372 373 /* 374 * Send message to IP to just bind to the protocol. 375 */ 376 static void 377 icmp_bind_proto(queue_t *q) 378 { 379 mblk_t *mp; 380 struct T_bind_req *tbr; 381 icmp_t *icmp; 382 383 icmp = (icmp_t *)q->q_ptr; 384 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 385 BPRI_MED); 386 if (!mp) { 387 return; 388 } 389 mp->b_datap->db_type = M_PROTO; 390 tbr = (struct T_bind_req *)mp->b_rptr; 391 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 392 tbr->ADDR_offset = sizeof (struct T_bind_req); 393 if (icmp->icmp_ipversion == IPV4_VERSION) { 394 sin_t *sin; 395 396 tbr->ADDR_length = sizeof (sin_t); 397 sin = (sin_t *)&tbr[1]; 398 *sin = sin_null; 399 sin->sin_family = AF_INET; 400 mp->b_wptr = (uchar_t *)&sin[1]; 401 } else { 402 sin6_t *sin6; 403 404 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 405 tbr->ADDR_length = sizeof (sin6_t); 406 sin6 = (sin6_t *)&tbr[1]; 407 *sin6 = sin6_null; 408 sin6->sin6_family = AF_INET6; 409 mp->b_wptr = (uchar_t *)&sin6[1]; 410 } 411 412 /* Place protocol type in the O_T_BIND_REQ following the address. */ 413 *mp->b_wptr++ = icmp->icmp_proto; 414 415 /* Pass the O_T_BIND_REQ to ip. */ 416 putnext(q, mp); 417 } 418 419 /* 420 * This routine handles each T_CONN_REQ message passed to icmp. It 421 * associates a default destination address with the stream. 422 * 423 * This routine sends down a T_BIND_REQ to IP with the following mblks: 424 * T_BIND_REQ - specifying local and remote address. 425 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 426 * T_OK_ACK - for the T_CONN_REQ 427 * T_CONN_CON - to keep the TPI user happy 428 * 429 * The connect completes in icmp_rput. 430 * When a T_BIND_ACK is received information is extracted from the IRE 431 * and the two appended messages are sent to the TPI user. 432 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 433 * it to an error ack for the appropriate primitive. 434 */ 435 static void 436 icmp_connect(queue_t *q, mblk_t *mp) 437 { 438 sin_t *sin; 439 sin6_t *sin6; 440 mblk_t *mp1, *mp2; 441 struct T_conn_req *tcr; 442 icmp_t *icmp; 443 ipaddr_t v4dst; 444 in6_addr_t v6dst; 445 uint32_t flowinfo; 446 447 icmp = (icmp_t *)q->q_ptr; 448 tcr = (struct T_conn_req *)mp->b_rptr; 449 /* Sanity checks */ 450 if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { 451 icmp_err_ack(q, mp, TPROTO, 0); 452 return; 453 } 454 455 if (icmp->icmp_state == TS_DATA_XFER) { 456 /* Already connected - clear out state */ 457 icmp->icmp_v6src = icmp->icmp_bound_v6src; 458 icmp->icmp_state = TS_IDLE; 459 } 460 461 462 if (tcr->OPT_length != 0) { 463 icmp_err_ack(q, mp, TBADOPT, 0); 464 return; 465 } 466 switch (tcr->DEST_length) { 467 default: 468 icmp_err_ack(q, mp, TBADADDR, 0); 469 return; 470 471 case sizeof (sin_t): 472 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 473 sizeof (sin_t)); 474 if (sin == NULL || !OK_32PTR((char *)sin)) { 475 icmp_err_ack(q, mp, TSYSERR, EINVAL); 476 return; 477 } 478 if (icmp->icmp_family != AF_INET || 479 sin->sin_family != AF_INET) { 480 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 481 return; 482 } 483 v4dst = sin->sin_addr.s_addr; 484 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 485 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 486 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 487 icmp->icmp_ip_snd_options_len; 488 break; 489 490 case sizeof (sin6_t): 491 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 492 sizeof (sin6_t)); 493 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 494 icmp_err_ack(q, mp, TSYSERR, EINVAL); 495 return; 496 } 497 if (icmp->icmp_family != AF_INET6 || 498 sin6->sin6_family != AF_INET6) { 499 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 500 return; 501 } 502 /* No support for mapped addresses on raw sockets */ 503 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 504 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 505 return; 506 } 507 v6dst = sin6->sin6_addr; 508 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 509 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 510 flowinfo = sin6->sin6_flowinfo; 511 break; 512 } 513 if (icmp->icmp_ipversion == IPV4_VERSION) { 514 /* 515 * Interpret a zero destination to mean loopback. 516 * Update the T_CONN_REQ (sin/sin6) since it is used to 517 * generate the T_CONN_CON. 518 */ 519 if (v4dst == INADDR_ANY) { 520 v4dst = htonl(INADDR_LOOPBACK); 521 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 522 if (icmp->icmp_family == AF_INET) { 523 sin->sin_addr.s_addr = v4dst; 524 } else { 525 sin6->sin6_addr = v6dst; 526 } 527 } 528 icmp->icmp_v6dst = v6dst; 529 icmp->icmp_flowinfo = 0; 530 531 /* 532 * If the destination address is multicast and 533 * an outgoing multicast interface has been set, 534 * use the address of that interface as our 535 * source address if no source address has been set. 536 */ 537 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 538 CLASSD(v4dst) && 539 icmp->icmp_multicast_if_addr != INADDR_ANY) { 540 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 541 &icmp->icmp_v6src); 542 } 543 } else { 544 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 545 /* 546 * Interpret a zero destination to mean loopback. 547 * Update the T_CONN_REQ (sin/sin6) since it is used to 548 * generate the T_CONN_CON. 549 */ 550 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 551 v6dst = ipv6_loopback; 552 sin6->sin6_addr = v6dst; 553 } 554 icmp->icmp_v6dst = v6dst; 555 icmp->icmp_flowinfo = flowinfo; 556 /* 557 * If the destination address is multicast and 558 * an outgoing multicast interface has been set, 559 * then the ip bind logic will pick the correct source 560 * address (i.e. matching the outgoing multicast interface). 561 */ 562 } 563 564 /* 565 * Send down bind to IP to verify that there is a route 566 * and to determine the source address. 567 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 568 */ 569 if (icmp->icmp_family == AF_INET) { 570 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 571 sin->sin_port); 572 } else { 573 ASSERT(icmp->icmp_family == AF_INET6); 574 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 575 sin6->sin6_port); 576 } 577 if (mp1 == NULL) { 578 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 579 return; 580 } 581 582 /* 583 * We also have to send a connection confirmation to 584 * keep TLI happy. Prepare it for icmp_rput. 585 */ 586 if (icmp->icmp_family == AF_INET) { 587 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 588 0); 589 } else { 590 ASSERT(icmp->icmp_family == AF_INET6); 591 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 592 0); 593 } 594 if (mp2 == NULL) { 595 freemsg(mp1); 596 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 597 return; 598 } 599 600 mp = mi_tpi_ok_ack_alloc(mp); 601 if (mp == NULL) { 602 /* Unable to reuse the T_CONN_REQ for the ack. */ 603 freemsg(mp2); 604 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 605 return; 606 } 607 608 icmp->icmp_state = TS_DATA_XFER; 609 610 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 611 linkb(mp1, mp); 612 linkb(mp1, mp2); 613 614 mblk_setcred(mp1, icmp->icmp_credp); 615 putnext(q, mp1); 616 } 617 618 static int 619 icmp_close(queue_t *q) 620 { 621 icmp_t *icmp = (icmp_t *)q->q_ptr; 622 int i1; 623 icmp_stack_t *is = icmp->icmp_is; 624 625 /* tell IP that if we're not here, he can't trust labels */ 626 if (is_system_labeled()) 627 putnext(WR(q), icmp->icmp_delabel); 628 629 qprocsoff(q); 630 631 /* If there are any options associated with the stream, free them. */ 632 if (icmp->icmp_ip_snd_options) 633 mi_free((char *)icmp->icmp_ip_snd_options); 634 635 if (icmp->icmp_filter != NULL) 636 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 637 638 /* Free memory associated with sticky options */ 639 if (icmp->icmp_sticky_hdrs_len != 0) { 640 kmem_free(icmp->icmp_sticky_hdrs, 641 icmp->icmp_sticky_hdrs_len); 642 icmp->icmp_sticky_hdrs = NULL; 643 icmp->icmp_sticky_hdrs_len = 0; 644 } 645 646 ip6_pkt_free(&icmp->icmp_sticky_ipp); 647 648 crfree(icmp->icmp_credp); 649 netstack_rele(icmp->icmp_is->is_netstack); 650 651 /* Free the icmp structure and release the minor device number. */ 652 i1 = mi_close_comm(&is->is_head, q); 653 654 return (i1); 655 } 656 657 /* 658 * This routine handles each T_DISCON_REQ message passed to icmp 659 * as an indicating that ICMP is no longer connected. This results 660 * in sending a T_BIND_REQ to IP to restore the binding to just 661 * the local address. 662 * 663 * This routine sends down a T_BIND_REQ to IP with the following mblks: 664 * T_BIND_REQ - specifying just the local address. 665 * T_OK_ACK - for the T_DISCON_REQ 666 * 667 * The disconnect completes in icmp_rput. 668 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 669 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 670 * it to an error ack for the appropriate primitive. 671 */ 672 static void 673 icmp_disconnect(queue_t *q, mblk_t *mp) 674 { 675 icmp_t *icmp; 676 mblk_t *mp1; 677 678 icmp = (icmp_t *)q->q_ptr; 679 680 if (icmp->icmp_state != TS_DATA_XFER) { 681 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 682 "icmp_disconnect: bad state, %d", icmp->icmp_state); 683 icmp_err_ack(q, mp, TOUTSTATE, 0); 684 return; 685 } 686 icmp->icmp_v6src = icmp->icmp_bound_v6src; 687 icmp->icmp_state = TS_IDLE; 688 689 /* 690 * Send down bind to IP to remove the full binding and revert 691 * to the local address binding. 692 */ 693 if (icmp->icmp_family == AF_INET) { 694 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 695 } else { 696 ASSERT(icmp->icmp_family == AF_INET6); 697 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 698 } 699 if (mp1 == NULL) { 700 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 701 return; 702 } 703 mp = mi_tpi_ok_ack_alloc(mp); 704 if (mp == NULL) { 705 /* Unable to reuse the T_DISCON_REQ for the ack. */ 706 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 707 return; 708 } 709 710 if (icmp->icmp_family == AF_INET6) { 711 int error; 712 713 /* Rebuild the header template */ 714 error = icmp_build_hdrs(q, icmp); 715 if (error != 0) { 716 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 717 freemsg(mp1); 718 return; 719 } 720 } 721 icmp->icmp_discon_pending = 1; 722 723 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ 724 linkb(mp1, mp); 725 putnext(q, mp1); 726 } 727 728 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 729 static void 730 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 731 { 732 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 733 qreply(q, mp); 734 } 735 736 /* Shorthand to generate and send TPI error acks to our client */ 737 static void 738 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 739 t_scalar_t t_error, int sys_error) 740 { 741 struct T_error_ack *teackp; 742 743 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 744 M_PCPROTO, T_ERROR_ACK)) != NULL) { 745 teackp = (struct T_error_ack *)mp->b_rptr; 746 teackp->ERROR_prim = primitive; 747 teackp->TLI_error = t_error; 748 teackp->UNIX_error = sys_error; 749 qreply(q, mp); 750 } 751 } 752 753 /* 754 * icmp_icmp_error is called by icmp_rput to process ICMP 755 * messages passed up by IP. 756 * Generates the appropriate T_UDERROR_IND for permanent 757 * (non-transient) errors. 758 * Assumes that IP has pulled up everything up to and including 759 * the ICMP header. 760 */ 761 static void 762 icmp_icmp_error(queue_t *q, mblk_t *mp) 763 { 764 icmph_t *icmph; 765 ipha_t *ipha; 766 int iph_hdr_length; 767 sin_t sin; 768 sin6_t sin6; 769 mblk_t *mp1; 770 int error = 0; 771 icmp_t *icmp = (icmp_t *)q->q_ptr; 772 773 /* 774 * Deliver T_UDERROR_IND when the application has asked for it. 775 * The socket layer enables this automatically when connected. 776 */ 777 if (!icmp->icmp_dgram_errind) { 778 freemsg(mp); 779 return; 780 } 781 782 ipha = (ipha_t *)mp->b_rptr; 783 784 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 785 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 786 icmp_icmp_error_ipv6(q, mp); 787 return; 788 } 789 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 790 791 iph_hdr_length = IPH_HDR_LENGTH(ipha); 792 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 793 ipha = (ipha_t *)&icmph[1]; 794 iph_hdr_length = IPH_HDR_LENGTH(ipha); 795 796 switch (icmph->icmph_type) { 797 case ICMP_DEST_UNREACHABLE: 798 switch (icmph->icmph_code) { 799 case ICMP_FRAGMENTATION_NEEDED: 800 /* 801 * IP has already adjusted the path MTU. 802 * XXX Somehow pass MTU indication to application? 803 */ 804 break; 805 case ICMP_PORT_UNREACHABLE: 806 case ICMP_PROTOCOL_UNREACHABLE: 807 error = ECONNREFUSED; 808 break; 809 default: 810 /* Transient errors */ 811 break; 812 } 813 break; 814 default: 815 /* Transient errors */ 816 break; 817 } 818 if (error == 0) { 819 freemsg(mp); 820 return; 821 } 822 823 switch (icmp->icmp_family) { 824 case AF_INET: 825 sin = sin_null; 826 sin.sin_family = AF_INET; 827 sin.sin_addr.s_addr = ipha->ipha_dst; 828 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 829 error); 830 break; 831 case AF_INET6: 832 sin6 = sin6_null; 833 sin6.sin6_family = AF_INET6; 834 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 835 836 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 837 NULL, 0, error); 838 break; 839 } 840 if (mp1) 841 putnext(q, mp1); 842 freemsg(mp); 843 } 844 845 /* 846 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 847 * for IPv6 packets. 848 * Send permanent (non-transient) errors upstream. 849 * Assumes that IP has pulled up all the extension headers as well 850 * as the ICMPv6 header. 851 */ 852 static void 853 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 854 { 855 icmp6_t *icmp6; 856 ip6_t *ip6h, *outer_ip6h; 857 uint16_t iph_hdr_length; 858 uint8_t *nexthdrp; 859 sin6_t sin6; 860 mblk_t *mp1; 861 int error = 0; 862 icmp_t *icmp = (icmp_t *)q->q_ptr; 863 864 outer_ip6h = (ip6_t *)mp->b_rptr; 865 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 866 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 867 else 868 iph_hdr_length = IPV6_HDR_LEN; 869 870 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 871 ip6h = (ip6_t *)&icmp6[1]; 872 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 873 freemsg(mp); 874 return; 875 } 876 if (*nexthdrp != icmp->icmp_proto) { 877 /* 878 * Could have switched icmp_proto after while ip did fanout of 879 * this message 880 */ 881 freemsg(mp); 882 return; 883 } 884 switch (icmp6->icmp6_type) { 885 case ICMP6_DST_UNREACH: 886 switch (icmp6->icmp6_code) { 887 case ICMP6_DST_UNREACH_NOPORT: 888 error = ECONNREFUSED; 889 break; 890 case ICMP6_DST_UNREACH_ADMIN: 891 case ICMP6_DST_UNREACH_NOROUTE: 892 case ICMP6_DST_UNREACH_BEYONDSCOPE: 893 case ICMP6_DST_UNREACH_ADDR: 894 /* Transient errors */ 895 break; 896 default: 897 break; 898 } 899 break; 900 case ICMP6_PACKET_TOO_BIG: { 901 struct T_unitdata_ind *tudi; 902 struct T_opthdr *toh; 903 size_t udi_size; 904 mblk_t *newmp; 905 t_scalar_t opt_length = sizeof (struct T_opthdr) + 906 sizeof (struct ip6_mtuinfo); 907 sin6_t *sin6; 908 struct ip6_mtuinfo *mtuinfo; 909 910 /* 911 * If the application has requested to receive path mtu 912 * information, send up an empty message containing an 913 * IPV6_PATHMTU ancillary data item. 914 */ 915 if (!icmp->icmp_ipv6_recvpathmtu) 916 break; 917 918 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 919 opt_length; 920 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 921 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); 922 break; 923 } 924 925 /* 926 * newmp->b_cont is left to NULL on purpose. This is an 927 * empty message containing only ancillary data. 928 */ 929 newmp->b_datap->db_type = M_PROTO; 930 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 931 newmp->b_wptr = (uchar_t *)tudi + udi_size; 932 tudi->PRIM_type = T_UNITDATA_IND; 933 tudi->SRC_length = sizeof (sin6_t); 934 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 935 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 936 tudi->OPT_length = opt_length; 937 938 sin6 = (sin6_t *)&tudi[1]; 939 bzero(sin6, sizeof (sin6_t)); 940 sin6->sin6_family = AF_INET6; 941 sin6->sin6_addr = icmp->icmp_v6dst; 942 943 toh = (struct T_opthdr *)&sin6[1]; 944 toh->level = IPPROTO_IPV6; 945 toh->name = IPV6_PATHMTU; 946 toh->len = opt_length; 947 toh->status = 0; 948 949 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 950 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 951 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 952 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 953 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 954 /* 955 * We've consumed everything we need from the original 956 * message. Free it, then send our empty message. 957 */ 958 freemsg(mp); 959 putnext(q, newmp); 960 return; 961 } 962 case ICMP6_TIME_EXCEEDED: 963 /* Transient errors */ 964 break; 965 case ICMP6_PARAM_PROB: 966 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 967 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 968 (uchar_t *)ip6h + icmp6->icmp6_pptr == 969 (uchar_t *)nexthdrp) { 970 error = ECONNREFUSED; 971 break; 972 } 973 break; 974 } 975 if (error == 0) { 976 freemsg(mp); 977 return; 978 } 979 980 sin6 = sin6_null; 981 sin6.sin6_family = AF_INET6; 982 sin6.sin6_addr = ip6h->ip6_dst; 983 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 984 985 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 986 error); 987 if (mp1) 988 putnext(q, mp1); 989 freemsg(mp); 990 } 991 992 /* 993 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 994 * The local address is filled in if endpoint is bound. The remote address 995 * is filled in if remote address has been precified ("connected endpoint") 996 * (The concept of connected CLTS sockets is alien to published TPI 997 * but we support it anyway). 998 */ 999 static void 1000 icmp_addr_req(queue_t *q, mblk_t *mp) 1001 { 1002 icmp_t *icmp = (icmp_t *)q->q_ptr; 1003 mblk_t *ackmp; 1004 struct T_addr_ack *taa; 1005 1006 /* Make it large enough for worst case */ 1007 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1008 2 * sizeof (sin6_t), 1); 1009 if (ackmp == NULL) { 1010 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1011 return; 1012 } 1013 taa = (struct T_addr_ack *)ackmp->b_rptr; 1014 1015 bzero(taa, sizeof (struct T_addr_ack)); 1016 ackmp->b_wptr = (uchar_t *)&taa[1]; 1017 1018 taa->PRIM_type = T_ADDR_ACK; 1019 ackmp->b_datap->db_type = M_PCPROTO; 1020 1021 /* 1022 * Note: Following code assumes 32 bit alignment of basic 1023 * data structures like sin_t and struct T_addr_ack. 1024 */ 1025 if (icmp->icmp_state != TS_UNBND) { 1026 /* 1027 * Fill in local address 1028 */ 1029 taa->LOCADDR_offset = sizeof (*taa); 1030 if (icmp->icmp_family == AF_INET) { 1031 sin_t *sin; 1032 1033 taa->LOCADDR_length = sizeof (sin_t); 1034 sin = (sin_t *)&taa[1]; 1035 /* Fill zeroes and then intialize non-zero fields */ 1036 *sin = sin_null; 1037 sin->sin_family = AF_INET; 1038 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1039 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1040 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1041 sin->sin_addr.s_addr); 1042 } else { 1043 /* 1044 * INADDR_ANY 1045 * icmp_v6src is not set, we might be bound to 1046 * broadcast/multicast. Use icmp_bound_v6src as 1047 * local address instead (that could 1048 * also still be INADDR_ANY) 1049 */ 1050 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1051 sin->sin_addr.s_addr); 1052 } 1053 ackmp->b_wptr = (uchar_t *)&sin[1]; 1054 } else { 1055 sin6_t *sin6; 1056 1057 ASSERT(icmp->icmp_family == AF_INET6); 1058 taa->LOCADDR_length = sizeof (sin6_t); 1059 sin6 = (sin6_t *)&taa[1]; 1060 /* Fill zeroes and then intialize non-zero fields */ 1061 *sin6 = sin6_null; 1062 sin6->sin6_family = AF_INET6; 1063 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1064 sin6->sin6_addr = icmp->icmp_v6src; 1065 } else { 1066 /* 1067 * UNSPECIFIED 1068 * icmp_v6src is not set, we might be bound to 1069 * broadcast/multicast. Use icmp_bound_v6src as 1070 * local address instead (that could 1071 * also still be UNSPECIFIED) 1072 */ 1073 sin6->sin6_addr = icmp->icmp_bound_v6src; 1074 } 1075 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1076 } 1077 } 1078 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1079 qreply(q, ackmp); 1080 } 1081 1082 static void 1083 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1084 { 1085 *tap = icmp_g_t_info_ack; 1086 1087 if (icmp->icmp_family == AF_INET6) 1088 tap->ADDR_size = sizeof (sin6_t); 1089 else 1090 tap->ADDR_size = sizeof (sin_t); 1091 tap->CURRENT_state = icmp->icmp_state; 1092 tap->OPT_size = icmp_max_optsize; 1093 } 1094 1095 /* 1096 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1097 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1098 * icmp_g_t_info_ack. The current state of the stream is copied from 1099 * icmp_state. 1100 */ 1101 static void 1102 icmp_capability_req(queue_t *q, mblk_t *mp) 1103 { 1104 icmp_t *icmp = (icmp_t *)q->q_ptr; 1105 t_uscalar_t cap_bits1; 1106 struct T_capability_ack *tcap; 1107 1108 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1109 1110 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1111 mp->b_datap->db_type, T_CAPABILITY_ACK); 1112 if (!mp) 1113 return; 1114 1115 tcap = (struct T_capability_ack *)mp->b_rptr; 1116 tcap->CAP_bits1 = 0; 1117 1118 if (cap_bits1 & TC1_INFO) { 1119 icmp_copy_info(&tcap->INFO_ack, icmp); 1120 tcap->CAP_bits1 |= TC1_INFO; 1121 } 1122 1123 qreply(q, mp); 1124 } 1125 1126 /* 1127 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1128 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1129 * The current state of the stream is copied from icmp_state. 1130 */ 1131 static void 1132 icmp_info_req(queue_t *q, mblk_t *mp) 1133 { 1134 icmp_t *icmp = (icmp_t *)q->q_ptr; 1135 1136 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1137 T_INFO_ACK); 1138 if (!mp) 1139 return; 1140 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1141 qreply(q, mp); 1142 } 1143 1144 /* 1145 * IP recognizes seven kinds of bind requests: 1146 * 1147 * - A zero-length address binds only to the protocol number. 1148 * 1149 * - A 4-byte address is treated as a request to 1150 * validate that the address is a valid local IPv4 1151 * address, appropriate for an application to bind to. 1152 * IP does the verification, but does not make any note 1153 * of the address at this time. 1154 * 1155 * - A 16-byte address contains is treated as a request 1156 * to validate a local IPv6 address, as the 4-byte 1157 * address case above. 1158 * 1159 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1160 * use it for the inbound fanout of packets. 1161 * 1162 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1163 * use it for the inbound fanout of packets. 1164 * 1165 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1166 * information consisting of local and remote addresses 1167 * and ports (unused for raw sockets). In this case, the addresses are both 1168 * validated as appropriate for this operation, and, if 1169 * so, the information is retained for use in the 1170 * inbound fanout. 1171 * 1172 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1173 * fanout information, like the 12-byte case above. 1174 * 1175 * IP will also fill in the IRE request mblk with information 1176 * regarding our peer. In all cases, we notify IP of our protocol 1177 * type by appending a single protocol byte to the bind request. 1178 */ 1179 static mblk_t * 1180 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1181 in_port_t fport) 1182 { 1183 char *cp; 1184 mblk_t *mp; 1185 struct T_bind_req *tbr; 1186 ipa_conn_t *ac; 1187 ipa6_conn_t *ac6; 1188 sin_t *sin; 1189 sin6_t *sin6; 1190 1191 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1192 1193 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1194 if (mp == NULL) 1195 return (NULL); 1196 mp->b_datap->db_type = M_PROTO; 1197 tbr = (struct T_bind_req *)mp->b_rptr; 1198 tbr->PRIM_type = bind_prim; 1199 tbr->ADDR_offset = sizeof (*tbr); 1200 tbr->CONIND_number = 0; 1201 tbr->ADDR_length = addr_length; 1202 cp = (char *)&tbr[1]; 1203 switch (addr_length) { 1204 case sizeof (ipa_conn_t): 1205 ASSERT(icmp->icmp_family == AF_INET); 1206 /* Append a request for an IRE */ 1207 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1208 if (mp->b_cont == NULL) { 1209 freemsg(mp); 1210 return (NULL); 1211 } 1212 mp->b_cont->b_wptr += sizeof (ire_t); 1213 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1214 1215 /* cp known to be 32 bit aligned */ 1216 ac = (ipa_conn_t *)cp; 1217 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1218 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1219 ac->ac_fport = fport; 1220 ac->ac_lport = 0; 1221 break; 1222 1223 case sizeof (ipa6_conn_t): 1224 ASSERT(icmp->icmp_family == AF_INET6); 1225 /* Append a request for an IRE */ 1226 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1227 if (mp->b_cont == NULL) { 1228 freemsg(mp); 1229 return (NULL); 1230 } 1231 mp->b_cont->b_wptr += sizeof (ire_t); 1232 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1233 1234 /* cp known to be 32 bit aligned */ 1235 ac6 = (ipa6_conn_t *)cp; 1236 ac6->ac6_laddr = icmp->icmp_v6src; 1237 ac6->ac6_faddr = icmp->icmp_v6dst; 1238 ac6->ac6_fport = fport; 1239 ac6->ac6_lport = 0; 1240 break; 1241 1242 case sizeof (sin_t): 1243 ASSERT(icmp->icmp_family == AF_INET); 1244 /* Append a request for an IRE */ 1245 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1246 if (!mp->b_cont) { 1247 freemsg(mp); 1248 return (NULL); 1249 } 1250 mp->b_cont->b_wptr += sizeof (ire_t); 1251 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1252 1253 sin = (sin_t *)cp; 1254 *sin = sin_null; 1255 sin->sin_family = AF_INET; 1256 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1257 break; 1258 1259 case sizeof (sin6_t): 1260 ASSERT(icmp->icmp_family == AF_INET6); 1261 /* Append a request for an IRE */ 1262 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1263 if (!mp->b_cont) { 1264 freemsg(mp); 1265 return (NULL); 1266 } 1267 mp->b_cont->b_wptr += sizeof (ire_t); 1268 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1269 1270 sin6 = (sin6_t *)cp; 1271 *sin6 = sin6_null; 1272 sin6->sin6_family = AF_INET6; 1273 sin6->sin6_addr = icmp->icmp_bound_v6src; 1274 break; 1275 } 1276 /* Add protocol number to end */ 1277 cp[addr_length] = icmp->icmp_proto; 1278 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1279 return (mp); 1280 } 1281 1282 /* ARGSUSED */ 1283 static void 1284 dummy_func(void *arg) 1285 { 1286 } 1287 1288 static mblk_t * 1289 alloc_wait(queue_t *q, size_t len, int pri, int *errp) 1290 { 1291 mblk_t *mp; 1292 bufcall_id_t id; 1293 int retv; 1294 1295 while ((mp = allocb(len, pri)) == NULL) { 1296 id = qbufcall(q, len, pri, dummy_func, NULL); 1297 if (id == 0) { 1298 *errp = ENOMEM; 1299 break; 1300 } 1301 retv = qwait_sig(q); 1302 qunbufcall(q, id); 1303 if (retv == 0) { 1304 *errp = EINTR; 1305 break; 1306 } 1307 } 1308 if (mp != NULL) 1309 mp->b_wptr += len; 1310 return (mp); 1311 } 1312 1313 /* 1314 * This is the open routine for icmp. It allocates a icmp_t structure for 1315 * the stream and, on the first open of the module, creates an ND table. 1316 */ 1317 static int 1318 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1319 { 1320 int err; 1321 icmp_t *icmp; 1322 mblk_t *mp; 1323 out_labeled_t *olp; 1324 netstack_t *ns; 1325 icmp_stack_t *is; 1326 zoneid_t zoneid; 1327 1328 /* If the stream is already open, return immediately. */ 1329 if (q->q_ptr != NULL) 1330 return (0); 1331 1332 /* If this is not a push of icmp as a module, fail. */ 1333 if (sflag != MODOPEN) 1334 return (EINVAL); 1335 1336 /* 1337 * Defer the qprocson until everything is initialized since 1338 * we are D_MTPERQ and after qprocson the rput routine can 1339 * run. (Could do qprocson earlier since icmp currently 1340 * has an outer perimeter.) 1341 */ 1342 1343 ns = netstack_find_by_cred(credp); 1344 ASSERT(ns != NULL); 1345 is = ns->netstack_icmp; 1346 ASSERT(is != NULL); 1347 1348 /* 1349 * For exclusive stacks we set the zoneid to zero 1350 * to make ICMP operate as if in the global zone. 1351 */ 1352 if (is->is_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1353 zoneid = GLOBAL_ZONEID; 1354 else 1355 zoneid = crgetzoneid(credp); 1356 1357 /* 1358 * Create a icmp_t structure for this stream and link into the 1359 * list of open streams. 1360 */ 1361 err = mi_open_comm(&is->is_head, sizeof (icmp_t), q, devp, 1362 flag, sflag, credp); 1363 if (err != 0) { 1364 netstack_rele(is->is_netstack); 1365 return (err); 1366 } 1367 1368 /* 1369 * The receive hiwat is only looked at on the stream head queue. 1370 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 1371 */ 1372 q->q_hiwat = is->is_recv_hiwat; 1373 1374 /* Set the initial state of the stream and the privilege status. */ 1375 icmp = (icmp_t *)q->q_ptr; 1376 icmp->icmp_state = TS_UNBND; 1377 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1378 icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1379 icmp->icmp_filter = NULL; 1380 1381 icmp->icmp_credp = credp; 1382 crhold(credp); 1383 1384 /* 1385 * If the caller has the process-wide flag set, then default to MAC 1386 * exempt mode. This allows read-down to unlabeled hosts. 1387 */ 1388 if (getpflags(NET_MAC_AWARE, credp) != 0) 1389 icmp->icmp_mac_exempt = B_TRUE; 1390 1391 icmp->icmp_zoneid = zoneid; 1392 icmp->icmp_is = is; 1393 1394 if (getmajor(*devp) == (major_t)ICMP6_MAJ) { 1395 icmp->icmp_ipversion = IPV6_VERSION; 1396 icmp->icmp_family = AF_INET6; 1397 /* May be changed by a SO_PROTOTYPE socket option. */ 1398 icmp->icmp_proto = IPPROTO_ICMPV6; 1399 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1400 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1401 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1402 } else { 1403 icmp->icmp_ipversion = IPV4_VERSION; 1404 icmp->icmp_family = AF_INET; 1405 /* May be changed by a SO_PROTOTYPE socket option. */ 1406 icmp->icmp_proto = IPPROTO_ICMP; 1407 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1408 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1409 } 1410 qprocson(q); 1411 1412 /* 1413 * Check if icmp is being I_PUSHed by a non-privileged user. 1414 * If so, we set icmp_restricted to indicate that only MIB 1415 * traffic may pass. 1416 */ 1417 if (secpolicy_net_icmpaccess(credp) != 0) { 1418 icmp->icmp_restricted = 1; 1419 } 1420 1421 /* 1422 * The transmit hiwat is only looked at on IP's queue. 1423 * Store in q_hiwat in order to return on SO_SNDBUF 1424 * getsockopts. 1425 */ 1426 WR(q)->q_hiwat = is->is_xmit_hiwat; 1427 WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; 1428 WR(q)->q_lowat = is->is_xmit_lowat; 1429 WR(q)->q_next->q_lowat = WR(q)->q_lowat; 1430 1431 if (icmp->icmp_family == AF_INET6) { 1432 /* Build initial header template for transmit */ 1433 err = icmp_build_hdrs(q, icmp); 1434 if (err != 0) 1435 goto open_error; 1436 } 1437 /* Set the Stream head write offset. */ 1438 (void) mi_set_sth_wroff(q, 1439 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1440 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1441 1442 if (is_system_labeled()) { 1443 /* notify IP that we know about labeling */ 1444 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1445 if (mp == NULL) 1446 goto open_error; 1447 mp->b_datap->db_type = M_CTL; 1448 olp = (out_labeled_t *)mp->b_rptr; 1449 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1450 olp->out_qnext = WR(q)->q_next; 1451 putnext(WR(q), mp); 1452 1453 /* save off a copy for closing */ 1454 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1455 if (mp == NULL) 1456 goto open_error; 1457 mp->b_datap->db_type = M_CTL; 1458 olp = (out_labeled_t *)mp->b_rptr; 1459 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1460 olp->out_qnext = NULL; 1461 icmp->icmp_delabel = mp; 1462 } 1463 1464 return (0); 1465 1466 open_error: 1467 qprocsoff(q); 1468 crfree(credp); 1469 (void) mi_close_comm(&is->is_head, q); 1470 netstack_rele(is->is_netstack); 1471 return (err); 1472 } 1473 1474 /* 1475 * Which ICMP options OK to set through T_UNITDATA_REQ... 1476 */ 1477 /* ARGSUSED */ 1478 static boolean_t 1479 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1480 { 1481 return (B_TRUE); 1482 } 1483 1484 /* 1485 * This routine gets default values of certain options whose default 1486 * values are maintained by protcol specific code 1487 */ 1488 /* ARGSUSED */ 1489 int 1490 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1491 { 1492 icmp_t *icmp = (icmp_t *)q->q_ptr; 1493 icmp_stack_t *is = icmp->icmp_is; 1494 int *i1 = (int *)ptr; 1495 1496 switch (level) { 1497 case IPPROTO_IP: 1498 switch (name) { 1499 case IP_MULTICAST_TTL: 1500 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1501 return (sizeof (uchar_t)); 1502 case IP_MULTICAST_LOOP: 1503 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1504 return (sizeof (uchar_t)); 1505 } 1506 break; 1507 case IPPROTO_IPV6: 1508 switch (name) { 1509 case IPV6_MULTICAST_HOPS: 1510 *i1 = IP_DEFAULT_MULTICAST_TTL; 1511 return (sizeof (int)); 1512 case IPV6_MULTICAST_LOOP: 1513 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1514 return (sizeof (int)); 1515 case IPV6_UNICAST_HOPS: 1516 *i1 = is->is_ipv6_hoplimit; 1517 return (sizeof (int)); 1518 } 1519 break; 1520 case IPPROTO_ICMPV6: 1521 switch (name) { 1522 case ICMP6_FILTER: 1523 /* Make it look like "pass all" */ 1524 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1525 return (sizeof (icmp6_filter_t)); 1526 } 1527 break; 1528 } 1529 return (-1); 1530 } 1531 1532 /* 1533 * This routine retrieves the current status of socket options. 1534 * It returns the size of the option retrieved. 1535 */ 1536 int 1537 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1538 { 1539 icmp_t *icmp = (icmp_t *)q->q_ptr; 1540 int *i1 = (int *)ptr; 1541 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1542 icmp_stack_t *is = icmp->icmp_is; 1543 1544 switch (level) { 1545 case SOL_SOCKET: 1546 switch (name) { 1547 case SO_DEBUG: 1548 *i1 = icmp->icmp_debug; 1549 break; 1550 case SO_TYPE: 1551 *i1 = SOCK_RAW; 1552 break; 1553 case SO_PROTOTYPE: 1554 *i1 = icmp->icmp_proto; 1555 break; 1556 case SO_REUSEADDR: 1557 *i1 = icmp->icmp_reuseaddr; 1558 break; 1559 1560 /* 1561 * The following three items are available here, 1562 * but are only meaningful to IP. 1563 */ 1564 case SO_DONTROUTE: 1565 *i1 = icmp->icmp_dontroute; 1566 break; 1567 case SO_USELOOPBACK: 1568 *i1 = icmp->icmp_useloopback; 1569 break; 1570 case SO_BROADCAST: 1571 *i1 = icmp->icmp_broadcast; 1572 break; 1573 1574 case SO_SNDBUF: 1575 ASSERT(q->q_hiwat <= INT_MAX); 1576 *i1 = (int)q->q_hiwat; 1577 break; 1578 case SO_RCVBUF: 1579 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1580 *i1 = (int)RD(q)->q_hiwat; 1581 break; 1582 case SO_DGRAM_ERRIND: 1583 *i1 = icmp->icmp_dgram_errind; 1584 break; 1585 case SO_TIMESTAMP: 1586 *i1 = icmp->icmp_timestamp; 1587 break; 1588 case SO_MAC_EXEMPT: 1589 *i1 = icmp->icmp_mac_exempt; 1590 break; 1591 case SO_DOMAIN: 1592 *i1 = icmp->icmp_family; 1593 break; 1594 1595 /* 1596 * Following four not meaningful for icmp 1597 * Action is same as "default" to which we fallthrough 1598 * so we keep them in comments. 1599 * case SO_LINGER: 1600 * case SO_KEEPALIVE: 1601 * case SO_OOBINLINE: 1602 * case SO_ALLZONES: 1603 */ 1604 default: 1605 return (-1); 1606 } 1607 break; 1608 case IPPROTO_IP: 1609 /* 1610 * Only allow IPv4 option processing on IPv4 sockets. 1611 */ 1612 if (icmp->icmp_family != AF_INET) 1613 return (-1); 1614 1615 switch (name) { 1616 case IP_OPTIONS: 1617 case T_IP_OPTIONS: 1618 /* Options are passed up with each packet */ 1619 return (0); 1620 case IP_HDRINCL: 1621 *i1 = (int)icmp->icmp_hdrincl; 1622 break; 1623 case IP_TOS: 1624 case T_IP_TOS: 1625 *i1 = (int)icmp->icmp_type_of_service; 1626 break; 1627 case IP_TTL: 1628 *i1 = (int)icmp->icmp_ttl; 1629 break; 1630 case IP_MULTICAST_IF: 1631 /* 0 address if not set */ 1632 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1633 return (sizeof (ipaddr_t)); 1634 case IP_MULTICAST_TTL: 1635 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1636 return (sizeof (uchar_t)); 1637 case IP_MULTICAST_LOOP: 1638 *ptr = icmp->icmp_multicast_loop; 1639 return (sizeof (uint8_t)); 1640 case IP_BOUND_IF: 1641 /* Zero if not set */ 1642 *i1 = icmp->icmp_bound_if; 1643 break; /* goto sizeof (int) option return */ 1644 case IP_UNSPEC_SRC: 1645 *ptr = icmp->icmp_unspec_source; 1646 break; /* goto sizeof (int) option return */ 1647 case IP_XMIT_IF: 1648 *i1 = icmp->icmp_xmit_if; 1649 break; /* goto sizeof (int) option return */ 1650 case IP_RECVIF: 1651 *ptr = icmp->icmp_recvif; 1652 break; /* goto sizeof (int) option return */ 1653 case IP_RECVPKTINFO: 1654 /* 1655 * This also handles IP_PKTINFO. 1656 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1657 * Differentiation is based on the size of the argument 1658 * passed in. 1659 * This option is handled in IP which will return an 1660 * error for IP_PKTINFO as it's not supported as a 1661 * sticky option. 1662 */ 1663 return (-EINVAL); 1664 /* 1665 * Cannot "get" the value of following options 1666 * at this level. Action is same as "default" to 1667 * which we fallthrough so we keep them in comments. 1668 * 1669 * case IP_ADD_MEMBERSHIP: 1670 * case IP_DROP_MEMBERSHIP: 1671 * case IP_BLOCK_SOURCE: 1672 * case IP_UNBLOCK_SOURCE: 1673 * case IP_ADD_SOURCE_MEMBERSHIP: 1674 * case IP_DROP_SOURCE_MEMBERSHIP: 1675 * case MCAST_JOIN_GROUP: 1676 * case MCAST_LEAVE_GROUP: 1677 * case MCAST_BLOCK_SOURCE: 1678 * case MCAST_UNBLOCK_SOURCE: 1679 * case MCAST_JOIN_SOURCE_GROUP: 1680 * case MCAST_LEAVE_SOURCE_GROUP: 1681 * case MRT_INIT: 1682 * case MRT_DONE: 1683 * case MRT_ADD_VIF: 1684 * case MRT_DEL_VIF: 1685 * case MRT_ADD_MFC: 1686 * case MRT_DEL_MFC: 1687 * case MRT_VERSION: 1688 * case MRT_ASSERT: 1689 * case IP_SEC_OPT: 1690 * case IP_DONTFAILOVER_IF: 1691 * case IP_NEXTHOP: 1692 */ 1693 default: 1694 return (-1); 1695 } 1696 break; 1697 case IPPROTO_IPV6: 1698 /* 1699 * Only allow IPv6 option processing on native IPv6 sockets. 1700 */ 1701 if (icmp->icmp_family != AF_INET6) 1702 return (-1); 1703 switch (name) { 1704 case IPV6_UNICAST_HOPS: 1705 *i1 = (unsigned int)icmp->icmp_ttl; 1706 break; 1707 case IPV6_MULTICAST_IF: 1708 /* 0 index if not set */ 1709 *i1 = icmp->icmp_multicast_if_index; 1710 break; 1711 case IPV6_MULTICAST_HOPS: 1712 *i1 = icmp->icmp_multicast_ttl; 1713 break; 1714 case IPV6_MULTICAST_LOOP: 1715 *i1 = icmp->icmp_multicast_loop; 1716 break; 1717 case IPV6_BOUND_IF: 1718 /* Zero if not set */ 1719 *i1 = icmp->icmp_bound_if; 1720 break; 1721 case IPV6_UNSPEC_SRC: 1722 *i1 = icmp->icmp_unspec_source; 1723 break; 1724 case IPV6_CHECKSUM: 1725 /* 1726 * Return offset or -1 if no checksum offset. 1727 * Does not apply to IPPROTO_ICMPV6 1728 */ 1729 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1730 return (-1); 1731 1732 if (icmp->icmp_raw_checksum) { 1733 *i1 = icmp->icmp_checksum_off; 1734 } else { 1735 *i1 = -1; 1736 } 1737 break; 1738 case IPV6_JOIN_GROUP: 1739 case IPV6_LEAVE_GROUP: 1740 case MCAST_JOIN_GROUP: 1741 case MCAST_LEAVE_GROUP: 1742 case MCAST_BLOCK_SOURCE: 1743 case MCAST_UNBLOCK_SOURCE: 1744 case MCAST_JOIN_SOURCE_GROUP: 1745 case MCAST_LEAVE_SOURCE_GROUP: 1746 /* cannot "get" the value for these */ 1747 return (-1); 1748 case IPV6_RECVPKTINFO: 1749 *i1 = icmp->icmp_ip_recvpktinfo; 1750 break; 1751 case IPV6_RECVTCLASS: 1752 *i1 = icmp->icmp_ipv6_recvtclass; 1753 break; 1754 case IPV6_RECVPATHMTU: 1755 *i1 = icmp->icmp_ipv6_recvpathmtu; 1756 break; 1757 case IPV6_V6ONLY: 1758 *i1 = 1; 1759 break; 1760 case IPV6_RECVHOPLIMIT: 1761 *i1 = icmp->icmp_ipv6_recvhoplimit; 1762 break; 1763 case IPV6_RECVHOPOPTS: 1764 *i1 = icmp->icmp_ipv6_recvhopopts; 1765 break; 1766 case IPV6_RECVDSTOPTS: 1767 *i1 = icmp->icmp_ipv6_recvdstopts; 1768 break; 1769 case _OLD_IPV6_RECVDSTOPTS: 1770 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1771 break; 1772 case IPV6_RECVRTHDRDSTOPTS: 1773 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1774 break; 1775 case IPV6_RECVRTHDR: 1776 *i1 = icmp->icmp_ipv6_recvrthdr; 1777 break; 1778 case IPV6_PKTINFO: { 1779 /* XXX assumes that caller has room for max size! */ 1780 struct in6_pktinfo *pkti; 1781 1782 pkti = (struct in6_pktinfo *)ptr; 1783 if (ipp->ipp_fields & IPPF_IFINDEX) 1784 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1785 else 1786 pkti->ipi6_ifindex = 0; 1787 if (ipp->ipp_fields & IPPF_ADDR) 1788 pkti->ipi6_addr = ipp->ipp_addr; 1789 else 1790 pkti->ipi6_addr = ipv6_all_zeros; 1791 return (sizeof (struct in6_pktinfo)); 1792 } 1793 case IPV6_NEXTHOP: { 1794 sin6_t *sin6 = (sin6_t *)ptr; 1795 1796 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1797 return (0); 1798 *sin6 = sin6_null; 1799 sin6->sin6_family = AF_INET6; 1800 sin6->sin6_addr = ipp->ipp_nexthop; 1801 return (sizeof (sin6_t)); 1802 } 1803 case IPV6_HOPOPTS: 1804 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1805 return (0); 1806 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1807 return (0); 1808 bcopy((char *)ipp->ipp_hopopts + 1809 icmp->icmp_label_len_v6, ptr, 1810 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1811 if (icmp->icmp_label_len_v6 > 0) { 1812 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1813 ptr[1] = (ipp->ipp_hopoptslen - 1814 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1815 } 1816 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1817 case IPV6_RTHDRDSTOPTS: 1818 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1819 return (0); 1820 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1821 return (ipp->ipp_rtdstoptslen); 1822 case IPV6_RTHDR: 1823 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1824 return (0); 1825 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1826 return (ipp->ipp_rthdrlen); 1827 case IPV6_DSTOPTS: 1828 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1829 return (0); 1830 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1831 return (ipp->ipp_dstoptslen); 1832 case IPV6_PATHMTU: 1833 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1834 return (0); 1835 1836 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1837 (struct ip6_mtuinfo *)ptr, 1838 is->is_netstack)); 1839 case IPV6_TCLASS: 1840 if (ipp->ipp_fields & IPPF_TCLASS) 1841 *i1 = ipp->ipp_tclass; 1842 else 1843 *i1 = IPV6_FLOW_TCLASS( 1844 IPV6_DEFAULT_VERS_AND_FLOW); 1845 break; 1846 default: 1847 return (-1); 1848 } 1849 break; 1850 case IPPROTO_ICMPV6: 1851 /* 1852 * Only allow IPv6 option processing on native IPv6 sockets. 1853 */ 1854 if (icmp->icmp_family != AF_INET6) 1855 return (-1); 1856 1857 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1858 return (-1); 1859 1860 switch (name) { 1861 case ICMP6_FILTER: 1862 if (icmp->icmp_filter == NULL) { 1863 /* Make it look like "pass all" */ 1864 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1865 } else { 1866 (void) bcopy(icmp->icmp_filter, ptr, 1867 sizeof (icmp6_filter_t)); 1868 } 1869 return (sizeof (icmp6_filter_t)); 1870 default: 1871 return (-1); 1872 } 1873 default: 1874 return (-1); 1875 } 1876 return (sizeof (int)); 1877 } 1878 1879 /* This routine sets socket options. */ 1880 /* ARGSUSED */ 1881 int 1882 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1883 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1884 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 1885 { 1886 icmp_t *icmp = (icmp_t *)q->q_ptr; 1887 icmp_stack_t *is = icmp->icmp_is; 1888 int *i1 = (int *)invalp; 1889 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1890 boolean_t checkonly; 1891 int error; 1892 1893 switch (optset_context) { 1894 case SETFN_OPTCOM_CHECKONLY: 1895 checkonly = B_TRUE; 1896 /* 1897 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1898 * inlen != 0 implies value supplied and 1899 * we have to "pretend" to set it. 1900 * inlen == 0 implies that there is no 1901 * value part in T_CHECK request and just validation 1902 * done elsewhere should be enough, we just return here. 1903 */ 1904 if (inlen == 0) { 1905 *outlenp = 0; 1906 return (0); 1907 } 1908 break; 1909 case SETFN_OPTCOM_NEGOTIATE: 1910 checkonly = B_FALSE; 1911 break; 1912 case SETFN_UD_NEGOTIATE: 1913 case SETFN_CONN_NEGOTIATE: 1914 checkonly = B_FALSE; 1915 /* 1916 * Negotiating local and "association-related" options 1917 * through T_UNITDATA_REQ. 1918 * 1919 * Following routine can filter out ones we do not 1920 * want to be "set" this way. 1921 */ 1922 if (!icmp_opt_allow_udr_set(level, name)) { 1923 *outlenp = 0; 1924 return (EINVAL); 1925 } 1926 break; 1927 default: 1928 /* 1929 * We should never get here 1930 */ 1931 *outlenp = 0; 1932 return (EINVAL); 1933 } 1934 1935 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1936 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1937 1938 /* 1939 * For fixed length options, no sanity check 1940 * of passed in length is done. It is assumed *_optcom_req() 1941 * routines do the right thing. 1942 */ 1943 1944 switch (level) { 1945 case SOL_SOCKET: 1946 switch (name) { 1947 case SO_DEBUG: 1948 if (!checkonly) 1949 icmp->icmp_debug = onoff; 1950 break; 1951 case SO_PROTOTYPE: 1952 if ((*i1 & 0xFF) != IPPROTO_ICMP && 1953 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1954 secpolicy_net_rawaccess(cr) != 0) { 1955 *outlenp = 0; 1956 return (EACCES); 1957 } 1958 /* Can't use IPPROTO_RAW with IPv6 */ 1959 if ((*i1 & 0xFF) == IPPROTO_RAW && 1960 icmp->icmp_family == AF_INET6) { 1961 *outlenp = 0; 1962 return (EPROTONOSUPPORT); 1963 } 1964 if (checkonly) { 1965 /* T_CHECK case */ 1966 *(int *)outvalp = (*i1 & 0xFF); 1967 break; 1968 } 1969 icmp->icmp_proto = *i1 & 0xFF; 1970 if ((icmp->icmp_proto == IPPROTO_RAW || 1971 icmp->icmp_proto == IPPROTO_IGMP) && 1972 icmp->icmp_family == AF_INET) 1973 icmp->icmp_hdrincl = 1; 1974 else 1975 icmp->icmp_hdrincl = 0; 1976 1977 if (icmp->icmp_family == AF_INET6 && 1978 icmp->icmp_proto == IPPROTO_ICMPV6) { 1979 /* Set offset for icmp6_cksum */ 1980 icmp->icmp_raw_checksum = 0; 1981 icmp->icmp_checksum_off = 2; 1982 } 1983 if (icmp->icmp_proto == IPPROTO_UDP || 1984 icmp->icmp_proto == IPPROTO_TCP || 1985 icmp->icmp_proto == IPPROTO_SCTP) { 1986 icmp->icmp_no_tp_cksum = 1; 1987 icmp->icmp_sticky_ipp.ipp_fields |= 1988 IPPF_NO_CKSUM; 1989 } else { 1990 icmp->icmp_no_tp_cksum = 0; 1991 icmp->icmp_sticky_ipp.ipp_fields &= 1992 ~IPPF_NO_CKSUM; 1993 } 1994 1995 if (icmp->icmp_filter != NULL && 1996 icmp->icmp_proto != IPPROTO_ICMPV6) { 1997 kmem_free(icmp->icmp_filter, 1998 sizeof (icmp6_filter_t)); 1999 icmp->icmp_filter = NULL; 2000 } 2001 2002 /* Rebuild the header template */ 2003 error = icmp_build_hdrs(q, icmp); 2004 if (error != 0) { 2005 *outlenp = 0; 2006 return (error); 2007 } 2008 2009 /* 2010 * For SCTP, we don't use icmp_bind_proto() for 2011 * raw socket binding. Note that we do not need 2012 * to set *outlenp. 2013 */ 2014 if (icmp->icmp_proto == IPPROTO_SCTP) 2015 return (0); 2016 2017 icmp_bind_proto(q); 2018 *outlenp = sizeof (int); 2019 *(int *)outvalp = *i1 & 0xFF; 2020 return (0); 2021 case SO_REUSEADDR: 2022 if (!checkonly) 2023 icmp->icmp_reuseaddr = onoff; 2024 break; 2025 2026 /* 2027 * The following three items are available here, 2028 * but are only meaningful to IP. 2029 */ 2030 case SO_DONTROUTE: 2031 if (!checkonly) 2032 icmp->icmp_dontroute = onoff; 2033 break; 2034 case SO_USELOOPBACK: 2035 if (!checkonly) 2036 icmp->icmp_useloopback = onoff; 2037 break; 2038 case SO_BROADCAST: 2039 if (!checkonly) 2040 icmp->icmp_broadcast = onoff; 2041 break; 2042 2043 case SO_SNDBUF: 2044 if (*i1 > is->is_max_buf) { 2045 *outlenp = 0; 2046 return (ENOBUFS); 2047 } 2048 if (!checkonly) { 2049 q->q_hiwat = *i1; 2050 q->q_next->q_hiwat = *i1; 2051 } 2052 break; 2053 case SO_RCVBUF: 2054 if (*i1 > is->is_max_buf) { 2055 *outlenp = 0; 2056 return (ENOBUFS); 2057 } 2058 if (!checkonly) { 2059 RD(q)->q_hiwat = *i1; 2060 (void) mi_set_sth_hiwat(RD(q), *i1); 2061 } 2062 break; 2063 case SO_DGRAM_ERRIND: 2064 if (!checkonly) 2065 icmp->icmp_dgram_errind = onoff; 2066 break; 2067 case SO_ALLZONES: 2068 /* 2069 * "soft" error (negative) 2070 * option not handled at this level 2071 * Note: Do not modify *outlenp 2072 */ 2073 return (-EINVAL); 2074 case SO_TIMESTAMP: 2075 if (!checkonly) { 2076 icmp->icmp_timestamp = onoff; 2077 } 2078 break; 2079 case SO_MAC_EXEMPT: 2080 if (secpolicy_net_mac_aware(cr) != 0 || 2081 icmp->icmp_state != TS_UNBND) 2082 return (EACCES); 2083 if (!checkonly) 2084 icmp->icmp_mac_exempt = onoff; 2085 break; 2086 /* 2087 * Following three not meaningful for icmp 2088 * Action is same as "default" so we keep them 2089 * in comments. 2090 * case SO_LINGER: 2091 * case SO_KEEPALIVE: 2092 * case SO_OOBINLINE: 2093 */ 2094 default: 2095 *outlenp = 0; 2096 return (EINVAL); 2097 } 2098 break; 2099 case IPPROTO_IP: 2100 /* 2101 * Only allow IPv4 option processing on IPv4 sockets. 2102 */ 2103 if (icmp->icmp_family != AF_INET) { 2104 *outlenp = 0; 2105 return (ENOPROTOOPT); 2106 } 2107 switch (name) { 2108 case IP_OPTIONS: 2109 case T_IP_OPTIONS: 2110 /* Save options for use by IP. */ 2111 if ((inlen & 0x3) || 2112 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2113 *outlenp = 0; 2114 return (EINVAL); 2115 } 2116 if (checkonly) 2117 break; 2118 2119 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2120 &icmp->icmp_ip_snd_options_len, 2121 icmp->icmp_label_len, invalp, inlen)) { 2122 *outlenp = 0; 2123 return (ENOMEM); 2124 } 2125 2126 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2127 icmp->icmp_ip_snd_options_len; 2128 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2129 is->is_wroff_extra); 2130 break; 2131 case IP_HDRINCL: 2132 if (!checkonly) 2133 icmp->icmp_hdrincl = onoff; 2134 break; 2135 case IP_TOS: 2136 case T_IP_TOS: 2137 if (!checkonly) { 2138 icmp->icmp_type_of_service = (uint8_t)*i1; 2139 } 2140 break; 2141 case IP_TTL: 2142 if (!checkonly) { 2143 icmp->icmp_ttl = (uint8_t)*i1; 2144 } 2145 break; 2146 case IP_MULTICAST_IF: 2147 /* 2148 * TODO should check OPTMGMT reply and undo this if 2149 * there is an error. 2150 */ 2151 if (!checkonly) 2152 icmp->icmp_multicast_if_addr = *i1; 2153 break; 2154 case IP_MULTICAST_TTL: 2155 if (!checkonly) 2156 icmp->icmp_multicast_ttl = *invalp; 2157 break; 2158 case IP_MULTICAST_LOOP: 2159 if (!checkonly) { 2160 icmp->icmp_multicast_loop = 2161 (*invalp == 0) ? 0 : 1; 2162 } 2163 break; 2164 case IP_BOUND_IF: 2165 if (!checkonly) 2166 icmp->icmp_bound_if = *i1; 2167 break; 2168 case IP_UNSPEC_SRC: 2169 if (!checkonly) 2170 icmp->icmp_unspec_source = onoff; 2171 break; 2172 case IP_XMIT_IF: 2173 if (!checkonly) 2174 icmp->icmp_xmit_if = *i1; 2175 break; 2176 case IP_RECVIF: 2177 if (!checkonly) 2178 icmp->icmp_recvif = onoff; 2179 break; 2180 2181 case IP_PKTINFO: { 2182 /* 2183 * This also handles IP_RECVPKTINFO. 2184 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2185 * Differentiation is based on the size of the argument 2186 * passed in. 2187 */ 2188 struct in_pktinfo *pktinfop; 2189 ip4_pkt_t *attr_pktinfop; 2190 2191 if (checkonly) 2192 break; 2193 2194 if (inlen == sizeof (int)) { 2195 /* 2196 * This is IP_RECVPKTINFO option. 2197 * Keep a local copy of wether this option is 2198 * set or not and pass it down to IP for 2199 * processing. 2200 */ 2201 icmp->icmp_ip_recvpktinfo = onoff; 2202 return (-EINVAL); 2203 } 2204 2205 2206 if (inlen != sizeof (struct in_pktinfo)) 2207 return (EINVAL); 2208 2209 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2210 == NULL) { 2211 /* 2212 * sticky option is not supported 2213 */ 2214 return (EINVAL); 2215 } 2216 2217 pktinfop = (struct in_pktinfo *)invalp; 2218 2219 /* 2220 * Atleast one of the values should be specified 2221 */ 2222 if (pktinfop->ipi_ifindex == 0 && 2223 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2224 return (EINVAL); 2225 } 2226 2227 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2228 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2229 } 2230 break; 2231 case IP_ADD_MEMBERSHIP: 2232 case IP_DROP_MEMBERSHIP: 2233 case IP_BLOCK_SOURCE: 2234 case IP_UNBLOCK_SOURCE: 2235 case IP_ADD_SOURCE_MEMBERSHIP: 2236 case IP_DROP_SOURCE_MEMBERSHIP: 2237 case MCAST_JOIN_GROUP: 2238 case MCAST_LEAVE_GROUP: 2239 case MCAST_BLOCK_SOURCE: 2240 case MCAST_UNBLOCK_SOURCE: 2241 case MCAST_JOIN_SOURCE_GROUP: 2242 case MCAST_LEAVE_SOURCE_GROUP: 2243 case MRT_INIT: 2244 case MRT_DONE: 2245 case MRT_ADD_VIF: 2246 case MRT_DEL_VIF: 2247 case MRT_ADD_MFC: 2248 case MRT_DEL_MFC: 2249 case MRT_VERSION: 2250 case MRT_ASSERT: 2251 case IP_SEC_OPT: 2252 case IP_DONTFAILOVER_IF: 2253 case IP_NEXTHOP: 2254 /* 2255 * "soft" error (negative) 2256 * option not handled at this level 2257 * Note: Do not modify *outlenp 2258 */ 2259 return (-EINVAL); 2260 default: 2261 *outlenp = 0; 2262 return (EINVAL); 2263 } 2264 break; 2265 case IPPROTO_IPV6: { 2266 ip6_pkt_t *ipp; 2267 boolean_t sticky; 2268 2269 if (icmp->icmp_family != AF_INET6) { 2270 *outlenp = 0; 2271 return (ENOPROTOOPT); 2272 } 2273 /* 2274 * Deal with both sticky options and ancillary data 2275 */ 2276 if (thisdg_attrs == NULL) { 2277 /* sticky options, or none */ 2278 ipp = &icmp->icmp_sticky_ipp; 2279 sticky = B_TRUE; 2280 } else { 2281 /* ancillary data */ 2282 ipp = (ip6_pkt_t *)thisdg_attrs; 2283 sticky = B_FALSE; 2284 } 2285 2286 switch (name) { 2287 case IPV6_MULTICAST_IF: 2288 if (!checkonly) 2289 icmp->icmp_multicast_if_index = *i1; 2290 break; 2291 case IPV6_UNICAST_HOPS: 2292 /* -1 means use default */ 2293 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2294 *outlenp = 0; 2295 return (EINVAL); 2296 } 2297 if (!checkonly) { 2298 if (*i1 == -1) { 2299 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2300 is->is_ipv6_hoplimit; 2301 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2302 /* Pass modified value to IP. */ 2303 *i1 = ipp->ipp_hoplimit; 2304 } else { 2305 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2306 (uint8_t)*i1; 2307 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2308 } 2309 /* Rebuild the header template */ 2310 error = icmp_build_hdrs(q, icmp); 2311 if (error != 0) { 2312 *outlenp = 0; 2313 return (error); 2314 } 2315 } 2316 break; 2317 case IPV6_MULTICAST_HOPS: 2318 /* -1 means use default */ 2319 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2320 *outlenp = 0; 2321 return (EINVAL); 2322 } 2323 if (!checkonly) { 2324 if (*i1 == -1) { 2325 icmp->icmp_multicast_ttl = 2326 ipp->ipp_multicast_hops = 2327 IP_DEFAULT_MULTICAST_TTL; 2328 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2329 /* Pass modified value to IP. */ 2330 *i1 = icmp->icmp_multicast_ttl; 2331 } else { 2332 icmp->icmp_multicast_ttl = 2333 ipp->ipp_multicast_hops = 2334 (uint8_t)*i1; 2335 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2336 } 2337 } 2338 break; 2339 case IPV6_MULTICAST_LOOP: 2340 if (*i1 != 0 && *i1 != 1) { 2341 *outlenp = 0; 2342 return (EINVAL); 2343 } 2344 if (!checkonly) 2345 icmp->icmp_multicast_loop = *i1; 2346 break; 2347 case IPV6_CHECKSUM: 2348 /* 2349 * Integer offset into the user data of where the 2350 * checksum is located. 2351 * Offset of -1 disables option. 2352 * Does not apply to IPPROTO_ICMPV6. 2353 */ 2354 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2355 *outlenp = 0; 2356 return (EINVAL); 2357 } 2358 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2359 /* Negative or not 16 bit aligned offset */ 2360 *outlenp = 0; 2361 return (EINVAL); 2362 } 2363 if (checkonly) 2364 break; 2365 2366 if (*i1 == -1) { 2367 icmp->icmp_raw_checksum = 0; 2368 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2369 } else { 2370 icmp->icmp_raw_checksum = 1; 2371 icmp->icmp_checksum_off = *i1; 2372 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2373 } 2374 /* Rebuild the header template */ 2375 error = icmp_build_hdrs(q, icmp); 2376 if (error != 0) { 2377 *outlenp = 0; 2378 return (error); 2379 } 2380 break; 2381 case IPV6_JOIN_GROUP: 2382 case IPV6_LEAVE_GROUP: 2383 case MCAST_JOIN_GROUP: 2384 case MCAST_LEAVE_GROUP: 2385 case MCAST_BLOCK_SOURCE: 2386 case MCAST_UNBLOCK_SOURCE: 2387 case MCAST_JOIN_SOURCE_GROUP: 2388 case MCAST_LEAVE_SOURCE_GROUP: 2389 /* 2390 * "soft" error (negative) 2391 * option not handled at this level 2392 * Note: Do not modify *outlenp 2393 */ 2394 return (-EINVAL); 2395 case IPV6_BOUND_IF: 2396 if (!checkonly) 2397 icmp->icmp_bound_if = *i1; 2398 break; 2399 case IPV6_UNSPEC_SRC: 2400 if (!checkonly) 2401 icmp->icmp_unspec_source = onoff; 2402 break; 2403 case IPV6_RECVTCLASS: 2404 if (!checkonly) 2405 icmp->icmp_ipv6_recvtclass = onoff; 2406 break; 2407 /* 2408 * Set boolean switches for ancillary data delivery 2409 */ 2410 case IPV6_RECVPKTINFO: 2411 if (!checkonly) 2412 icmp->icmp_ip_recvpktinfo = onoff; 2413 break; 2414 case IPV6_RECVPATHMTU: 2415 if (!checkonly) 2416 icmp->icmp_ipv6_recvpathmtu = onoff; 2417 break; 2418 case IPV6_RECVHOPLIMIT: 2419 if (!checkonly) 2420 icmp->icmp_ipv6_recvhoplimit = onoff; 2421 break; 2422 case IPV6_RECVHOPOPTS: 2423 if (!checkonly) 2424 icmp->icmp_ipv6_recvhopopts = onoff; 2425 break; 2426 case IPV6_RECVDSTOPTS: 2427 if (!checkonly) 2428 icmp->icmp_ipv6_recvdstopts = onoff; 2429 break; 2430 case _OLD_IPV6_RECVDSTOPTS: 2431 if (!checkonly) 2432 icmp->icmp_old_ipv6_recvdstopts = onoff; 2433 break; 2434 case IPV6_RECVRTHDRDSTOPTS: 2435 if (!checkonly) 2436 icmp->icmp_ipv6_recvrtdstopts = onoff; 2437 break; 2438 case IPV6_RECVRTHDR: 2439 if (!checkonly) 2440 icmp->icmp_ipv6_recvrthdr = onoff; 2441 break; 2442 /* 2443 * Set sticky options or ancillary data. 2444 * If sticky options, (re)build any extension headers 2445 * that might be needed as a result. 2446 */ 2447 case IPV6_PKTINFO: 2448 /* 2449 * The source address and ifindex are verified 2450 * in ip_opt_set(). For ancillary data the 2451 * source address is checked in ip_wput_v6. 2452 */ 2453 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2454 return (EINVAL); 2455 if (checkonly) 2456 break; 2457 2458 if (inlen == 0) { 2459 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2460 ipp->ipp_sticky_ignored |= 2461 (IPPF_IFINDEX|IPPF_ADDR); 2462 } else { 2463 struct in6_pktinfo *pkti; 2464 2465 pkti = (struct in6_pktinfo *)invalp; 2466 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2467 ipp->ipp_addr = pkti->ipi6_addr; 2468 if (ipp->ipp_ifindex != 0) 2469 ipp->ipp_fields |= IPPF_IFINDEX; 2470 else 2471 ipp->ipp_fields &= ~IPPF_IFINDEX; 2472 if (!IN6_IS_ADDR_UNSPECIFIED( 2473 &ipp->ipp_addr)) 2474 ipp->ipp_fields |= IPPF_ADDR; 2475 else 2476 ipp->ipp_fields &= ~IPPF_ADDR; 2477 } 2478 if (sticky) { 2479 error = icmp_build_hdrs(q, icmp); 2480 if (error != 0) 2481 return (error); 2482 } 2483 break; 2484 case IPV6_HOPLIMIT: 2485 /* This option can only be used as ancillary data. */ 2486 if (sticky) 2487 return (EINVAL); 2488 if (inlen != 0 && inlen != sizeof (int)) 2489 return (EINVAL); 2490 if (checkonly) 2491 break; 2492 2493 if (inlen == 0) { 2494 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2495 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2496 } else { 2497 if (*i1 > 255 || *i1 < -1) 2498 return (EINVAL); 2499 if (*i1 == -1) 2500 ipp->ipp_hoplimit = 2501 is->is_ipv6_hoplimit; 2502 else 2503 ipp->ipp_hoplimit = *i1; 2504 ipp->ipp_fields |= IPPF_HOPLIMIT; 2505 } 2506 break; 2507 case IPV6_TCLASS: 2508 /* 2509 * IPV6_RECVTCLASS accepts -1 as use kernel default 2510 * and [0, 255] as the actualy traffic class. 2511 */ 2512 if (inlen != 0 && inlen != sizeof (int)) 2513 return (EINVAL); 2514 if (checkonly) 2515 break; 2516 2517 if (inlen == 0) { 2518 ipp->ipp_fields &= ~IPPF_TCLASS; 2519 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2520 } else { 2521 if (*i1 >= 256 || *i1 < -1) 2522 return (EINVAL); 2523 if (*i1 == -1) { 2524 ipp->ipp_tclass = 2525 IPV6_FLOW_TCLASS( 2526 IPV6_DEFAULT_VERS_AND_FLOW); 2527 } else { 2528 ipp->ipp_tclass = *i1; 2529 } 2530 ipp->ipp_fields |= IPPF_TCLASS; 2531 } 2532 if (sticky) { 2533 error = icmp_build_hdrs(q, icmp); 2534 if (error != 0) 2535 return (error); 2536 } 2537 break; 2538 case IPV6_NEXTHOP: 2539 /* 2540 * IP will verify that the nexthop is reachable 2541 * and fail for sticky options. 2542 */ 2543 if (inlen != 0 && inlen != sizeof (sin6_t)) 2544 return (EINVAL); 2545 if (checkonly) 2546 break; 2547 2548 if (inlen == 0) { 2549 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2550 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2551 } else { 2552 sin6_t *sin6 = (sin6_t *)invalp; 2553 2554 if (sin6->sin6_family != AF_INET6) 2555 return (EAFNOSUPPORT); 2556 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2557 return (EADDRNOTAVAIL); 2558 ipp->ipp_nexthop = sin6->sin6_addr; 2559 if (!IN6_IS_ADDR_UNSPECIFIED( 2560 &ipp->ipp_nexthop)) 2561 ipp->ipp_fields |= IPPF_NEXTHOP; 2562 else 2563 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2564 } 2565 if (sticky) { 2566 error = icmp_build_hdrs(q, icmp); 2567 if (error != 0) 2568 return (error); 2569 } 2570 break; 2571 case IPV6_HOPOPTS: { 2572 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2573 /* 2574 * Sanity checks - minimum size, size a multiple of 2575 * eight bytes, and matching size passed in. 2576 */ 2577 if (inlen != 0 && 2578 inlen != (8 * (hopts->ip6h_len + 1))) 2579 return (EINVAL); 2580 2581 if (checkonly) 2582 break; 2583 error = optcom_pkt_set(invalp, inlen, sticky, 2584 (uchar_t **)&ipp->ipp_hopopts, 2585 &ipp->ipp_hopoptslen, 2586 sticky ? icmp->icmp_label_len_v6 : 0); 2587 if (error != 0) 2588 return (error); 2589 if (ipp->ipp_hopoptslen == 0) { 2590 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2591 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2592 } else { 2593 ipp->ipp_fields |= IPPF_HOPOPTS; 2594 } 2595 if (sticky) { 2596 error = icmp_build_hdrs(q, icmp); 2597 if (error != 0) 2598 return (error); 2599 } 2600 break; 2601 } 2602 case IPV6_RTHDRDSTOPTS: { 2603 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2604 2605 /* 2606 * Sanity checks - minimum size, size a multiple of 2607 * eight bytes, and matching size passed in. 2608 */ 2609 if (inlen != 0 && 2610 inlen != (8 * (dopts->ip6d_len + 1))) 2611 return (EINVAL); 2612 2613 if (checkonly) 2614 break; 2615 2616 if (inlen == 0) { 2617 if (sticky && 2618 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2619 kmem_free(ipp->ipp_rtdstopts, 2620 ipp->ipp_rtdstoptslen); 2621 ipp->ipp_rtdstopts = NULL; 2622 ipp->ipp_rtdstoptslen = 0; 2623 } 2624 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2625 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2626 } else { 2627 error = optcom_pkt_set(invalp, inlen, sticky, 2628 (uchar_t **)&ipp->ipp_rtdstopts, 2629 &ipp->ipp_rtdstoptslen, 0); 2630 if (error != 0) 2631 return (error); 2632 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2633 } 2634 if (sticky) { 2635 error = icmp_build_hdrs(q, icmp); 2636 if (error != 0) 2637 return (error); 2638 } 2639 break; 2640 } 2641 case IPV6_DSTOPTS: { 2642 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2643 2644 /* 2645 * Sanity checks - minimum size, size a multiple of 2646 * eight bytes, and matching size passed in. 2647 */ 2648 if (inlen != 0 && 2649 inlen != (8 * (dopts->ip6d_len + 1))) 2650 return (EINVAL); 2651 2652 if (checkonly) 2653 break; 2654 2655 if (inlen == 0) { 2656 if (sticky && 2657 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2658 kmem_free(ipp->ipp_dstopts, 2659 ipp->ipp_dstoptslen); 2660 ipp->ipp_dstopts = NULL; 2661 ipp->ipp_dstoptslen = 0; 2662 } 2663 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2664 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2665 } else { 2666 error = optcom_pkt_set(invalp, inlen, sticky, 2667 (uchar_t **)&ipp->ipp_dstopts, 2668 &ipp->ipp_dstoptslen, 0); 2669 if (error != 0) 2670 return (error); 2671 ipp->ipp_fields |= IPPF_DSTOPTS; 2672 } 2673 if (sticky) { 2674 error = icmp_build_hdrs(q, icmp); 2675 if (error != 0) 2676 return (error); 2677 } 2678 break; 2679 } 2680 case IPV6_RTHDR: { 2681 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2682 2683 /* 2684 * Sanity checks - minimum size, size a multiple of 2685 * eight bytes, and matching size passed in. 2686 */ 2687 if (inlen != 0 && 2688 inlen != (8 * (rt->ip6r_len + 1))) 2689 return (EINVAL); 2690 2691 if (checkonly) 2692 break; 2693 2694 if (inlen == 0) { 2695 if (sticky && 2696 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2697 kmem_free(ipp->ipp_rthdr, 2698 ipp->ipp_rthdrlen); 2699 ipp->ipp_rthdr = NULL; 2700 ipp->ipp_rthdrlen = 0; 2701 } 2702 ipp->ipp_fields &= ~IPPF_RTHDR; 2703 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2704 } else { 2705 error = optcom_pkt_set(invalp, inlen, sticky, 2706 (uchar_t **)&ipp->ipp_rthdr, 2707 &ipp->ipp_rthdrlen, 0); 2708 if (error != 0) 2709 return (error); 2710 ipp->ipp_fields |= IPPF_RTHDR; 2711 } 2712 if (sticky) { 2713 error = icmp_build_hdrs(q, icmp); 2714 if (error != 0) 2715 return (error); 2716 } 2717 break; 2718 } 2719 2720 case IPV6_DONTFRAG: 2721 if (checkonly) 2722 break; 2723 2724 if (onoff) { 2725 ipp->ipp_fields |= IPPF_DONTFRAG; 2726 } else { 2727 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2728 } 2729 break; 2730 2731 case IPV6_USE_MIN_MTU: 2732 if (inlen != sizeof (int)) 2733 return (EINVAL); 2734 2735 if (*i1 < -1 || *i1 > 1) 2736 return (EINVAL); 2737 2738 if (checkonly) 2739 break; 2740 2741 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2742 ipp->ipp_use_min_mtu = *i1; 2743 break; 2744 2745 /* 2746 * This option can't be set. Its only returned via 2747 * getsockopt() or ancillary data. 2748 */ 2749 case IPV6_PATHMTU: 2750 return (EINVAL); 2751 2752 case IPV6_BOUND_PIF: 2753 case IPV6_SEC_OPT: 2754 case IPV6_DONTFAILOVER_IF: 2755 case IPV6_SRC_PREFERENCES: 2756 case IPV6_V6ONLY: 2757 /* Handled at IP level */ 2758 return (-EINVAL); 2759 default: 2760 *outlenp = 0; 2761 return (EINVAL); 2762 } 2763 break; 2764 } /* end IPPROTO_IPV6 */ 2765 2766 case IPPROTO_ICMPV6: 2767 /* 2768 * Only allow IPv6 option processing on IPv6 sockets. 2769 */ 2770 if (icmp->icmp_family != AF_INET6) { 2771 *outlenp = 0; 2772 return (ENOPROTOOPT); 2773 } 2774 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2775 *outlenp = 0; 2776 return (ENOPROTOOPT); 2777 } 2778 switch (name) { 2779 case ICMP6_FILTER: 2780 if (!checkonly) { 2781 if ((inlen != 0) && 2782 (inlen != sizeof (icmp6_filter_t))) 2783 return (EINVAL); 2784 2785 if (inlen == 0) { 2786 if (icmp->icmp_filter != NULL) { 2787 kmem_free(icmp->icmp_filter, 2788 sizeof (icmp6_filter_t)); 2789 icmp->icmp_filter = NULL; 2790 } 2791 } else { 2792 if (icmp->icmp_filter == NULL) { 2793 icmp->icmp_filter = kmem_alloc( 2794 sizeof (icmp6_filter_t), 2795 KM_NOSLEEP); 2796 if (icmp->icmp_filter == NULL) { 2797 *outlenp = 0; 2798 return (ENOBUFS); 2799 } 2800 } 2801 (void) bcopy(invalp, icmp->icmp_filter, 2802 inlen); 2803 } 2804 } 2805 break; 2806 2807 default: 2808 *outlenp = 0; 2809 return (EINVAL); 2810 } 2811 break; 2812 default: 2813 *outlenp = 0; 2814 return (EINVAL); 2815 } 2816 /* 2817 * Common case of OK return with outval same as inval. 2818 */ 2819 if (invalp != outvalp) { 2820 /* don't trust bcopy for identical src/dst */ 2821 (void) bcopy(invalp, outvalp, inlen); 2822 } 2823 *outlenp = inlen; 2824 return (0); 2825 } 2826 2827 /* 2828 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2829 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2830 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2831 * headers. 2832 * Returns failure if can't allocate memory. 2833 */ 2834 static int 2835 icmp_build_hdrs(queue_t *q, icmp_t *icmp) 2836 { 2837 icmp_stack_t *is = icmp->icmp_is; 2838 uchar_t *hdrs; 2839 uint_t hdrs_len; 2840 ip6_t *ip6h; 2841 ip6i_t *ip6i; 2842 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2843 2844 hdrs_len = ip_total_hdrs_len_v6(ipp); 2845 ASSERT(hdrs_len != 0); 2846 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 2847 /* Need to reallocate */ 2848 if (hdrs_len != 0) { 2849 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2850 if (hdrs == NULL) 2851 return (ENOMEM); 2852 } else { 2853 hdrs = NULL; 2854 } 2855 if (icmp->icmp_sticky_hdrs_len != 0) { 2856 kmem_free(icmp->icmp_sticky_hdrs, 2857 icmp->icmp_sticky_hdrs_len); 2858 } 2859 icmp->icmp_sticky_hdrs = hdrs; 2860 icmp->icmp_sticky_hdrs_len = hdrs_len; 2861 } 2862 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 2863 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 2864 2865 /* Set header fields not in ipp */ 2866 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 2867 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 2868 ip6h = (ip6_t *)&ip6i[1]; 2869 2870 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 2871 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 2872 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 2873 } 2874 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 2875 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 2876 } 2877 } else { 2878 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 2879 } 2880 2881 if (!(ipp->ipp_fields & IPPF_ADDR)) 2882 ip6h->ip6_src = icmp->icmp_v6src; 2883 2884 /* Try to get everything in a single mblk */ 2885 if (hdrs_len > icmp->icmp_max_hdr_len) { 2886 icmp->icmp_max_hdr_len = hdrs_len; 2887 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2888 is->is_wroff_extra); 2889 } 2890 return (0); 2891 } 2892 2893 /* 2894 * This routine retrieves the value of an ND variable in a icmpparam_t 2895 * structure. It is called through nd_getset when a user reads the 2896 * variable. 2897 */ 2898 /* ARGSUSED */ 2899 static int 2900 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2901 { 2902 icmpparam_t *icmppa = (icmpparam_t *)cp; 2903 2904 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2905 return (0); 2906 } 2907 2908 /* 2909 * Walk through the param array specified registering each element with the 2910 * named dispatch (ND) handler. 2911 */ 2912 static boolean_t 2913 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 2914 { 2915 for (; cnt-- > 0; icmppa++) { 2916 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2917 if (!nd_load(ndp, icmppa->icmp_param_name, 2918 icmp_param_get, icmp_param_set, 2919 (caddr_t)icmppa)) { 2920 nd_free(ndp); 2921 return (B_FALSE); 2922 } 2923 } 2924 } 2925 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 2926 NULL)) { 2927 nd_free(ndp); 2928 return (B_FALSE); 2929 } 2930 return (B_TRUE); 2931 } 2932 2933 /* This routine sets an ND variable in a icmpparam_t structure. */ 2934 /* ARGSUSED */ 2935 static int 2936 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2937 { 2938 long new_value; 2939 icmpparam_t *icmppa = (icmpparam_t *)cp; 2940 2941 /* 2942 * Fail the request if the new value does not lie within the 2943 * required bounds. 2944 */ 2945 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2946 new_value < icmppa->icmp_param_min || 2947 new_value > icmppa->icmp_param_max) { 2948 return (EINVAL); 2949 } 2950 /* Set the new value */ 2951 icmppa->icmp_param_value = new_value; 2952 return (0); 2953 } 2954 2955 static void 2956 icmp_rput(queue_t *q, mblk_t *mp) 2957 { 2958 struct T_unitdata_ind *tudi; 2959 uchar_t *rptr; 2960 struct T_error_ack *tea; 2961 icmp_t *icmp = (icmp_t *)q->q_ptr; 2962 icmp_stack_t *is = icmp->icmp_is; 2963 sin_t *sin; 2964 sin6_t *sin6; 2965 ip6_t *ip6h; 2966 ip6i_t *ip6i; 2967 mblk_t *mp1; 2968 int hdr_len; 2969 ipha_t *ipha; 2970 int udi_size; /* Size of T_unitdata_ind */ 2971 uint_t ipvers; 2972 ip6_pkt_t ipp; 2973 uint8_t nexthdr; 2974 boolean_t recvif = B_FALSE; 2975 ip_pktinfo_t *pinfo = NULL; 2976 mblk_t *options_mp = NULL; 2977 uint_t icmp_opt = 0; 2978 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 2979 uint_t hopstrip; 2980 2981 if (icmp->icmp_restricted) { 2982 putnext(q, mp); 2983 return; 2984 } 2985 2986 if (mp->b_datap->db_type == M_CTL) { 2987 /* 2988 * IP sends up the IPSEC_IN message for handling IPSEC 2989 * policy at the TCP level. We don't need it here. 2990 */ 2991 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 2992 mp1 = mp->b_cont; 2993 freeb(mp); 2994 mp = mp1; 2995 } else { 2996 pinfo = (ip_pktinfo_t *)mp->b_rptr; 2997 if ((icmp->icmp_recvif != 0 || 2998 icmp->icmp_ip_recvpktinfo) && 2999 (pinfo->ip_pkt_ulp_type == IN_PKTINFO)) { 3000 /* 3001 * IP has passed the options in mp and the 3002 * actual data is in b_cont. 3003 */ 3004 recvif = B_TRUE; 3005 /* 3006 * We are here bcos IP_RECVIF is set so we need 3007 * to extract the options mblk and adjust the 3008 * rptr 3009 */ 3010 options_mp = mp; 3011 mp = mp->b_cont; 3012 } 3013 } 3014 } 3015 3016 rptr = mp->b_rptr; 3017 switch (mp->b_datap->db_type) { 3018 case M_DATA: 3019 /* 3020 * M_DATA messages contain IP packets. They are handled 3021 * following the switch. 3022 */ 3023 break; 3024 case M_PROTO: 3025 case M_PCPROTO: 3026 /* M_PROTO messages contain some type of TPI message. */ 3027 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 3028 freemsg(mp); 3029 return; 3030 } 3031 tea = (struct T_error_ack *)rptr; 3032 switch (tea->PRIM_type) { 3033 case T_ERROR_ACK: 3034 switch (tea->ERROR_prim) { 3035 case O_T_BIND_REQ: 3036 case T_BIND_REQ: 3037 /* 3038 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3039 * clear out the source address before 3040 * passing the message upstream. 3041 * If this was caused by a T_CONN_REQ 3042 * revert back to bound state. 3043 */ 3044 if (icmp->icmp_state == TS_UNBND) { 3045 /* 3046 * TPI has not yet bound - bind sent by 3047 * icmp_bind_proto. 3048 */ 3049 freemsg(mp); 3050 return; 3051 } 3052 if (icmp->icmp_state == TS_DATA_XFER) { 3053 /* Connect failed */ 3054 tea->ERROR_prim = T_CONN_REQ; 3055 icmp->icmp_v6src = 3056 icmp->icmp_bound_v6src; 3057 icmp->icmp_state = TS_IDLE; 3058 if (icmp->icmp_family == AF_INET6) 3059 (void) icmp_build_hdrs(q, icmp); 3060 break; 3061 } 3062 3063 if (icmp->icmp_discon_pending) { 3064 tea->ERROR_prim = T_DISCON_REQ; 3065 icmp->icmp_discon_pending = 0; 3066 } 3067 V6_SET_ZERO(icmp->icmp_v6src); 3068 V6_SET_ZERO(icmp->icmp_bound_v6src); 3069 icmp->icmp_state = TS_UNBND; 3070 if (icmp->icmp_family == AF_INET6) 3071 (void) icmp_build_hdrs(q, icmp); 3072 break; 3073 default: 3074 break; 3075 } 3076 break; 3077 case T_BIND_ACK: 3078 icmp_rput_bind_ack(q, mp); 3079 return; 3080 3081 case T_OPTMGMT_ACK: 3082 case T_OK_ACK: 3083 if (tea->PRIM_type == T_OK_ACK) { 3084 struct T_ok_ack *toa; 3085 toa = (struct T_ok_ack *)rptr; 3086 if (toa->CORRECT_prim == T_UNBIND_REQ) { 3087 /* 3088 * If somebody sets IPSEC options, IP 3089 * sends some IPSEC info which is used 3090 * by the TCP for detached connections. 3091 * We don't need it here. 3092 */ 3093 if ((mp1 = mp->b_cont) != NULL) { 3094 freemsg(mp1); 3095 mp->b_cont = NULL; 3096 } 3097 } 3098 } 3099 break; 3100 default: 3101 freemsg(mp); 3102 return; 3103 } 3104 putnext(q, mp); 3105 return; 3106 case M_CTL: 3107 if (recvif) { 3108 /* 3109 * IP has passed the options in mp and the actual data 3110 * is in b_cont. Jump to normal data processing. 3111 */ 3112 break; 3113 } 3114 3115 /* Contains ICMP packet from IP */ 3116 icmp_icmp_error(q, mp); 3117 return; 3118 default: 3119 putnext(q, mp); 3120 return; 3121 } 3122 3123 /* 3124 * Discard message if it is misaligned or smaller than the IP header. 3125 */ 3126 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3127 freemsg(mp); 3128 if (options_mp != NULL) 3129 freeb(options_mp); 3130 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); 3131 return; 3132 } 3133 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3134 3135 /* Handle M_DATA messages containing IP packets messages */ 3136 if (ipvers == IPV4_VERSION) { 3137 /* 3138 * Special case where IP attaches 3139 * the IRE needs to be handled so that we don't send up 3140 * IRE to the user land. 3141 */ 3142 ipha = (ipha_t *)rptr; 3143 hdr_len = IPH_HDR_LENGTH(ipha); 3144 3145 if (ipha->ipha_protocol == IPPROTO_TCP) { 3146 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3147 3148 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3149 TH_SYN) && mp->b_cont != NULL) { 3150 mp1 = mp->b_cont; 3151 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3152 freeb(mp1); 3153 mp->b_cont = NULL; 3154 } 3155 } 3156 } 3157 if (is->is_bsd_compat) { 3158 ushort_t len; 3159 len = ntohs(ipha->ipha_length); 3160 3161 if (mp->b_datap->db_ref > 1) { 3162 /* 3163 * Allocate a new IP header so that we can 3164 * modify ipha_length. 3165 */ 3166 mblk_t *mp1; 3167 3168 mp1 = allocb(hdr_len, BPRI_MED); 3169 if (!mp1) { 3170 freemsg(mp); 3171 if (options_mp != NULL) 3172 freeb(options_mp); 3173 BUMP_MIB(&icmp->icmp_rawip_mib, 3174 rawipInErrors); 3175 return; 3176 } 3177 bcopy(rptr, mp1->b_rptr, hdr_len); 3178 mp->b_rptr = rptr + hdr_len; 3179 rptr = mp1->b_rptr; 3180 ipha = (ipha_t *)rptr; 3181 mp1->b_cont = mp; 3182 mp1->b_wptr = rptr + hdr_len; 3183 mp = mp1; 3184 } 3185 len -= hdr_len; 3186 ipha->ipha_length = htons(len); 3187 } 3188 } 3189 3190 /* 3191 * This is the inbound data path. Packets are passed upstream as 3192 * T_UNITDATA_IND messages with full IP headers still attached. 3193 */ 3194 if (icmp->icmp_family == AF_INET) { 3195 ASSERT(ipvers == IPV4_VERSION); 3196 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3197 if (icmp->icmp_recvif && recvif && 3198 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3199 udi_size += sizeof (struct T_opthdr) + 3200 sizeof (uint_t); 3201 } 3202 3203 if (icmp->icmp_ip_recvpktinfo && recvif && 3204 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3205 udi_size += sizeof (struct T_opthdr) + 3206 sizeof (struct in_pktinfo); 3207 } 3208 3209 /* 3210 * If SO_TIMESTAMP is set allocate the appropriate sized 3211 * buffer. Since gethrestime() expects a pointer aligned 3212 * argument, we allocate space necessary for extra 3213 * alignment (even though it might not be used). 3214 */ 3215 if (icmp->icmp_timestamp) { 3216 udi_size += sizeof (struct T_opthdr) + 3217 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3218 } 3219 mp1 = allocb(udi_size, BPRI_MED); 3220 if (mp1 == NULL) { 3221 freemsg(mp); 3222 if (options_mp != NULL) 3223 freeb(options_mp); 3224 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); 3225 return; 3226 } 3227 mp1->b_cont = mp; 3228 mp = mp1; 3229 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3230 mp->b_datap->db_type = M_PROTO; 3231 mp->b_wptr = (uchar_t *)tudi + udi_size; 3232 tudi->PRIM_type = T_UNITDATA_IND; 3233 tudi->SRC_length = sizeof (sin_t); 3234 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3235 sin = (sin_t *)&tudi[1]; 3236 *sin = sin_null; 3237 sin->sin_family = AF_INET; 3238 sin->sin_addr.s_addr = ipha->ipha_src; 3239 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3240 sizeof (sin_t); 3241 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3242 tudi->OPT_length = udi_size; 3243 3244 /* 3245 * Add options if IP_RECVIF is set 3246 */ 3247 if (udi_size != 0) { 3248 char *dstopt; 3249 3250 dstopt = (char *)&sin[1]; 3251 if (icmp->icmp_recvif && recvif && 3252 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3253 3254 struct T_opthdr *toh; 3255 uint_t *dstptr; 3256 3257 toh = (struct T_opthdr *)dstopt; 3258 toh->level = IPPROTO_IP; 3259 toh->name = IP_RECVIF; 3260 toh->len = sizeof (struct T_opthdr) + 3261 sizeof (uint_t); 3262 toh->status = 0; 3263 dstopt += sizeof (struct T_opthdr); 3264 dstptr = (uint_t *)dstopt; 3265 *dstptr = pinfo->ip_pkt_ifindex; 3266 dstopt += sizeof (uint_t); 3267 freeb(options_mp); 3268 udi_size -= toh->len; 3269 } 3270 if (icmp->icmp_timestamp) { 3271 struct T_opthdr *toh; 3272 3273 toh = (struct T_opthdr *)dstopt; 3274 toh->level = SOL_SOCKET; 3275 toh->name = SCM_TIMESTAMP; 3276 toh->len = sizeof (struct T_opthdr) + 3277 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3278 toh->status = 0; 3279 dstopt += sizeof (struct T_opthdr); 3280 /* Align for gethrestime() */ 3281 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3282 sizeof (intptr_t)); 3283 gethrestime((timestruc_t *)dstopt); 3284 dstopt = (char *)toh + toh->len; 3285 udi_size -= toh->len; 3286 } 3287 if (icmp->icmp_ip_recvpktinfo && recvif && 3288 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3289 struct T_opthdr *toh; 3290 struct in_pktinfo *pktinfop; 3291 3292 toh = (struct T_opthdr *)dstopt; 3293 toh->level = IPPROTO_IP; 3294 toh->name = IP_PKTINFO; 3295 toh->len = sizeof (struct T_opthdr) + 3296 sizeof (in_pktinfo_t); 3297 toh->status = 0; 3298 dstopt += sizeof (struct T_opthdr); 3299 pktinfop = (struct in_pktinfo *)dstopt; 3300 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3301 pktinfop->ipi_spec_dst = 3302 pinfo->ip_pkt_match_addr; 3303 3304 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3305 3306 dstopt += sizeof (struct in_pktinfo); 3307 udi_size -= toh->len; 3308 } 3309 3310 /* Consumed all of allocated space */ 3311 ASSERT(udi_size == 0); 3312 } 3313 3314 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); 3315 putnext(q, mp); 3316 return; 3317 } 3318 3319 /* 3320 * We don't need options_mp in the IPv6 path. 3321 */ 3322 if (options_mp != NULL) { 3323 freeb(options_mp); 3324 options_mp = NULL; 3325 } 3326 3327 /* 3328 * Discard message if it is smaller than the IPv6 header 3329 * or if the header is malformed. 3330 */ 3331 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3332 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3333 icmp->icmp_family != AF_INET6) { 3334 freemsg(mp); 3335 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); 3336 return; 3337 } 3338 3339 /* Initialize */ 3340 ipp.ipp_fields = 0; 3341 hopstrip = 0; 3342 3343 ip6h = (ip6_t *)rptr; 3344 /* 3345 * Call on ip_find_hdr_v6 which gets the total hdr len 3346 * as well as individual lenghts of ext hdrs (and ptrs to 3347 * them). 3348 */ 3349 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3350 /* Look for ifindex information */ 3351 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3352 ip6i = (ip6i_t *)ip6h; 3353 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3354 ASSERT(ip6i->ip6i_ifindex != 0); 3355 ipp.ipp_fields |= IPPF_IFINDEX; 3356 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3357 } 3358 rptr = (uchar_t *)&ip6i[1]; 3359 mp->b_rptr = rptr; 3360 if (rptr == mp->b_wptr) { 3361 mp1 = mp->b_cont; 3362 freeb(mp); 3363 mp = mp1; 3364 rptr = mp->b_rptr; 3365 } 3366 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3367 ip6h = (ip6_t *)rptr; 3368 } 3369 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3370 3371 /* 3372 * We need to lie a bit to the user because users inside 3373 * labeled compartments should not see their own labels. We 3374 * assume that in all other respects IP has checked the label, 3375 * and that the label is always first among the options. (If 3376 * it's not first, then this code won't see it, and the option 3377 * will be passed along to the user.) 3378 * 3379 * If we had multilevel ICMP sockets, then the following code 3380 * should be skipped for them to allow the user to see the 3381 * label. 3382 * 3383 * Alignment restrictions in the definition of IP options 3384 * (namely, the requirement that the 4-octet DOI goes on a 3385 * 4-octet boundary) mean that we know exactly where the option 3386 * should start, but we're lenient for other hosts. 3387 * 3388 * Note that there are no multilevel ICMP or raw IP sockets 3389 * yet, thus nobody ever sees the IP6OPT_LS option. 3390 */ 3391 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3392 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3393 const uchar_t *ucp = 3394 (const uchar_t *)ipp.ipp_hopopts + 2; 3395 int remlen = ipp.ipp_hopoptslen - 2; 3396 3397 while (remlen > 0) { 3398 if (*ucp == IP6OPT_PAD1) { 3399 remlen--; 3400 ucp++; 3401 } else if (*ucp == IP6OPT_PADN) { 3402 remlen -= ucp[1] + 2; 3403 ucp += ucp[1] + 2; 3404 } else if (*ucp == ip6opt_ls) { 3405 hopstrip = (ucp - 3406 (const uchar_t *)ipp.ipp_hopopts) + 3407 ucp[1] + 2; 3408 hopstrip = (hopstrip + 7) & ~7; 3409 break; 3410 } else { 3411 /* label option must be first */ 3412 break; 3413 } 3414 } 3415 } 3416 } else { 3417 hdr_len = IPV6_HDR_LEN; 3418 ip6i = NULL; 3419 nexthdr = ip6h->ip6_nxt; 3420 } 3421 /* 3422 * One special case where IP attaches the IRE needs to 3423 * be handled so that we don't send up IRE to the user land. 3424 */ 3425 if (nexthdr == IPPROTO_TCP) { 3426 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3427 3428 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3429 mp->b_cont != NULL) { 3430 mp1 = mp->b_cont; 3431 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3432 freeb(mp1); 3433 mp->b_cont = NULL; 3434 } 3435 } 3436 } 3437 /* 3438 * Check a filter for ICMPv6 types if needed. 3439 * Verify raw checksums if needed. 3440 */ 3441 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3442 if (icmp->icmp_filter != NULL) { 3443 int type; 3444 3445 /* Assumes that IP has done the pullupmsg */ 3446 type = mp->b_rptr[hdr_len]; 3447 3448 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3449 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3450 freemsg(mp); 3451 return; 3452 } 3453 } else { 3454 /* Checksum */ 3455 uint16_t *up; 3456 uint32_t sum; 3457 int remlen; 3458 3459 up = (uint16_t *)&ip6h->ip6_src; 3460 3461 remlen = msgdsize(mp) - hdr_len; 3462 sum = htons(icmp->icmp_proto + remlen) 3463 + up[0] + up[1] + up[2] + up[3] 3464 + up[4] + up[5] + up[6] + up[7] 3465 + up[8] + up[9] + up[10] + up[11] 3466 + up[12] + up[13] + up[14] + up[15]; 3467 sum = (sum & 0xffff) + (sum >> 16); 3468 sum = IP_CSUM(mp, hdr_len, sum); 3469 if (sum != 0) { 3470 /* IPv6 RAW checksum failed */ 3471 ip0dbg(("icmp_rput: RAW checksum " 3472 "failed %x\n", sum)); 3473 freemsg(mp); 3474 BUMP_MIB(&icmp->icmp_rawip_mib, 3475 rawipInCksumErrs); 3476 return; 3477 } 3478 } 3479 } 3480 /* Skip all the IPv6 headers per API */ 3481 mp->b_rptr += hdr_len; 3482 3483 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3484 3485 /* 3486 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3487 * maintain state information, instead of relying on icmp_t 3488 * structure, since there arent any locks protecting these members 3489 * and there is a window where there might be a race between a 3490 * thread setting options on the write side and a thread reading 3491 * these options on the read size. 3492 */ 3493 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3494 IPPF_RTHDR|IPPF_IFINDEX)) { 3495 if (icmp->icmp_ipv6_recvhopopts && 3496 (ipp.ipp_fields & IPPF_HOPOPTS) && 3497 ipp.ipp_hopoptslen > hopstrip) { 3498 udi_size += sizeof (struct T_opthdr) + 3499 ipp.ipp_hopoptslen - hopstrip; 3500 icmp_opt |= IPPF_HOPOPTS; 3501 } 3502 if ((icmp->icmp_ipv6_recvdstopts || 3503 icmp->icmp_old_ipv6_recvdstopts) && 3504 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3505 udi_size += sizeof (struct T_opthdr) + 3506 ipp.ipp_dstoptslen; 3507 icmp_opt |= IPPF_DSTOPTS; 3508 } 3509 if (((icmp->icmp_ipv6_recvdstopts && 3510 icmp->icmp_ipv6_recvrthdr && 3511 (ipp.ipp_fields & IPPF_RTHDR)) || 3512 icmp->icmp_ipv6_recvrtdstopts) && 3513 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3514 udi_size += sizeof (struct T_opthdr) + 3515 ipp.ipp_rtdstoptslen; 3516 icmp_opt |= IPPF_RTDSTOPTS; 3517 } 3518 if (icmp->icmp_ipv6_recvrthdr && 3519 (ipp.ipp_fields & IPPF_RTHDR)) { 3520 udi_size += sizeof (struct T_opthdr) + 3521 ipp.ipp_rthdrlen; 3522 icmp_opt |= IPPF_RTHDR; 3523 } 3524 if (icmp->icmp_ip_recvpktinfo && 3525 (ipp.ipp_fields & IPPF_IFINDEX)) { 3526 udi_size += sizeof (struct T_opthdr) + 3527 sizeof (struct in6_pktinfo); 3528 icmp_opt |= IPPF_IFINDEX; 3529 } 3530 } 3531 if (icmp->icmp_ipv6_recvhoplimit) { 3532 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3533 icmp_ipv6_recvhoplimit = B_TRUE; 3534 } 3535 3536 if (icmp->icmp_ipv6_recvtclass) 3537 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3538 3539 mp1 = allocb(udi_size, BPRI_MED); 3540 if (mp1 == NULL) { 3541 freemsg(mp); 3542 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInErrors); 3543 return; 3544 } 3545 mp1->b_cont = mp; 3546 mp = mp1; 3547 mp->b_datap->db_type = M_PROTO; 3548 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3549 mp->b_wptr = (uchar_t *)tudi + udi_size; 3550 tudi->PRIM_type = T_UNITDATA_IND; 3551 tudi->SRC_length = sizeof (sin6_t); 3552 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3553 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3554 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3555 tudi->OPT_length = udi_size; 3556 sin6 = (sin6_t *)&tudi[1]; 3557 sin6->sin6_port = 0; 3558 sin6->sin6_family = AF_INET6; 3559 3560 sin6->sin6_addr = ip6h->ip6_src; 3561 /* No sin6_flowinfo per API */ 3562 sin6->sin6_flowinfo = 0; 3563 /* For link-scope source pass up scope id */ 3564 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3565 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3566 sin6->sin6_scope_id = ipp.ipp_ifindex; 3567 else 3568 sin6->sin6_scope_id = 0; 3569 3570 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3571 icmp->icmp_zoneid, is->is_netstack); 3572 3573 if (udi_size != 0) { 3574 uchar_t *dstopt; 3575 3576 dstopt = (uchar_t *)&sin6[1]; 3577 if (icmp_opt & IPPF_IFINDEX) { 3578 struct T_opthdr *toh; 3579 struct in6_pktinfo *pkti; 3580 3581 toh = (struct T_opthdr *)dstopt; 3582 toh->level = IPPROTO_IPV6; 3583 toh->name = IPV6_PKTINFO; 3584 toh->len = sizeof (struct T_opthdr) + 3585 sizeof (*pkti); 3586 toh->status = 0; 3587 dstopt += sizeof (struct T_opthdr); 3588 pkti = (struct in6_pktinfo *)dstopt; 3589 pkti->ipi6_addr = ip6h->ip6_dst; 3590 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3591 dstopt += sizeof (*pkti); 3592 udi_size -= toh->len; 3593 } 3594 if (icmp_ipv6_recvhoplimit) { 3595 struct T_opthdr *toh; 3596 3597 toh = (struct T_opthdr *)dstopt; 3598 toh->level = IPPROTO_IPV6; 3599 toh->name = IPV6_HOPLIMIT; 3600 toh->len = sizeof (struct T_opthdr) + 3601 sizeof (uint_t); 3602 toh->status = 0; 3603 dstopt += sizeof (struct T_opthdr); 3604 *(uint_t *)dstopt = ip6h->ip6_hops; 3605 dstopt += sizeof (uint_t); 3606 udi_size -= toh->len; 3607 } 3608 if (icmp->icmp_ipv6_recvtclass) { 3609 struct T_opthdr *toh; 3610 3611 toh = (struct T_opthdr *)dstopt; 3612 toh->level = IPPROTO_IPV6; 3613 toh->name = IPV6_TCLASS; 3614 toh->len = sizeof (struct T_opthdr) + 3615 sizeof (uint_t); 3616 toh->status = 0; 3617 dstopt += sizeof (struct T_opthdr); 3618 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3619 dstopt += sizeof (uint_t); 3620 udi_size -= toh->len; 3621 } 3622 if (icmp_opt & IPPF_HOPOPTS) { 3623 struct T_opthdr *toh; 3624 3625 toh = (struct T_opthdr *)dstopt; 3626 toh->level = IPPROTO_IPV6; 3627 toh->name = IPV6_HOPOPTS; 3628 toh->len = sizeof (struct T_opthdr) + 3629 ipp.ipp_hopoptslen - hopstrip; 3630 toh->status = 0; 3631 dstopt += sizeof (struct T_opthdr); 3632 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3633 ipp.ipp_hopoptslen - hopstrip); 3634 if (hopstrip > 0) { 3635 /* copy next header value and fake length */ 3636 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3637 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3638 hopstrip / 8; 3639 } 3640 dstopt += ipp.ipp_hopoptslen - hopstrip; 3641 udi_size -= toh->len; 3642 } 3643 if (icmp_opt & IPPF_RTDSTOPTS) { 3644 struct T_opthdr *toh; 3645 3646 toh = (struct T_opthdr *)dstopt; 3647 toh->level = IPPROTO_IPV6; 3648 toh->name = IPV6_DSTOPTS; 3649 toh->len = sizeof (struct T_opthdr) + 3650 ipp.ipp_rtdstoptslen; 3651 toh->status = 0; 3652 dstopt += sizeof (struct T_opthdr); 3653 bcopy(ipp.ipp_rtdstopts, dstopt, 3654 ipp.ipp_rtdstoptslen); 3655 dstopt += ipp.ipp_rtdstoptslen; 3656 udi_size -= toh->len; 3657 } 3658 if (icmp_opt & IPPF_RTHDR) { 3659 struct T_opthdr *toh; 3660 3661 toh = (struct T_opthdr *)dstopt; 3662 toh->level = IPPROTO_IPV6; 3663 toh->name = IPV6_RTHDR; 3664 toh->len = sizeof (struct T_opthdr) + 3665 ipp.ipp_rthdrlen; 3666 toh->status = 0; 3667 dstopt += sizeof (struct T_opthdr); 3668 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3669 dstopt += ipp.ipp_rthdrlen; 3670 udi_size -= toh->len; 3671 } 3672 if (icmp_opt & IPPF_DSTOPTS) { 3673 struct T_opthdr *toh; 3674 3675 toh = (struct T_opthdr *)dstopt; 3676 toh->level = IPPROTO_IPV6; 3677 toh->name = IPV6_DSTOPTS; 3678 toh->len = sizeof (struct T_opthdr) + 3679 ipp.ipp_dstoptslen; 3680 toh->status = 0; 3681 dstopt += sizeof (struct T_opthdr); 3682 bcopy(ipp.ipp_dstopts, dstopt, 3683 ipp.ipp_dstoptslen); 3684 dstopt += ipp.ipp_dstoptslen; 3685 udi_size -= toh->len; 3686 } 3687 /* Consumed all of allocated space */ 3688 ASSERT(udi_size == 0); 3689 } 3690 BUMP_MIB(&icmp->icmp_rawip_mib, rawipInDatagrams); 3691 putnext(q, mp); 3692 } 3693 3694 /* 3695 * Process a T_BIND_ACK 3696 */ 3697 static void 3698 icmp_rput_bind_ack(queue_t *q, mblk_t *mp) 3699 { 3700 icmp_t *icmp = (icmp_t *)q->q_ptr; 3701 mblk_t *mp1; 3702 ire_t *ire; 3703 struct T_bind_ack *tba; 3704 uchar_t *addrp; 3705 ipa_conn_t *ac; 3706 ipa6_conn_t *ac6; 3707 3708 /* 3709 * We know if headers are included or not so we can 3710 * safely do this. 3711 */ 3712 if (icmp->icmp_state == TS_UNBND) { 3713 /* 3714 * TPI has not yet bound - bind sent by 3715 * icmp_bind_proto. 3716 */ 3717 freemsg(mp); 3718 return; 3719 } 3720 if (icmp->icmp_discon_pending) 3721 icmp->icmp_discon_pending = 0; 3722 3723 /* 3724 * If a broadcast/multicast address was bound set 3725 * the source address to 0. 3726 * This ensures no datagrams with broadcast address 3727 * as source address are emitted (which would violate 3728 * RFC1122 - Hosts requirements) 3729 * 3730 * Note that when connecting the returned IRE is 3731 * for the destination address and we only perform 3732 * the broadcast check for the source address (it 3733 * is OK to connect to a broadcast/multicast address.) 3734 */ 3735 mp1 = mp->b_cont; 3736 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3737 ire = (ire_t *)mp1->b_rptr; 3738 3739 /* 3740 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3741 * local address. 3742 */ 3743 if (ire->ire_type == IRE_BROADCAST && 3744 icmp->icmp_state != TS_DATA_XFER) { 3745 /* This was just a local bind to a MC/broadcast addr */ 3746 V6_SET_ZERO(icmp->icmp_v6src); 3747 if (icmp->icmp_family == AF_INET6) 3748 (void) icmp_build_hdrs(q, icmp); 3749 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3750 /* 3751 * Local address not yet set - pick it from the 3752 * T_bind_ack 3753 */ 3754 tba = (struct T_bind_ack *)mp->b_rptr; 3755 addrp = &mp->b_rptr[tba->ADDR_offset]; 3756 switch (icmp->icmp_family) { 3757 case AF_INET: 3758 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3759 ac = (ipa_conn_t *)addrp; 3760 } else { 3761 ASSERT(tba->ADDR_length == 3762 sizeof (ipa_conn_x_t)); 3763 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3764 } 3765 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3766 &icmp->icmp_v6src); 3767 break; 3768 case AF_INET6: 3769 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3770 ac6 = (ipa6_conn_t *)addrp; 3771 } else { 3772 ASSERT(tba->ADDR_length == 3773 sizeof (ipa6_conn_x_t)); 3774 ac6 = &((ipa6_conn_x_t *) 3775 addrp)->ac6x_conn; 3776 } 3777 icmp->icmp_v6src = ac6->ac6_laddr; 3778 (void) icmp_build_hdrs(q, icmp); 3779 } 3780 } 3781 mp1 = mp1->b_cont; 3782 } 3783 /* 3784 * Look for one or more appended ACK message added by 3785 * icmp_connect or icmp_disconnect. 3786 * If none found just send up the T_BIND_ACK. 3787 * icmp_connect has appended a T_OK_ACK and a 3788 * T_CONN_CON. 3789 * icmp_disconnect has appended a T_OK_ACK. 3790 */ 3791 if (mp1 != NULL) { 3792 if (mp->b_cont == mp1) 3793 mp->b_cont = NULL; 3794 else { 3795 ASSERT(mp->b_cont->b_cont == mp1); 3796 mp->b_cont->b_cont = NULL; 3797 } 3798 freemsg(mp); 3799 mp = mp1; 3800 while (mp != NULL) { 3801 mp1 = mp->b_cont; 3802 mp->b_cont = NULL; 3803 putnext(q, mp); 3804 mp = mp1; 3805 } 3806 return; 3807 } 3808 freemsg(mp->b_cont); 3809 mp->b_cont = NULL; 3810 putnext(q, mp); 3811 } 3812 3813 /* 3814 * return SNMP stuff in buffer in mpdata 3815 */ 3816 static int 3817 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3818 { 3819 mblk_t *mpdata; 3820 struct opthdr *optp; 3821 icmp_t *icmp = (icmp_t *)q->q_ptr; 3822 3823 if (mpctl == NULL || 3824 (mpdata = mpctl->b_cont) == NULL) { 3825 return (0); 3826 } 3827 3828 /* fixed length structure for IPv4 and IPv6 counters */ 3829 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 3830 optp->level = EXPER_RAWIP; 3831 optp->name = 0; 3832 (void) snmp_append_data(mpdata, (char *)&icmp->icmp_rawip_mib, 3833 sizeof (icmp->icmp_rawip_mib)); 3834 optp->len = msgdsize(mpdata); 3835 qreply(q, mpctl); 3836 3837 return (1); 3838 } 3839 3840 /* 3841 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 3842 * TODO: If this ever actually tries to set anything, it needs to be 3843 * to do the appropriate locking. 3844 */ 3845 /* ARGSUSED */ 3846 static int 3847 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 3848 uchar_t *ptr, int len) 3849 { 3850 switch (level) { 3851 case EXPER_RAWIP: 3852 return (0); 3853 default: 3854 return (1); 3855 } 3856 } 3857 3858 /* Report for ndd "icmp_status" */ 3859 /* ARGSUSED */ 3860 static int 3861 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3862 { 3863 IDP idp; 3864 icmp_t *icmp; 3865 char *state; 3866 char laddrbuf[INET6_ADDRSTRLEN]; 3867 char faddrbuf[INET6_ADDRSTRLEN]; 3868 icmp_stack_t *is; 3869 3870 icmp = (icmp_t *)q->q_ptr; 3871 is = icmp->icmp_is; 3872 3873 (void) mi_mpprintf(mp, 3874 "RAWIP " MI_COL_HDRPAD_STR 3875 /* 01234567[89ABCDEF] */ 3876 " src addr dest addr state"); 3877 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 3878 3879 3880 for (idp = mi_first_ptr(&is->is_head); 3881 (icmp = (icmp_t *)idp) != NULL; 3882 idp = mi_next_ptr(&is->is_head, idp)) { 3883 if (icmp->icmp_state == TS_UNBND) 3884 state = "UNBOUND"; 3885 else if (icmp->icmp_state == TS_IDLE) 3886 state = "IDLE"; 3887 else if (icmp->icmp_state == TS_DATA_XFER) 3888 state = "CONNECTED"; 3889 else 3890 state = "UnkState"; 3891 3892 (void) mi_mpprintf(mp, 3893 MI_COL_PTRFMT_STR "%s %s %s", 3894 (void *)icmp, 3895 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 3896 sizeof (faddrbuf)), 3897 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 3898 sizeof (laddrbuf)), 3899 state); 3900 } 3901 return (0); 3902 } 3903 3904 /* 3905 * This routine creates a T_UDERROR_IND message and passes it upstream. 3906 * The address and options are copied from the T_UNITDATA_REQ message 3907 * passed in mp. This message is freed. 3908 */ 3909 static void 3910 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 3911 { 3912 mblk_t *mp1; 3913 uchar_t *rptr = mp->b_rptr; 3914 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 3915 3916 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 3917 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 3918 tudr->OPT_length, err); 3919 if (mp1) 3920 qreply(q, mp1); 3921 freemsg(mp); 3922 } 3923 3924 /* 3925 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 3926 * After some error checking, the message is passed downstream to ip. 3927 */ 3928 static void 3929 icmp_unbind(queue_t *q, mblk_t *mp) 3930 { 3931 icmp_t *icmp = (icmp_t *)q->q_ptr; 3932 3933 /* If a bind has not been done, we can't unbind. */ 3934 if (icmp->icmp_state == TS_UNBND) { 3935 icmp_err_ack(q, mp, TOUTSTATE, 0); 3936 return; 3937 } 3938 V6_SET_ZERO(icmp->icmp_v6src); 3939 V6_SET_ZERO(icmp->icmp_bound_v6src); 3940 icmp->icmp_state = TS_UNBND; 3941 3942 if (icmp->icmp_family == AF_INET6) { 3943 int error; 3944 3945 /* Rebuild the header template */ 3946 error = icmp_build_hdrs(q, icmp); 3947 if (error != 0) { 3948 icmp_err_ack(q, mp, TSYSERR, error); 3949 return; 3950 } 3951 } 3952 /* Pass the unbind to IP. */ 3953 putnext(q, mp); 3954 } 3955 3956 /* 3957 * Process IPv4 packets that already include an IP header. 3958 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 3959 * IPPROTO_IGMP). 3960 */ 3961 static void 3962 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop, 3963 boolean_t use_putnext) 3964 { 3965 icmp_stack_t *is = icmp->icmp_is; 3966 ipha_t *ipha; 3967 int ip_hdr_length; 3968 int tp_hdr_len; 3969 mblk_t *mp1; 3970 uint_t pkt_len; 3971 ip_opt_info_t optinfo; 3972 3973 optinfo.ip_opt_flags = 0; 3974 optinfo.ip_opt_ill_index = 0; 3975 ipha = (ipha_t *)mp->b_rptr; 3976 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 3977 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 3978 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3979 ASSERT(icmp != NULL); 3980 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 3981 freemsg(mp); 3982 return; 3983 } 3984 ipha = (ipha_t *)mp->b_rptr; 3985 } 3986 ipha->ipha_version_and_hdr_length = 3987 (IP_VERSION<<4) | (ip_hdr_length>>2); 3988 3989 /* 3990 * For the socket of SOCK_RAW type, the checksum is provided in the 3991 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 3992 * tell IP that the application has sent a complete IP header and not 3993 * to compute the transport checksum nor change the DF flag. 3994 */ 3995 ipha->ipha_ident = IP_HDR_INCLUDED; 3996 ipha->ipha_hdr_checksum = 0; 3997 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 3998 /* Insert options if any */ 3999 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4000 /* 4001 * Put the IP header plus any transport header that is 4002 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4003 * that at least the checksum field is in the first mblk.) 4004 */ 4005 switch (ipha->ipha_protocol) { 4006 case IPPROTO_UDP: 4007 tp_hdr_len = 8; 4008 break; 4009 case IPPROTO_TCP: 4010 tp_hdr_len = 20; 4011 break; 4012 default: 4013 tp_hdr_len = 0; 4014 break; 4015 } 4016 /* 4017 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4018 * tp_hdr_len bytes will be in a single mblk. 4019 */ 4020 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4021 tp_hdr_len)) { 4022 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4023 tp_hdr_len)) { 4024 BUMP_MIB(&icmp->icmp_rawip_mib, 4025 rawipOutErrors); 4026 freemsg(mp); 4027 return; 4028 } 4029 ipha = (ipha_t *)mp->b_rptr; 4030 } 4031 4032 /* 4033 * if the length is larger then the max allowed IP packet, 4034 * then send an error and abort the processing. 4035 */ 4036 pkt_len = ntohs(ipha->ipha_length) 4037 + icmp->icmp_ip_snd_options_len; 4038 if (pkt_len > IP_MAXPACKET) { 4039 icmp_ud_err(q, mp, EMSGSIZE); 4040 return; 4041 } 4042 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4043 tp_hdr_len, BPRI_LO))) { 4044 icmp_ud_err(q, mp, ENOMEM); 4045 return; 4046 } 4047 mp1->b_rptr += is->is_wroff_extra; 4048 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4049 4050 ipha->ipha_length = htons((uint16_t)pkt_len); 4051 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4052 4053 /* Copy transport header if any */ 4054 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4055 mp1->b_wptr += tp_hdr_len; 4056 4057 /* Add options */ 4058 ipha = (ipha_t *)mp1->b_rptr; 4059 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4060 icmp->icmp_ip_snd_options_len); 4061 4062 /* Drop IP header and transport header from original */ 4063 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4064 4065 mp1->b_cont = mp; 4066 mp = mp1; 4067 /* 4068 * Massage source route putting first source 4069 * route in ipha_dst. 4070 */ 4071 (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); 4072 } 4073 4074 if (pktinfop != NULL) { 4075 /* 4076 * Over write the source address provided in the header 4077 */ 4078 if (pktinfop->ip4_addr != INADDR_ANY) { 4079 ipha->ipha_src = pktinfop->ip4_addr; 4080 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4081 ASSERT(use_putnext == B_FALSE); 4082 } 4083 4084 if (pktinfop->ip4_ill_index != 0) { 4085 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4086 ASSERT(use_putnext == B_FALSE); 4087 } 4088 } 4089 4090 mblk_setcred(mp, icmp->icmp_credp); 4091 if (use_putnext) { 4092 putnext(q, mp); 4093 } else { 4094 ip_output_options(Q_TO_CONN(q->q_next), mp, q->q_next, IP_WPUT, 4095 &optinfo); 4096 } 4097 } 4098 4099 static boolean_t 4100 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4101 { 4102 int err; 4103 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4104 4105 err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4106 opt_storage, icmp->icmp_mac_exempt, 4107 icmp->icmp_is->is_netstack->netstack_ip); 4108 if (err == 0) { 4109 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4110 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4111 opt_storage); 4112 } 4113 if (err != 0) { 4114 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4115 DTRACE_PROBE4( 4116 tx__ip__log__drop__updatelabel__icmp, 4117 char *, "queue(1) failed to update options(2) on mp(3)", 4118 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4119 icmp_ud_err(q, mp, err); 4120 return (B_FALSE); 4121 } 4122 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4123 return (B_TRUE); 4124 } 4125 4126 /* 4127 * This routine handles all messages passed downstream. It either 4128 * consumes the message or passes it downstream; it never queues a 4129 * a message. 4130 */ 4131 static void 4132 icmp_wput(queue_t *q, mblk_t *mp) 4133 { 4134 uchar_t *rptr = mp->b_rptr; 4135 ipha_t *ipha; 4136 mblk_t *mp1; 4137 int ip_hdr_length; 4138 #define tudr ((struct T_unitdata_req *)rptr) 4139 size_t ip_len; 4140 icmp_t *icmp = (icmp_t *)q->q_ptr; 4141 icmp_stack_t *is = icmp->icmp_is; 4142 sin6_t *sin6; 4143 sin_t *sin; 4144 ipaddr_t v4dst; 4145 ip4_pkt_t pktinfo; 4146 ip4_pkt_t *pktinfop = &pktinfo; 4147 ip_opt_info_t optinfo; 4148 queue_t *ip_wq; 4149 boolean_t use_putnext = B_TRUE; 4150 4151 if (icmp->icmp_restricted) { 4152 icmp_wput_restricted(q, mp); 4153 return; 4154 } 4155 4156 switch (mp->b_datap->db_type) { 4157 case M_DATA: 4158 if (icmp->icmp_hdrincl) { 4159 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4160 ipha = (ipha_t *)mp->b_rptr; 4161 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4162 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4163 BUMP_MIB(&icmp->icmp_rawip_mib, 4164 rawipOutErrors); 4165 freemsg(mp); 4166 return; 4167 } 4168 ipha = (ipha_t *)mp->b_rptr; 4169 } 4170 /* 4171 * If this connection was used for v6 (inconceivable!) 4172 * or if we have a new destination, then it's time to 4173 * figure a new label. 4174 */ 4175 if (is_system_labeled() && 4176 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4177 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4178 ipha->ipha_dst) && 4179 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4180 return; 4181 } 4182 icmp_wput_hdrincl(q, mp, icmp, NULL, use_putnext); 4183 return; 4184 } 4185 freemsg(mp); 4186 return; 4187 case M_PROTO: 4188 case M_PCPROTO: 4189 ip_len = mp->b_wptr - rptr; 4190 if (ip_len >= sizeof (struct T_unitdata_req)) { 4191 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4192 if (((union T_primitives *)rptr)->type 4193 == T_UNITDATA_REQ) 4194 break; 4195 } 4196 /* FALLTHRU */ 4197 default: 4198 icmp_wput_other(q, mp); 4199 return; 4200 } 4201 4202 /* Handle T_UNITDATA_REQ messages here. */ 4203 4204 4205 4206 if (icmp->icmp_state == TS_UNBND) { 4207 /* If a port has not been bound to the stream, fail. */ 4208 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4209 icmp_ud_err(q, mp, EPROTO); 4210 return; 4211 } 4212 mp1 = mp->b_cont; 4213 if (mp1 == NULL) { 4214 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4215 icmp_ud_err(q, mp, EPROTO); 4216 return; 4217 } 4218 4219 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4220 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4221 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4222 return; 4223 } 4224 4225 switch (icmp->icmp_family) { 4226 case AF_INET6: 4227 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4228 if (!OK_32PTR((char *)sin6) || 4229 tudr->DEST_length != sizeof (sin6_t) || 4230 sin6->sin6_family != AF_INET6) { 4231 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4232 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4233 return; 4234 } 4235 4236 /* No support for mapped addresses on raw sockets */ 4237 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4238 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4239 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4240 return; 4241 } 4242 4243 /* 4244 * Destination is a native IPv6 address. 4245 * Send out an IPv6 format packet. 4246 */ 4247 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4248 return; 4249 4250 case AF_INET: 4251 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4252 if (!OK_32PTR((char *)sin) || 4253 tudr->DEST_length != sizeof (sin_t) || 4254 sin->sin_family != AF_INET) { 4255 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4256 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4257 return; 4258 } 4259 /* Extract and ipaddr */ 4260 v4dst = sin->sin_addr.s_addr; 4261 break; 4262 4263 default: 4264 ASSERT(0); 4265 } 4266 4267 pktinfop->ip4_ill_index = 0; 4268 pktinfop->ip4_addr = INADDR_ANY; 4269 optinfo.ip_opt_flags = 0; 4270 optinfo.ip_opt_ill_index = 0; 4271 4272 4273 /* 4274 * If options passed in, feed it for verification and handling 4275 */ 4276 if (tudr->OPT_length != 0) { 4277 int error; 4278 4279 error = 0; 4280 if (icmp_unitdata_opt_process(q, mp, &error, 4281 (void *)pktinfop) < 0) { 4282 /* failure */ 4283 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4284 icmp_ud_err(q, mp, error); 4285 return; 4286 } 4287 ASSERT(error == 0); 4288 /* 4289 * Note: Success in processing options. 4290 * mp option buffer represented by 4291 * OPT_length/offset now potentially modified 4292 * and contain option setting results 4293 */ 4294 4295 if (pktinfop->ip4_ill_index != 0 || 4296 pktinfop->ip4_addr != INADDR_ANY) { 4297 /* 4298 * PKTINFO option is supported only when ICMP is 4299 * over IP. 4300 */ 4301 ip_wq = WR(q)->q_next; 4302 if (NOT_OVER_IP(ip_wq)) { 4303 icmp_ud_err(q, mp, EINVAL); 4304 return; 4305 } 4306 use_putnext = B_FALSE; 4307 } 4308 } 4309 4310 if (v4dst == INADDR_ANY) 4311 v4dst = htonl(INADDR_LOOPBACK); 4312 4313 /* Check if our saved options are valid; update if not */ 4314 if (is_system_labeled() && 4315 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4316 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4317 !icmp_update_label(q, icmp, mp, v4dst)) { 4318 return; 4319 } 4320 4321 /* Protocol 255 contains full IP headers */ 4322 if (icmp->icmp_hdrincl) { 4323 freeb(mp); 4324 icmp_wput_hdrincl(q, mp1, icmp, pktinfop, use_putnext); 4325 return; 4326 } 4327 4328 4329 /* Add an IP header */ 4330 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4331 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4332 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4333 mp1->b_datap->db_ref != 1 || 4334 !OK_32PTR(ipha)) { 4335 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4336 BPRI_LO))) { 4337 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4338 icmp_ud_err(q, mp, ENOMEM); 4339 return; 4340 } 4341 mp1->b_cont = mp->b_cont; 4342 ipha = (ipha_t *)mp1->b_datap->db_lim; 4343 mp1->b_wptr = (uchar_t *)ipha; 4344 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4345 } 4346 #ifdef _BIG_ENDIAN 4347 /* Set version, header length, and tos */ 4348 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4349 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4350 icmp->icmp_type_of_service); 4351 /* Set ttl and protocol */ 4352 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4353 #else 4354 /* Set version, header length, and tos */ 4355 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4356 ((icmp->icmp_type_of_service << 8) | 4357 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4358 /* Set ttl and protocol */ 4359 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4360 #endif 4361 if (pktinfop->ip4_addr != INADDR_ANY) { 4362 ASSERT(use_putnext == B_FALSE); 4363 ipha->ipha_src = pktinfop->ip4_addr; 4364 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4365 } else { 4366 4367 /* 4368 * Copy our address into the packet. If this is zero, 4369 * ip will fill in the real source address. 4370 */ 4371 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4372 } 4373 4374 ipha->ipha_fragment_offset_and_flags = 0; 4375 4376 if (pktinfop->ip4_ill_index != 0) { 4377 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4378 ASSERT(use_putnext == B_FALSE); 4379 } 4380 4381 4382 /* 4383 * For the socket of SOCK_RAW type, the checksum is provided in the 4384 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4385 * tell IP that the application has sent a complete IP header and not 4386 * to compute the transport checksum nor change the DF flag. 4387 */ 4388 ipha->ipha_ident = IP_HDR_INCLUDED; 4389 4390 /* Finish common formatting of the packet. */ 4391 mp1->b_rptr = (uchar_t *)ipha; 4392 4393 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4394 if (mp1->b_cont != NULL) 4395 ip_len += msgdsize(mp1->b_cont); 4396 4397 /* 4398 * Set the length into the IP header. 4399 * If the length is greater than the maximum allowed by IP, 4400 * then free the message and return. Do not try and send it 4401 * as this can cause problems in layers below. 4402 */ 4403 if (ip_len > IP_MAXPACKET) { 4404 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4405 icmp_ud_err(q, mp, EMSGSIZE); 4406 return; 4407 } 4408 ipha->ipha_length = htons((uint16_t)ip_len); 4409 /* 4410 * Copy in the destination address from the T_UNITDATA 4411 * request 4412 */ 4413 ipha->ipha_dst = v4dst; 4414 4415 /* 4416 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4417 */ 4418 if (CLASSD(v4dst)) 4419 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4420 4421 /* Copy in options if any */ 4422 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4423 bcopy(icmp->icmp_ip_snd_options, 4424 &ipha[1], icmp->icmp_ip_snd_options_len); 4425 /* 4426 * Massage source route putting first source route in ipha_dst. 4427 * Ignore the destination in the T_unitdata_req. 4428 */ 4429 (void) ip_massage_options(ipha, icmp->icmp_is->is_netstack); 4430 } 4431 4432 freeb(mp); 4433 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); 4434 mblk_setcred(mp1, icmp->icmp_credp); 4435 if (use_putnext) { 4436 putnext(q, mp1); 4437 } else { 4438 ip_output_options(Q_TO_CONN(q->q_next), mp1, q->q_next, IP_WPUT, 4439 &optinfo); 4440 } 4441 #undef ipha 4442 #undef tudr 4443 } 4444 4445 static boolean_t 4446 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4447 { 4448 int err; 4449 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4450 4451 err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4452 opt_storage, icmp->icmp_mac_exempt, 4453 icmp->icmp_is->is_netstack->netstack_ip); 4454 if (err == 0) { 4455 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4456 &icmp->icmp_label_len_v6, opt_storage); 4457 } 4458 if (err != 0) { 4459 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4460 DTRACE_PROBE4( 4461 tx__ip__log__drop__updatelabel__icmp6, 4462 char *, "queue(1) failed to update options(2) on mp(3)", 4463 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4464 icmp_ud_err(wq, mp, err); 4465 return (B_FALSE); 4466 } 4467 4468 icmp->icmp_v6lastdst = *dst; 4469 return (B_TRUE); 4470 } 4471 4472 /* 4473 * icmp_wput_ipv6(): 4474 * Assumes that icmp_wput did some sanity checking on the destination 4475 * address, but that the label may not yet be correct. 4476 */ 4477 void 4478 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4479 { 4480 ip6_t *ip6h; 4481 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4482 mblk_t *mp1; 4483 int ip_hdr_len = IPV6_HDR_LEN; 4484 size_t ip_len; 4485 icmp_t *icmp = (icmp_t *)q->q_ptr; 4486 icmp_stack_t *is = icmp->icmp_is; 4487 ip6_pkt_t ipp_s; /* For ancillary data options */ 4488 ip6_pkt_t *ipp = &ipp_s; 4489 ip6_pkt_t *tipp; 4490 uint32_t csum = 0; 4491 uint_t ignore = 0; 4492 uint_t option_exists = 0, is_sticky = 0; 4493 uint8_t *cp; 4494 uint8_t *nxthdr_ptr; 4495 in6_addr_t ip6_dst; 4496 4497 /* 4498 * If the local address is a mapped address return 4499 * an error. 4500 * It would be possible to send an IPv6 packet but the 4501 * response would never make it back to the application 4502 * since it is bound to a mapped address. 4503 */ 4504 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4505 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4506 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4507 return; 4508 } 4509 4510 ipp->ipp_fields = 0; 4511 ipp->ipp_sticky_ignored = 0; 4512 4513 /* 4514 * If TPI options passed in, feed it for verification and handling 4515 */ 4516 if (tudr_optlen != 0) { 4517 int error; 4518 4519 if (icmp_unitdata_opt_process(q, mp, &error, 4520 (void *)ipp) < 0) { 4521 /* failure */ 4522 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4523 icmp_ud_err(q, mp, error); 4524 return; 4525 } 4526 ignore = ipp->ipp_sticky_ignored; 4527 ASSERT(error == 0); 4528 } 4529 4530 if (sin6->sin6_scope_id != 0 && 4531 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4532 /* 4533 * IPPF_SCOPE_ID is special. It's neither a sticky 4534 * option nor ancillary data. It needs to be 4535 * explicitly set in options_exists. 4536 */ 4537 option_exists |= IPPF_SCOPE_ID; 4538 } 4539 4540 /* 4541 * Compute the destination address 4542 */ 4543 ip6_dst = sin6->sin6_addr; 4544 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4545 ip6_dst = ipv6_loopback; 4546 4547 /* 4548 * If we're not going to the same destination as last time, then 4549 * recompute the label required. This is done in a separate routine to 4550 * avoid blowing up our stack here. 4551 */ 4552 if (is_system_labeled() && 4553 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4554 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4555 return; 4556 } 4557 4558 /* 4559 * If there's a security label here, then we ignore any options the 4560 * user may try to set. We keep the peer's label as a hidden sticky 4561 * option. 4562 */ 4563 if (icmp->icmp_label_len_v6 > 0) { 4564 ignore &= ~IPPF_HOPOPTS; 4565 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4566 } 4567 4568 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4569 (ipp->ipp_fields == 0)) { 4570 /* No sticky options nor ancillary data. */ 4571 goto no_options; 4572 } 4573 4574 /* 4575 * Go through the options figuring out where each is going to 4576 * come from and build two masks. The first mask indicates if 4577 * the option exists at all. The second mask indicates if the 4578 * option is sticky or ancillary. 4579 */ 4580 if (!(ignore & IPPF_HOPOPTS)) { 4581 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4582 option_exists |= IPPF_HOPOPTS; 4583 ip_hdr_len += ipp->ipp_hopoptslen; 4584 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4585 option_exists |= IPPF_HOPOPTS; 4586 is_sticky |= IPPF_HOPOPTS; 4587 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4588 } 4589 } 4590 4591 if (!(ignore & IPPF_RTHDR)) { 4592 if (ipp->ipp_fields & IPPF_RTHDR) { 4593 option_exists |= IPPF_RTHDR; 4594 ip_hdr_len += ipp->ipp_rthdrlen; 4595 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4596 option_exists |= IPPF_RTHDR; 4597 is_sticky |= IPPF_RTHDR; 4598 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4599 } 4600 } 4601 4602 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4603 /* 4604 * Need to have a router header to use these. 4605 */ 4606 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4607 option_exists |= IPPF_RTDSTOPTS; 4608 ip_hdr_len += ipp->ipp_rtdstoptslen; 4609 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4610 option_exists |= IPPF_RTDSTOPTS; 4611 is_sticky |= IPPF_RTDSTOPTS; 4612 ip_hdr_len += 4613 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4614 } 4615 } 4616 4617 if (!(ignore & IPPF_DSTOPTS)) { 4618 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4619 option_exists |= IPPF_DSTOPTS; 4620 ip_hdr_len += ipp->ipp_dstoptslen; 4621 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4622 option_exists |= IPPF_DSTOPTS; 4623 is_sticky |= IPPF_DSTOPTS; 4624 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4625 } 4626 } 4627 4628 if (!(ignore & IPPF_IFINDEX)) { 4629 if (ipp->ipp_fields & IPPF_IFINDEX) { 4630 option_exists |= IPPF_IFINDEX; 4631 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4632 option_exists |= IPPF_IFINDEX; 4633 is_sticky |= IPPF_IFINDEX; 4634 } 4635 } 4636 4637 if (!(ignore & IPPF_ADDR)) { 4638 if (ipp->ipp_fields & IPPF_ADDR) { 4639 option_exists |= IPPF_ADDR; 4640 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4641 option_exists |= IPPF_ADDR; 4642 is_sticky |= IPPF_ADDR; 4643 } 4644 } 4645 4646 if (!(ignore & IPPF_DONTFRAG)) { 4647 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4648 option_exists |= IPPF_DONTFRAG; 4649 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4650 option_exists |= IPPF_DONTFRAG; 4651 is_sticky |= IPPF_DONTFRAG; 4652 } 4653 } 4654 4655 if (!(ignore & IPPF_USE_MIN_MTU)) { 4656 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4657 option_exists |= IPPF_USE_MIN_MTU; 4658 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4659 IPPF_USE_MIN_MTU) { 4660 option_exists |= IPPF_USE_MIN_MTU; 4661 is_sticky |= IPPF_USE_MIN_MTU; 4662 } 4663 } 4664 4665 if (!(ignore & IPPF_NEXTHOP)) { 4666 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4667 option_exists |= IPPF_NEXTHOP; 4668 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4669 option_exists |= IPPF_NEXTHOP; 4670 is_sticky |= IPPF_NEXTHOP; 4671 } 4672 } 4673 4674 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4675 option_exists |= IPPF_HOPLIMIT; 4676 /* IPV6_HOPLIMIT can never be sticky */ 4677 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4678 4679 if (!(ignore & IPPF_UNICAST_HOPS) && 4680 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4681 option_exists |= IPPF_UNICAST_HOPS; 4682 is_sticky |= IPPF_UNICAST_HOPS; 4683 } 4684 4685 if (!(ignore & IPPF_MULTICAST_HOPS) && 4686 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4687 option_exists |= IPPF_MULTICAST_HOPS; 4688 is_sticky |= IPPF_MULTICAST_HOPS; 4689 } 4690 4691 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4692 /* This is a sticky socket option only */ 4693 option_exists |= IPPF_NO_CKSUM; 4694 is_sticky |= IPPF_NO_CKSUM; 4695 } 4696 4697 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4698 /* This is a sticky socket option only */ 4699 option_exists |= IPPF_RAW_CKSUM; 4700 is_sticky |= IPPF_RAW_CKSUM; 4701 } 4702 4703 if (!(ignore & IPPF_TCLASS)) { 4704 if (ipp->ipp_fields & IPPF_TCLASS) { 4705 option_exists |= IPPF_TCLASS; 4706 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4707 option_exists |= IPPF_TCLASS; 4708 is_sticky |= IPPF_TCLASS; 4709 } 4710 } 4711 4712 no_options: 4713 4714 /* 4715 * If any options carried in the ip6i_t were specified, we 4716 * need to account for the ip6i_t in the data we'll be sending 4717 * down. 4718 */ 4719 if (option_exists & IPPF_HAS_IP6I) 4720 ip_hdr_len += sizeof (ip6i_t); 4721 4722 /* check/fix buffer config, setup pointers into it */ 4723 mp1 = mp->b_cont; 4724 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4725 if ((mp1->b_datap->db_ref != 1) || 4726 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4727 !OK_32PTR(ip6h)) { 4728 /* Try to get everything in a single mblk next time */ 4729 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4730 icmp->icmp_max_hdr_len = ip_hdr_len; 4731 (void) mi_set_sth_wroff(RD(q), 4732 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4733 } 4734 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4735 if (!mp1) { 4736 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4737 icmp_ud_err(q, mp, ENOMEM); 4738 return; 4739 } 4740 mp1->b_cont = mp->b_cont; 4741 mp1->b_wptr = mp1->b_datap->db_lim; 4742 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4743 } 4744 mp1->b_rptr = (unsigned char *)ip6h; 4745 ip6i = (ip6i_t *)ip6h; 4746 4747 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4748 if (option_exists & IPPF_HAS_IP6I) { 4749 ip6h = (ip6_t *)&ip6i[1]; 4750 ip6i->ip6i_flags = 0; 4751 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4752 4753 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4754 if (option_exists & IPPF_SCOPE_ID) { 4755 ip6i->ip6i_flags |= IP6I_IFINDEX; 4756 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4757 } else if (option_exists & IPPF_IFINDEX) { 4758 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4759 ASSERT(tipp->ipp_ifindex != 0); 4760 ip6i->ip6i_flags |= IP6I_IFINDEX; 4761 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4762 } 4763 4764 if (option_exists & IPPF_RAW_CKSUM) { 4765 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4766 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4767 } 4768 4769 if (option_exists & IPPF_NO_CKSUM) { 4770 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4771 } 4772 4773 if (option_exists & IPPF_ADDR) { 4774 /* 4775 * Enable per-packet source address verification if 4776 * IPV6_PKTINFO specified the source address. 4777 * ip6_src is set in the transport's _wput function. 4778 */ 4779 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4780 } 4781 4782 if (option_exists & IPPF_DONTFRAG) { 4783 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4784 } 4785 4786 if (option_exists & IPPF_USE_MIN_MTU) { 4787 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4788 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4789 } 4790 4791 if (option_exists & IPPF_NEXTHOP) { 4792 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4793 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4794 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4795 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4796 } 4797 4798 /* 4799 * tell IP this is an ip6i_t private header 4800 */ 4801 ip6i->ip6i_nxt = IPPROTO_RAW; 4802 } 4803 4804 /* Initialize IPv6 header */ 4805 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4806 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4807 4808 /* Set the hoplimit of the outgoing packet. */ 4809 if (option_exists & IPPF_HOPLIMIT) { 4810 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4811 ip6h->ip6_hops = ipp->ipp_hoplimit; 4812 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4813 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4814 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4815 if (option_exists & IPPF_MULTICAST_HOPS) 4816 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4817 } else { 4818 ip6h->ip6_hops = icmp->icmp_ttl; 4819 if (option_exists & IPPF_UNICAST_HOPS) 4820 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4821 } 4822 4823 if (option_exists & IPPF_ADDR) { 4824 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4825 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4826 ip6h->ip6_src = tipp->ipp_addr; 4827 } else { 4828 /* 4829 * The source address was not set using IPV6_PKTINFO. 4830 * First look at the bound source. 4831 * If unspecified fallback to __sin6_src_id. 4832 */ 4833 ip6h->ip6_src = icmp->icmp_v6src; 4834 if (sin6->__sin6_src_id != 0 && 4835 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4836 ip_srcid_find_id(sin6->__sin6_src_id, 4837 &ip6h->ip6_src, icmp->icmp_zoneid, 4838 is->is_netstack); 4839 } 4840 } 4841 4842 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4843 cp = (uint8_t *)&ip6h[1]; 4844 4845 /* 4846 * Here's where we have to start stringing together 4847 * any extension headers in the right order: 4848 * Hop-by-hop, destination, routing, and final destination opts. 4849 */ 4850 if (option_exists & IPPF_HOPOPTS) { 4851 /* Hop-by-hop options */ 4852 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4853 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 4854 4855 *nxthdr_ptr = IPPROTO_HOPOPTS; 4856 nxthdr_ptr = &hbh->ip6h_nxt; 4857 4858 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 4859 cp += tipp->ipp_hopoptslen; 4860 } 4861 /* 4862 * En-route destination options 4863 * Only do them if there's a routing header as well 4864 */ 4865 if (option_exists & IPPF_RTDSTOPTS) { 4866 ip6_dest_t *dst = (ip6_dest_t *)cp; 4867 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 4868 4869 *nxthdr_ptr = IPPROTO_DSTOPTS; 4870 nxthdr_ptr = &dst->ip6d_nxt; 4871 4872 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 4873 cp += tipp->ipp_rtdstoptslen; 4874 } 4875 /* 4876 * Routing header next 4877 */ 4878 if (option_exists & IPPF_RTHDR) { 4879 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4880 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 4881 4882 *nxthdr_ptr = IPPROTO_ROUTING; 4883 nxthdr_ptr = &rt->ip6r_nxt; 4884 4885 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 4886 cp += tipp->ipp_rthdrlen; 4887 } 4888 /* 4889 * Do ultimate destination options 4890 */ 4891 if (option_exists & IPPF_DSTOPTS) { 4892 ip6_dest_t *dest = (ip6_dest_t *)cp; 4893 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 4894 4895 *nxthdr_ptr = IPPROTO_DSTOPTS; 4896 nxthdr_ptr = &dest->ip6d_nxt; 4897 4898 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 4899 cp += tipp->ipp_dstoptslen; 4900 } 4901 4902 /* 4903 * Now set the last header pointer to the proto passed in 4904 */ 4905 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 4906 *nxthdr_ptr = icmp->icmp_proto; 4907 4908 /* 4909 * Copy in the destination address 4910 */ 4911 ip6h->ip6_dst = ip6_dst; 4912 4913 ip6h->ip6_vcf = 4914 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4915 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4916 4917 if (option_exists & IPPF_TCLASS) { 4918 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 4919 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4920 tipp->ipp_tclass); 4921 } 4922 if (option_exists & IPPF_RTHDR) { 4923 ip6_rthdr_t *rth; 4924 4925 /* 4926 * Perform any processing needed for source routing. 4927 * We know that all extension headers will be in the same mblk 4928 * as the IPv6 header. 4929 */ 4930 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 4931 if (rth != NULL && rth->ip6r_segleft != 0) { 4932 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 4933 /* 4934 * Drop packet - only support Type 0 routing. 4935 * Notify the application as well. 4936 */ 4937 icmp_ud_err(q, mp, EPROTO); 4938 BUMP_MIB(&icmp->icmp_rawip_mib, 4939 rawipOutErrors); 4940 return; 4941 } 4942 /* 4943 * rth->ip6r_len is twice the number of 4944 * addresses in the header 4945 */ 4946 if (rth->ip6r_len & 0x1) { 4947 icmp_ud_err(q, mp, EPROTO); 4948 BUMP_MIB(&icmp->icmp_rawip_mib, 4949 rawipOutErrors); 4950 return; 4951 } 4952 /* 4953 * Shuffle the routing header and ip6_dst 4954 * addresses, and get the checksum difference 4955 * between the first hop (in ip6_dst) and 4956 * the destination (in the last routing hdr entry). 4957 */ 4958 csum = ip_massage_options_v6(ip6h, rth, 4959 icmp->icmp_is->is_netstack); 4960 /* 4961 * Verify that the first hop isn't a mapped address. 4962 * Routers along the path need to do this verification 4963 * for subsequent hops. 4964 */ 4965 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 4966 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4967 BUMP_MIB(&icmp->icmp_rawip_mib, 4968 rawipOutErrors); 4969 return; 4970 } 4971 } 4972 } 4973 4974 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 4975 if (mp1->b_cont != NULL) 4976 ip_len += msgdsize(mp1->b_cont); 4977 4978 /* 4979 * Set the length into the IP header. 4980 * If the length is greater than the maximum allowed by IP, 4981 * then free the message and return. Do not try and send it 4982 * as this can cause problems in layers below. 4983 */ 4984 if (ip_len > IP_MAXPACKET) { 4985 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutErrors); 4986 icmp_ud_err(q, mp, EMSGSIZE); 4987 return; 4988 } 4989 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 4990 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 4991 uint16_t *cksum_ptr; 4992 uint_t ext_hdrs_len; 4993 4994 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 4995 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 4996 icmp->icmp_checksum_off == 2); 4997 4998 /* 4999 * We make it easy for IP to include our pseudo header 5000 * by putting our length in uh_checksum, modified (if 5001 * we have a routing header) by the checksum difference 5002 * between the ultimate destination and first hop addresses. 5003 * Note: ICMPv6 must always checksum the packet. 5004 */ 5005 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5006 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5007 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5008 BUMP_MIB(&icmp->icmp_rawip_mib, 5009 rawipOutErrors); 5010 freemsg(mp); 5011 return; 5012 } 5013 ip6i = (ip6i_t *)mp1->b_rptr; 5014 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5015 ip6h = (ip6_t *)&ip6i[1]; 5016 else 5017 ip6h = (ip6_t *)ip6i; 5018 } 5019 /* Add payload length to checksum */ 5020 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5021 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5022 csum += htons(ip_len - ext_hdrs_len); 5023 5024 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5025 csum = (csum & 0xFFFF) + (csum >> 16); 5026 *cksum_ptr = (uint16_t)csum; 5027 } 5028 5029 #ifdef _LITTLE_ENDIAN 5030 ip_len = htons(ip_len); 5031 #endif 5032 ip6h->ip6_plen = (uint16_t)ip_len; 5033 5034 freeb(mp); 5035 5036 /* We're done. Pass the packet to IP */ 5037 BUMP_MIB(&icmp->icmp_rawip_mib, rawipOutDatagrams); 5038 mblk_setcred(mp1, icmp->icmp_credp); 5039 putnext(q, mp1); 5040 } 5041 5042 static void 5043 icmp_wput_other(queue_t *q, mblk_t *mp) 5044 { 5045 uchar_t *rptr = mp->b_rptr; 5046 struct iocblk *iocp; 5047 #define tudr ((struct T_unitdata_req *)rptr) 5048 icmp_t *icmp; 5049 cred_t *cr; 5050 5051 icmp = (icmp_t *)q->q_ptr; 5052 5053 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5054 5055 switch (mp->b_datap->db_type) { 5056 case M_PROTO: 5057 case M_PCPROTO: 5058 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5059 /* 5060 * If the message does not contain a PRIM_type, 5061 * throw it away. 5062 */ 5063 freemsg(mp); 5064 return; 5065 } 5066 switch (((union T_primitives *)rptr)->type) { 5067 case T_ADDR_REQ: 5068 icmp_addr_req(q, mp); 5069 return; 5070 case O_T_BIND_REQ: 5071 case T_BIND_REQ: 5072 qwriter(q, mp, icmp_bind, PERIM_OUTER); 5073 return; 5074 case T_CONN_REQ: 5075 icmp_connect(q, mp); 5076 return; 5077 case T_CAPABILITY_REQ: 5078 icmp_capability_req(q, mp); 5079 return; 5080 case T_INFO_REQ: 5081 icmp_info_req(q, mp); 5082 return; 5083 case T_UNITDATA_REQ: 5084 /* 5085 * If a T_UNITDATA_REQ gets here, the address must 5086 * be bad. Valid T_UNITDATA_REQs are found above 5087 * and break to below this switch. 5088 */ 5089 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5090 return; 5091 case T_UNBIND_REQ: 5092 icmp_unbind(q, mp); 5093 return; 5094 5095 case T_SVR4_OPTMGMT_REQ: 5096 if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, 5097 cr)) 5098 /* Only IP can return anything meaningful */ 5099 (void) svr4_optcom_req(q, mp, cr, 5100 &icmp_opt_obj); 5101 return; 5102 5103 case T_OPTMGMT_REQ: 5104 /* Only IP can return anything meaningful */ 5105 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5106 return; 5107 5108 case T_DISCON_REQ: 5109 icmp_disconnect(q, mp); 5110 return; 5111 5112 /* The following TPI message is not supported by icmp. */ 5113 case O_T_CONN_RES: 5114 case T_CONN_RES: 5115 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5116 return; 5117 5118 /* The following 3 TPI requests are illegal for icmp. */ 5119 case T_DATA_REQ: 5120 case T_EXDATA_REQ: 5121 case T_ORDREL_REQ: 5122 freemsg(mp); 5123 (void) putctl1(RD(q), M_ERROR, EPROTO); 5124 return; 5125 default: 5126 break; 5127 } 5128 break; 5129 case M_IOCTL: 5130 iocp = (struct iocblk *)mp->b_rptr; 5131 switch (iocp->ioc_cmd) { 5132 case TI_GETPEERNAME: 5133 if (icmp->icmp_state != TS_DATA_XFER) { 5134 /* 5135 * If a default destination address has not 5136 * been associated with the stream, then we 5137 * don't know the peer's name. 5138 */ 5139 iocp->ioc_error = ENOTCONN; 5140 err_ret:; 5141 iocp->ioc_count = 0; 5142 mp->b_datap->db_type = M_IOCACK; 5143 qreply(q, mp); 5144 return; 5145 } 5146 /* FALLTHRU */ 5147 case TI_GETMYNAME: 5148 /* 5149 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5150 * need to copyin the user's strbuf structure. 5151 * Processing will continue in the M_IOCDATA case 5152 * below. 5153 */ 5154 mi_copyin(q, mp, NULL, 5155 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5156 return; 5157 case ND_SET: 5158 /* nd_getset performs the necessary error checking */ 5159 case ND_GET: 5160 if (nd_getset(q, icmp->icmp_is->is_nd, mp)) { 5161 qreply(q, mp); 5162 return; 5163 } 5164 break; 5165 default: 5166 break; 5167 } 5168 break; 5169 case M_IOCDATA: 5170 icmp_wput_iocdata(q, mp); 5171 return; 5172 default: 5173 break; 5174 } 5175 putnext(q, mp); 5176 } 5177 5178 /* 5179 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5180 * messages. 5181 */ 5182 static void 5183 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5184 { 5185 mblk_t *mp1; 5186 STRUCT_HANDLE(strbuf, sb); 5187 icmp_t *icmp; 5188 in6_addr_t v6addr; 5189 ipaddr_t v4addr; 5190 uint32_t flowinfo = 0; 5191 int addrlen; 5192 5193 /* Make sure it is one of ours. */ 5194 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5195 case TI_GETMYNAME: 5196 case TI_GETPEERNAME: 5197 break; 5198 default: 5199 putnext(q, mp); 5200 return; 5201 } 5202 switch (mi_copy_state(q, mp, &mp1)) { 5203 case -1: 5204 return; 5205 case MI_COPY_CASE(MI_COPY_IN, 1): 5206 break; 5207 case MI_COPY_CASE(MI_COPY_OUT, 1): 5208 /* 5209 * The address has been copied out, so now 5210 * copyout the strbuf. 5211 */ 5212 mi_copyout(q, mp); 5213 return; 5214 case MI_COPY_CASE(MI_COPY_OUT, 2): 5215 /* 5216 * The address and strbuf have been copied out. 5217 * We're done, so just acknowledge the original 5218 * M_IOCTL. 5219 */ 5220 mi_copy_done(q, mp, 0); 5221 return; 5222 default: 5223 /* 5224 * Something strange has happened, so acknowledge 5225 * the original M_IOCTL with an EPROTO error. 5226 */ 5227 mi_copy_done(q, mp, EPROTO); 5228 return; 5229 } 5230 /* 5231 * Now we have the strbuf structure for TI_GETMYNAME 5232 * and TI_GETPEERNAME. Next we copyout the requested 5233 * address and then we'll copyout the strbuf. 5234 */ 5235 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5236 (void *)mp1->b_rptr); 5237 icmp = (icmp_t *)q->q_ptr; 5238 if (icmp->icmp_family == AF_INET) 5239 addrlen = sizeof (sin_t); 5240 else 5241 addrlen = sizeof (sin6_t); 5242 5243 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5244 mi_copy_done(q, mp, EINVAL); 5245 return; 5246 } 5247 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5248 case TI_GETMYNAME: 5249 if (icmp->icmp_family == AF_INET) { 5250 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5251 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5252 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5253 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5254 } else { 5255 /* 5256 * INADDR_ANY 5257 * icmp_v6src is not set, we might be bound to 5258 * broadcast/multicast. Use icmp_bound_v6src as 5259 * local address instead (that could 5260 * also still be INADDR_ANY) 5261 */ 5262 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5263 } 5264 } else { 5265 /* icmp->icmp_family == AF_INET6 */ 5266 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5267 v6addr = icmp->icmp_v6src; 5268 } else { 5269 /* 5270 * UNSPECIFIED 5271 * icmp_v6src is not set, we might be bound to 5272 * broadcast/multicast. Use icmp_bound_v6src as 5273 * local address instead (that could 5274 * also still be UNSPECIFIED) 5275 */ 5276 v6addr = icmp->icmp_bound_v6src; 5277 } 5278 } 5279 break; 5280 case TI_GETPEERNAME: 5281 if (icmp->icmp_family == AF_INET) { 5282 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5283 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5284 } else { 5285 /* icmp->icmp_family == AF_INET6) */ 5286 v6addr = icmp->icmp_v6dst; 5287 flowinfo = icmp->icmp_flowinfo; 5288 } 5289 break; 5290 default: 5291 mi_copy_done(q, mp, EPROTO); 5292 return; 5293 } 5294 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5295 if (!mp1) 5296 return; 5297 5298 if (icmp->icmp_family == AF_INET) { 5299 sin_t *sin; 5300 5301 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5302 sin = (sin_t *)mp1->b_rptr; 5303 mp1->b_wptr = (uchar_t *)&sin[1]; 5304 *sin = sin_null; 5305 sin->sin_family = AF_INET; 5306 sin->sin_addr.s_addr = v4addr; 5307 } else { 5308 /* icmp->icmp_family == AF_INET6 */ 5309 sin6_t *sin6; 5310 5311 ASSERT(icmp->icmp_family == AF_INET6); 5312 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5313 sin6 = (sin6_t *)mp1->b_rptr; 5314 mp1->b_wptr = (uchar_t *)&sin6[1]; 5315 *sin6 = sin6_null; 5316 sin6->sin6_family = AF_INET6; 5317 sin6->sin6_flowinfo = flowinfo; 5318 sin6->sin6_addr = v6addr; 5319 } 5320 /* Copy out the address */ 5321 mi_copyout(q, mp); 5322 } 5323 5324 /* 5325 * Only allow MIB requests and M_FLUSHes to pass. 5326 * All other messages are nacked or dropped. 5327 */ 5328 static void 5329 icmp_wput_restricted(queue_t *q, mblk_t *mp) 5330 { 5331 cred_t *cr; 5332 icmp_t *icmp; 5333 5334 switch (DB_TYPE(mp)) { 5335 case M_PROTO: 5336 case M_PCPROTO: 5337 if (MBLKL(mp) < sizeof (t_scalar_t)) { 5338 freemsg(mp); 5339 return; 5340 } 5341 icmp = (icmp_t *)q->q_ptr; 5342 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5343 5344 switch (((union T_primitives *)mp->b_rptr)->type) { 5345 case T_SVR4_OPTMGMT_REQ: 5346 if (!snmpcom_req(q, mp, 5347 icmp_snmp_set, icmp_snmp_get, cr)) 5348 (void) svr4_optcom_req(q, mp, cr, 5349 &icmp_opt_obj); 5350 return; 5351 case T_OPTMGMT_REQ: 5352 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5353 return; 5354 default: 5355 icmp_err_ack(q, mp, TSYSERR, ENOTSUP); 5356 return; 5357 } 5358 /* NOTREACHED */ 5359 case M_IOCTL: 5360 miocnak(q, mp, 0, ENOTSUP); 5361 break; 5362 case M_FLUSH: 5363 putnext(q, mp); 5364 break; 5365 default: 5366 freemsg(mp); 5367 break; 5368 } 5369 } 5370 5371 static int 5372 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5373 void *thisdg_attrs) 5374 { 5375 icmp_t *icmp; 5376 struct T_unitdata_req *udreqp; 5377 int is_absreq_failure; 5378 cred_t *cr; 5379 5380 icmp = (icmp_t *)q->q_ptr; 5381 5382 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5383 *errorp = 0; 5384 5385 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5386 5387 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5388 udreqp->OPT_offset, cr, &icmp_opt_obj, 5389 thisdg_attrs, &is_absreq_failure); 5390 5391 if (*errorp != 0) { 5392 /* 5393 * Note: No special action needed in this 5394 * module for "is_absreq_failure" 5395 */ 5396 return (-1); /* failure */ 5397 } 5398 ASSERT(is_absreq_failure == 0); 5399 return (0); /* success */ 5400 } 5401 5402 void 5403 icmp_ddi_init(void) 5404 { 5405 ICMP6_MAJ = ddi_name_to_major(ICMP6); 5406 icmp_max_optsize = 5407 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5408 icmp_opt_obj.odb_opt_arr_cnt); 5409 5410 /* 5411 * We want to be informed each time a stack is created or 5412 * destroyed in the kernel, so we can maintain the 5413 * set of icmp_stack_t's. 5414 */ 5415 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5416 } 5417 5418 void 5419 icmp_ddi_destroy(void) 5420 { 5421 netstack_unregister(NS_ICMP); 5422 } 5423 5424 /* 5425 * Initialize the ICMP stack instance. 5426 */ 5427 static void * 5428 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5429 { 5430 icmp_stack_t *is; 5431 icmpparam_t *pa; 5432 5433 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5434 is->is_netstack = ns; 5435 5436 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5437 is->is_param_arr = pa; 5438 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5439 5440 (void) icmp_param_register(&is->is_nd, 5441 is->is_param_arr, A_CNT(icmp_param_arr)); 5442 is->is_ksp = rawip_kstat_init(stackid); 5443 return (is); 5444 } 5445 5446 /* 5447 * Free the ICMP stack instance. 5448 */ 5449 static void 5450 rawip_stack_fini(netstackid_t stackid, void *arg) 5451 { 5452 icmp_stack_t *is = (icmp_stack_t *)arg; 5453 5454 nd_free(&is->is_nd); 5455 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5456 is->is_param_arr = NULL; 5457 5458 rawip_kstat_fini(stackid, is->is_ksp); 5459 is->is_ksp = NULL; 5460 kmem_free(is, sizeof (*is)); 5461 } 5462 5463 static void * 5464 rawip_kstat_init(netstackid_t stackid) { 5465 kstat_t *ksp; 5466 5467 rawip_named_kstat_t template = { 5468 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5469 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5470 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5471 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5472 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5473 }; 5474 5475 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5476 KSTAT_TYPE_NAMED, 5477 NUM_OF_FIELDS(rawip_named_kstat_t), 5478 0, stackid); 5479 if (ksp == NULL || ksp->ks_data == NULL) 5480 return (NULL); 5481 5482 bcopy(&template, ksp->ks_data, sizeof (template)); 5483 ksp->ks_update = rawip_kstat_update; 5484 ksp->ks_private = (void *)(uintptr_t)stackid; 5485 5486 kstat_install(ksp); 5487 return (ksp); 5488 } 5489 5490 static void 5491 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5492 { 5493 if (ksp != NULL) { 5494 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5495 kstat_delete_netstack(ksp, stackid); 5496 } 5497 } 5498 5499 static int 5500 rawip_kstat_update(kstat_t *ksp, int rw) 5501 { 5502 rawip_named_kstat_t *rawipkp; 5503 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5504 netstack_t *ns; 5505 icmp_stack_t *is; 5506 5507 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5508 return (EIO); 5509 5510 if (rw == KSTAT_WRITE) 5511 return (EACCES); 5512 5513 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5514 5515 ns = netstack_find_by_stackid(stackid); 5516 if (ns == NULL) 5517 return (-1); 5518 is = ns->netstack_icmp; 5519 if (is == NULL) { 5520 netstack_rele(ns); 5521 return (-1); 5522 } 5523 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5524 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5525 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5526 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5527 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5528 netstack_rele(ns); 5529 return (0); 5530 } 5531