1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 84 #define ICMP6 "icmp6" 85 major_t ICMP6_MAJ; 86 87 /* 88 * Object to represent database of options to search passed to 89 * {sock,tpi}optcom_req() interface routine to take care of option 90 * management and associated methods. 91 * XXX These and other extern's should really move to a icmp header. 92 */ 93 extern optdb_obj_t icmp_opt_obj; 94 extern uint_t icmp_max_optsize; 95 96 /* 97 * Synchronization notes: 98 * 99 * At all points in this code where exclusive access is required, we 100 * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) 101 * which will arrange to call the routine only after all threads have 102 * exited the shared resource. 103 */ 104 105 /* Named Dispatch Parameter Management Structure */ 106 typedef struct icmpparam_s { 107 uint_t icmp_param_min; 108 uint_t icmp_param_max; 109 uint_t icmp_param_value; 110 char *icmp_param_name; 111 } icmpparam_t; 112 113 static void icmp_addr_req(queue_t *q, mblk_t *mp); 114 static void icmp_bind(queue_t *q, mblk_t *mp); 115 static void icmp_bind_proto(queue_t *q); 116 static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); 117 static void icmp_capability_req(queue_t *q, mblk_t *mp); 118 static int icmp_close(queue_t *q); 119 static void icmp_connect(queue_t *q, mblk_t *mp); 120 static void icmp_disconnect(queue_t *q, mblk_t *mp); 121 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 122 int sys_error); 123 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 124 t_scalar_t t_error, int sys_error); 125 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 126 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 127 static void icmp_info_req(queue_t *q, mblk_t *mp); 128 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 129 t_scalar_t addr_length, in_port_t); 130 static int icmp_open(queue_t *q, dev_t *devp, int flag, 131 int sflag, cred_t *credp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static void icmp_rput(queue_t *q, mblk_t *mp); 146 static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); 147 static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); 148 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 149 uchar_t *ptr, int len); 150 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 151 cred_t *cr); 152 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 153 static void icmp_unbind(queue_t *q, mblk_t *mp); 154 static void icmp_wput(queue_t *q, mblk_t *mp); 155 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 156 t_scalar_t tudr_optlen); 157 static void icmp_wput_other(queue_t *q, mblk_t *mp); 158 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 159 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 160 161 static void rawip_kstat_init(void); 162 static void rawip_kstat_fini(void); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 static struct qinit rinit = { 171 (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info 172 }; 173 174 static struct qinit winit = { 175 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info 176 }; 177 178 struct streamtab icmpinfo = { 179 &rinit, &winit 180 }; 181 182 static sin_t sin_null; /* Zero address for quick clears */ 183 static sin6_t sin6_null; /* Zero address for quick clears */ 184 static void *icmp_g_head; /* Head for list of open icmp streams. */ 185 static IDP icmp_g_nd; /* Points to table of ICMP ND variables. */ 186 187 /* MIB-2 stuff for SNMP */ 188 static mib2_rawip_t rawip_mib; /* SNMP fixed size info */ 189 static kstat_t *rawip_mibkp; /* kstat exporting rawip_mib data */ 190 191 /* Default structure copied into T_INFO_ACK messages */ 192 static struct T_info_ack icmp_g_t_info_ack = { 193 T_INFO_ACK, 194 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 195 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 196 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 197 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 198 0, /* ADDR_size - filled in later. */ 199 0, /* OPT_size - not initialized here */ 200 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 201 T_CLTS, /* SERV_type. icmp supports connection-less. */ 202 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 203 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 204 }; 205 206 /* 207 * Table of ND variables supported by icmp. These are loaded into icmp_g_nd 208 * in icmp_open. 209 * All of these are alterable, within the min/max values given, at run time. 210 */ 211 static icmpparam_t icmp_param_arr[] = { 212 /* min max value name */ 213 { 0, 128, 32, "icmp_wroff_extra" }, 214 { 1, 255, 255, "icmp_ipv4_ttl" }, 215 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 216 { 0, 1, 1, "icmp_bsd_compat" }, 217 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 218 { 0, 65536, 1024, "icmp_xmit_lowat"}, 219 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 220 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 221 }; 222 #define icmp_wroff_extra icmp_param_arr[0].icmp_param_value 223 #define icmp_ipv4_ttl icmp_param_arr[1].icmp_param_value 224 #define icmp_ipv6_hoplimit icmp_param_arr[2].icmp_param_value 225 #define icmp_bsd_compat icmp_param_arr[3].icmp_param_value 226 #define icmp_xmit_hiwat icmp_param_arr[4].icmp_param_value 227 #define icmp_xmit_lowat icmp_param_arr[5].icmp_param_value 228 #define icmp_recv_hiwat icmp_param_arr[6].icmp_param_value 229 #define icmp_max_buf icmp_param_arr[7].icmp_param_value 230 231 /* 232 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 233 * passed to icmp_wput. 234 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 235 * protocol type placed in the message following the address. A T_BIND_ACK 236 * message is passed upstream when ip acknowledges the request. 237 * (Called as writer.) 238 */ 239 static void 240 icmp_bind(queue_t *q, mblk_t *mp) 241 { 242 sin_t *sin; 243 sin6_t *sin6; 244 mblk_t *mp1; 245 struct T_bind_req *tbr; 246 icmp_t *icmp; 247 248 icmp = (icmp_t *)q->q_ptr; 249 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 250 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 251 "icmp_bind: bad req, len %u", 252 (uint_t)(mp->b_wptr - mp->b_rptr)); 253 icmp_err_ack(q, mp, TPROTO, 0); 254 return; 255 } 256 if (icmp->icmp_state != TS_UNBND) { 257 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 258 "icmp_bind: bad state, %d", icmp->icmp_state); 259 icmp_err_ack(q, mp, TOUTSTATE, 0); 260 return; 261 } 262 /* 263 * Reallocate the message to make sure we have enough room for an 264 * address and the protocol type. 265 */ 266 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 267 if (!mp1) { 268 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 269 return; 270 } 271 mp = mp1; 272 tbr = (struct T_bind_req *)mp->b_rptr; 273 switch (tbr->ADDR_length) { 274 case 0: /* Generic request */ 275 tbr->ADDR_offset = sizeof (struct T_bind_req); 276 if (icmp->icmp_family == AF_INET) { 277 tbr->ADDR_length = sizeof (sin_t); 278 sin = (sin_t *)&tbr[1]; 279 *sin = sin_null; 280 sin->sin_family = AF_INET; 281 mp->b_wptr = (uchar_t *)&sin[1]; 282 } else { 283 ASSERT(icmp->icmp_family == AF_INET6); 284 tbr->ADDR_length = sizeof (sin6_t); 285 sin6 = (sin6_t *)&tbr[1]; 286 *sin6 = sin6_null; 287 sin6->sin6_family = AF_INET6; 288 mp->b_wptr = (uchar_t *)&sin6[1]; 289 } 290 break; 291 case sizeof (sin_t): /* Complete IP address */ 292 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 293 sizeof (sin_t)); 294 if (sin == NULL || !OK_32PTR((char *)sin)) { 295 icmp_err_ack(q, mp, TSYSERR, EINVAL); 296 return; 297 } 298 if (icmp->icmp_family != AF_INET || 299 sin->sin_family != AF_INET) { 300 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 301 return; 302 } 303 break; 304 case sizeof (sin6_t): /* Complete IP address */ 305 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 306 sizeof (sin6_t)); 307 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 308 icmp_err_ack(q, mp, TSYSERR, EINVAL); 309 return; 310 } 311 if (icmp->icmp_family != AF_INET6 || 312 sin6->sin6_family != AF_INET6) { 313 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 314 return; 315 } 316 /* No support for mapped addresses on raw sockets */ 317 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 318 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 319 return; 320 } 321 break; 322 default: 323 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 324 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 325 icmp_err_ack(q, mp, TBADADDR, 0); 326 return; 327 } 328 /* 329 * Copy the source address into our icmp structure. This address 330 * may still be zero; if so, ip will fill in the correct address 331 * each time an outbound packet is passed to it. 332 * If we are binding to a broadcast or multicast address icmp_rput 333 * will clear the source address when it receives the T_BIND_ACK. 334 */ 335 icmp->icmp_state = TS_IDLE; 336 337 if (icmp->icmp_family == AF_INET) { 338 ASSERT(sin != NULL); 339 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 340 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 341 &icmp->icmp_v6src); 342 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 343 icmp->icmp_ip_snd_options_len; 344 icmp->icmp_bound_v6src = icmp->icmp_v6src; 345 } else { 346 int error; 347 348 ASSERT(sin6 != NULL); 349 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 350 icmp->icmp_v6src = sin6->sin6_addr; 351 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 352 icmp->icmp_bound_v6src = icmp->icmp_v6src; 353 354 /* Rebuild the header template */ 355 error = icmp_build_hdrs(q, icmp); 356 if (error != 0) { 357 icmp_err_ack(q, mp, TSYSERR, error); 358 return; 359 } 360 } 361 /* 362 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 363 * the address. 364 */ 365 *mp->b_wptr++ = icmp->icmp_proto; 366 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 367 /* 368 * Append a request for an IRE if src not 0 (INADDR_ANY) 369 */ 370 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 371 if (!mp->b_cont) { 372 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 373 return; 374 } 375 mp->b_cont->b_wptr += sizeof (ire_t); 376 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 377 } 378 379 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 380 putnext(q, mp); 381 } 382 383 /* 384 * Send message to IP to just bind to the protocol. 385 */ 386 static void 387 icmp_bind_proto(queue_t *q) 388 { 389 mblk_t *mp; 390 struct T_bind_req *tbr; 391 icmp_t *icmp; 392 393 icmp = (icmp_t *)q->q_ptr; 394 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 395 BPRI_MED); 396 if (!mp) { 397 return; 398 } 399 mp->b_datap->db_type = M_PROTO; 400 tbr = (struct T_bind_req *)mp->b_rptr; 401 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 402 tbr->ADDR_offset = sizeof (struct T_bind_req); 403 if (icmp->icmp_ipversion == IPV4_VERSION) { 404 sin_t *sin; 405 406 tbr->ADDR_length = sizeof (sin_t); 407 sin = (sin_t *)&tbr[1]; 408 *sin = sin_null; 409 sin->sin_family = AF_INET; 410 mp->b_wptr = (uchar_t *)&sin[1]; 411 } else { 412 sin6_t *sin6; 413 414 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 415 tbr->ADDR_length = sizeof (sin6_t); 416 sin6 = (sin6_t *)&tbr[1]; 417 *sin6 = sin6_null; 418 sin6->sin6_family = AF_INET6; 419 mp->b_wptr = (uchar_t *)&sin6[1]; 420 } 421 422 /* Place protocol type in the O_T_BIND_REQ following the address. */ 423 *mp->b_wptr++ = icmp->icmp_proto; 424 425 /* Pass the O_T_BIND_REQ to ip. */ 426 putnext(q, mp); 427 } 428 429 /* 430 * This routine handles each T_CONN_REQ message passed to icmp. It 431 * associates a default destination address with the stream. 432 * 433 * This routine sends down a T_BIND_REQ to IP with the following mblks: 434 * T_BIND_REQ - specifying local and remote address. 435 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 436 * T_OK_ACK - for the T_CONN_REQ 437 * T_CONN_CON - to keep the TPI user happy 438 * 439 * The connect completes in icmp_rput. 440 * When a T_BIND_ACK is received information is extracted from the IRE 441 * and the two appended messages are sent to the TPI user. 442 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 443 * it to an error ack for the appropriate primitive. 444 */ 445 static void 446 icmp_connect(queue_t *q, mblk_t *mp) 447 { 448 sin_t *sin; 449 sin6_t *sin6; 450 mblk_t *mp1, *mp2; 451 struct T_conn_req *tcr; 452 icmp_t *icmp; 453 ipaddr_t v4dst; 454 in6_addr_t v6dst; 455 uint32_t flowinfo; 456 457 icmp = (icmp_t *)q->q_ptr; 458 tcr = (struct T_conn_req *)mp->b_rptr; 459 /* Sanity checks */ 460 if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { 461 icmp_err_ack(q, mp, TPROTO, 0); 462 return; 463 } 464 465 if (icmp->icmp_state == TS_DATA_XFER) { 466 /* Already connected - clear out state */ 467 icmp->icmp_v6src = icmp->icmp_bound_v6src; 468 icmp->icmp_state = TS_IDLE; 469 } 470 471 472 if (tcr->OPT_length != 0) { 473 icmp_err_ack(q, mp, TBADOPT, 0); 474 return; 475 } 476 switch (tcr->DEST_length) { 477 default: 478 icmp_err_ack(q, mp, TBADADDR, 0); 479 return; 480 481 case sizeof (sin_t): 482 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 483 sizeof (sin_t)); 484 if (sin == NULL || !OK_32PTR((char *)sin)) { 485 icmp_err_ack(q, mp, TSYSERR, EINVAL); 486 return; 487 } 488 if (icmp->icmp_family != AF_INET || 489 sin->sin_family != AF_INET) { 490 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 491 return; 492 } 493 v4dst = sin->sin_addr.s_addr; 494 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 495 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 496 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 497 icmp->icmp_ip_snd_options_len; 498 break; 499 500 case sizeof (sin6_t): 501 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 502 sizeof (sin6_t)); 503 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 504 icmp_err_ack(q, mp, TSYSERR, EINVAL); 505 return; 506 } 507 if (icmp->icmp_family != AF_INET6 || 508 sin6->sin6_family != AF_INET6) { 509 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 510 return; 511 } 512 /* No support for mapped addresses on raw sockets */ 513 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 514 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 515 return; 516 } 517 v6dst = sin6->sin6_addr; 518 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 519 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 520 flowinfo = sin6->sin6_flowinfo; 521 break; 522 } 523 if (icmp->icmp_ipversion == IPV4_VERSION) { 524 /* 525 * Interpret a zero destination to mean loopback. 526 * Update the T_CONN_REQ (sin/sin6) since it is used to 527 * generate the T_CONN_CON. 528 */ 529 if (v4dst == INADDR_ANY) { 530 v4dst = htonl(INADDR_LOOPBACK); 531 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 532 if (icmp->icmp_family == AF_INET) { 533 sin->sin_addr.s_addr = v4dst; 534 } else { 535 sin6->sin6_addr = v6dst; 536 } 537 } 538 icmp->icmp_v6dst = v6dst; 539 icmp->icmp_flowinfo = 0; 540 541 /* 542 * If the destination address is multicast and 543 * an outgoing multicast interface has been set, 544 * use the address of that interface as our 545 * source address if no source address has been set. 546 */ 547 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 548 CLASSD(v4dst) && 549 icmp->icmp_multicast_if_addr != INADDR_ANY) { 550 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 551 &icmp->icmp_v6src); 552 } 553 } else { 554 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 555 /* 556 * Interpret a zero destination to mean loopback. 557 * Update the T_CONN_REQ (sin/sin6) since it is used to 558 * generate the T_CONN_CON. 559 */ 560 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 561 v6dst = ipv6_loopback; 562 sin6->sin6_addr = v6dst; 563 } 564 icmp->icmp_v6dst = v6dst; 565 icmp->icmp_flowinfo = flowinfo; 566 /* 567 * If the destination address is multicast and 568 * an outgoing multicast interface has been set, 569 * then the ip bind logic will pick the correct source 570 * address (i.e. matching the outgoing multicast interface). 571 */ 572 } 573 574 /* 575 * Send down bind to IP to verify that there is a route 576 * and to determine the source address. 577 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 578 */ 579 if (icmp->icmp_family == AF_INET) { 580 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 581 sin->sin_port); 582 } else { 583 ASSERT(icmp->icmp_family == AF_INET6); 584 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 585 sin6->sin6_port); 586 } 587 if (mp1 == NULL) { 588 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 589 return; 590 } 591 592 /* 593 * We also have to send a connection confirmation to 594 * keep TLI happy. Prepare it for icmp_rput. 595 */ 596 if (icmp->icmp_family == AF_INET) { 597 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 598 0); 599 } else { 600 ASSERT(icmp->icmp_family == AF_INET6); 601 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 602 0); 603 } 604 if (mp2 == NULL) { 605 freemsg(mp1); 606 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 607 return; 608 } 609 610 mp = mi_tpi_ok_ack_alloc(mp); 611 if (mp == NULL) { 612 /* Unable to reuse the T_CONN_REQ for the ack. */ 613 freemsg(mp2); 614 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 615 return; 616 } 617 618 icmp->icmp_state = TS_DATA_XFER; 619 620 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 621 linkb(mp1, mp); 622 linkb(mp1, mp2); 623 624 mblk_setcred(mp1, icmp->icmp_credp); 625 putnext(q, mp1); 626 } 627 628 static int 629 icmp_close(queue_t *q) 630 { 631 icmp_t *icmp = (icmp_t *)q->q_ptr; 632 int i1; 633 634 /* tell IP that if we're not here, he can't trust labels */ 635 if (is_system_labeled()) 636 putnext(WR(q), icmp->icmp_delabel); 637 638 qprocsoff(q); 639 640 /* If there are any options associated with the stream, free them. */ 641 if (icmp->icmp_ip_snd_options) 642 mi_free((char *)icmp->icmp_ip_snd_options); 643 644 if (icmp->icmp_filter != NULL) 645 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 646 647 /* Free memory associated with sticky options */ 648 if (icmp->icmp_sticky_hdrs_len != 0) { 649 kmem_free(icmp->icmp_sticky_hdrs, 650 icmp->icmp_sticky_hdrs_len); 651 icmp->icmp_sticky_hdrs = NULL; 652 icmp->icmp_sticky_hdrs_len = 0; 653 } 654 655 ip6_pkt_free(&icmp->icmp_sticky_ipp); 656 657 crfree(icmp->icmp_credp); 658 659 /* Free the icmp structure and release the minor device number. */ 660 i1 = mi_close_comm(&icmp_g_head, q); 661 662 return (i1); 663 } 664 665 /* 666 * This routine handles each T_DISCON_REQ message passed to icmp 667 * as an indicating that ICMP is no longer connected. This results 668 * in sending a T_BIND_REQ to IP to restore the binding to just 669 * the local address. 670 * 671 * This routine sends down a T_BIND_REQ to IP with the following mblks: 672 * T_BIND_REQ - specifying just the local address. 673 * T_OK_ACK - for the T_DISCON_REQ 674 * 675 * The disconnect completes in icmp_rput. 676 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 677 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 678 * it to an error ack for the appropriate primitive. 679 */ 680 static void 681 icmp_disconnect(queue_t *q, mblk_t *mp) 682 { 683 icmp_t *icmp; 684 mblk_t *mp1; 685 686 icmp = (icmp_t *)q->q_ptr; 687 688 if (icmp->icmp_state != TS_DATA_XFER) { 689 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 690 "icmp_disconnect: bad state, %d", icmp->icmp_state); 691 icmp_err_ack(q, mp, TOUTSTATE, 0); 692 return; 693 } 694 icmp->icmp_v6src = icmp->icmp_bound_v6src; 695 icmp->icmp_state = TS_IDLE; 696 697 /* 698 * Send down bind to IP to remove the full binding and revert 699 * to the local address binding. 700 */ 701 if (icmp->icmp_family == AF_INET) { 702 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 703 } else { 704 ASSERT(icmp->icmp_family == AF_INET6); 705 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 706 } 707 if (mp1 == NULL) { 708 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 709 return; 710 } 711 mp = mi_tpi_ok_ack_alloc(mp); 712 if (mp == NULL) { 713 /* Unable to reuse the T_DISCON_REQ for the ack. */ 714 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 715 return; 716 } 717 718 if (icmp->icmp_family == AF_INET6) { 719 int error; 720 721 /* Rebuild the header template */ 722 error = icmp_build_hdrs(q, icmp); 723 if (error != 0) { 724 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 725 freemsg(mp1); 726 return; 727 } 728 } 729 icmp->icmp_discon_pending = 1; 730 731 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ 732 linkb(mp1, mp); 733 putnext(q, mp1); 734 } 735 736 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 737 static void 738 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 739 { 740 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 741 qreply(q, mp); 742 } 743 744 /* Shorthand to generate and send TPI error acks to our client */ 745 static void 746 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 747 t_scalar_t t_error, int sys_error) 748 { 749 struct T_error_ack *teackp; 750 751 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 752 M_PCPROTO, T_ERROR_ACK)) != NULL) { 753 teackp = (struct T_error_ack *)mp->b_rptr; 754 teackp->ERROR_prim = primitive; 755 teackp->TLI_error = t_error; 756 teackp->UNIX_error = sys_error; 757 qreply(q, mp); 758 } 759 } 760 761 /* 762 * icmp_icmp_error is called by icmp_rput to process ICMP 763 * messages passed up by IP. 764 * Generates the appropriate T_UDERROR_IND for permanent 765 * (non-transient) errors. 766 * Assumes that IP has pulled up everything up to and including 767 * the ICMP header. 768 */ 769 static void 770 icmp_icmp_error(queue_t *q, mblk_t *mp) 771 { 772 icmph_t *icmph; 773 ipha_t *ipha; 774 int iph_hdr_length; 775 sin_t sin; 776 sin6_t sin6; 777 mblk_t *mp1; 778 int error = 0; 779 icmp_t *icmp = (icmp_t *)q->q_ptr; 780 781 /* 782 * Deliver T_UDERROR_IND when the application has asked for it. 783 * The socket layer enables this automatically when connected. 784 */ 785 if (!icmp->icmp_dgram_errind) { 786 freemsg(mp); 787 return; 788 } 789 790 ipha = (ipha_t *)mp->b_rptr; 791 792 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 793 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 794 icmp_icmp_error_ipv6(q, mp); 795 return; 796 } 797 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 798 799 iph_hdr_length = IPH_HDR_LENGTH(ipha); 800 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 801 ipha = (ipha_t *)&icmph[1]; 802 iph_hdr_length = IPH_HDR_LENGTH(ipha); 803 804 switch (icmph->icmph_type) { 805 case ICMP_DEST_UNREACHABLE: 806 switch (icmph->icmph_code) { 807 case ICMP_FRAGMENTATION_NEEDED: 808 /* 809 * IP has already adjusted the path MTU. 810 * XXX Somehow pass MTU indication to application? 811 */ 812 break; 813 case ICMP_PORT_UNREACHABLE: 814 case ICMP_PROTOCOL_UNREACHABLE: 815 error = ECONNREFUSED; 816 break; 817 default: 818 /* Transient errors */ 819 break; 820 } 821 break; 822 default: 823 /* Transient errors */ 824 break; 825 } 826 if (error == 0) { 827 freemsg(mp); 828 return; 829 } 830 831 switch (icmp->icmp_family) { 832 case AF_INET: 833 sin = sin_null; 834 sin.sin_family = AF_INET; 835 sin.sin_addr.s_addr = ipha->ipha_dst; 836 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 837 error); 838 break; 839 case AF_INET6: 840 sin6 = sin6_null; 841 sin6.sin6_family = AF_INET6; 842 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 843 844 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 845 NULL, 0, error); 846 break; 847 } 848 if (mp1) 849 putnext(q, mp1); 850 freemsg(mp); 851 } 852 853 /* 854 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 855 * for IPv6 packets. 856 * Send permanent (non-transient) errors upstream. 857 * Assumes that IP has pulled up all the extension headers as well 858 * as the ICMPv6 header. 859 */ 860 static void 861 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 862 { 863 icmp6_t *icmp6; 864 ip6_t *ip6h, *outer_ip6h; 865 uint16_t iph_hdr_length; 866 uint8_t *nexthdrp; 867 sin6_t sin6; 868 mblk_t *mp1; 869 int error = 0; 870 icmp_t *icmp = (icmp_t *)q->q_ptr; 871 872 outer_ip6h = (ip6_t *)mp->b_rptr; 873 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 874 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 875 else 876 iph_hdr_length = IPV6_HDR_LEN; 877 878 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 879 ip6h = (ip6_t *)&icmp6[1]; 880 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 881 freemsg(mp); 882 return; 883 } 884 if (*nexthdrp != icmp->icmp_proto) { 885 /* 886 * Could have switched icmp_proto after while ip did fanout of 887 * this message 888 */ 889 freemsg(mp); 890 return; 891 } 892 switch (icmp6->icmp6_type) { 893 case ICMP6_DST_UNREACH: 894 switch (icmp6->icmp6_code) { 895 case ICMP6_DST_UNREACH_NOPORT: 896 error = ECONNREFUSED; 897 break; 898 case ICMP6_DST_UNREACH_ADMIN: 899 case ICMP6_DST_UNREACH_NOROUTE: 900 case ICMP6_DST_UNREACH_BEYONDSCOPE: 901 case ICMP6_DST_UNREACH_ADDR: 902 /* Transient errors */ 903 break; 904 default: 905 break; 906 } 907 break; 908 case ICMP6_PACKET_TOO_BIG: { 909 struct T_unitdata_ind *tudi; 910 struct T_opthdr *toh; 911 size_t udi_size; 912 mblk_t *newmp; 913 t_scalar_t opt_length = sizeof (struct T_opthdr) + 914 sizeof (struct ip6_mtuinfo); 915 sin6_t *sin6; 916 struct ip6_mtuinfo *mtuinfo; 917 918 /* 919 * If the application has requested to receive path mtu 920 * information, send up an empty message containing an 921 * IPV6_PATHMTU ancillary data item. 922 */ 923 if (!icmp->icmp_ipv6_recvpathmtu) 924 break; 925 926 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 927 opt_length; 928 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 929 BUMP_MIB(&rawip_mib, rawipInErrors); 930 break; 931 } 932 933 /* 934 * newmp->b_cont is left to NULL on purpose. This is an 935 * empty message containing only ancillary data. 936 */ 937 newmp->b_datap->db_type = M_PROTO; 938 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 939 newmp->b_wptr = (uchar_t *)tudi + udi_size; 940 tudi->PRIM_type = T_UNITDATA_IND; 941 tudi->SRC_length = sizeof (sin6_t); 942 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 943 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 944 tudi->OPT_length = opt_length; 945 946 sin6 = (sin6_t *)&tudi[1]; 947 bzero(sin6, sizeof (sin6_t)); 948 sin6->sin6_family = AF_INET6; 949 sin6->sin6_addr = icmp->icmp_v6dst; 950 951 toh = (struct T_opthdr *)&sin6[1]; 952 toh->level = IPPROTO_IPV6; 953 toh->name = IPV6_PATHMTU; 954 toh->len = opt_length; 955 toh->status = 0; 956 957 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 958 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 959 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 960 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 961 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 962 /* 963 * We've consumed everything we need from the original 964 * message. Free it, then send our empty message. 965 */ 966 freemsg(mp); 967 putnext(q, newmp); 968 return; 969 } 970 case ICMP6_TIME_EXCEEDED: 971 /* Transient errors */ 972 break; 973 case ICMP6_PARAM_PROB: 974 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 975 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 976 (uchar_t *)ip6h + icmp6->icmp6_pptr == 977 (uchar_t *)nexthdrp) { 978 error = ECONNREFUSED; 979 break; 980 } 981 break; 982 } 983 if (error == 0) { 984 freemsg(mp); 985 return; 986 } 987 988 sin6 = sin6_null; 989 sin6.sin6_family = AF_INET6; 990 sin6.sin6_addr = ip6h->ip6_dst; 991 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 994 error); 995 if (mp1) 996 putnext(q, mp1); 997 freemsg(mp); 998 } 999 1000 /* 1001 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1002 * The local address is filled in if endpoint is bound. The remote address 1003 * is filled in if remote address has been precified ("connected endpoint") 1004 * (The concept of connected CLTS sockets is alien to published TPI 1005 * but we support it anyway). 1006 */ 1007 static void 1008 icmp_addr_req(queue_t *q, mblk_t *mp) 1009 { 1010 icmp_t *icmp = (icmp_t *)q->q_ptr; 1011 mblk_t *ackmp; 1012 struct T_addr_ack *taa; 1013 1014 /* Make it large enough for worst case */ 1015 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1016 2 * sizeof (sin6_t), 1); 1017 if (ackmp == NULL) { 1018 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1019 return; 1020 } 1021 taa = (struct T_addr_ack *)ackmp->b_rptr; 1022 1023 bzero(taa, sizeof (struct T_addr_ack)); 1024 ackmp->b_wptr = (uchar_t *)&taa[1]; 1025 1026 taa->PRIM_type = T_ADDR_ACK; 1027 ackmp->b_datap->db_type = M_PCPROTO; 1028 1029 /* 1030 * Note: Following code assumes 32 bit alignment of basic 1031 * data structures like sin_t and struct T_addr_ack. 1032 */ 1033 if (icmp->icmp_state != TS_UNBND) { 1034 /* 1035 * Fill in local address 1036 */ 1037 taa->LOCADDR_offset = sizeof (*taa); 1038 if (icmp->icmp_family == AF_INET) { 1039 sin_t *sin; 1040 1041 taa->LOCADDR_length = sizeof (sin_t); 1042 sin = (sin_t *)&taa[1]; 1043 /* Fill zeroes and then intialize non-zero fields */ 1044 *sin = sin_null; 1045 sin->sin_family = AF_INET; 1046 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1047 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1048 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1049 sin->sin_addr.s_addr); 1050 } else { 1051 /* 1052 * INADDR_ANY 1053 * icmp_v6src is not set, we might be bound to 1054 * broadcast/multicast. Use icmp_bound_v6src as 1055 * local address instead (that could 1056 * also still be INADDR_ANY) 1057 */ 1058 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1059 sin->sin_addr.s_addr); 1060 } 1061 ackmp->b_wptr = (uchar_t *)&sin[1]; 1062 } else { 1063 sin6_t *sin6; 1064 1065 ASSERT(icmp->icmp_family == AF_INET6); 1066 taa->LOCADDR_length = sizeof (sin6_t); 1067 sin6 = (sin6_t *)&taa[1]; 1068 /* Fill zeroes and then intialize non-zero fields */ 1069 *sin6 = sin6_null; 1070 sin6->sin6_family = AF_INET6; 1071 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1072 sin6->sin6_addr = icmp->icmp_v6src; 1073 } else { 1074 /* 1075 * UNSPECIFIED 1076 * icmp_v6src is not set, we might be bound to 1077 * broadcast/multicast. Use icmp_bound_v6src as 1078 * local address instead (that could 1079 * also still be UNSPECIFIED) 1080 */ 1081 sin6->sin6_addr = icmp->icmp_bound_v6src; 1082 } 1083 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1084 } 1085 } 1086 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1087 qreply(q, ackmp); 1088 } 1089 1090 static void 1091 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1092 { 1093 *tap = icmp_g_t_info_ack; 1094 1095 if (icmp->icmp_family == AF_INET6) 1096 tap->ADDR_size = sizeof (sin6_t); 1097 else 1098 tap->ADDR_size = sizeof (sin_t); 1099 tap->CURRENT_state = icmp->icmp_state; 1100 tap->OPT_size = icmp_max_optsize; 1101 } 1102 1103 /* 1104 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1105 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1106 * icmp_g_t_info_ack. The current state of the stream is copied from 1107 * icmp_state. 1108 */ 1109 static void 1110 icmp_capability_req(queue_t *q, mblk_t *mp) 1111 { 1112 icmp_t *icmp = (icmp_t *)q->q_ptr; 1113 t_uscalar_t cap_bits1; 1114 struct T_capability_ack *tcap; 1115 1116 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1117 1118 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1119 mp->b_datap->db_type, T_CAPABILITY_ACK); 1120 if (!mp) 1121 return; 1122 1123 tcap = (struct T_capability_ack *)mp->b_rptr; 1124 tcap->CAP_bits1 = 0; 1125 1126 if (cap_bits1 & TC1_INFO) { 1127 icmp_copy_info(&tcap->INFO_ack, icmp); 1128 tcap->CAP_bits1 |= TC1_INFO; 1129 } 1130 1131 qreply(q, mp); 1132 } 1133 1134 /* 1135 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1136 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1137 * The current state of the stream is copied from icmp_state. 1138 */ 1139 static void 1140 icmp_info_req(queue_t *q, mblk_t *mp) 1141 { 1142 icmp_t *icmp = (icmp_t *)q->q_ptr; 1143 1144 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1145 T_INFO_ACK); 1146 if (!mp) 1147 return; 1148 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1149 qreply(q, mp); 1150 } 1151 1152 /* 1153 * IP recognizes seven kinds of bind requests: 1154 * 1155 * - A zero-length address binds only to the protocol number. 1156 * 1157 * - A 4-byte address is treated as a request to 1158 * validate that the address is a valid local IPv4 1159 * address, appropriate for an application to bind to. 1160 * IP does the verification, but does not make any note 1161 * of the address at this time. 1162 * 1163 * - A 16-byte address contains is treated as a request 1164 * to validate a local IPv6 address, as the 4-byte 1165 * address case above. 1166 * 1167 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1168 * use it for the inbound fanout of packets. 1169 * 1170 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1171 * use it for the inbound fanout of packets. 1172 * 1173 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1174 * information consisting of local and remote addresses 1175 * and ports (unused for raw sockets). In this case, the addresses are both 1176 * validated as appropriate for this operation, and, if 1177 * so, the information is retained for use in the 1178 * inbound fanout. 1179 * 1180 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1181 * fanout information, like the 12-byte case above. 1182 * 1183 * IP will also fill in the IRE request mblk with information 1184 * regarding our peer. In all cases, we notify IP of our protocol 1185 * type by appending a single protocol byte to the bind request. 1186 */ 1187 static mblk_t * 1188 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1189 in_port_t fport) 1190 { 1191 char *cp; 1192 mblk_t *mp; 1193 struct T_bind_req *tbr; 1194 ipa_conn_t *ac; 1195 ipa6_conn_t *ac6; 1196 sin_t *sin; 1197 sin6_t *sin6; 1198 1199 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1200 1201 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1202 if (mp == NULL) 1203 return (NULL); 1204 mp->b_datap->db_type = M_PROTO; 1205 tbr = (struct T_bind_req *)mp->b_rptr; 1206 tbr->PRIM_type = bind_prim; 1207 tbr->ADDR_offset = sizeof (*tbr); 1208 tbr->CONIND_number = 0; 1209 tbr->ADDR_length = addr_length; 1210 cp = (char *)&tbr[1]; 1211 switch (addr_length) { 1212 case sizeof (ipa_conn_t): 1213 ASSERT(icmp->icmp_family == AF_INET); 1214 /* Append a request for an IRE */ 1215 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1216 if (mp->b_cont == NULL) { 1217 freemsg(mp); 1218 return (NULL); 1219 } 1220 mp->b_cont->b_wptr += sizeof (ire_t); 1221 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1222 1223 /* cp known to be 32 bit aligned */ 1224 ac = (ipa_conn_t *)cp; 1225 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1226 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1227 ac->ac_fport = fport; 1228 ac->ac_lport = 0; 1229 break; 1230 1231 case sizeof (ipa6_conn_t): 1232 ASSERT(icmp->icmp_family == AF_INET6); 1233 /* Append a request for an IRE */ 1234 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1235 if (mp->b_cont == NULL) { 1236 freemsg(mp); 1237 return (NULL); 1238 } 1239 mp->b_cont->b_wptr += sizeof (ire_t); 1240 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1241 1242 /* cp known to be 32 bit aligned */ 1243 ac6 = (ipa6_conn_t *)cp; 1244 ac6->ac6_laddr = icmp->icmp_v6src; 1245 ac6->ac6_faddr = icmp->icmp_v6dst; 1246 ac6->ac6_fport = fport; 1247 ac6->ac6_lport = 0; 1248 break; 1249 1250 case sizeof (sin_t): 1251 ASSERT(icmp->icmp_family == AF_INET); 1252 /* Append a request for an IRE */ 1253 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1254 if (!mp->b_cont) { 1255 freemsg(mp); 1256 return (NULL); 1257 } 1258 mp->b_cont->b_wptr += sizeof (ire_t); 1259 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1260 1261 sin = (sin_t *)cp; 1262 *sin = sin_null; 1263 sin->sin_family = AF_INET; 1264 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1265 break; 1266 1267 case sizeof (sin6_t): 1268 ASSERT(icmp->icmp_family == AF_INET6); 1269 /* Append a request for an IRE */ 1270 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1271 if (!mp->b_cont) { 1272 freemsg(mp); 1273 return (NULL); 1274 } 1275 mp->b_cont->b_wptr += sizeof (ire_t); 1276 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1277 1278 sin6 = (sin6_t *)cp; 1279 *sin6 = sin6_null; 1280 sin6->sin6_family = AF_INET6; 1281 sin6->sin6_addr = icmp->icmp_bound_v6src; 1282 break; 1283 } 1284 /* Add protocol number to end */ 1285 cp[addr_length] = icmp->icmp_proto; 1286 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1287 return (mp); 1288 } 1289 1290 /* ARGSUSED */ 1291 static void 1292 dummy_func(void *arg) 1293 { 1294 } 1295 1296 static mblk_t * 1297 alloc_wait(queue_t *q, size_t len, int pri, int *errp) 1298 { 1299 mblk_t *mp; 1300 bufcall_id_t id; 1301 int retv; 1302 1303 while ((mp = allocb(len, pri)) == NULL) { 1304 id = qbufcall(q, len, pri, dummy_func, NULL); 1305 if (id == 0) { 1306 *errp = ENOMEM; 1307 break; 1308 } 1309 retv = qwait_sig(q); 1310 qunbufcall(q, id); 1311 if (retv == 0) { 1312 *errp = EINTR; 1313 break; 1314 } 1315 } 1316 if (mp != NULL) 1317 mp->b_wptr += len; 1318 return (mp); 1319 } 1320 1321 /* 1322 * This is the open routine for icmp. It allocates a icmp_t structure for 1323 * the stream and, on the first open of the module, creates an ND table. 1324 */ 1325 static int 1326 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1327 { 1328 int err; 1329 icmp_t *icmp; 1330 mblk_t *mp; 1331 out_labeled_t *olp; 1332 1333 /* If the stream is already open, return immediately. */ 1334 if (q->q_ptr != NULL) 1335 return (0); 1336 1337 /* If this is not a push of icmp as a module, fail. */ 1338 if (sflag != MODOPEN) 1339 return (EINVAL); 1340 1341 /* 1342 * Defer the qprocson until everything is initialized since 1343 * we are D_MTPERQ and after qprocson the rput routine can 1344 * run. (Could do qprocson earlier since icmp currently 1345 * has an outer perimeter.) 1346 */ 1347 1348 /* 1349 * Create a icmp_t structure for this stream and link into the 1350 * list of open streams. 1351 */ 1352 err = mi_open_comm(&icmp_g_head, sizeof (icmp_t), q, devp, 1353 flag, sflag, credp); 1354 if (err != 0) 1355 return (err); 1356 1357 /* 1358 * The receive hiwat is only looked at on the stream head queue. 1359 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 1360 */ 1361 q->q_hiwat = icmp_recv_hiwat; 1362 1363 /* Set the initial state of the stream and the privilege status. */ 1364 icmp = (icmp_t *)q->q_ptr; 1365 icmp->icmp_state = TS_UNBND; 1366 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1367 icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1368 icmp->icmp_filter = NULL; 1369 1370 icmp->icmp_credp = credp; 1371 crhold(credp); 1372 1373 /* 1374 * If the caller has the process-wide flag set, then default to MAC 1375 * exempt mode. This allows read-down to unlabeled hosts. 1376 */ 1377 if (getpflags(NET_MAC_AWARE, credp) != 0) 1378 icmp->icmp_mac_exempt = B_TRUE; 1379 1380 icmp->icmp_zoneid = getzoneid(); 1381 1382 if (getmajor(*devp) == (major_t)ICMP6_MAJ) { 1383 icmp->icmp_ipversion = IPV6_VERSION; 1384 icmp->icmp_family = AF_INET6; 1385 /* May be changed by a SO_PROTOTYPE socket option. */ 1386 icmp->icmp_proto = IPPROTO_ICMPV6; 1387 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1388 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1389 icmp->icmp_ttl = (uint8_t)icmp_ipv6_hoplimit; 1390 } else { 1391 icmp->icmp_ipversion = IPV4_VERSION; 1392 icmp->icmp_family = AF_INET; 1393 /* May be changed by a SO_PROTOTYPE socket option. */ 1394 icmp->icmp_proto = IPPROTO_ICMP; 1395 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1396 icmp->icmp_ttl = (uint8_t)icmp_ipv4_ttl; 1397 } 1398 qprocson(q); 1399 1400 /* 1401 * Check if icmp is being I_PUSHed by a non-privileged user. 1402 * If so, we set icmp_restricted to indicate that only MIB 1403 * traffic may pass. 1404 */ 1405 if (secpolicy_net_icmpaccess(credp) != 0) { 1406 icmp->icmp_restricted = 1; 1407 } 1408 1409 /* 1410 * The transmit hiwat is only looked at on IP's queue. 1411 * Store in q_hiwat in order to return on SO_SNDBUF 1412 * getsockopts. 1413 */ 1414 WR(q)->q_hiwat = icmp_xmit_hiwat; 1415 WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; 1416 WR(q)->q_lowat = icmp_xmit_lowat; 1417 WR(q)->q_next->q_lowat = WR(q)->q_lowat; 1418 1419 if (icmp->icmp_family == AF_INET6) { 1420 /* Build initial header template for transmit */ 1421 err = icmp_build_hdrs(q, icmp); 1422 if (err != 0) 1423 goto open_error; 1424 } 1425 /* Set the Stream head write offset. */ 1426 (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + icmp_wroff_extra); 1427 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1428 1429 if (is_system_labeled()) { 1430 /* notify IP that we know about labeling */ 1431 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1432 if (mp == NULL) 1433 goto open_error; 1434 mp->b_datap->db_type = M_CTL; 1435 olp = (out_labeled_t *)mp->b_rptr; 1436 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1437 olp->out_qnext = WR(q)->q_next; 1438 putnext(WR(q), mp); 1439 1440 /* save off a copy for closing */ 1441 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1442 if (mp == NULL) 1443 goto open_error; 1444 mp->b_datap->db_type = M_CTL; 1445 olp = (out_labeled_t *)mp->b_rptr; 1446 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1447 olp->out_qnext = NULL; 1448 icmp->icmp_delabel = mp; 1449 } 1450 1451 return (0); 1452 1453 open_error: 1454 qprocsoff(q); 1455 crfree(credp); 1456 (void) mi_close_comm(&icmp_g_head, q); 1457 return (err); 1458 } 1459 1460 /* 1461 * Which ICMP options OK to set through T_UNITDATA_REQ... 1462 */ 1463 /* ARGSUSED */ 1464 static boolean_t 1465 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1466 { 1467 return (B_TRUE); 1468 } 1469 1470 /* 1471 * This routine gets default values of certain options whose default 1472 * values are maintained by protcol specific code 1473 */ 1474 /* ARGSUSED */ 1475 int 1476 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1477 { 1478 int *i1 = (int *)ptr; 1479 1480 switch (level) { 1481 case IPPROTO_IP: 1482 switch (name) { 1483 case IP_MULTICAST_TTL: 1484 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1485 return (sizeof (uchar_t)); 1486 case IP_MULTICAST_LOOP: 1487 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1488 return (sizeof (uchar_t)); 1489 } 1490 break; 1491 case IPPROTO_IPV6: 1492 switch (name) { 1493 case IPV6_MULTICAST_HOPS: 1494 *i1 = IP_DEFAULT_MULTICAST_TTL; 1495 return (sizeof (int)); 1496 case IPV6_MULTICAST_LOOP: 1497 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1498 return (sizeof (int)); 1499 case IPV6_UNICAST_HOPS: 1500 *i1 = icmp_ipv6_hoplimit; 1501 return (sizeof (int)); 1502 } 1503 break; 1504 case IPPROTO_ICMPV6: 1505 switch (name) { 1506 case ICMP6_FILTER: 1507 /* Make it look like "pass all" */ 1508 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1509 return (sizeof (icmp6_filter_t)); 1510 } 1511 break; 1512 } 1513 return (-1); 1514 } 1515 1516 /* 1517 * This routine retrieves the current status of socket options. 1518 * It returns the size of the option retrieved. 1519 */ 1520 int 1521 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1522 { 1523 icmp_t *icmp = (icmp_t *)q->q_ptr; 1524 int *i1 = (int *)ptr; 1525 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1526 1527 switch (level) { 1528 case SOL_SOCKET: 1529 switch (name) { 1530 case SO_DEBUG: 1531 *i1 = icmp->icmp_debug; 1532 break; 1533 case SO_TYPE: 1534 *i1 = SOCK_RAW; 1535 break; 1536 case SO_PROTOTYPE: 1537 *i1 = icmp->icmp_proto; 1538 break; 1539 case SO_REUSEADDR: 1540 *i1 = icmp->icmp_reuseaddr; 1541 break; 1542 1543 /* 1544 * The following three items are available here, 1545 * but are only meaningful to IP. 1546 */ 1547 case SO_DONTROUTE: 1548 *i1 = icmp->icmp_dontroute; 1549 break; 1550 case SO_USELOOPBACK: 1551 *i1 = icmp->icmp_useloopback; 1552 break; 1553 case SO_BROADCAST: 1554 *i1 = icmp->icmp_broadcast; 1555 break; 1556 1557 case SO_SNDBUF: 1558 ASSERT(q->q_hiwat <= INT_MAX); 1559 *i1 = (int)q->q_hiwat; 1560 break; 1561 case SO_RCVBUF: 1562 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1563 *i1 = (int)RD(q)->q_hiwat; 1564 break; 1565 case SO_DGRAM_ERRIND: 1566 *i1 = icmp->icmp_dgram_errind; 1567 break; 1568 case SO_TIMESTAMP: 1569 *i1 = icmp->icmp_timestamp; 1570 break; 1571 case SO_MAC_EXEMPT: 1572 *i1 = icmp->icmp_mac_exempt; 1573 break; 1574 /* 1575 * Following four not meaningful for icmp 1576 * Action is same as "default" to which we fallthrough 1577 * so we keep them in comments. 1578 * case SO_LINGER: 1579 * case SO_KEEPALIVE: 1580 * case SO_OOBINLINE: 1581 * case SO_ALLZONES: 1582 */ 1583 default: 1584 return (-1); 1585 } 1586 break; 1587 case IPPROTO_IP: 1588 /* 1589 * Only allow IPv4 option processing on IPv4 sockets. 1590 */ 1591 if (icmp->icmp_family != AF_INET) 1592 return (-1); 1593 1594 switch (name) { 1595 case IP_OPTIONS: 1596 case T_IP_OPTIONS: 1597 /* Options are passed up with each packet */ 1598 return (0); 1599 case IP_HDRINCL: 1600 *i1 = (int)icmp->icmp_hdrincl; 1601 break; 1602 case IP_TOS: 1603 case T_IP_TOS: 1604 *i1 = (int)icmp->icmp_type_of_service; 1605 break; 1606 case IP_TTL: 1607 *i1 = (int)icmp->icmp_ttl; 1608 break; 1609 case IP_MULTICAST_IF: 1610 /* 0 address if not set */ 1611 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1612 return (sizeof (ipaddr_t)); 1613 case IP_MULTICAST_TTL: 1614 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1615 return (sizeof (uchar_t)); 1616 case IP_MULTICAST_LOOP: 1617 *ptr = icmp->icmp_multicast_loop; 1618 return (sizeof (uint8_t)); 1619 case IP_BOUND_IF: 1620 /* Zero if not set */ 1621 *i1 = icmp->icmp_bound_if; 1622 break; /* goto sizeof (int) option return */ 1623 case IP_UNSPEC_SRC: 1624 *ptr = icmp->icmp_unspec_source; 1625 break; /* goto sizeof (int) option return */ 1626 case IP_XMIT_IF: 1627 *i1 = icmp->icmp_xmit_if; 1628 break; /* goto sizeof (int) option return */ 1629 case IP_RECVIF: 1630 *ptr = icmp->icmp_recvif; 1631 break; /* goto sizeof (int) option return */ 1632 case IP_RECVPKTINFO: 1633 /* 1634 * This also handles IP_PKTINFO. 1635 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1636 * Differentiation is based on the size of the argument 1637 * passed in. 1638 * This option is handled in IP which will return an 1639 * error for IP_PKTINFO as it's not supported as a 1640 * sticky option. 1641 */ 1642 return (-EINVAL); 1643 /* 1644 * Cannot "get" the value of following options 1645 * at this level. Action is same as "default" to 1646 * which we fallthrough so we keep them in comments. 1647 * 1648 * case IP_ADD_MEMBERSHIP: 1649 * case IP_DROP_MEMBERSHIP: 1650 * case IP_BLOCK_SOURCE: 1651 * case IP_UNBLOCK_SOURCE: 1652 * case IP_ADD_SOURCE_MEMBERSHIP: 1653 * case IP_DROP_SOURCE_MEMBERSHIP: 1654 * case MCAST_JOIN_GROUP: 1655 * case MCAST_LEAVE_GROUP: 1656 * case MCAST_BLOCK_SOURCE: 1657 * case MCAST_UNBLOCK_SOURCE: 1658 * case MCAST_JOIN_SOURCE_GROUP: 1659 * case MCAST_LEAVE_SOURCE_GROUP: 1660 * case MRT_INIT: 1661 * case MRT_DONE: 1662 * case MRT_ADD_VIF: 1663 * case MRT_DEL_VIF: 1664 * case MRT_ADD_MFC: 1665 * case MRT_DEL_MFC: 1666 * case MRT_VERSION: 1667 * case MRT_ASSERT: 1668 * case IP_SEC_OPT: 1669 * case IP_DONTFAILOVER_IF: 1670 * case IP_NEXTHOP: 1671 */ 1672 default: 1673 return (-1); 1674 } 1675 break; 1676 case IPPROTO_IPV6: 1677 /* 1678 * Only allow IPv6 option processing on native IPv6 sockets. 1679 */ 1680 if (icmp->icmp_family != AF_INET6) 1681 return (-1); 1682 switch (name) { 1683 case IPV6_UNICAST_HOPS: 1684 *i1 = (unsigned int)icmp->icmp_ttl; 1685 break; 1686 case IPV6_MULTICAST_IF: 1687 /* 0 index if not set */ 1688 *i1 = icmp->icmp_multicast_if_index; 1689 break; 1690 case IPV6_MULTICAST_HOPS: 1691 *i1 = icmp->icmp_multicast_ttl; 1692 break; 1693 case IPV6_MULTICAST_LOOP: 1694 *i1 = icmp->icmp_multicast_loop; 1695 break; 1696 case IPV6_BOUND_IF: 1697 /* Zero if not set */ 1698 *i1 = icmp->icmp_bound_if; 1699 break; 1700 case IPV6_UNSPEC_SRC: 1701 *i1 = icmp->icmp_unspec_source; 1702 break; 1703 case IPV6_CHECKSUM: 1704 /* 1705 * Return offset or -1 if no checksum offset. 1706 * Does not apply to IPPROTO_ICMPV6 1707 */ 1708 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1709 return (-1); 1710 1711 if (icmp->icmp_raw_checksum) { 1712 *i1 = icmp->icmp_checksum_off; 1713 } else { 1714 *i1 = -1; 1715 } 1716 break; 1717 case IPV6_JOIN_GROUP: 1718 case IPV6_LEAVE_GROUP: 1719 case MCAST_JOIN_GROUP: 1720 case MCAST_LEAVE_GROUP: 1721 case MCAST_BLOCK_SOURCE: 1722 case MCAST_UNBLOCK_SOURCE: 1723 case MCAST_JOIN_SOURCE_GROUP: 1724 case MCAST_LEAVE_SOURCE_GROUP: 1725 /* cannot "get" the value for these */ 1726 return (-1); 1727 case IPV6_RECVPKTINFO: 1728 *i1 = icmp->icmp_ip_recvpktinfo; 1729 break; 1730 case IPV6_RECVTCLASS: 1731 *i1 = icmp->icmp_ipv6_recvtclass; 1732 break; 1733 case IPV6_RECVPATHMTU: 1734 *i1 = icmp->icmp_ipv6_recvpathmtu; 1735 break; 1736 case IPV6_V6ONLY: 1737 *i1 = 1; 1738 break; 1739 case IPV6_RECVHOPLIMIT: 1740 *i1 = icmp->icmp_ipv6_recvhoplimit; 1741 break; 1742 case IPV6_RECVHOPOPTS: 1743 *i1 = icmp->icmp_ipv6_recvhopopts; 1744 break; 1745 case IPV6_RECVDSTOPTS: 1746 *i1 = icmp->icmp_ipv6_recvdstopts; 1747 break; 1748 case _OLD_IPV6_RECVDSTOPTS: 1749 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1750 break; 1751 case IPV6_RECVRTHDRDSTOPTS: 1752 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1753 break; 1754 case IPV6_RECVRTHDR: 1755 *i1 = icmp->icmp_ipv6_recvrthdr; 1756 break; 1757 case IPV6_PKTINFO: { 1758 /* XXX assumes that caller has room for max size! */ 1759 struct in6_pktinfo *pkti; 1760 1761 pkti = (struct in6_pktinfo *)ptr; 1762 if (ipp->ipp_fields & IPPF_IFINDEX) 1763 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1764 else 1765 pkti->ipi6_ifindex = 0; 1766 if (ipp->ipp_fields & IPPF_ADDR) 1767 pkti->ipi6_addr = ipp->ipp_addr; 1768 else 1769 pkti->ipi6_addr = ipv6_all_zeros; 1770 return (sizeof (struct in6_pktinfo)); 1771 } 1772 case IPV6_NEXTHOP: { 1773 sin6_t *sin6 = (sin6_t *)ptr; 1774 1775 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1776 return (0); 1777 *sin6 = sin6_null; 1778 sin6->sin6_family = AF_INET6; 1779 sin6->sin6_addr = ipp->ipp_nexthop; 1780 return (sizeof (sin6_t)); 1781 } 1782 case IPV6_HOPOPTS: 1783 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1784 return (0); 1785 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1786 return (0); 1787 bcopy((char *)ipp->ipp_hopopts + 1788 icmp->icmp_label_len_v6, ptr, 1789 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1790 if (icmp->icmp_label_len_v6 > 0) { 1791 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1792 ptr[1] = (ipp->ipp_hopoptslen - 1793 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1794 } 1795 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1796 case IPV6_RTHDRDSTOPTS: 1797 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1798 return (0); 1799 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1800 return (ipp->ipp_rtdstoptslen); 1801 case IPV6_RTHDR: 1802 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1803 return (0); 1804 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1805 return (ipp->ipp_rthdrlen); 1806 case IPV6_DSTOPTS: 1807 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1808 return (0); 1809 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1810 return (ipp->ipp_dstoptslen); 1811 case IPV6_PATHMTU: 1812 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1813 return (0); 1814 1815 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1816 (struct ip6_mtuinfo *)ptr)); 1817 case IPV6_TCLASS: 1818 if (ipp->ipp_fields & IPPF_TCLASS) 1819 *i1 = ipp->ipp_tclass; 1820 else 1821 *i1 = IPV6_FLOW_TCLASS( 1822 IPV6_DEFAULT_VERS_AND_FLOW); 1823 break; 1824 default: 1825 return (-1); 1826 } 1827 break; 1828 case IPPROTO_ICMPV6: 1829 /* 1830 * Only allow IPv6 option processing on native IPv6 sockets. 1831 */ 1832 if (icmp->icmp_family != AF_INET6) 1833 return (-1); 1834 1835 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1836 return (-1); 1837 1838 switch (name) { 1839 case ICMP6_FILTER: 1840 if (icmp->icmp_filter == NULL) { 1841 /* Make it look like "pass all" */ 1842 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1843 } else { 1844 (void) bcopy(icmp->icmp_filter, ptr, 1845 sizeof (icmp6_filter_t)); 1846 } 1847 return (sizeof (icmp6_filter_t)); 1848 default: 1849 return (-1); 1850 } 1851 default: 1852 return (-1); 1853 } 1854 return (sizeof (int)); 1855 } 1856 1857 /* This routine sets socket options. */ 1858 /* ARGSUSED */ 1859 int 1860 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1861 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1862 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 1863 { 1864 icmp_t *icmp = (icmp_t *)q->q_ptr; 1865 int *i1 = (int *)invalp; 1866 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1867 boolean_t checkonly; 1868 int error; 1869 1870 switch (optset_context) { 1871 case SETFN_OPTCOM_CHECKONLY: 1872 checkonly = B_TRUE; 1873 /* 1874 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1875 * inlen != 0 implies value supplied and 1876 * we have to "pretend" to set it. 1877 * inlen == 0 implies that there is no 1878 * value part in T_CHECK request and just validation 1879 * done elsewhere should be enough, we just return here. 1880 */ 1881 if (inlen == 0) { 1882 *outlenp = 0; 1883 return (0); 1884 } 1885 break; 1886 case SETFN_OPTCOM_NEGOTIATE: 1887 checkonly = B_FALSE; 1888 break; 1889 case SETFN_UD_NEGOTIATE: 1890 case SETFN_CONN_NEGOTIATE: 1891 checkonly = B_FALSE; 1892 /* 1893 * Negotiating local and "association-related" options 1894 * through T_UNITDATA_REQ. 1895 * 1896 * Following routine can filter out ones we do not 1897 * want to be "set" this way. 1898 */ 1899 if (!icmp_opt_allow_udr_set(level, name)) { 1900 *outlenp = 0; 1901 return (EINVAL); 1902 } 1903 break; 1904 default: 1905 /* 1906 * We should never get here 1907 */ 1908 *outlenp = 0; 1909 return (EINVAL); 1910 } 1911 1912 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1913 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1914 1915 /* 1916 * For fixed length options, no sanity check 1917 * of passed in length is done. It is assumed *_optcom_req() 1918 * routines do the right thing. 1919 */ 1920 1921 switch (level) { 1922 case SOL_SOCKET: 1923 switch (name) { 1924 case SO_DEBUG: 1925 if (!checkonly) 1926 icmp->icmp_debug = onoff; 1927 break; 1928 case SO_PROTOTYPE: 1929 if ((*i1 & 0xFF) != IPPROTO_ICMP && 1930 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1931 secpolicy_net_rawaccess(cr) != 0) { 1932 *outlenp = 0; 1933 return (EACCES); 1934 } 1935 /* Can't use IPPROTO_RAW with IPv6 */ 1936 if ((*i1 & 0xFF) == IPPROTO_RAW && 1937 icmp->icmp_family == AF_INET6) { 1938 *outlenp = 0; 1939 return (EPROTONOSUPPORT); 1940 } 1941 if (checkonly) { 1942 /* T_CHECK case */ 1943 *(int *)outvalp = (*i1 & 0xFF); 1944 break; 1945 } 1946 icmp->icmp_proto = *i1 & 0xFF; 1947 if ((icmp->icmp_proto == IPPROTO_RAW || 1948 icmp->icmp_proto == IPPROTO_IGMP) && 1949 icmp->icmp_family == AF_INET) 1950 icmp->icmp_hdrincl = 1; 1951 else 1952 icmp->icmp_hdrincl = 0; 1953 1954 if (icmp->icmp_family == AF_INET6 && 1955 icmp->icmp_proto == IPPROTO_ICMPV6) { 1956 /* Set offset for icmp6_cksum */ 1957 icmp->icmp_raw_checksum = 0; 1958 icmp->icmp_checksum_off = 2; 1959 } 1960 if (icmp->icmp_proto == IPPROTO_UDP || 1961 icmp->icmp_proto == IPPROTO_TCP || 1962 icmp->icmp_proto == IPPROTO_SCTP) { 1963 icmp->icmp_no_tp_cksum = 1; 1964 icmp->icmp_sticky_ipp.ipp_fields |= 1965 IPPF_NO_CKSUM; 1966 } else { 1967 icmp->icmp_no_tp_cksum = 0; 1968 icmp->icmp_sticky_ipp.ipp_fields &= 1969 ~IPPF_NO_CKSUM; 1970 } 1971 1972 if (icmp->icmp_filter != NULL && 1973 icmp->icmp_proto != IPPROTO_ICMPV6) { 1974 kmem_free(icmp->icmp_filter, 1975 sizeof (icmp6_filter_t)); 1976 icmp->icmp_filter = NULL; 1977 } 1978 1979 /* Rebuild the header template */ 1980 error = icmp_build_hdrs(q, icmp); 1981 if (error != 0) { 1982 *outlenp = 0; 1983 return (error); 1984 } 1985 1986 /* 1987 * For SCTP, we don't use icmp_bind_proto() for 1988 * raw socket binding. Note that we do not need 1989 * to set *outlenp. 1990 */ 1991 if (icmp->icmp_proto == IPPROTO_SCTP) 1992 return (0); 1993 1994 icmp_bind_proto(q); 1995 *outlenp = sizeof (int); 1996 *(int *)outvalp = *i1 & 0xFF; 1997 return (0); 1998 case SO_REUSEADDR: 1999 if (!checkonly) 2000 icmp->icmp_reuseaddr = onoff; 2001 break; 2002 2003 /* 2004 * The following three items are available here, 2005 * but are only meaningful to IP. 2006 */ 2007 case SO_DONTROUTE: 2008 if (!checkonly) 2009 icmp->icmp_dontroute = onoff; 2010 break; 2011 case SO_USELOOPBACK: 2012 if (!checkonly) 2013 icmp->icmp_useloopback = onoff; 2014 break; 2015 case SO_BROADCAST: 2016 if (!checkonly) 2017 icmp->icmp_broadcast = onoff; 2018 break; 2019 2020 case SO_SNDBUF: 2021 if (*i1 > icmp_max_buf) { 2022 *outlenp = 0; 2023 return (ENOBUFS); 2024 } 2025 if (!checkonly) { 2026 q->q_hiwat = *i1; 2027 q->q_next->q_hiwat = *i1; 2028 } 2029 break; 2030 case SO_RCVBUF: 2031 if (*i1 > icmp_max_buf) { 2032 *outlenp = 0; 2033 return (ENOBUFS); 2034 } 2035 if (!checkonly) { 2036 RD(q)->q_hiwat = *i1; 2037 (void) mi_set_sth_hiwat(RD(q), *i1); 2038 } 2039 break; 2040 case SO_DGRAM_ERRIND: 2041 if (!checkonly) 2042 icmp->icmp_dgram_errind = onoff; 2043 break; 2044 case SO_ALLZONES: 2045 /* 2046 * "soft" error (negative) 2047 * option not handled at this level 2048 * Note: Do not modify *outlenp 2049 */ 2050 return (-EINVAL); 2051 case SO_TIMESTAMP: 2052 if (!checkonly) { 2053 icmp->icmp_timestamp = onoff; 2054 } 2055 break; 2056 case SO_MAC_EXEMPT: 2057 if (secpolicy_net_mac_aware(cr) != 0 || 2058 icmp->icmp_state != TS_UNBND) 2059 return (EACCES); 2060 if (!checkonly) 2061 icmp->icmp_mac_exempt = onoff; 2062 break; 2063 /* 2064 * Following three not meaningful for icmp 2065 * Action is same as "default" so we keep them 2066 * in comments. 2067 * case SO_LINGER: 2068 * case SO_KEEPALIVE: 2069 * case SO_OOBINLINE: 2070 */ 2071 default: 2072 *outlenp = 0; 2073 return (EINVAL); 2074 } 2075 break; 2076 case IPPROTO_IP: 2077 /* 2078 * Only allow IPv4 option processing on IPv4 sockets. 2079 */ 2080 if (icmp->icmp_family != AF_INET) { 2081 *outlenp = 0; 2082 return (ENOPROTOOPT); 2083 } 2084 switch (name) { 2085 case IP_OPTIONS: 2086 case T_IP_OPTIONS: 2087 /* Save options for use by IP. */ 2088 if ((inlen & 0x3) || 2089 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2090 *outlenp = 0; 2091 return (EINVAL); 2092 } 2093 if (checkonly) 2094 break; 2095 2096 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2097 &icmp->icmp_ip_snd_options_len, 2098 icmp->icmp_label_len, invalp, inlen)) { 2099 *outlenp = 0; 2100 return (ENOMEM); 2101 } 2102 2103 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2104 icmp->icmp_ip_snd_options_len; 2105 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2106 icmp_wroff_extra); 2107 break; 2108 case IP_HDRINCL: 2109 if (!checkonly) 2110 icmp->icmp_hdrincl = onoff; 2111 break; 2112 case IP_TOS: 2113 case T_IP_TOS: 2114 if (!checkonly) { 2115 icmp->icmp_type_of_service = (uint8_t)*i1; 2116 } 2117 break; 2118 case IP_TTL: 2119 if (!checkonly) { 2120 icmp->icmp_ttl = (uint8_t)*i1; 2121 } 2122 break; 2123 case IP_MULTICAST_IF: 2124 /* 2125 * TODO should check OPTMGMT reply and undo this if 2126 * there is an error. 2127 */ 2128 if (!checkonly) 2129 icmp->icmp_multicast_if_addr = *i1; 2130 break; 2131 case IP_MULTICAST_TTL: 2132 if (!checkonly) 2133 icmp->icmp_multicast_ttl = *invalp; 2134 break; 2135 case IP_MULTICAST_LOOP: 2136 if (!checkonly) { 2137 icmp->icmp_multicast_loop = 2138 (*invalp == 0) ? 0 : 1; 2139 } 2140 break; 2141 case IP_BOUND_IF: 2142 if (!checkonly) 2143 icmp->icmp_bound_if = *i1; 2144 break; 2145 case IP_UNSPEC_SRC: 2146 if (!checkonly) 2147 icmp->icmp_unspec_source = onoff; 2148 break; 2149 case IP_XMIT_IF: 2150 if (!checkonly) 2151 icmp->icmp_xmit_if = *i1; 2152 break; 2153 case IP_RECVIF: 2154 if (!checkonly) 2155 icmp->icmp_recvif = onoff; 2156 break; 2157 2158 case IP_PKTINFO: { 2159 /* 2160 * This also handles IP_RECVPKTINFO. 2161 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2162 * Differentiation is based on the size of the argument 2163 * passed in. 2164 */ 2165 struct in_pktinfo *pktinfop; 2166 ip4_pkt_t *attr_pktinfop; 2167 2168 if (checkonly) 2169 break; 2170 2171 if (inlen == sizeof (int)) { 2172 /* 2173 * This is IP_RECVPKTINFO option. 2174 * Keep a local copy of wether this option is 2175 * set or not and pass it down to IP for 2176 * processing. 2177 */ 2178 icmp->icmp_ip_recvpktinfo = onoff; 2179 return (-EINVAL); 2180 } 2181 2182 2183 if (inlen != sizeof (struct in_pktinfo)) 2184 return (EINVAL); 2185 2186 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2187 == NULL) { 2188 /* 2189 * sticky option is not supported 2190 */ 2191 return (EINVAL); 2192 } 2193 2194 pktinfop = (struct in_pktinfo *)invalp; 2195 2196 /* 2197 * Atleast one of the values should be specified 2198 */ 2199 if (pktinfop->ipi_ifindex == 0 && 2200 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2201 return (EINVAL); 2202 } 2203 2204 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2205 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2206 } 2207 break; 2208 case IP_ADD_MEMBERSHIP: 2209 case IP_DROP_MEMBERSHIP: 2210 case IP_BLOCK_SOURCE: 2211 case IP_UNBLOCK_SOURCE: 2212 case IP_ADD_SOURCE_MEMBERSHIP: 2213 case IP_DROP_SOURCE_MEMBERSHIP: 2214 case MCAST_JOIN_GROUP: 2215 case MCAST_LEAVE_GROUP: 2216 case MCAST_BLOCK_SOURCE: 2217 case MCAST_UNBLOCK_SOURCE: 2218 case MCAST_JOIN_SOURCE_GROUP: 2219 case MCAST_LEAVE_SOURCE_GROUP: 2220 case MRT_INIT: 2221 case MRT_DONE: 2222 case MRT_ADD_VIF: 2223 case MRT_DEL_VIF: 2224 case MRT_ADD_MFC: 2225 case MRT_DEL_MFC: 2226 case MRT_VERSION: 2227 case MRT_ASSERT: 2228 case IP_SEC_OPT: 2229 case IP_DONTFAILOVER_IF: 2230 case IP_NEXTHOP: 2231 /* 2232 * "soft" error (negative) 2233 * option not handled at this level 2234 * Note: Do not modify *outlenp 2235 */ 2236 return (-EINVAL); 2237 default: 2238 *outlenp = 0; 2239 return (EINVAL); 2240 } 2241 break; 2242 case IPPROTO_IPV6: { 2243 ip6_pkt_t *ipp; 2244 boolean_t sticky; 2245 2246 if (icmp->icmp_family != AF_INET6) { 2247 *outlenp = 0; 2248 return (ENOPROTOOPT); 2249 } 2250 /* 2251 * Deal with both sticky options and ancillary data 2252 */ 2253 if (thisdg_attrs == NULL) { 2254 /* sticky options, or none */ 2255 ipp = &icmp->icmp_sticky_ipp; 2256 sticky = B_TRUE; 2257 } else { 2258 /* ancillary data */ 2259 ipp = (ip6_pkt_t *)thisdg_attrs; 2260 sticky = B_FALSE; 2261 } 2262 2263 switch (name) { 2264 case IPV6_MULTICAST_IF: 2265 if (!checkonly) 2266 icmp->icmp_multicast_if_index = *i1; 2267 break; 2268 case IPV6_UNICAST_HOPS: 2269 /* -1 means use default */ 2270 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2271 *outlenp = 0; 2272 return (EINVAL); 2273 } 2274 if (!checkonly) { 2275 if (*i1 == -1) { 2276 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2277 icmp_ipv6_hoplimit; 2278 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2279 /* Pass modified value to IP. */ 2280 *i1 = ipp->ipp_hoplimit; 2281 } else { 2282 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2283 (uint8_t)*i1; 2284 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2285 } 2286 /* Rebuild the header template */ 2287 error = icmp_build_hdrs(q, icmp); 2288 if (error != 0) { 2289 *outlenp = 0; 2290 return (error); 2291 } 2292 } 2293 break; 2294 case IPV6_MULTICAST_HOPS: 2295 /* -1 means use default */ 2296 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2297 *outlenp = 0; 2298 return (EINVAL); 2299 } 2300 if (!checkonly) { 2301 if (*i1 == -1) { 2302 icmp->icmp_multicast_ttl = 2303 ipp->ipp_multicast_hops = 2304 IP_DEFAULT_MULTICAST_TTL; 2305 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2306 /* Pass modified value to IP. */ 2307 *i1 = icmp->icmp_multicast_ttl; 2308 } else { 2309 icmp->icmp_multicast_ttl = 2310 ipp->ipp_multicast_hops = 2311 (uint8_t)*i1; 2312 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2313 } 2314 } 2315 break; 2316 case IPV6_MULTICAST_LOOP: 2317 if (*i1 != 0 && *i1 != 1) { 2318 *outlenp = 0; 2319 return (EINVAL); 2320 } 2321 if (!checkonly) 2322 icmp->icmp_multicast_loop = *i1; 2323 break; 2324 case IPV6_CHECKSUM: 2325 /* 2326 * Integer offset into the user data of where the 2327 * checksum is located. 2328 * Offset of -1 disables option. 2329 * Does not apply to IPPROTO_ICMPV6. 2330 */ 2331 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2332 *outlenp = 0; 2333 return (EINVAL); 2334 } 2335 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2336 /* Negative or not 16 bit aligned offset */ 2337 *outlenp = 0; 2338 return (EINVAL); 2339 } 2340 if (checkonly) 2341 break; 2342 2343 if (*i1 == -1) { 2344 icmp->icmp_raw_checksum = 0; 2345 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2346 } else { 2347 icmp->icmp_raw_checksum = 1; 2348 icmp->icmp_checksum_off = *i1; 2349 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2350 } 2351 /* Rebuild the header template */ 2352 error = icmp_build_hdrs(q, icmp); 2353 if (error != 0) { 2354 *outlenp = 0; 2355 return (error); 2356 } 2357 break; 2358 case IPV6_JOIN_GROUP: 2359 case IPV6_LEAVE_GROUP: 2360 case MCAST_JOIN_GROUP: 2361 case MCAST_LEAVE_GROUP: 2362 case MCAST_BLOCK_SOURCE: 2363 case MCAST_UNBLOCK_SOURCE: 2364 case MCAST_JOIN_SOURCE_GROUP: 2365 case MCAST_LEAVE_SOURCE_GROUP: 2366 /* 2367 * "soft" error (negative) 2368 * option not handled at this level 2369 * Note: Do not modify *outlenp 2370 */ 2371 return (-EINVAL); 2372 case IPV6_BOUND_IF: 2373 if (!checkonly) 2374 icmp->icmp_bound_if = *i1; 2375 break; 2376 case IPV6_UNSPEC_SRC: 2377 if (!checkonly) 2378 icmp->icmp_unspec_source = onoff; 2379 break; 2380 case IPV6_RECVTCLASS: 2381 if (!checkonly) 2382 icmp->icmp_ipv6_recvtclass = onoff; 2383 break; 2384 /* 2385 * Set boolean switches for ancillary data delivery 2386 */ 2387 case IPV6_RECVPKTINFO: 2388 if (!checkonly) 2389 icmp->icmp_ip_recvpktinfo = onoff; 2390 break; 2391 case IPV6_RECVPATHMTU: 2392 if (!checkonly) 2393 icmp->icmp_ipv6_recvpathmtu = onoff; 2394 break; 2395 case IPV6_RECVHOPLIMIT: 2396 if (!checkonly) 2397 icmp->icmp_ipv6_recvhoplimit = onoff; 2398 break; 2399 case IPV6_RECVHOPOPTS: 2400 if (!checkonly) 2401 icmp->icmp_ipv6_recvhopopts = onoff; 2402 break; 2403 case IPV6_RECVDSTOPTS: 2404 if (!checkonly) 2405 icmp->icmp_ipv6_recvdstopts = onoff; 2406 break; 2407 case _OLD_IPV6_RECVDSTOPTS: 2408 if (!checkonly) 2409 icmp->icmp_old_ipv6_recvdstopts = onoff; 2410 break; 2411 case IPV6_RECVRTHDRDSTOPTS: 2412 if (!checkonly) 2413 icmp->icmp_ipv6_recvrtdstopts = onoff; 2414 break; 2415 case IPV6_RECVRTHDR: 2416 if (!checkonly) 2417 icmp->icmp_ipv6_recvrthdr = onoff; 2418 break; 2419 /* 2420 * Set sticky options or ancillary data. 2421 * If sticky options, (re)build any extension headers 2422 * that might be needed as a result. 2423 */ 2424 case IPV6_PKTINFO: 2425 /* 2426 * The source address and ifindex are verified 2427 * in ip_opt_set(). For ancillary data the 2428 * source address is checked in ip_wput_v6. 2429 */ 2430 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2431 return (EINVAL); 2432 if (checkonly) 2433 break; 2434 2435 if (inlen == 0) { 2436 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2437 ipp->ipp_sticky_ignored |= 2438 (IPPF_IFINDEX|IPPF_ADDR); 2439 } else { 2440 struct in6_pktinfo *pkti; 2441 2442 pkti = (struct in6_pktinfo *)invalp; 2443 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2444 ipp->ipp_addr = pkti->ipi6_addr; 2445 if (ipp->ipp_ifindex != 0) 2446 ipp->ipp_fields |= IPPF_IFINDEX; 2447 else 2448 ipp->ipp_fields &= ~IPPF_IFINDEX; 2449 if (!IN6_IS_ADDR_UNSPECIFIED( 2450 &ipp->ipp_addr)) 2451 ipp->ipp_fields |= IPPF_ADDR; 2452 else 2453 ipp->ipp_fields &= ~IPPF_ADDR; 2454 } 2455 if (sticky) { 2456 error = icmp_build_hdrs(q, icmp); 2457 if (error != 0) 2458 return (error); 2459 } 2460 break; 2461 case IPV6_HOPLIMIT: 2462 /* This option can only be used as ancillary data. */ 2463 if (sticky) 2464 return (EINVAL); 2465 if (inlen != 0 && inlen != sizeof (int)) 2466 return (EINVAL); 2467 if (checkonly) 2468 break; 2469 2470 if (inlen == 0) { 2471 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2472 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2473 } else { 2474 if (*i1 > 255 || *i1 < -1) 2475 return (EINVAL); 2476 if (*i1 == -1) 2477 ipp->ipp_hoplimit = icmp_ipv6_hoplimit; 2478 else 2479 ipp->ipp_hoplimit = *i1; 2480 ipp->ipp_fields |= IPPF_HOPLIMIT; 2481 } 2482 break; 2483 case IPV6_TCLASS: 2484 /* 2485 * IPV6_RECVTCLASS accepts -1 as use kernel default 2486 * and [0, 255] as the actualy traffic class. 2487 */ 2488 if (inlen != 0 && inlen != sizeof (int)) 2489 return (EINVAL); 2490 if (checkonly) 2491 break; 2492 2493 if (inlen == 0) { 2494 ipp->ipp_fields &= ~IPPF_TCLASS; 2495 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2496 } else { 2497 if (*i1 >= 256 || *i1 < -1) 2498 return (EINVAL); 2499 if (*i1 == -1) { 2500 ipp->ipp_tclass = 2501 IPV6_FLOW_TCLASS( 2502 IPV6_DEFAULT_VERS_AND_FLOW); 2503 } else { 2504 ipp->ipp_tclass = *i1; 2505 } 2506 ipp->ipp_fields |= IPPF_TCLASS; 2507 } 2508 if (sticky) { 2509 error = icmp_build_hdrs(q, icmp); 2510 if (error != 0) 2511 return (error); 2512 } 2513 break; 2514 case IPV6_NEXTHOP: 2515 /* 2516 * IP will verify that the nexthop is reachable 2517 * and fail for sticky options. 2518 */ 2519 if (inlen != 0 && inlen != sizeof (sin6_t)) 2520 return (EINVAL); 2521 if (checkonly) 2522 break; 2523 2524 if (inlen == 0) { 2525 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2526 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2527 } else { 2528 sin6_t *sin6 = (sin6_t *)invalp; 2529 2530 if (sin6->sin6_family != AF_INET6) 2531 return (EAFNOSUPPORT); 2532 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2533 return (EADDRNOTAVAIL); 2534 ipp->ipp_nexthop = sin6->sin6_addr; 2535 if (!IN6_IS_ADDR_UNSPECIFIED( 2536 &ipp->ipp_nexthop)) 2537 ipp->ipp_fields |= IPPF_NEXTHOP; 2538 else 2539 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2540 } 2541 if (sticky) { 2542 error = icmp_build_hdrs(q, icmp); 2543 if (error != 0) 2544 return (error); 2545 } 2546 break; 2547 case IPV6_HOPOPTS: { 2548 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2549 /* 2550 * Sanity checks - minimum size, size a multiple of 2551 * eight bytes, and matching size passed in. 2552 */ 2553 if (inlen != 0 && 2554 inlen != (8 * (hopts->ip6h_len + 1))) 2555 return (EINVAL); 2556 2557 if (checkonly) 2558 break; 2559 error = optcom_pkt_set(invalp, inlen, sticky, 2560 (uchar_t **)&ipp->ipp_hopopts, 2561 &ipp->ipp_hopoptslen, 2562 sticky ? icmp->icmp_label_len_v6 : 0); 2563 if (error != 0) 2564 return (error); 2565 if (ipp->ipp_hopoptslen == 0) { 2566 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2567 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2568 } else { 2569 ipp->ipp_fields |= IPPF_HOPOPTS; 2570 } 2571 if (sticky) { 2572 error = icmp_build_hdrs(q, icmp); 2573 if (error != 0) 2574 return (error); 2575 } 2576 break; 2577 } 2578 case IPV6_RTHDRDSTOPTS: { 2579 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2580 2581 /* 2582 * Sanity checks - minimum size, size a multiple of 2583 * eight bytes, and matching size passed in. 2584 */ 2585 if (inlen != 0 && 2586 inlen != (8 * (dopts->ip6d_len + 1))) 2587 return (EINVAL); 2588 2589 if (checkonly) 2590 break; 2591 2592 if (inlen == 0) { 2593 if (sticky && 2594 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2595 kmem_free(ipp->ipp_rtdstopts, 2596 ipp->ipp_rtdstoptslen); 2597 ipp->ipp_rtdstopts = NULL; 2598 ipp->ipp_rtdstoptslen = 0; 2599 } 2600 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2601 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2602 } else { 2603 error = optcom_pkt_set(invalp, inlen, sticky, 2604 (uchar_t **)&ipp->ipp_rtdstopts, 2605 &ipp->ipp_rtdstoptslen, 0); 2606 if (error != 0) 2607 return (error); 2608 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2609 } 2610 if (sticky) { 2611 error = icmp_build_hdrs(q, icmp); 2612 if (error != 0) 2613 return (error); 2614 } 2615 break; 2616 } 2617 case IPV6_DSTOPTS: { 2618 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2619 2620 /* 2621 * Sanity checks - minimum size, size a multiple of 2622 * eight bytes, and matching size passed in. 2623 */ 2624 if (inlen != 0 && 2625 inlen != (8 * (dopts->ip6d_len + 1))) 2626 return (EINVAL); 2627 2628 if (checkonly) 2629 break; 2630 2631 if (inlen == 0) { 2632 if (sticky && 2633 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2634 kmem_free(ipp->ipp_dstopts, 2635 ipp->ipp_dstoptslen); 2636 ipp->ipp_dstopts = NULL; 2637 ipp->ipp_dstoptslen = 0; 2638 } 2639 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2640 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2641 } else { 2642 error = optcom_pkt_set(invalp, inlen, sticky, 2643 (uchar_t **)&ipp->ipp_dstopts, 2644 &ipp->ipp_dstoptslen, 0); 2645 if (error != 0) 2646 return (error); 2647 ipp->ipp_fields |= IPPF_DSTOPTS; 2648 } 2649 if (sticky) { 2650 error = icmp_build_hdrs(q, icmp); 2651 if (error != 0) 2652 return (error); 2653 } 2654 break; 2655 } 2656 case IPV6_RTHDR: { 2657 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2658 2659 /* 2660 * Sanity checks - minimum size, size a multiple of 2661 * eight bytes, and matching size passed in. 2662 */ 2663 if (inlen != 0 && 2664 inlen != (8 * (rt->ip6r_len + 1))) 2665 return (EINVAL); 2666 2667 if (checkonly) 2668 break; 2669 2670 if (inlen == 0) { 2671 if (sticky && 2672 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2673 kmem_free(ipp->ipp_rthdr, 2674 ipp->ipp_rthdrlen); 2675 ipp->ipp_rthdr = NULL; 2676 ipp->ipp_rthdrlen = 0; 2677 } 2678 ipp->ipp_fields &= ~IPPF_RTHDR; 2679 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2680 } else { 2681 error = optcom_pkt_set(invalp, inlen, sticky, 2682 (uchar_t **)&ipp->ipp_rthdr, 2683 &ipp->ipp_rthdrlen, 0); 2684 if (error != 0) 2685 return (error); 2686 ipp->ipp_fields |= IPPF_RTHDR; 2687 } 2688 if (sticky) { 2689 error = icmp_build_hdrs(q, icmp); 2690 if (error != 0) 2691 return (error); 2692 } 2693 break; 2694 } 2695 2696 case IPV6_DONTFRAG: 2697 if (checkonly) 2698 break; 2699 2700 if (onoff) { 2701 ipp->ipp_fields |= IPPF_DONTFRAG; 2702 } else { 2703 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2704 } 2705 break; 2706 2707 case IPV6_USE_MIN_MTU: 2708 if (inlen != sizeof (int)) 2709 return (EINVAL); 2710 2711 if (*i1 < -1 || *i1 > 1) 2712 return (EINVAL); 2713 2714 if (checkonly) 2715 break; 2716 2717 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2718 ipp->ipp_use_min_mtu = *i1; 2719 break; 2720 2721 /* 2722 * This option can't be set. Its only returned via 2723 * getsockopt() or ancillary data. 2724 */ 2725 case IPV6_PATHMTU: 2726 return (EINVAL); 2727 2728 case IPV6_BOUND_PIF: 2729 case IPV6_SEC_OPT: 2730 case IPV6_DONTFAILOVER_IF: 2731 case IPV6_SRC_PREFERENCES: 2732 case IPV6_V6ONLY: 2733 /* Handled at IP level */ 2734 return (-EINVAL); 2735 default: 2736 *outlenp = 0; 2737 return (EINVAL); 2738 } 2739 break; 2740 } /* end IPPROTO_IPV6 */ 2741 2742 case IPPROTO_ICMPV6: 2743 /* 2744 * Only allow IPv6 option processing on IPv6 sockets. 2745 */ 2746 if (icmp->icmp_family != AF_INET6) { 2747 *outlenp = 0; 2748 return (ENOPROTOOPT); 2749 } 2750 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2751 *outlenp = 0; 2752 return (ENOPROTOOPT); 2753 } 2754 switch (name) { 2755 case ICMP6_FILTER: 2756 if (!checkonly) { 2757 if ((inlen != 0) && 2758 (inlen != sizeof (icmp6_filter_t))) 2759 return (EINVAL); 2760 2761 if (inlen == 0) { 2762 if (icmp->icmp_filter != NULL) { 2763 kmem_free(icmp->icmp_filter, 2764 sizeof (icmp6_filter_t)); 2765 icmp->icmp_filter = NULL; 2766 } 2767 } else { 2768 if (icmp->icmp_filter == NULL) { 2769 icmp->icmp_filter = kmem_alloc( 2770 sizeof (icmp6_filter_t), 2771 KM_NOSLEEP); 2772 if (icmp->icmp_filter == NULL) { 2773 *outlenp = 0; 2774 return (ENOBUFS); 2775 } 2776 } 2777 (void) bcopy(invalp, icmp->icmp_filter, 2778 inlen); 2779 } 2780 } 2781 break; 2782 2783 default: 2784 *outlenp = 0; 2785 return (EINVAL); 2786 } 2787 break; 2788 default: 2789 *outlenp = 0; 2790 return (EINVAL); 2791 } 2792 /* 2793 * Common case of OK return with outval same as inval. 2794 */ 2795 if (invalp != outvalp) { 2796 /* don't trust bcopy for identical src/dst */ 2797 (void) bcopy(invalp, outvalp, inlen); 2798 } 2799 *outlenp = inlen; 2800 return (0); 2801 } 2802 2803 /* 2804 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2805 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2806 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2807 * headers. 2808 * Returns failure if can't allocate memory. 2809 */ 2810 static int 2811 icmp_build_hdrs(queue_t *q, icmp_t *icmp) 2812 { 2813 uchar_t *hdrs; 2814 uint_t hdrs_len; 2815 ip6_t *ip6h; 2816 ip6i_t *ip6i; 2817 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2818 2819 hdrs_len = ip_total_hdrs_len_v6(ipp); 2820 ASSERT(hdrs_len != 0); 2821 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 2822 /* Need to reallocate */ 2823 if (hdrs_len != 0) { 2824 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2825 if (hdrs == NULL) 2826 return (ENOMEM); 2827 } else { 2828 hdrs = NULL; 2829 } 2830 if (icmp->icmp_sticky_hdrs_len != 0) { 2831 kmem_free(icmp->icmp_sticky_hdrs, 2832 icmp->icmp_sticky_hdrs_len); 2833 } 2834 icmp->icmp_sticky_hdrs = hdrs; 2835 icmp->icmp_sticky_hdrs_len = hdrs_len; 2836 } 2837 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 2838 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 2839 2840 /* Set header fields not in ipp */ 2841 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 2842 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 2843 ip6h = (ip6_t *)&ip6i[1]; 2844 2845 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 2846 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 2847 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 2848 } 2849 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 2850 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 2851 } 2852 } else { 2853 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 2854 } 2855 2856 if (!(ipp->ipp_fields & IPPF_ADDR)) 2857 ip6h->ip6_src = icmp->icmp_v6src; 2858 2859 /* Try to get everything in a single mblk */ 2860 if (hdrs_len > icmp->icmp_max_hdr_len) { 2861 icmp->icmp_max_hdr_len = hdrs_len; 2862 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2863 icmp_wroff_extra); 2864 } 2865 return (0); 2866 } 2867 2868 /* 2869 * This routine retrieves the value of an ND variable in a icmpparam_t 2870 * structure. It is called through nd_getset when a user reads the 2871 * variable. 2872 */ 2873 /* ARGSUSED */ 2874 static int 2875 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2876 { 2877 icmpparam_t *icmppa = (icmpparam_t *)cp; 2878 2879 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2880 return (0); 2881 } 2882 2883 /* 2884 * Walk through the param array specified registering each element with the 2885 * named dispatch (ND) handler. 2886 */ 2887 static boolean_t 2888 icmp_param_register(icmpparam_t *icmppa, int cnt) 2889 { 2890 for (; cnt-- > 0; icmppa++) { 2891 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2892 if (!nd_load(&icmp_g_nd, icmppa->icmp_param_name, 2893 icmp_param_get, icmp_param_set, 2894 (caddr_t)icmppa)) { 2895 nd_free(&icmp_g_nd); 2896 return (B_FALSE); 2897 } 2898 } 2899 } 2900 if (!nd_load(&icmp_g_nd, "icmp_status", icmp_status_report, NULL, 2901 NULL)) { 2902 nd_free(&icmp_g_nd); 2903 return (B_FALSE); 2904 } 2905 return (B_TRUE); 2906 } 2907 2908 /* This routine sets an ND variable in a icmpparam_t structure. */ 2909 /* ARGSUSED */ 2910 static int 2911 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2912 { 2913 long new_value; 2914 icmpparam_t *icmppa = (icmpparam_t *)cp; 2915 2916 /* 2917 * Fail the request if the new value does not lie within the 2918 * required bounds. 2919 */ 2920 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2921 new_value < icmppa->icmp_param_min || 2922 new_value > icmppa->icmp_param_max) { 2923 return (EINVAL); 2924 } 2925 /* Set the new value */ 2926 icmppa->icmp_param_value = new_value; 2927 return (0); 2928 } 2929 2930 static void 2931 icmp_rput(queue_t *q, mblk_t *mp) 2932 { 2933 struct T_unitdata_ind *tudi; 2934 uchar_t *rptr; 2935 struct T_error_ack *tea; 2936 icmp_t *icmp; 2937 sin_t *sin; 2938 sin6_t *sin6; 2939 ip6_t *ip6h; 2940 ip6i_t *ip6i; 2941 mblk_t *mp1; 2942 int hdr_len; 2943 ipha_t *ipha; 2944 int udi_size; /* Size of T_unitdata_ind */ 2945 uint_t ipvers; 2946 ip6_pkt_t ipp; 2947 uint8_t nexthdr; 2948 boolean_t recvif = B_FALSE; 2949 ip_pktinfo_t *pinfo = NULL; 2950 mblk_t *options_mp = NULL; 2951 uint_t icmp_opt = 0; 2952 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 2953 uint_t hopstrip; 2954 2955 icmp = (icmp_t *)q->q_ptr; 2956 if (icmp->icmp_restricted) { 2957 putnext(q, mp); 2958 return; 2959 } 2960 2961 if (mp->b_datap->db_type == M_CTL) { 2962 /* 2963 * IP sends up the IPSEC_IN message for handling IPSEC 2964 * policy at the TCP level. We don't need it here. 2965 */ 2966 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 2967 mp1 = mp->b_cont; 2968 freeb(mp); 2969 mp = mp1; 2970 } else { 2971 pinfo = (ip_pktinfo_t *)mp->b_rptr; 2972 if ((icmp->icmp_recvif != 0 || 2973 icmp->icmp_ip_recvpktinfo) && 2974 (pinfo->ip_pkt_ulp_type == IN_PKTINFO)) { 2975 /* 2976 * IP has passed the options in mp and the 2977 * actual data is in b_cont. 2978 */ 2979 recvif = B_TRUE; 2980 /* 2981 * We are here bcos IP_RECVIF is set so we need 2982 * to extract the options mblk and adjust the 2983 * rptr 2984 */ 2985 options_mp = mp; 2986 mp = mp->b_cont; 2987 } 2988 } 2989 } 2990 2991 rptr = mp->b_rptr; 2992 switch (mp->b_datap->db_type) { 2993 case M_DATA: 2994 /* 2995 * M_DATA messages contain IP packets. They are handled 2996 * following the switch. 2997 */ 2998 break; 2999 case M_PROTO: 3000 case M_PCPROTO: 3001 /* M_PROTO messages contain some type of TPI message. */ 3002 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 3003 freemsg(mp); 3004 return; 3005 } 3006 tea = (struct T_error_ack *)rptr; 3007 switch (tea->PRIM_type) { 3008 case T_ERROR_ACK: 3009 switch (tea->ERROR_prim) { 3010 case O_T_BIND_REQ: 3011 case T_BIND_REQ: 3012 /* 3013 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3014 * clear out the source address before 3015 * passing the message upstream. 3016 * If this was caused by a T_CONN_REQ 3017 * revert back to bound state. 3018 */ 3019 if (icmp->icmp_state == TS_UNBND) { 3020 /* 3021 * TPI has not yet bound - bind sent by 3022 * icmp_bind_proto. 3023 */ 3024 freemsg(mp); 3025 return; 3026 } 3027 if (icmp->icmp_state == TS_DATA_XFER) { 3028 /* Connect failed */ 3029 tea->ERROR_prim = T_CONN_REQ; 3030 icmp->icmp_v6src = 3031 icmp->icmp_bound_v6src; 3032 icmp->icmp_state = TS_IDLE; 3033 if (icmp->icmp_family == AF_INET6) 3034 (void) icmp_build_hdrs(q, icmp); 3035 break; 3036 } 3037 3038 if (icmp->icmp_discon_pending) { 3039 tea->ERROR_prim = T_DISCON_REQ; 3040 icmp->icmp_discon_pending = 0; 3041 } 3042 V6_SET_ZERO(icmp->icmp_v6src); 3043 V6_SET_ZERO(icmp->icmp_bound_v6src); 3044 icmp->icmp_state = TS_UNBND; 3045 if (icmp->icmp_family == AF_INET6) 3046 (void) icmp_build_hdrs(q, icmp); 3047 break; 3048 default: 3049 break; 3050 } 3051 break; 3052 case T_BIND_ACK: 3053 icmp_rput_bind_ack(q, mp); 3054 return; 3055 3056 case T_OPTMGMT_ACK: 3057 case T_OK_ACK: 3058 if (tea->PRIM_type == T_OK_ACK) { 3059 struct T_ok_ack *toa; 3060 toa = (struct T_ok_ack *)rptr; 3061 if (toa->CORRECT_prim == T_UNBIND_REQ) { 3062 /* 3063 * If somebody sets IPSEC options, IP 3064 * sends some IPSEC info which is used 3065 * by the TCP for detached connections. 3066 * We don't need it here. 3067 */ 3068 if ((mp1 = mp->b_cont) != NULL) { 3069 freemsg(mp1); 3070 mp->b_cont = NULL; 3071 } 3072 } 3073 } 3074 break; 3075 default: 3076 freemsg(mp); 3077 return; 3078 } 3079 putnext(q, mp); 3080 return; 3081 case M_CTL: 3082 if (recvif) { 3083 /* 3084 * IP has passed the options in mp and the actual data 3085 * is in b_cont. Jump to normal data processing. 3086 */ 3087 break; 3088 } 3089 3090 /* Contains ICMP packet from IP */ 3091 icmp_icmp_error(q, mp); 3092 return; 3093 default: 3094 putnext(q, mp); 3095 return; 3096 } 3097 3098 /* 3099 * Discard message if it is misaligned or smaller than the IP header. 3100 */ 3101 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3102 freemsg(mp); 3103 if (options_mp != NULL) 3104 freeb(options_mp); 3105 BUMP_MIB(&rawip_mib, rawipInErrors); 3106 return; 3107 } 3108 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3109 3110 /* Handle M_DATA messages containing IP packets messages */ 3111 if (ipvers == IPV4_VERSION) { 3112 /* 3113 * Special case where IP attaches 3114 * the IRE needs to be handled so that we don't send up 3115 * IRE to the user land. 3116 */ 3117 ipha = (ipha_t *)rptr; 3118 hdr_len = IPH_HDR_LENGTH(ipha); 3119 3120 if (ipha->ipha_protocol == IPPROTO_TCP) { 3121 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3122 3123 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3124 TH_SYN) && mp->b_cont != NULL) { 3125 mp1 = mp->b_cont; 3126 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3127 freeb(mp1); 3128 mp->b_cont = NULL; 3129 } 3130 } 3131 } 3132 if (icmp_bsd_compat) { 3133 ushort_t len; 3134 len = ntohs(ipha->ipha_length); 3135 3136 if (mp->b_datap->db_ref > 1) { 3137 /* 3138 * Allocate a new IP header so that we can 3139 * modify ipha_length. 3140 */ 3141 mblk_t *mp1; 3142 3143 mp1 = allocb(hdr_len, BPRI_MED); 3144 if (!mp1) { 3145 freemsg(mp); 3146 if (options_mp != NULL) 3147 freeb(options_mp); 3148 BUMP_MIB(&rawip_mib, rawipInErrors); 3149 return; 3150 } 3151 bcopy(rptr, mp1->b_rptr, hdr_len); 3152 mp->b_rptr = rptr + hdr_len; 3153 rptr = mp1->b_rptr; 3154 ipha = (ipha_t *)rptr; 3155 mp1->b_cont = mp; 3156 mp1->b_wptr = rptr + hdr_len; 3157 mp = mp1; 3158 } 3159 len -= hdr_len; 3160 ipha->ipha_length = htons(len); 3161 } 3162 } 3163 3164 /* 3165 * This is the inbound data path. Packets are passed upstream as 3166 * T_UNITDATA_IND messages with full IP headers still attached. 3167 */ 3168 if (icmp->icmp_family == AF_INET) { 3169 ASSERT(ipvers == IPV4_VERSION); 3170 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3171 if (icmp->icmp_recvif && recvif && 3172 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3173 udi_size += sizeof (struct T_opthdr) + 3174 sizeof (uint_t); 3175 } 3176 3177 if (icmp->icmp_ip_recvpktinfo && recvif && 3178 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3179 udi_size += sizeof (struct T_opthdr) + 3180 sizeof (struct in_pktinfo); 3181 } 3182 3183 /* 3184 * If SO_TIMESTAMP is set allocate the appropriate sized 3185 * buffer. Since gethrestime() expects a pointer aligned 3186 * argument, we allocate space necessary for extra 3187 * alignment (even though it might not be used). 3188 */ 3189 if (icmp->icmp_timestamp) { 3190 udi_size += sizeof (struct T_opthdr) + 3191 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3192 } 3193 mp1 = allocb(udi_size, BPRI_MED); 3194 if (mp1 == NULL) { 3195 freemsg(mp); 3196 if (options_mp != NULL) 3197 freeb(options_mp); 3198 BUMP_MIB(&rawip_mib, rawipInErrors); 3199 return; 3200 } 3201 mp1->b_cont = mp; 3202 mp = mp1; 3203 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3204 mp->b_datap->db_type = M_PROTO; 3205 mp->b_wptr = (uchar_t *)tudi + udi_size; 3206 tudi->PRIM_type = T_UNITDATA_IND; 3207 tudi->SRC_length = sizeof (sin_t); 3208 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3209 sin = (sin_t *)&tudi[1]; 3210 *sin = sin_null; 3211 sin->sin_family = AF_INET; 3212 sin->sin_addr.s_addr = ipha->ipha_src; 3213 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3214 sizeof (sin_t); 3215 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3216 tudi->OPT_length = udi_size; 3217 3218 /* 3219 * Add options if IP_RECVIF is set 3220 */ 3221 if (udi_size != 0) { 3222 char *dstopt; 3223 3224 dstopt = (char *)&sin[1]; 3225 if (icmp->icmp_recvif && recvif && 3226 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3227 3228 struct T_opthdr *toh; 3229 uint_t *dstptr; 3230 3231 toh = (struct T_opthdr *)dstopt; 3232 toh->level = IPPROTO_IP; 3233 toh->name = IP_RECVIF; 3234 toh->len = sizeof (struct T_opthdr) + 3235 sizeof (uint_t); 3236 toh->status = 0; 3237 dstopt += sizeof (struct T_opthdr); 3238 dstptr = (uint_t *)dstopt; 3239 *dstptr = pinfo->ip_pkt_ifindex; 3240 dstopt += sizeof (uint_t); 3241 freeb(options_mp); 3242 udi_size -= toh->len; 3243 } 3244 if (icmp->icmp_timestamp) { 3245 struct T_opthdr *toh; 3246 3247 toh = (struct T_opthdr *)dstopt; 3248 toh->level = SOL_SOCKET; 3249 toh->name = SCM_TIMESTAMP; 3250 toh->len = sizeof (struct T_opthdr) + 3251 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3252 toh->status = 0; 3253 dstopt += sizeof (struct T_opthdr); 3254 /* Align for gethrestime() */ 3255 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3256 sizeof (intptr_t)); 3257 gethrestime((timestruc_t *)dstopt); 3258 dstopt = (char *)toh + toh->len; 3259 udi_size -= toh->len; 3260 } 3261 if (icmp->icmp_ip_recvpktinfo && recvif && 3262 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3263 struct T_opthdr *toh; 3264 struct in_pktinfo *pktinfop; 3265 3266 toh = (struct T_opthdr *)dstopt; 3267 toh->level = IPPROTO_IP; 3268 toh->name = IP_PKTINFO; 3269 toh->len = sizeof (struct T_opthdr) + 3270 sizeof (in_pktinfo_t); 3271 toh->status = 0; 3272 dstopt += sizeof (struct T_opthdr); 3273 pktinfop = (struct in_pktinfo *)dstopt; 3274 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3275 pktinfop->ipi_spec_dst = 3276 pinfo->ip_pkt_match_addr; 3277 3278 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3279 3280 dstopt += sizeof (struct in_pktinfo); 3281 udi_size -= toh->len; 3282 } 3283 3284 /* Consumed all of allocated space */ 3285 ASSERT(udi_size == 0); 3286 } 3287 3288 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3289 putnext(q, mp); 3290 return; 3291 } 3292 3293 /* 3294 * We don't need options_mp in the IPv6 path. 3295 */ 3296 if (options_mp != NULL) { 3297 freeb(options_mp); 3298 options_mp = NULL; 3299 } 3300 3301 /* 3302 * Discard message if it is smaller than the IPv6 header 3303 * or if the header is malformed. 3304 */ 3305 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3306 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3307 icmp->icmp_family != AF_INET6) { 3308 freemsg(mp); 3309 BUMP_MIB(&rawip_mib, rawipInErrors); 3310 return; 3311 } 3312 3313 /* Initialize */ 3314 ipp.ipp_fields = 0; 3315 hopstrip = 0; 3316 3317 ip6h = (ip6_t *)rptr; 3318 /* 3319 * Call on ip_find_hdr_v6 which gets the total hdr len 3320 * as well as individual lenghts of ext hdrs (and ptrs to 3321 * them). 3322 */ 3323 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3324 /* Look for ifindex information */ 3325 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3326 ip6i = (ip6i_t *)ip6h; 3327 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3328 ASSERT(ip6i->ip6i_ifindex != 0); 3329 ipp.ipp_fields |= IPPF_IFINDEX; 3330 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3331 } 3332 rptr = (uchar_t *)&ip6i[1]; 3333 mp->b_rptr = rptr; 3334 if (rptr == mp->b_wptr) { 3335 mp1 = mp->b_cont; 3336 freeb(mp); 3337 mp = mp1; 3338 rptr = mp->b_rptr; 3339 } 3340 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3341 ip6h = (ip6_t *)rptr; 3342 } 3343 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3344 3345 /* 3346 * We need to lie a bit to the user because users inside 3347 * labeled compartments should not see their own labels. We 3348 * assume that in all other respects IP has checked the label, 3349 * and that the label is always first among the options. (If 3350 * it's not first, then this code won't see it, and the option 3351 * will be passed along to the user.) 3352 * 3353 * If we had multilevel ICMP sockets, then the following code 3354 * should be skipped for them to allow the user to see the 3355 * label. 3356 * 3357 * Alignment restrictions in the definition of IP options 3358 * (namely, the requirement that the 4-octet DOI goes on a 3359 * 4-octet boundary) mean that we know exactly where the option 3360 * should start, but we're lenient for other hosts. 3361 * 3362 * Note that there are no multilevel ICMP or raw IP sockets 3363 * yet, thus nobody ever sees the IP6OPT_LS option. 3364 */ 3365 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3366 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3367 const uchar_t *ucp = 3368 (const uchar_t *)ipp.ipp_hopopts + 2; 3369 int remlen = ipp.ipp_hopoptslen - 2; 3370 3371 while (remlen > 0) { 3372 if (*ucp == IP6OPT_PAD1) { 3373 remlen--; 3374 ucp++; 3375 } else if (*ucp == IP6OPT_PADN) { 3376 remlen -= ucp[1] + 2; 3377 ucp += ucp[1] + 2; 3378 } else if (*ucp == ip6opt_ls) { 3379 hopstrip = (ucp - 3380 (const uchar_t *)ipp.ipp_hopopts) + 3381 ucp[1] + 2; 3382 hopstrip = (hopstrip + 7) & ~7; 3383 break; 3384 } else { 3385 /* label option must be first */ 3386 break; 3387 } 3388 } 3389 } 3390 } else { 3391 hdr_len = IPV6_HDR_LEN; 3392 ip6i = NULL; 3393 nexthdr = ip6h->ip6_nxt; 3394 } 3395 /* 3396 * One special case where IP attaches the IRE needs to 3397 * be handled so that we don't send up IRE to the user land. 3398 */ 3399 if (nexthdr == IPPROTO_TCP) { 3400 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3401 3402 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3403 mp->b_cont != NULL) { 3404 mp1 = mp->b_cont; 3405 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3406 freeb(mp1); 3407 mp->b_cont = NULL; 3408 } 3409 } 3410 } 3411 /* 3412 * Check a filter for ICMPv6 types if needed. 3413 * Verify raw checksums if needed. 3414 */ 3415 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3416 if (icmp->icmp_filter != NULL) { 3417 int type; 3418 3419 /* Assumes that IP has done the pullupmsg */ 3420 type = mp->b_rptr[hdr_len]; 3421 3422 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3423 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3424 freemsg(mp); 3425 return; 3426 } 3427 } else { 3428 /* Checksum */ 3429 uint16_t *up; 3430 uint32_t sum; 3431 int remlen; 3432 3433 up = (uint16_t *)&ip6h->ip6_src; 3434 3435 remlen = msgdsize(mp) - hdr_len; 3436 sum = htons(icmp->icmp_proto + remlen) 3437 + up[0] + up[1] + up[2] + up[3] 3438 + up[4] + up[5] + up[6] + up[7] 3439 + up[8] + up[9] + up[10] + up[11] 3440 + up[12] + up[13] + up[14] + up[15]; 3441 sum = (sum & 0xffff) + (sum >> 16); 3442 sum = IP_CSUM(mp, hdr_len, sum); 3443 if (sum != 0) { 3444 /* IPv6 RAW checksum failed */ 3445 ip0dbg(("icmp_rput: RAW checksum " 3446 "failed %x\n", sum)); 3447 freemsg(mp); 3448 BUMP_MIB(&rawip_mib, rawipInCksumErrs); 3449 return; 3450 } 3451 } 3452 } 3453 /* Skip all the IPv6 headers per API */ 3454 mp->b_rptr += hdr_len; 3455 3456 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3457 3458 /* 3459 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3460 * maintain state information, instead of relying on icmp_t 3461 * structure, since there arent any locks protecting these members 3462 * and there is a window where there might be a race between a 3463 * thread setting options on the write side and a thread reading 3464 * these options on the read size. 3465 */ 3466 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3467 IPPF_RTHDR|IPPF_IFINDEX)) { 3468 if (icmp->icmp_ipv6_recvhopopts && 3469 (ipp.ipp_fields & IPPF_HOPOPTS) && 3470 ipp.ipp_hopoptslen > hopstrip) { 3471 udi_size += sizeof (struct T_opthdr) + 3472 ipp.ipp_hopoptslen - hopstrip; 3473 icmp_opt |= IPPF_HOPOPTS; 3474 } 3475 if ((icmp->icmp_ipv6_recvdstopts || 3476 icmp->icmp_old_ipv6_recvdstopts) && 3477 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3478 udi_size += sizeof (struct T_opthdr) + 3479 ipp.ipp_dstoptslen; 3480 icmp_opt |= IPPF_DSTOPTS; 3481 } 3482 if (((icmp->icmp_ipv6_recvdstopts && 3483 icmp->icmp_ipv6_recvrthdr && 3484 (ipp.ipp_fields & IPPF_RTHDR)) || 3485 icmp->icmp_ipv6_recvrtdstopts) && 3486 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3487 udi_size += sizeof (struct T_opthdr) + 3488 ipp.ipp_rtdstoptslen; 3489 icmp_opt |= IPPF_RTDSTOPTS; 3490 } 3491 if (icmp->icmp_ipv6_recvrthdr && 3492 (ipp.ipp_fields & IPPF_RTHDR)) { 3493 udi_size += sizeof (struct T_opthdr) + 3494 ipp.ipp_rthdrlen; 3495 icmp_opt |= IPPF_RTHDR; 3496 } 3497 if (icmp->icmp_ip_recvpktinfo && 3498 (ipp.ipp_fields & IPPF_IFINDEX)) { 3499 udi_size += sizeof (struct T_opthdr) + 3500 sizeof (struct in6_pktinfo); 3501 icmp_opt |= IPPF_IFINDEX; 3502 } 3503 } 3504 if (icmp->icmp_ipv6_recvhoplimit) { 3505 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3506 icmp_ipv6_recvhoplimit = B_TRUE; 3507 } 3508 3509 if (icmp->icmp_ipv6_recvtclass) 3510 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3511 3512 mp1 = allocb(udi_size, BPRI_MED); 3513 if (mp1 == NULL) { 3514 freemsg(mp); 3515 BUMP_MIB(&rawip_mib, rawipInErrors); 3516 return; 3517 } 3518 mp1->b_cont = mp; 3519 mp = mp1; 3520 mp->b_datap->db_type = M_PROTO; 3521 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3522 mp->b_wptr = (uchar_t *)tudi + udi_size; 3523 tudi->PRIM_type = T_UNITDATA_IND; 3524 tudi->SRC_length = sizeof (sin6_t); 3525 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3526 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3527 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3528 tudi->OPT_length = udi_size; 3529 sin6 = (sin6_t *)&tudi[1]; 3530 sin6->sin6_port = 0; 3531 sin6->sin6_family = AF_INET6; 3532 3533 sin6->sin6_addr = ip6h->ip6_src; 3534 /* No sin6_flowinfo per API */ 3535 sin6->sin6_flowinfo = 0; 3536 /* For link-scope source pass up scope id */ 3537 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3538 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3539 sin6->sin6_scope_id = ipp.ipp_ifindex; 3540 else 3541 sin6->sin6_scope_id = 0; 3542 3543 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3544 icmp->icmp_zoneid); 3545 3546 if (udi_size != 0) { 3547 uchar_t *dstopt; 3548 3549 dstopt = (uchar_t *)&sin6[1]; 3550 if (icmp_opt & IPPF_IFINDEX) { 3551 struct T_opthdr *toh; 3552 struct in6_pktinfo *pkti; 3553 3554 toh = (struct T_opthdr *)dstopt; 3555 toh->level = IPPROTO_IPV6; 3556 toh->name = IPV6_PKTINFO; 3557 toh->len = sizeof (struct T_opthdr) + 3558 sizeof (*pkti); 3559 toh->status = 0; 3560 dstopt += sizeof (struct T_opthdr); 3561 pkti = (struct in6_pktinfo *)dstopt; 3562 pkti->ipi6_addr = ip6h->ip6_dst; 3563 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3564 dstopt += sizeof (*pkti); 3565 udi_size -= toh->len; 3566 } 3567 if (icmp_ipv6_recvhoplimit) { 3568 struct T_opthdr *toh; 3569 3570 toh = (struct T_opthdr *)dstopt; 3571 toh->level = IPPROTO_IPV6; 3572 toh->name = IPV6_HOPLIMIT; 3573 toh->len = sizeof (struct T_opthdr) + 3574 sizeof (uint_t); 3575 toh->status = 0; 3576 dstopt += sizeof (struct T_opthdr); 3577 *(uint_t *)dstopt = ip6h->ip6_hops; 3578 dstopt += sizeof (uint_t); 3579 udi_size -= toh->len; 3580 } 3581 if (icmp->icmp_ipv6_recvtclass) { 3582 struct T_opthdr *toh; 3583 3584 toh = (struct T_opthdr *)dstopt; 3585 toh->level = IPPROTO_IPV6; 3586 toh->name = IPV6_TCLASS; 3587 toh->len = sizeof (struct T_opthdr) + 3588 sizeof (uint_t); 3589 toh->status = 0; 3590 dstopt += sizeof (struct T_opthdr); 3591 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3592 dstopt += sizeof (uint_t); 3593 udi_size -= toh->len; 3594 } 3595 if (icmp_opt & IPPF_HOPOPTS) { 3596 struct T_opthdr *toh; 3597 3598 toh = (struct T_opthdr *)dstopt; 3599 toh->level = IPPROTO_IPV6; 3600 toh->name = IPV6_HOPOPTS; 3601 toh->len = sizeof (struct T_opthdr) + 3602 ipp.ipp_hopoptslen - hopstrip; 3603 toh->status = 0; 3604 dstopt += sizeof (struct T_opthdr); 3605 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3606 ipp.ipp_hopoptslen - hopstrip); 3607 if (hopstrip > 0) { 3608 /* copy next header value and fake length */ 3609 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3610 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3611 hopstrip / 8; 3612 } 3613 dstopt += ipp.ipp_hopoptslen - hopstrip; 3614 udi_size -= toh->len; 3615 } 3616 if (icmp_opt & IPPF_RTDSTOPTS) { 3617 struct T_opthdr *toh; 3618 3619 toh = (struct T_opthdr *)dstopt; 3620 toh->level = IPPROTO_IPV6; 3621 toh->name = IPV6_DSTOPTS; 3622 toh->len = sizeof (struct T_opthdr) + 3623 ipp.ipp_rtdstoptslen; 3624 toh->status = 0; 3625 dstopt += sizeof (struct T_opthdr); 3626 bcopy(ipp.ipp_rtdstopts, dstopt, 3627 ipp.ipp_rtdstoptslen); 3628 dstopt += ipp.ipp_rtdstoptslen; 3629 udi_size -= toh->len; 3630 } 3631 if (icmp_opt & IPPF_RTHDR) { 3632 struct T_opthdr *toh; 3633 3634 toh = (struct T_opthdr *)dstopt; 3635 toh->level = IPPROTO_IPV6; 3636 toh->name = IPV6_RTHDR; 3637 toh->len = sizeof (struct T_opthdr) + 3638 ipp.ipp_rthdrlen; 3639 toh->status = 0; 3640 dstopt += sizeof (struct T_opthdr); 3641 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3642 dstopt += ipp.ipp_rthdrlen; 3643 udi_size -= toh->len; 3644 } 3645 if (icmp_opt & IPPF_DSTOPTS) { 3646 struct T_opthdr *toh; 3647 3648 toh = (struct T_opthdr *)dstopt; 3649 toh->level = IPPROTO_IPV6; 3650 toh->name = IPV6_DSTOPTS; 3651 toh->len = sizeof (struct T_opthdr) + 3652 ipp.ipp_dstoptslen; 3653 toh->status = 0; 3654 dstopt += sizeof (struct T_opthdr); 3655 bcopy(ipp.ipp_dstopts, dstopt, 3656 ipp.ipp_dstoptslen); 3657 dstopt += ipp.ipp_dstoptslen; 3658 udi_size -= toh->len; 3659 } 3660 /* Consumed all of allocated space */ 3661 ASSERT(udi_size == 0); 3662 } 3663 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3664 putnext(q, mp); 3665 } 3666 3667 /* 3668 * Process a T_BIND_ACK 3669 */ 3670 static void 3671 icmp_rput_bind_ack(queue_t *q, mblk_t *mp) 3672 { 3673 icmp_t *icmp = (icmp_t *)q->q_ptr; 3674 mblk_t *mp1; 3675 ire_t *ire; 3676 struct T_bind_ack *tba; 3677 uchar_t *addrp; 3678 ipa_conn_t *ac; 3679 ipa6_conn_t *ac6; 3680 3681 /* 3682 * We know if headers are included or not so we can 3683 * safely do this. 3684 */ 3685 if (icmp->icmp_state == TS_UNBND) { 3686 /* 3687 * TPI has not yet bound - bind sent by 3688 * icmp_bind_proto. 3689 */ 3690 freemsg(mp); 3691 return; 3692 } 3693 if (icmp->icmp_discon_pending) 3694 icmp->icmp_discon_pending = 0; 3695 3696 /* 3697 * If a broadcast/multicast address was bound set 3698 * the source address to 0. 3699 * This ensures no datagrams with broadcast address 3700 * as source address are emitted (which would violate 3701 * RFC1122 - Hosts requirements) 3702 * 3703 * Note that when connecting the returned IRE is 3704 * for the destination address and we only perform 3705 * the broadcast check for the source address (it 3706 * is OK to connect to a broadcast/multicast address.) 3707 */ 3708 mp1 = mp->b_cont; 3709 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3710 ire = (ire_t *)mp1->b_rptr; 3711 3712 /* 3713 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3714 * local address. 3715 */ 3716 if (ire->ire_type == IRE_BROADCAST && 3717 icmp->icmp_state != TS_DATA_XFER) { 3718 /* This was just a local bind to a MC/broadcast addr */ 3719 V6_SET_ZERO(icmp->icmp_v6src); 3720 if (icmp->icmp_family == AF_INET6) 3721 (void) icmp_build_hdrs(q, icmp); 3722 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3723 /* 3724 * Local address not yet set - pick it from the 3725 * T_bind_ack 3726 */ 3727 tba = (struct T_bind_ack *)mp->b_rptr; 3728 addrp = &mp->b_rptr[tba->ADDR_offset]; 3729 switch (icmp->icmp_family) { 3730 case AF_INET: 3731 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3732 ac = (ipa_conn_t *)addrp; 3733 } else { 3734 ASSERT(tba->ADDR_length == 3735 sizeof (ipa_conn_x_t)); 3736 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3737 } 3738 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3739 &icmp->icmp_v6src); 3740 break; 3741 case AF_INET6: 3742 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3743 ac6 = (ipa6_conn_t *)addrp; 3744 } else { 3745 ASSERT(tba->ADDR_length == 3746 sizeof (ipa6_conn_x_t)); 3747 ac6 = &((ipa6_conn_x_t *) 3748 addrp)->ac6x_conn; 3749 } 3750 icmp->icmp_v6src = ac6->ac6_laddr; 3751 (void) icmp_build_hdrs(q, icmp); 3752 } 3753 } 3754 mp1 = mp1->b_cont; 3755 } 3756 /* 3757 * Look for one or more appended ACK message added by 3758 * icmp_connect or icmp_disconnect. 3759 * If none found just send up the T_BIND_ACK. 3760 * icmp_connect has appended a T_OK_ACK and a 3761 * T_CONN_CON. 3762 * icmp_disconnect has appended a T_OK_ACK. 3763 */ 3764 if (mp1 != NULL) { 3765 if (mp->b_cont == mp1) 3766 mp->b_cont = NULL; 3767 else { 3768 ASSERT(mp->b_cont->b_cont == mp1); 3769 mp->b_cont->b_cont = NULL; 3770 } 3771 freemsg(mp); 3772 mp = mp1; 3773 while (mp != NULL) { 3774 mp1 = mp->b_cont; 3775 mp->b_cont = NULL; 3776 putnext(q, mp); 3777 mp = mp1; 3778 } 3779 return; 3780 } 3781 freemsg(mp->b_cont); 3782 mp->b_cont = NULL; 3783 putnext(q, mp); 3784 } 3785 3786 /* 3787 * return SNMP stuff in buffer in mpdata 3788 */ 3789 static int 3790 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3791 { 3792 mblk_t *mpdata; 3793 struct opthdr *optp; 3794 3795 if (mpctl == NULL || 3796 (mpdata = mpctl->b_cont) == NULL) { 3797 return (0); 3798 } 3799 3800 /* fixed length structure for IPv4 and IPv6 counters */ 3801 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 3802 optp->level = EXPER_RAWIP; 3803 optp->name = 0; 3804 (void) snmp_append_data(mpdata, (char *)&rawip_mib, sizeof (rawip_mib)); 3805 optp->len = msgdsize(mpdata); 3806 qreply(q, mpctl); 3807 3808 return (1); 3809 } 3810 3811 /* 3812 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 3813 * TODO: If this ever actually tries to set anything, it needs to be 3814 * to do the appropriate locking. 3815 */ 3816 /* ARGSUSED */ 3817 static int 3818 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 3819 uchar_t *ptr, int len) 3820 { 3821 switch (level) { 3822 case EXPER_RAWIP: 3823 return (0); 3824 default: 3825 return (1); 3826 } 3827 } 3828 3829 /* Report for ndd "icmp_status" */ 3830 /* ARGSUSED */ 3831 static int 3832 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3833 { 3834 IDP idp; 3835 icmp_t *icmp; 3836 char *state; 3837 char laddrbuf[INET6_ADDRSTRLEN]; 3838 char faddrbuf[INET6_ADDRSTRLEN]; 3839 3840 (void) mi_mpprintf(mp, 3841 "RAWIP " MI_COL_HDRPAD_STR 3842 /* 01234567[89ABCDEF] */ 3843 " src addr dest addr state"); 3844 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 3845 3846 3847 for (idp = mi_first_ptr(&icmp_g_head); 3848 (icmp = (icmp_t *)idp) != NULL; 3849 idp = mi_next_ptr(&icmp_g_head, idp)) { 3850 if (icmp->icmp_state == TS_UNBND) 3851 state = "UNBOUND"; 3852 else if (icmp->icmp_state == TS_IDLE) 3853 state = "IDLE"; 3854 else if (icmp->icmp_state == TS_DATA_XFER) 3855 state = "CONNECTED"; 3856 else 3857 state = "UnkState"; 3858 3859 (void) mi_mpprintf(mp, 3860 MI_COL_PTRFMT_STR "%s %s %s", 3861 (void *)icmp, 3862 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 3863 sizeof (faddrbuf)), 3864 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 3865 sizeof (laddrbuf)), 3866 state); 3867 } 3868 return (0); 3869 } 3870 3871 /* 3872 * This routine creates a T_UDERROR_IND message and passes it upstream. 3873 * The address and options are copied from the T_UNITDATA_REQ message 3874 * passed in mp. This message is freed. 3875 */ 3876 static void 3877 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 3878 { 3879 mblk_t *mp1; 3880 uchar_t *rptr = mp->b_rptr; 3881 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 3882 3883 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 3884 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 3885 tudr->OPT_length, err); 3886 if (mp1) 3887 qreply(q, mp1); 3888 freemsg(mp); 3889 } 3890 3891 /* 3892 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 3893 * After some error checking, the message is passed downstream to ip. 3894 */ 3895 static void 3896 icmp_unbind(queue_t *q, mblk_t *mp) 3897 { 3898 icmp_t *icmp = (icmp_t *)q->q_ptr; 3899 3900 /* If a bind has not been done, we can't unbind. */ 3901 if (icmp->icmp_state == TS_UNBND) { 3902 icmp_err_ack(q, mp, TOUTSTATE, 0); 3903 return; 3904 } 3905 V6_SET_ZERO(icmp->icmp_v6src); 3906 V6_SET_ZERO(icmp->icmp_bound_v6src); 3907 icmp->icmp_state = TS_UNBND; 3908 3909 if (icmp->icmp_family == AF_INET6) { 3910 int error; 3911 3912 /* Rebuild the header template */ 3913 error = icmp_build_hdrs(q, icmp); 3914 if (error != 0) { 3915 icmp_err_ack(q, mp, TSYSERR, error); 3916 return; 3917 } 3918 } 3919 /* Pass the unbind to IP. */ 3920 putnext(q, mp); 3921 } 3922 3923 /* 3924 * Process IPv4 packets that already include an IP header. 3925 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 3926 * IPPROTO_IGMP). 3927 */ 3928 static void 3929 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop, 3930 boolean_t use_putnext) 3931 { 3932 ipha_t *ipha; 3933 int ip_hdr_length; 3934 int tp_hdr_len; 3935 mblk_t *mp1; 3936 uint_t pkt_len; 3937 ip_opt_info_t optinfo; 3938 3939 optinfo.ip_opt_flags = 0; 3940 optinfo.ip_opt_ill_index = 0; 3941 ipha = (ipha_t *)mp->b_rptr; 3942 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 3943 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 3944 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3945 BUMP_MIB(&rawip_mib, rawipOutErrors); 3946 freemsg(mp); 3947 return; 3948 } 3949 ipha = (ipha_t *)mp->b_rptr; 3950 } 3951 ipha->ipha_version_and_hdr_length = 3952 (IP_VERSION<<4) | (ip_hdr_length>>2); 3953 3954 /* 3955 * For the socket of SOCK_RAW type, the checksum is provided in the 3956 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 3957 * tell IP that the application has sent a complete IP header and not 3958 * to compute the transport checksum nor change the DF flag. 3959 */ 3960 ipha->ipha_ident = IP_HDR_INCLUDED; 3961 ipha->ipha_hdr_checksum = 0; 3962 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 3963 /* Insert options if any */ 3964 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 3965 /* 3966 * Put the IP header plus any transport header that is 3967 * checksumed by ip_wput into the first mblk. (ip_wput assumes 3968 * that at least the checksum field is in the first mblk.) 3969 */ 3970 switch (ipha->ipha_protocol) { 3971 case IPPROTO_UDP: 3972 tp_hdr_len = 8; 3973 break; 3974 case IPPROTO_TCP: 3975 tp_hdr_len = 20; 3976 break; 3977 default: 3978 tp_hdr_len = 0; 3979 break; 3980 } 3981 /* 3982 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 3983 * tp_hdr_len bytes will be in a single mblk. 3984 */ 3985 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 3986 tp_hdr_len)) { 3987 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 3988 tp_hdr_len)) { 3989 BUMP_MIB(&rawip_mib, rawipOutErrors); 3990 freemsg(mp); 3991 return; 3992 } 3993 ipha = (ipha_t *)mp->b_rptr; 3994 } 3995 3996 /* 3997 * if the length is larger then the max allowed IP packet, 3998 * then send an error and abort the processing. 3999 */ 4000 pkt_len = ntohs(ipha->ipha_length) 4001 + icmp->icmp_ip_snd_options_len; 4002 if (pkt_len > IP_MAXPACKET) { 4003 icmp_ud_err(q, mp, EMSGSIZE); 4004 return; 4005 } 4006 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra + 4007 tp_hdr_len, BPRI_LO))) { 4008 icmp_ud_err(q, mp, ENOMEM); 4009 return; 4010 } 4011 mp1->b_rptr += icmp_wroff_extra; 4012 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4013 4014 ipha->ipha_length = htons((uint16_t)pkt_len); 4015 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4016 4017 /* Copy transport header if any */ 4018 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4019 mp1->b_wptr += tp_hdr_len; 4020 4021 /* Add options */ 4022 ipha = (ipha_t *)mp1->b_rptr; 4023 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4024 icmp->icmp_ip_snd_options_len); 4025 4026 /* Drop IP header and transport header from original */ 4027 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4028 4029 mp1->b_cont = mp; 4030 mp = mp1; 4031 /* 4032 * Massage source route putting first source 4033 * route in ipha_dst. 4034 */ 4035 (void) ip_massage_options(ipha); 4036 } 4037 4038 if (pktinfop != NULL) { 4039 /* 4040 * Over write the source address provided in the header 4041 */ 4042 if (pktinfop->ip4_addr != INADDR_ANY) { 4043 ipha->ipha_src = pktinfop->ip4_addr; 4044 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4045 ASSERT(use_putnext == B_FALSE); 4046 } 4047 4048 if (pktinfop->ip4_ill_index != 0) { 4049 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4050 ASSERT(use_putnext == B_FALSE); 4051 } 4052 } 4053 4054 mblk_setcred(mp, icmp->icmp_credp); 4055 if (use_putnext) { 4056 putnext(q, mp); 4057 } else { 4058 ip_output_options(Q_TO_CONN(q->q_next), mp, q->q_next, IP_WPUT, 4059 &optinfo); 4060 } 4061 } 4062 4063 static boolean_t 4064 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4065 { 4066 int err; 4067 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4068 4069 err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4070 opt_storage, icmp->icmp_mac_exempt); 4071 if (err == 0) { 4072 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4073 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4074 opt_storage); 4075 } 4076 if (err != 0) { 4077 BUMP_MIB(&rawip_mib, rawipOutErrors); 4078 DTRACE_PROBE4( 4079 tx__ip__log__drop__updatelabel__icmp, 4080 char *, "queue(1) failed to update options(2) on mp(3)", 4081 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4082 icmp_ud_err(q, mp, err); 4083 return (B_FALSE); 4084 } 4085 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4086 return (B_TRUE); 4087 } 4088 4089 /* 4090 * This routine handles all messages passed downstream. It either 4091 * consumes the message or passes it downstream; it never queues a 4092 * a message. 4093 */ 4094 static void 4095 icmp_wput(queue_t *q, mblk_t *mp) 4096 { 4097 uchar_t *rptr = mp->b_rptr; 4098 ipha_t *ipha; 4099 mblk_t *mp1; 4100 int ip_hdr_length; 4101 #define tudr ((struct T_unitdata_req *)rptr) 4102 size_t ip_len; 4103 icmp_t *icmp; 4104 sin6_t *sin6; 4105 sin_t *sin; 4106 ipaddr_t v4dst; 4107 ip4_pkt_t pktinfo; 4108 ip4_pkt_t *pktinfop = &pktinfo; 4109 ip_opt_info_t optinfo; 4110 queue_t *ip_wq; 4111 boolean_t use_putnext = B_TRUE; 4112 4113 icmp = (icmp_t *)q->q_ptr; 4114 if (icmp->icmp_restricted) { 4115 icmp_wput_restricted(q, mp); 4116 return; 4117 } 4118 4119 switch (mp->b_datap->db_type) { 4120 case M_DATA: 4121 if (icmp->icmp_hdrincl) { 4122 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4123 ipha = (ipha_t *)mp->b_rptr; 4124 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4125 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4126 BUMP_MIB(&rawip_mib, rawipOutErrors); 4127 freemsg(mp); 4128 return; 4129 } 4130 ipha = (ipha_t *)mp->b_rptr; 4131 } 4132 /* 4133 * If this connection was used for v6 (inconceivable!) 4134 * or if we have a new destination, then it's time to 4135 * figure a new label. 4136 */ 4137 if (is_system_labeled() && 4138 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4139 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4140 ipha->ipha_dst) && 4141 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4142 return; 4143 } 4144 icmp_wput_hdrincl(q, mp, icmp, NULL, use_putnext); 4145 return; 4146 } 4147 freemsg(mp); 4148 return; 4149 case M_PROTO: 4150 case M_PCPROTO: 4151 ip_len = mp->b_wptr - rptr; 4152 if (ip_len >= sizeof (struct T_unitdata_req)) { 4153 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4154 if (((union T_primitives *)rptr)->type 4155 == T_UNITDATA_REQ) 4156 break; 4157 } 4158 /* FALLTHRU */ 4159 default: 4160 icmp_wput_other(q, mp); 4161 return; 4162 } 4163 4164 /* Handle T_UNITDATA_REQ messages here. */ 4165 4166 4167 4168 if (icmp->icmp_state == TS_UNBND) { 4169 /* If a port has not been bound to the stream, fail. */ 4170 BUMP_MIB(&rawip_mib, rawipOutErrors); 4171 icmp_ud_err(q, mp, EPROTO); 4172 return; 4173 } 4174 mp1 = mp->b_cont; 4175 if (mp1 == NULL) { 4176 BUMP_MIB(&rawip_mib, rawipOutErrors); 4177 icmp_ud_err(q, mp, EPROTO); 4178 return; 4179 } 4180 4181 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4182 BUMP_MIB(&rawip_mib, rawipOutErrors); 4183 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4184 return; 4185 } 4186 4187 switch (icmp->icmp_family) { 4188 case AF_INET6: 4189 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4190 if (!OK_32PTR((char *)sin6) || 4191 tudr->DEST_length != sizeof (sin6_t) || 4192 sin6->sin6_family != AF_INET6) { 4193 BUMP_MIB(&rawip_mib, rawipOutErrors); 4194 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4195 return; 4196 } 4197 4198 /* No support for mapped addresses on raw sockets */ 4199 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4200 BUMP_MIB(&rawip_mib, rawipOutErrors); 4201 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4202 return; 4203 } 4204 4205 /* 4206 * Destination is a native IPv6 address. 4207 * Send out an IPv6 format packet. 4208 */ 4209 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4210 return; 4211 4212 case AF_INET: 4213 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4214 if (!OK_32PTR((char *)sin) || 4215 tudr->DEST_length != sizeof (sin_t) || 4216 sin->sin_family != AF_INET) { 4217 BUMP_MIB(&rawip_mib, rawipOutErrors); 4218 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4219 return; 4220 } 4221 /* Extract and ipaddr */ 4222 v4dst = sin->sin_addr.s_addr; 4223 break; 4224 4225 default: 4226 ASSERT(0); 4227 } 4228 4229 pktinfop->ip4_ill_index = 0; 4230 pktinfop->ip4_addr = INADDR_ANY; 4231 optinfo.ip_opt_flags = 0; 4232 optinfo.ip_opt_ill_index = 0; 4233 4234 4235 /* 4236 * If options passed in, feed it for verification and handling 4237 */ 4238 if (tudr->OPT_length != 0) { 4239 int error; 4240 4241 error = 0; 4242 if (icmp_unitdata_opt_process(q, mp, &error, 4243 (void *)pktinfop) < 0) { 4244 /* failure */ 4245 BUMP_MIB(&rawip_mib, rawipOutErrors); 4246 icmp_ud_err(q, mp, error); 4247 return; 4248 } 4249 ASSERT(error == 0); 4250 /* 4251 * Note: Success in processing options. 4252 * mp option buffer represented by 4253 * OPT_length/offset now potentially modified 4254 * and contain option setting results 4255 */ 4256 4257 if (pktinfop->ip4_ill_index != 0 || 4258 pktinfop->ip4_addr != INADDR_ANY) { 4259 /* 4260 * PKTINFO option is supported only when ICMP is 4261 * over IP. 4262 */ 4263 ip_wq = WR(q)->q_next; 4264 if (NOT_OVER_IP(ip_wq)) { 4265 icmp_ud_err(q, mp, EINVAL); 4266 return; 4267 } 4268 use_putnext = B_FALSE; 4269 } 4270 } 4271 4272 if (v4dst == INADDR_ANY) 4273 v4dst = htonl(INADDR_LOOPBACK); 4274 4275 /* Check if our saved options are valid; update if not */ 4276 if (is_system_labeled() && 4277 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4278 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4279 !icmp_update_label(q, icmp, mp, v4dst)) { 4280 return; 4281 } 4282 4283 /* Protocol 255 contains full IP headers */ 4284 if (icmp->icmp_hdrincl) { 4285 freeb(mp); 4286 icmp_wput_hdrincl(q, mp1, icmp, pktinfop, use_putnext); 4287 return; 4288 } 4289 4290 4291 /* Add an IP header */ 4292 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4293 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4294 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4295 mp1->b_datap->db_ref != 1 || 4296 !OK_32PTR(ipha)) { 4297 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra, 4298 BPRI_LO))) { 4299 BUMP_MIB(&rawip_mib, rawipOutErrors); 4300 icmp_ud_err(q, mp, ENOMEM); 4301 return; 4302 } 4303 mp1->b_cont = mp->b_cont; 4304 ipha = (ipha_t *)mp1->b_datap->db_lim; 4305 mp1->b_wptr = (uchar_t *)ipha; 4306 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4307 } 4308 #ifdef _BIG_ENDIAN 4309 /* Set version, header length, and tos */ 4310 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4311 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4312 icmp->icmp_type_of_service); 4313 /* Set ttl and protocol */ 4314 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4315 #else 4316 /* Set version, header length, and tos */ 4317 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4318 ((icmp->icmp_type_of_service << 8) | 4319 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4320 /* Set ttl and protocol */ 4321 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4322 #endif 4323 if (pktinfop->ip4_addr != INADDR_ANY) { 4324 ASSERT(use_putnext == B_FALSE); 4325 ipha->ipha_src = pktinfop->ip4_addr; 4326 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4327 } else { 4328 4329 /* 4330 * Copy our address into the packet. If this is zero, 4331 * ip will fill in the real source address. 4332 */ 4333 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4334 } 4335 4336 ipha->ipha_fragment_offset_and_flags = 0; 4337 4338 if (pktinfop->ip4_ill_index != 0) { 4339 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4340 ASSERT(use_putnext == B_FALSE); 4341 } 4342 4343 4344 /* 4345 * For the socket of SOCK_RAW type, the checksum is provided in the 4346 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4347 * tell IP that the application has sent a complete IP header and not 4348 * to compute the transport checksum nor change the DF flag. 4349 */ 4350 ipha->ipha_ident = IP_HDR_INCLUDED; 4351 4352 /* Finish common formatting of the packet. */ 4353 mp1->b_rptr = (uchar_t *)ipha; 4354 4355 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4356 if (mp1->b_cont != NULL) 4357 ip_len += msgdsize(mp1->b_cont); 4358 4359 /* 4360 * Set the length into the IP header. 4361 * If the length is greater than the maximum allowed by IP, 4362 * then free the message and return. Do not try and send it 4363 * as this can cause problems in layers below. 4364 */ 4365 if (ip_len > IP_MAXPACKET) { 4366 BUMP_MIB(&rawip_mib, rawipOutErrors); 4367 icmp_ud_err(q, mp, EMSGSIZE); 4368 return; 4369 } 4370 ipha->ipha_length = htons((uint16_t)ip_len); 4371 /* 4372 * Copy in the destination address from the T_UNITDATA 4373 * request 4374 */ 4375 ipha->ipha_dst = v4dst; 4376 4377 /* 4378 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4379 */ 4380 if (CLASSD(v4dst)) 4381 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4382 4383 /* Copy in options if any */ 4384 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4385 bcopy(icmp->icmp_ip_snd_options, 4386 &ipha[1], icmp->icmp_ip_snd_options_len); 4387 /* 4388 * Massage source route putting first source route in ipha_dst. 4389 * Ignore the destination in the T_unitdata_req. 4390 */ 4391 (void) ip_massage_options(ipha); 4392 } 4393 4394 freeb(mp); 4395 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4396 mblk_setcred(mp1, icmp->icmp_credp); 4397 if (use_putnext) { 4398 putnext(q, mp1); 4399 } else { 4400 ip_output_options(Q_TO_CONN(q->q_next), mp1, q->q_next, IP_WPUT, 4401 &optinfo); 4402 } 4403 #undef ipha 4404 #undef tudr 4405 } 4406 4407 static boolean_t 4408 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4409 { 4410 int err; 4411 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4412 4413 err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4414 opt_storage, icmp->icmp_mac_exempt); 4415 if (err == 0) { 4416 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4417 &icmp->icmp_label_len_v6, opt_storage); 4418 } 4419 if (err != 0) { 4420 BUMP_MIB(&rawip_mib, rawipOutErrors); 4421 DTRACE_PROBE4( 4422 tx__ip__log__drop__updatelabel__icmp6, 4423 char *, "queue(1) failed to update options(2) on mp(3)", 4424 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4425 icmp_ud_err(wq, mp, err); 4426 return (B_FALSE); 4427 } 4428 4429 icmp->icmp_v6lastdst = *dst; 4430 return (B_TRUE); 4431 } 4432 4433 /* 4434 * icmp_wput_ipv6(): 4435 * Assumes that icmp_wput did some sanity checking on the destination 4436 * address, but that the label may not yet be correct. 4437 */ 4438 void 4439 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4440 { 4441 ip6_t *ip6h; 4442 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4443 mblk_t *mp1; 4444 int ip_hdr_len = IPV6_HDR_LEN; 4445 size_t ip_len; 4446 icmp_t *icmp; 4447 ip6_pkt_t ipp_s; /* For ancillary data options */ 4448 ip6_pkt_t *ipp = &ipp_s; 4449 ip6_pkt_t *tipp; 4450 uint32_t csum = 0; 4451 uint_t ignore = 0; 4452 uint_t option_exists = 0, is_sticky = 0; 4453 uint8_t *cp; 4454 uint8_t *nxthdr_ptr; 4455 in6_addr_t ip6_dst; 4456 4457 icmp = (icmp_t *)q->q_ptr; 4458 4459 /* 4460 * If the local address is a mapped address return 4461 * an error. 4462 * It would be possible to send an IPv6 packet but the 4463 * response would never make it back to the application 4464 * since it is bound to a mapped address. 4465 */ 4466 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4467 BUMP_MIB(&rawip_mib, rawipOutErrors); 4468 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4469 return; 4470 } 4471 4472 ipp->ipp_fields = 0; 4473 ipp->ipp_sticky_ignored = 0; 4474 4475 /* 4476 * If TPI options passed in, feed it for verification and handling 4477 */ 4478 if (tudr_optlen != 0) { 4479 int error; 4480 4481 if (icmp_unitdata_opt_process(q, mp, &error, 4482 (void *)ipp) < 0) { 4483 /* failure */ 4484 BUMP_MIB(&rawip_mib, rawipOutErrors); 4485 icmp_ud_err(q, mp, error); 4486 return; 4487 } 4488 ignore = ipp->ipp_sticky_ignored; 4489 ASSERT(error == 0); 4490 } 4491 4492 if (sin6->sin6_scope_id != 0 && 4493 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4494 /* 4495 * IPPF_SCOPE_ID is special. It's neither a sticky 4496 * option nor ancillary data. It needs to be 4497 * explicitly set in options_exists. 4498 */ 4499 option_exists |= IPPF_SCOPE_ID; 4500 } 4501 4502 /* 4503 * Compute the destination address 4504 */ 4505 ip6_dst = sin6->sin6_addr; 4506 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4507 ip6_dst = ipv6_loopback; 4508 4509 /* 4510 * If we're not going to the same destination as last time, then 4511 * recompute the label required. This is done in a separate routine to 4512 * avoid blowing up our stack here. 4513 */ 4514 if (is_system_labeled() && 4515 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4516 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4517 return; 4518 } 4519 4520 /* 4521 * If there's a security label here, then we ignore any options the 4522 * user may try to set. We keep the peer's label as a hidden sticky 4523 * option. 4524 */ 4525 if (icmp->icmp_label_len_v6 > 0) { 4526 ignore &= ~IPPF_HOPOPTS; 4527 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4528 } 4529 4530 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4531 (ipp->ipp_fields == 0)) { 4532 /* No sticky options nor ancillary data. */ 4533 goto no_options; 4534 } 4535 4536 /* 4537 * Go through the options figuring out where each is going to 4538 * come from and build two masks. The first mask indicates if 4539 * the option exists at all. The second mask indicates if the 4540 * option is sticky or ancillary. 4541 */ 4542 if (!(ignore & IPPF_HOPOPTS)) { 4543 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4544 option_exists |= IPPF_HOPOPTS; 4545 ip_hdr_len += ipp->ipp_hopoptslen; 4546 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4547 option_exists |= IPPF_HOPOPTS; 4548 is_sticky |= IPPF_HOPOPTS; 4549 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4550 } 4551 } 4552 4553 if (!(ignore & IPPF_RTHDR)) { 4554 if (ipp->ipp_fields & IPPF_RTHDR) { 4555 option_exists |= IPPF_RTHDR; 4556 ip_hdr_len += ipp->ipp_rthdrlen; 4557 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4558 option_exists |= IPPF_RTHDR; 4559 is_sticky |= IPPF_RTHDR; 4560 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4561 } 4562 } 4563 4564 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4565 /* 4566 * Need to have a router header to use these. 4567 */ 4568 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4569 option_exists |= IPPF_RTDSTOPTS; 4570 ip_hdr_len += ipp->ipp_rtdstoptslen; 4571 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4572 option_exists |= IPPF_RTDSTOPTS; 4573 is_sticky |= IPPF_RTDSTOPTS; 4574 ip_hdr_len += 4575 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4576 } 4577 } 4578 4579 if (!(ignore & IPPF_DSTOPTS)) { 4580 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4581 option_exists |= IPPF_DSTOPTS; 4582 ip_hdr_len += ipp->ipp_dstoptslen; 4583 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4584 option_exists |= IPPF_DSTOPTS; 4585 is_sticky |= IPPF_DSTOPTS; 4586 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4587 } 4588 } 4589 4590 if (!(ignore & IPPF_IFINDEX)) { 4591 if (ipp->ipp_fields & IPPF_IFINDEX) { 4592 option_exists |= IPPF_IFINDEX; 4593 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4594 option_exists |= IPPF_IFINDEX; 4595 is_sticky |= IPPF_IFINDEX; 4596 } 4597 } 4598 4599 if (!(ignore & IPPF_ADDR)) { 4600 if (ipp->ipp_fields & IPPF_ADDR) { 4601 option_exists |= IPPF_ADDR; 4602 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4603 option_exists |= IPPF_ADDR; 4604 is_sticky |= IPPF_ADDR; 4605 } 4606 } 4607 4608 if (!(ignore & IPPF_DONTFRAG)) { 4609 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4610 option_exists |= IPPF_DONTFRAG; 4611 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4612 option_exists |= IPPF_DONTFRAG; 4613 is_sticky |= IPPF_DONTFRAG; 4614 } 4615 } 4616 4617 if (!(ignore & IPPF_USE_MIN_MTU)) { 4618 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4619 option_exists |= IPPF_USE_MIN_MTU; 4620 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4621 IPPF_USE_MIN_MTU) { 4622 option_exists |= IPPF_USE_MIN_MTU; 4623 is_sticky |= IPPF_USE_MIN_MTU; 4624 } 4625 } 4626 4627 if (!(ignore & IPPF_NEXTHOP)) { 4628 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4629 option_exists |= IPPF_NEXTHOP; 4630 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4631 option_exists |= IPPF_NEXTHOP; 4632 is_sticky |= IPPF_NEXTHOP; 4633 } 4634 } 4635 4636 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4637 option_exists |= IPPF_HOPLIMIT; 4638 /* IPV6_HOPLIMIT can never be sticky */ 4639 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4640 4641 if (!(ignore & IPPF_UNICAST_HOPS) && 4642 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4643 option_exists |= IPPF_UNICAST_HOPS; 4644 is_sticky |= IPPF_UNICAST_HOPS; 4645 } 4646 4647 if (!(ignore & IPPF_MULTICAST_HOPS) && 4648 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4649 option_exists |= IPPF_MULTICAST_HOPS; 4650 is_sticky |= IPPF_MULTICAST_HOPS; 4651 } 4652 4653 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4654 /* This is a sticky socket option only */ 4655 option_exists |= IPPF_NO_CKSUM; 4656 is_sticky |= IPPF_NO_CKSUM; 4657 } 4658 4659 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4660 /* This is a sticky socket option only */ 4661 option_exists |= IPPF_RAW_CKSUM; 4662 is_sticky |= IPPF_RAW_CKSUM; 4663 } 4664 4665 if (!(ignore & IPPF_TCLASS)) { 4666 if (ipp->ipp_fields & IPPF_TCLASS) { 4667 option_exists |= IPPF_TCLASS; 4668 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4669 option_exists |= IPPF_TCLASS; 4670 is_sticky |= IPPF_TCLASS; 4671 } 4672 } 4673 4674 no_options: 4675 4676 /* 4677 * If any options carried in the ip6i_t were specified, we 4678 * need to account for the ip6i_t in the data we'll be sending 4679 * down. 4680 */ 4681 if (option_exists & IPPF_HAS_IP6I) 4682 ip_hdr_len += sizeof (ip6i_t); 4683 4684 /* check/fix buffer config, setup pointers into it */ 4685 mp1 = mp->b_cont; 4686 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4687 if ((mp1->b_datap->db_ref != 1) || 4688 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4689 !OK_32PTR(ip6h)) { 4690 /* Try to get everything in a single mblk next time */ 4691 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4692 icmp->icmp_max_hdr_len = ip_hdr_len; 4693 (void) mi_set_sth_wroff(RD(q), 4694 icmp->icmp_max_hdr_len + icmp_wroff_extra); 4695 } 4696 mp1 = allocb(ip_hdr_len + icmp_wroff_extra, BPRI_LO); 4697 if (!mp1) { 4698 BUMP_MIB(&rawip_mib, rawipOutErrors); 4699 icmp_ud_err(q, mp, ENOMEM); 4700 return; 4701 } 4702 mp1->b_cont = mp->b_cont; 4703 mp1->b_wptr = mp1->b_datap->db_lim; 4704 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4705 } 4706 mp1->b_rptr = (unsigned char *)ip6h; 4707 ip6i = (ip6i_t *)ip6h; 4708 4709 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4710 if (option_exists & IPPF_HAS_IP6I) { 4711 ip6h = (ip6_t *)&ip6i[1]; 4712 ip6i->ip6i_flags = 0; 4713 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4714 4715 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4716 if (option_exists & IPPF_SCOPE_ID) { 4717 ip6i->ip6i_flags |= IP6I_IFINDEX; 4718 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4719 } else if (option_exists & IPPF_IFINDEX) { 4720 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4721 ASSERT(tipp->ipp_ifindex != 0); 4722 ip6i->ip6i_flags |= IP6I_IFINDEX; 4723 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4724 } 4725 4726 if (option_exists & IPPF_RAW_CKSUM) { 4727 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4728 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4729 } 4730 4731 if (option_exists & IPPF_NO_CKSUM) { 4732 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4733 } 4734 4735 if (option_exists & IPPF_ADDR) { 4736 /* 4737 * Enable per-packet source address verification if 4738 * IPV6_PKTINFO specified the source address. 4739 * ip6_src is set in the transport's _wput function. 4740 */ 4741 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4742 } 4743 4744 if (option_exists & IPPF_DONTFRAG) { 4745 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4746 } 4747 4748 if (option_exists & IPPF_USE_MIN_MTU) { 4749 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4750 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4751 } 4752 4753 if (option_exists & IPPF_NEXTHOP) { 4754 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4755 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4756 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4757 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4758 } 4759 4760 /* 4761 * tell IP this is an ip6i_t private header 4762 */ 4763 ip6i->ip6i_nxt = IPPROTO_RAW; 4764 } 4765 4766 /* Initialize IPv6 header */ 4767 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4768 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4769 4770 /* Set the hoplimit of the outgoing packet. */ 4771 if (option_exists & IPPF_HOPLIMIT) { 4772 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4773 ip6h->ip6_hops = ipp->ipp_hoplimit; 4774 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4775 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4776 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4777 if (option_exists & IPPF_MULTICAST_HOPS) 4778 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4779 } else { 4780 ip6h->ip6_hops = icmp->icmp_ttl; 4781 if (option_exists & IPPF_UNICAST_HOPS) 4782 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4783 } 4784 4785 if (option_exists & IPPF_ADDR) { 4786 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4787 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4788 ip6h->ip6_src = tipp->ipp_addr; 4789 } else { 4790 /* 4791 * The source address was not set using IPV6_PKTINFO. 4792 * First look at the bound source. 4793 * If unspecified fallback to __sin6_src_id. 4794 */ 4795 ip6h->ip6_src = icmp->icmp_v6src; 4796 if (sin6->__sin6_src_id != 0 && 4797 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4798 ip_srcid_find_id(sin6->__sin6_src_id, 4799 &ip6h->ip6_src, icmp->icmp_zoneid); 4800 } 4801 } 4802 4803 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4804 cp = (uint8_t *)&ip6h[1]; 4805 4806 /* 4807 * Here's where we have to start stringing together 4808 * any extension headers in the right order: 4809 * Hop-by-hop, destination, routing, and final destination opts. 4810 */ 4811 if (option_exists & IPPF_HOPOPTS) { 4812 /* Hop-by-hop options */ 4813 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4814 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 4815 4816 *nxthdr_ptr = IPPROTO_HOPOPTS; 4817 nxthdr_ptr = &hbh->ip6h_nxt; 4818 4819 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 4820 cp += tipp->ipp_hopoptslen; 4821 } 4822 /* 4823 * En-route destination options 4824 * Only do them if there's a routing header as well 4825 */ 4826 if (option_exists & IPPF_RTDSTOPTS) { 4827 ip6_dest_t *dst = (ip6_dest_t *)cp; 4828 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 4829 4830 *nxthdr_ptr = IPPROTO_DSTOPTS; 4831 nxthdr_ptr = &dst->ip6d_nxt; 4832 4833 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 4834 cp += tipp->ipp_rtdstoptslen; 4835 } 4836 /* 4837 * Routing header next 4838 */ 4839 if (option_exists & IPPF_RTHDR) { 4840 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4841 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 4842 4843 *nxthdr_ptr = IPPROTO_ROUTING; 4844 nxthdr_ptr = &rt->ip6r_nxt; 4845 4846 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 4847 cp += tipp->ipp_rthdrlen; 4848 } 4849 /* 4850 * Do ultimate destination options 4851 */ 4852 if (option_exists & IPPF_DSTOPTS) { 4853 ip6_dest_t *dest = (ip6_dest_t *)cp; 4854 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 4855 4856 *nxthdr_ptr = IPPROTO_DSTOPTS; 4857 nxthdr_ptr = &dest->ip6d_nxt; 4858 4859 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 4860 cp += tipp->ipp_dstoptslen; 4861 } 4862 4863 /* 4864 * Now set the last header pointer to the proto passed in 4865 */ 4866 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 4867 *nxthdr_ptr = icmp->icmp_proto; 4868 4869 /* 4870 * Copy in the destination address 4871 */ 4872 ip6h->ip6_dst = ip6_dst; 4873 4874 ip6h->ip6_vcf = 4875 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4876 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4877 4878 if (option_exists & IPPF_TCLASS) { 4879 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 4880 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4881 tipp->ipp_tclass); 4882 } 4883 if (option_exists & IPPF_RTHDR) { 4884 ip6_rthdr_t *rth; 4885 4886 /* 4887 * Perform any processing needed for source routing. 4888 * We know that all extension headers will be in the same mblk 4889 * as the IPv6 header. 4890 */ 4891 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 4892 if (rth != NULL && rth->ip6r_segleft != 0) { 4893 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 4894 /* 4895 * Drop packet - only support Type 0 routing. 4896 * Notify the application as well. 4897 */ 4898 icmp_ud_err(q, mp, EPROTO); 4899 BUMP_MIB(&rawip_mib, rawipOutErrors); 4900 return; 4901 } 4902 /* 4903 * rth->ip6r_len is twice the number of 4904 * addresses in the header 4905 */ 4906 if (rth->ip6r_len & 0x1) { 4907 icmp_ud_err(q, mp, EPROTO); 4908 BUMP_MIB(&rawip_mib, rawipOutErrors); 4909 return; 4910 } 4911 /* 4912 * Shuffle the routing header and ip6_dst 4913 * addresses, and get the checksum difference 4914 * between the first hop (in ip6_dst) and 4915 * the destination (in the last routing hdr entry). 4916 */ 4917 csum = ip_massage_options_v6(ip6h, rth); 4918 /* 4919 * Verify that the first hop isn't a mapped address. 4920 * Routers along the path need to do this verification 4921 * for subsequent hops. 4922 */ 4923 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 4924 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4925 BUMP_MIB(&rawip_mib, rawipOutErrors); 4926 return; 4927 } 4928 } 4929 } 4930 4931 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 4932 if (mp1->b_cont != NULL) 4933 ip_len += msgdsize(mp1->b_cont); 4934 4935 /* 4936 * Set the length into the IP header. 4937 * If the length is greater than the maximum allowed by IP, 4938 * then free the message and return. Do not try and send it 4939 * as this can cause problems in layers below. 4940 */ 4941 if (ip_len > IP_MAXPACKET) { 4942 BUMP_MIB(&rawip_mib, rawipOutErrors); 4943 icmp_ud_err(q, mp, EMSGSIZE); 4944 return; 4945 } 4946 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 4947 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 4948 uint16_t *cksum_ptr; 4949 uint_t ext_hdrs_len; 4950 4951 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 4952 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 4953 icmp->icmp_checksum_off == 2); 4954 4955 /* 4956 * We make it easy for IP to include our pseudo header 4957 * by putting our length in uh_checksum, modified (if 4958 * we have a routing header) by the checksum difference 4959 * between the ultimate destination and first hop addresses. 4960 * Note: ICMPv6 must always checksum the packet. 4961 */ 4962 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 4963 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 4964 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 4965 BUMP_MIB(&rawip_mib, rawipOutErrors); 4966 freemsg(mp); 4967 return; 4968 } 4969 ip6i = (ip6i_t *)mp1->b_rptr; 4970 if (ip6i->ip6i_nxt == IPPROTO_RAW) 4971 ip6h = (ip6_t *)&ip6i[1]; 4972 else 4973 ip6h = (ip6_t *)ip6i; 4974 } 4975 /* Add payload length to checksum */ 4976 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 4977 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 4978 csum += htons(ip_len - ext_hdrs_len); 4979 4980 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 4981 csum = (csum & 0xFFFF) + (csum >> 16); 4982 *cksum_ptr = (uint16_t)csum; 4983 } 4984 4985 #ifdef _LITTLE_ENDIAN 4986 ip_len = htons(ip_len); 4987 #endif 4988 ip6h->ip6_plen = (uint16_t)ip_len; 4989 4990 freeb(mp); 4991 4992 /* We're done. Pass the packet to IP */ 4993 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4994 mblk_setcred(mp1, icmp->icmp_credp); 4995 putnext(q, mp1); 4996 } 4997 4998 static void 4999 icmp_wput_other(queue_t *q, mblk_t *mp) 5000 { 5001 uchar_t *rptr = mp->b_rptr; 5002 struct iocblk *iocp; 5003 #define tudr ((struct T_unitdata_req *)rptr) 5004 icmp_t *icmp; 5005 cred_t *cr; 5006 5007 icmp = (icmp_t *)q->q_ptr; 5008 5009 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5010 5011 switch (mp->b_datap->db_type) { 5012 case M_PROTO: 5013 case M_PCPROTO: 5014 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5015 /* 5016 * If the message does not contain a PRIM_type, 5017 * throw it away. 5018 */ 5019 freemsg(mp); 5020 return; 5021 } 5022 switch (((union T_primitives *)rptr)->type) { 5023 case T_ADDR_REQ: 5024 icmp_addr_req(q, mp); 5025 return; 5026 case O_T_BIND_REQ: 5027 case T_BIND_REQ: 5028 qwriter(q, mp, icmp_bind, PERIM_OUTER); 5029 return; 5030 case T_CONN_REQ: 5031 icmp_connect(q, mp); 5032 return; 5033 case T_CAPABILITY_REQ: 5034 icmp_capability_req(q, mp); 5035 return; 5036 case T_INFO_REQ: 5037 icmp_info_req(q, mp); 5038 return; 5039 case T_UNITDATA_REQ: 5040 /* 5041 * If a T_UNITDATA_REQ gets here, the address must 5042 * be bad. Valid T_UNITDATA_REQs are found above 5043 * and break to below this switch. 5044 */ 5045 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5046 return; 5047 case T_UNBIND_REQ: 5048 icmp_unbind(q, mp); 5049 return; 5050 5051 case T_SVR4_OPTMGMT_REQ: 5052 if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, 5053 cr)) 5054 /* Only IP can return anything meaningful */ 5055 (void) svr4_optcom_req(q, mp, cr, 5056 &icmp_opt_obj); 5057 return; 5058 5059 case T_OPTMGMT_REQ: 5060 /* Only IP can return anything meaningful */ 5061 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5062 return; 5063 5064 case T_DISCON_REQ: 5065 icmp_disconnect(q, mp); 5066 return; 5067 5068 /* The following TPI message is not supported by icmp. */ 5069 case O_T_CONN_RES: 5070 case T_CONN_RES: 5071 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5072 return; 5073 5074 /* The following 3 TPI requests are illegal for icmp. */ 5075 case T_DATA_REQ: 5076 case T_EXDATA_REQ: 5077 case T_ORDREL_REQ: 5078 freemsg(mp); 5079 (void) putctl1(RD(q), M_ERROR, EPROTO); 5080 return; 5081 default: 5082 break; 5083 } 5084 break; 5085 case M_IOCTL: 5086 iocp = (struct iocblk *)mp->b_rptr; 5087 switch (iocp->ioc_cmd) { 5088 case TI_GETPEERNAME: 5089 if (icmp->icmp_state != TS_DATA_XFER) { 5090 /* 5091 * If a default destination address has not 5092 * been associated with the stream, then we 5093 * don't know the peer's name. 5094 */ 5095 iocp->ioc_error = ENOTCONN; 5096 err_ret:; 5097 iocp->ioc_count = 0; 5098 mp->b_datap->db_type = M_IOCACK; 5099 qreply(q, mp); 5100 return; 5101 } 5102 /* FALLTHRU */ 5103 case TI_GETMYNAME: 5104 /* 5105 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5106 * need to copyin the user's strbuf structure. 5107 * Processing will continue in the M_IOCDATA case 5108 * below. 5109 */ 5110 mi_copyin(q, mp, NULL, 5111 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5112 return; 5113 case ND_SET: 5114 /* nd_getset performs the necessary error checking */ 5115 case ND_GET: 5116 if (nd_getset(q, icmp_g_nd, mp)) { 5117 qreply(q, mp); 5118 return; 5119 } 5120 break; 5121 default: 5122 break; 5123 } 5124 break; 5125 case M_IOCDATA: 5126 icmp_wput_iocdata(q, mp); 5127 return; 5128 default: 5129 break; 5130 } 5131 putnext(q, mp); 5132 } 5133 5134 /* 5135 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5136 * messages. 5137 */ 5138 static void 5139 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5140 { 5141 mblk_t *mp1; 5142 STRUCT_HANDLE(strbuf, sb); 5143 icmp_t *icmp; 5144 in6_addr_t v6addr; 5145 ipaddr_t v4addr; 5146 uint32_t flowinfo = 0; 5147 int addrlen; 5148 5149 /* Make sure it is one of ours. */ 5150 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5151 case TI_GETMYNAME: 5152 case TI_GETPEERNAME: 5153 break; 5154 default: 5155 putnext(q, mp); 5156 return; 5157 } 5158 switch (mi_copy_state(q, mp, &mp1)) { 5159 case -1: 5160 return; 5161 case MI_COPY_CASE(MI_COPY_IN, 1): 5162 break; 5163 case MI_COPY_CASE(MI_COPY_OUT, 1): 5164 /* 5165 * The address has been copied out, so now 5166 * copyout the strbuf. 5167 */ 5168 mi_copyout(q, mp); 5169 return; 5170 case MI_COPY_CASE(MI_COPY_OUT, 2): 5171 /* 5172 * The address and strbuf have been copied out. 5173 * We're done, so just acknowledge the original 5174 * M_IOCTL. 5175 */ 5176 mi_copy_done(q, mp, 0); 5177 return; 5178 default: 5179 /* 5180 * Something strange has happened, so acknowledge 5181 * the original M_IOCTL with an EPROTO error. 5182 */ 5183 mi_copy_done(q, mp, EPROTO); 5184 return; 5185 } 5186 /* 5187 * Now we have the strbuf structure for TI_GETMYNAME 5188 * and TI_GETPEERNAME. Next we copyout the requested 5189 * address and then we'll copyout the strbuf. 5190 */ 5191 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5192 (void *)mp1->b_rptr); 5193 icmp = (icmp_t *)q->q_ptr; 5194 if (icmp->icmp_family == AF_INET) 5195 addrlen = sizeof (sin_t); 5196 else 5197 addrlen = sizeof (sin6_t); 5198 5199 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5200 mi_copy_done(q, mp, EINVAL); 5201 return; 5202 } 5203 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5204 case TI_GETMYNAME: 5205 if (icmp->icmp_family == AF_INET) { 5206 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5207 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5208 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5209 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5210 } else { 5211 /* 5212 * INADDR_ANY 5213 * icmp_v6src is not set, we might be bound to 5214 * broadcast/multicast. Use icmp_bound_v6src as 5215 * local address instead (that could 5216 * also still be INADDR_ANY) 5217 */ 5218 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5219 } 5220 } else { 5221 /* icmp->icmp_family == AF_INET6 */ 5222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5223 v6addr = icmp->icmp_v6src; 5224 } else { 5225 /* 5226 * UNSPECIFIED 5227 * icmp_v6src is not set, we might be bound to 5228 * broadcast/multicast. Use icmp_bound_v6src as 5229 * local address instead (that could 5230 * also still be UNSPECIFIED) 5231 */ 5232 v6addr = icmp->icmp_bound_v6src; 5233 } 5234 } 5235 break; 5236 case TI_GETPEERNAME: 5237 if (icmp->icmp_family == AF_INET) { 5238 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5239 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5240 } else { 5241 /* icmp->icmp_family == AF_INET6) */ 5242 v6addr = icmp->icmp_v6dst; 5243 flowinfo = icmp->icmp_flowinfo; 5244 } 5245 break; 5246 default: 5247 mi_copy_done(q, mp, EPROTO); 5248 return; 5249 } 5250 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5251 if (!mp1) 5252 return; 5253 5254 if (icmp->icmp_family == AF_INET) { 5255 sin_t *sin; 5256 5257 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5258 sin = (sin_t *)mp1->b_rptr; 5259 mp1->b_wptr = (uchar_t *)&sin[1]; 5260 *sin = sin_null; 5261 sin->sin_family = AF_INET; 5262 sin->sin_addr.s_addr = v4addr; 5263 } else { 5264 /* icmp->icmp_family == AF_INET6 */ 5265 sin6_t *sin6; 5266 5267 ASSERT(icmp->icmp_family == AF_INET6); 5268 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5269 sin6 = (sin6_t *)mp1->b_rptr; 5270 mp1->b_wptr = (uchar_t *)&sin6[1]; 5271 *sin6 = sin6_null; 5272 sin6->sin6_family = AF_INET6; 5273 sin6->sin6_flowinfo = flowinfo; 5274 sin6->sin6_addr = v6addr; 5275 } 5276 /* Copy out the address */ 5277 mi_copyout(q, mp); 5278 } 5279 5280 /* 5281 * Only allow MIB requests and M_FLUSHes to pass. 5282 * All other messages are nacked or dropped. 5283 */ 5284 static void 5285 icmp_wput_restricted(queue_t *q, mblk_t *mp) 5286 { 5287 cred_t *cr; 5288 icmp_t *icmp; 5289 5290 switch (DB_TYPE(mp)) { 5291 case M_PROTO: 5292 case M_PCPROTO: 5293 if (MBLKL(mp) < sizeof (t_scalar_t)) { 5294 freemsg(mp); 5295 return; 5296 } 5297 icmp = (icmp_t *)q->q_ptr; 5298 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5299 5300 switch (((union T_primitives *)mp->b_rptr)->type) { 5301 case T_SVR4_OPTMGMT_REQ: 5302 if (!snmpcom_req(q, mp, 5303 icmp_snmp_set, icmp_snmp_get, cr)) 5304 (void) svr4_optcom_req(q, mp, cr, 5305 &icmp_opt_obj); 5306 return; 5307 case T_OPTMGMT_REQ: 5308 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5309 return; 5310 default: 5311 icmp_err_ack(q, mp, TSYSERR, ENOTSUP); 5312 return; 5313 } 5314 /* NOTREACHED */ 5315 case M_IOCTL: 5316 miocnak(q, mp, 0, ENOTSUP); 5317 break; 5318 case M_FLUSH: 5319 putnext(q, mp); 5320 break; 5321 default: 5322 freemsg(mp); 5323 break; 5324 } 5325 } 5326 5327 static int 5328 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5329 void *thisdg_attrs) 5330 { 5331 icmp_t *icmp; 5332 struct T_unitdata_req *udreqp; 5333 int is_absreq_failure; 5334 cred_t *cr; 5335 5336 icmp = (icmp_t *)q->q_ptr; 5337 5338 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5339 *errorp = 0; 5340 5341 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5342 5343 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5344 udreqp->OPT_offset, cr, &icmp_opt_obj, 5345 thisdg_attrs, &is_absreq_failure); 5346 5347 if (*errorp != 0) { 5348 /* 5349 * Note: No special action needed in this 5350 * module for "is_absreq_failure" 5351 */ 5352 return (-1); /* failure */ 5353 } 5354 ASSERT(is_absreq_failure == 0); 5355 return (0); /* success */ 5356 } 5357 5358 void 5359 icmp_ddi_init(void) 5360 { 5361 ICMP6_MAJ = ddi_name_to_major(ICMP6); 5362 icmp_max_optsize = 5363 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5364 icmp_opt_obj.odb_opt_arr_cnt); 5365 5366 (void) icmp_param_register(icmp_param_arr, A_CNT(icmp_param_arr)); 5367 5368 rawip_kstat_init(); 5369 } 5370 5371 void 5372 icmp_ddi_destroy(void) 5373 { 5374 nd_free(&icmp_g_nd); 5375 5376 rawip_kstat_fini(); 5377 } 5378 5379 static void 5380 rawip_kstat_init(void) { 5381 5382 rawip_named_kstat_t template = { 5383 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5384 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5385 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5386 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5387 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5388 }; 5389 5390 rawip_mibkp = kstat_create("icmp", 0, "rawip", "mib2", 5391 KSTAT_TYPE_NAMED, 5392 NUM_OF_FIELDS(rawip_named_kstat_t), 5393 0); 5394 if (rawip_mibkp == NULL) 5395 return; 5396 5397 bcopy(&template, rawip_mibkp->ks_data, sizeof (template)); 5398 5399 rawip_mibkp->ks_update = rawip_kstat_update; 5400 5401 kstat_install(rawip_mibkp); 5402 } 5403 5404 static void 5405 rawip_kstat_fini(void) { 5406 if (rawip_mibkp) { 5407 kstat_delete(rawip_mibkp); 5408 rawip_mibkp = NULL; 5409 } 5410 } 5411 5412 static int 5413 rawip_kstat_update(kstat_t *kp, int rw) { 5414 rawip_named_kstat_t *rawipkp; 5415 5416 if ((kp == NULL) || (kp->ks_data == NULL)) 5417 return (EIO); 5418 5419 if (rw == KSTAT_WRITE) 5420 return (EACCES); 5421 5422 rawipkp = (rawip_named_kstat_t *)kp->ks_data; 5423 5424 rawipkp->inDatagrams.value.ui32 = rawip_mib.rawipInDatagrams; 5425 rawipkp->inCksumErrs.value.ui32 = rawip_mib.rawipInCksumErrs; 5426 rawipkp->inErrors.value.ui32 = rawip_mib.rawipInErrors; 5427 rawipkp->outDatagrams.value.ui32 = rawip_mib.rawipOutDatagrams; 5428 rawipkp->outErrors.value.ui32 = rawip_mib.rawipOutErrors; 5429 5430 return (0); 5431 } 5432