1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 84 #define ICMP6 "icmp6" 85 major_t ICMP6_MAJ; 86 87 /* 88 * Object to represent database of options to search passed to 89 * {sock,tpi}optcom_req() interface routine to take care of option 90 * management and associated methods. 91 * XXX These and other extern's should really move to a icmp header. 92 */ 93 extern optdb_obj_t icmp_opt_obj; 94 extern uint_t icmp_max_optsize; 95 96 /* 97 * Synchronization notes: 98 * 99 * At all points in this code where exclusive access is required, we 100 * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) 101 * which will arrange to call the routine only after all threads have 102 * exited the shared resource. 103 */ 104 105 /* Named Dispatch Parameter Management Structure */ 106 typedef struct icmpparam_s { 107 uint_t icmp_param_min; 108 uint_t icmp_param_max; 109 uint_t icmp_param_value; 110 char *icmp_param_name; 111 } icmpparam_t; 112 113 static void icmp_addr_req(queue_t *q, mblk_t *mp); 114 static void icmp_bind(queue_t *q, mblk_t *mp); 115 static void icmp_bind_proto(queue_t *q); 116 static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); 117 static void icmp_capability_req(queue_t *q, mblk_t *mp); 118 static int icmp_close(queue_t *q); 119 static void icmp_connect(queue_t *q, mblk_t *mp); 120 static void icmp_disconnect(queue_t *q, mblk_t *mp); 121 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 122 int sys_error); 123 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 124 t_scalar_t t_error, int sys_error); 125 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 126 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 127 static void icmp_info_req(queue_t *q, mblk_t *mp); 128 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 129 t_scalar_t addr_length, in_port_t); 130 static int icmp_open(queue_t *q, dev_t *devp, int flag, 131 int sflag, cred_t *credp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static void icmp_rput(queue_t *q, mblk_t *mp); 146 static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); 147 static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); 148 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 149 uchar_t *ptr, int len); 150 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 151 cred_t *cr); 152 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 153 static void icmp_unbind(queue_t *q, mblk_t *mp); 154 static void icmp_wput(queue_t *q, mblk_t *mp); 155 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 156 t_scalar_t tudr_optlen); 157 static void icmp_wput_other(queue_t *q, mblk_t *mp); 158 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 159 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 160 161 static void rawip_kstat_init(void); 162 static void rawip_kstat_fini(void); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 static struct qinit rinit = { 171 (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info 172 }; 173 174 static struct qinit winit = { 175 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info 176 }; 177 178 struct streamtab icmpinfo = { 179 &rinit, &winit 180 }; 181 182 static sin_t sin_null; /* Zero address for quick clears */ 183 static sin6_t sin6_null; /* Zero address for quick clears */ 184 static void *icmp_g_head; /* Head for list of open icmp streams. */ 185 static IDP icmp_g_nd; /* Points to table of ICMP ND variables. */ 186 187 /* MIB-2 stuff for SNMP */ 188 static mib2_rawip_t rawip_mib; /* SNMP fixed size info */ 189 static kstat_t *rawip_mibkp; /* kstat exporting rawip_mib data */ 190 191 /* Default structure copied into T_INFO_ACK messages */ 192 static struct T_info_ack icmp_g_t_info_ack = { 193 T_INFO_ACK, 194 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 195 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 196 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 197 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 198 0, /* ADDR_size - filled in later. */ 199 0, /* OPT_size - not initialized here */ 200 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 201 T_CLTS, /* SERV_type. icmp supports connection-less. */ 202 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 203 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 204 }; 205 206 /* 207 * Table of ND variables supported by icmp. These are loaded into icmp_g_nd 208 * in icmp_open. 209 * All of these are alterable, within the min/max values given, at run time. 210 */ 211 static icmpparam_t icmp_param_arr[] = { 212 /* min max value name */ 213 { 0, 128, 32, "icmp_wroff_extra" }, 214 { 1, 255, 255, "icmp_ipv4_ttl" }, 215 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 216 { 0, 1, 1, "icmp_bsd_compat" }, 217 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 218 { 0, 65536, 1024, "icmp_xmit_lowat"}, 219 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 220 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 221 }; 222 #define icmp_wroff_extra icmp_param_arr[0].icmp_param_value 223 #define icmp_ipv4_ttl icmp_param_arr[1].icmp_param_value 224 #define icmp_ipv6_hoplimit icmp_param_arr[2].icmp_param_value 225 #define icmp_bsd_compat icmp_param_arr[3].icmp_param_value 226 #define icmp_xmit_hiwat icmp_param_arr[4].icmp_param_value 227 #define icmp_xmit_lowat icmp_param_arr[5].icmp_param_value 228 #define icmp_recv_hiwat icmp_param_arr[6].icmp_param_value 229 #define icmp_max_buf icmp_param_arr[7].icmp_param_value 230 231 /* 232 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 233 * passed to icmp_wput. 234 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 235 * protocol type placed in the message following the address. A T_BIND_ACK 236 * message is passed upstream when ip acknowledges the request. 237 * (Called as writer.) 238 */ 239 static void 240 icmp_bind(queue_t *q, mblk_t *mp) 241 { 242 sin_t *sin; 243 sin6_t *sin6; 244 mblk_t *mp1; 245 struct T_bind_req *tbr; 246 icmp_t *icmp; 247 248 icmp = (icmp_t *)q->q_ptr; 249 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 250 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 251 "icmp_bind: bad req, len %u", 252 (uint_t)(mp->b_wptr - mp->b_rptr)); 253 icmp_err_ack(q, mp, TPROTO, 0); 254 return; 255 } 256 if (icmp->icmp_state != TS_UNBND) { 257 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 258 "icmp_bind: bad state, %d", icmp->icmp_state); 259 icmp_err_ack(q, mp, TOUTSTATE, 0); 260 return; 261 } 262 /* 263 * Reallocate the message to make sure we have enough room for an 264 * address and the protocol type. 265 */ 266 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 267 if (!mp1) { 268 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 269 return; 270 } 271 mp = mp1; 272 tbr = (struct T_bind_req *)mp->b_rptr; 273 switch (tbr->ADDR_length) { 274 case 0: /* Generic request */ 275 tbr->ADDR_offset = sizeof (struct T_bind_req); 276 if (icmp->icmp_family == AF_INET) { 277 tbr->ADDR_length = sizeof (sin_t); 278 sin = (sin_t *)&tbr[1]; 279 *sin = sin_null; 280 sin->sin_family = AF_INET; 281 mp->b_wptr = (uchar_t *)&sin[1]; 282 } else { 283 ASSERT(icmp->icmp_family == AF_INET6); 284 tbr->ADDR_length = sizeof (sin6_t); 285 sin6 = (sin6_t *)&tbr[1]; 286 *sin6 = sin6_null; 287 sin6->sin6_family = AF_INET6; 288 mp->b_wptr = (uchar_t *)&sin6[1]; 289 } 290 break; 291 case sizeof (sin_t): /* Complete IP address */ 292 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 293 sizeof (sin_t)); 294 if (sin == NULL || !OK_32PTR((char *)sin)) { 295 icmp_err_ack(q, mp, TSYSERR, EINVAL); 296 return; 297 } 298 if (icmp->icmp_family != AF_INET || 299 sin->sin_family != AF_INET) { 300 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 301 return; 302 } 303 break; 304 case sizeof (sin6_t): /* Complete IP address */ 305 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 306 sizeof (sin6_t)); 307 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 308 icmp_err_ack(q, mp, TSYSERR, EINVAL); 309 return; 310 } 311 if (icmp->icmp_family != AF_INET6 || 312 sin6->sin6_family != AF_INET6) { 313 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 314 return; 315 } 316 /* No support for mapped addresses on raw sockets */ 317 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 318 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 319 return; 320 } 321 break; 322 default: 323 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 324 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 325 icmp_err_ack(q, mp, TBADADDR, 0); 326 return; 327 } 328 /* 329 * Copy the source address into our icmp structure. This address 330 * may still be zero; if so, ip will fill in the correct address 331 * each time an outbound packet is passed to it. 332 * If we are binding to a broadcast or multicast address icmp_rput 333 * will clear the source address when it receives the T_BIND_ACK. 334 */ 335 icmp->icmp_state = TS_IDLE; 336 337 if (icmp->icmp_family == AF_INET) { 338 ASSERT(sin != NULL); 339 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 340 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 341 &icmp->icmp_v6src); 342 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 343 icmp->icmp_ip_snd_options_len; 344 icmp->icmp_bound_v6src = icmp->icmp_v6src; 345 } else { 346 int error; 347 348 ASSERT(sin6 != NULL); 349 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 350 icmp->icmp_v6src = sin6->sin6_addr; 351 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 352 icmp->icmp_bound_v6src = icmp->icmp_v6src; 353 354 /* Rebuild the header template */ 355 error = icmp_build_hdrs(q, icmp); 356 if (error != 0) { 357 icmp_err_ack(q, mp, TSYSERR, error); 358 return; 359 } 360 } 361 /* 362 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 363 * the address. 364 */ 365 *mp->b_wptr++ = icmp->icmp_proto; 366 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 367 /* 368 * Append a request for an IRE if src not 0 (INADDR_ANY) 369 */ 370 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 371 if (!mp->b_cont) { 372 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 373 return; 374 } 375 mp->b_cont->b_wptr += sizeof (ire_t); 376 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 377 } 378 379 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 380 putnext(q, mp); 381 } 382 383 /* 384 * Send message to IP to just bind to the protocol. 385 */ 386 static void 387 icmp_bind_proto(queue_t *q) 388 { 389 mblk_t *mp; 390 struct T_bind_req *tbr; 391 icmp_t *icmp; 392 393 icmp = (icmp_t *)q->q_ptr; 394 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 395 BPRI_MED); 396 if (!mp) { 397 return; 398 } 399 mp->b_datap->db_type = M_PROTO; 400 tbr = (struct T_bind_req *)mp->b_rptr; 401 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 402 tbr->ADDR_offset = sizeof (struct T_bind_req); 403 if (icmp->icmp_ipversion == IPV4_VERSION) { 404 sin_t *sin; 405 406 tbr->ADDR_length = sizeof (sin_t); 407 sin = (sin_t *)&tbr[1]; 408 *sin = sin_null; 409 sin->sin_family = AF_INET; 410 mp->b_wptr = (uchar_t *)&sin[1]; 411 } else { 412 sin6_t *sin6; 413 414 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 415 tbr->ADDR_length = sizeof (sin6_t); 416 sin6 = (sin6_t *)&tbr[1]; 417 *sin6 = sin6_null; 418 sin6->sin6_family = AF_INET6; 419 mp->b_wptr = (uchar_t *)&sin6[1]; 420 } 421 422 /* Place protocol type in the O_T_BIND_REQ following the address. */ 423 *mp->b_wptr++ = icmp->icmp_proto; 424 425 /* Pass the O_T_BIND_REQ to ip. */ 426 putnext(q, mp); 427 } 428 429 /* 430 * This routine handles each T_CONN_REQ message passed to icmp. It 431 * associates a default destination address with the stream. 432 * 433 * This routine sends down a T_BIND_REQ to IP with the following mblks: 434 * T_BIND_REQ - specifying local and remote address. 435 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 436 * T_OK_ACK - for the T_CONN_REQ 437 * T_CONN_CON - to keep the TPI user happy 438 * 439 * The connect completes in icmp_rput. 440 * When a T_BIND_ACK is received information is extracted from the IRE 441 * and the two appended messages are sent to the TPI user. 442 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 443 * it to an error ack for the appropriate primitive. 444 */ 445 static void 446 icmp_connect(queue_t *q, mblk_t *mp) 447 { 448 sin_t *sin; 449 sin6_t *sin6; 450 mblk_t *mp1, *mp2; 451 struct T_conn_req *tcr; 452 icmp_t *icmp; 453 ipaddr_t v4dst; 454 in6_addr_t v6dst; 455 uint32_t flowinfo; 456 457 icmp = (icmp_t *)q->q_ptr; 458 tcr = (struct T_conn_req *)mp->b_rptr; 459 /* Sanity checks */ 460 if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { 461 icmp_err_ack(q, mp, TPROTO, 0); 462 return; 463 } 464 465 if (icmp->icmp_state == TS_DATA_XFER) { 466 /* Already connected - clear out state */ 467 icmp->icmp_v6src = icmp->icmp_bound_v6src; 468 icmp->icmp_state = TS_IDLE; 469 } 470 471 472 if (tcr->OPT_length != 0) { 473 icmp_err_ack(q, mp, TBADOPT, 0); 474 return; 475 } 476 switch (tcr->DEST_length) { 477 default: 478 icmp_err_ack(q, mp, TBADADDR, 0); 479 return; 480 481 case sizeof (sin_t): 482 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 483 sizeof (sin_t)); 484 if (sin == NULL || !OK_32PTR((char *)sin)) { 485 icmp_err_ack(q, mp, TSYSERR, EINVAL); 486 return; 487 } 488 if (icmp->icmp_family != AF_INET || 489 sin->sin_family != AF_INET) { 490 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 491 return; 492 } 493 v4dst = sin->sin_addr.s_addr; 494 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 495 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 496 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 497 icmp->icmp_ip_snd_options_len; 498 break; 499 500 case sizeof (sin6_t): 501 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 502 sizeof (sin6_t)); 503 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 504 icmp_err_ack(q, mp, TSYSERR, EINVAL); 505 return; 506 } 507 if (icmp->icmp_family != AF_INET6 || 508 sin6->sin6_family != AF_INET6) { 509 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 510 return; 511 } 512 /* No support for mapped addresses on raw sockets */ 513 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 514 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 515 return; 516 } 517 v6dst = sin6->sin6_addr; 518 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 519 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 520 flowinfo = sin6->sin6_flowinfo; 521 break; 522 } 523 if (icmp->icmp_ipversion == IPV4_VERSION) { 524 /* 525 * Interpret a zero destination to mean loopback. 526 * Update the T_CONN_REQ (sin/sin6) since it is used to 527 * generate the T_CONN_CON. 528 */ 529 if (v4dst == INADDR_ANY) { 530 v4dst = htonl(INADDR_LOOPBACK); 531 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 532 if (icmp->icmp_family == AF_INET) { 533 sin->sin_addr.s_addr = v4dst; 534 } else { 535 sin6->sin6_addr = v6dst; 536 } 537 } 538 icmp->icmp_v6dst = v6dst; 539 icmp->icmp_flowinfo = 0; 540 541 /* 542 * If the destination address is multicast and 543 * an outgoing multicast interface has been set, 544 * use the address of that interface as our 545 * source address if no source address has been set. 546 */ 547 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 548 CLASSD(v4dst) && 549 icmp->icmp_multicast_if_addr != INADDR_ANY) { 550 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 551 &icmp->icmp_v6src); 552 } 553 } else { 554 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 555 /* 556 * Interpret a zero destination to mean loopback. 557 * Update the T_CONN_REQ (sin/sin6) since it is used to 558 * generate the T_CONN_CON. 559 */ 560 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 561 v6dst = ipv6_loopback; 562 sin6->sin6_addr = v6dst; 563 } 564 icmp->icmp_v6dst = v6dst; 565 icmp->icmp_flowinfo = flowinfo; 566 /* 567 * If the destination address is multicast and 568 * an outgoing multicast interface has been set, 569 * then the ip bind logic will pick the correct source 570 * address (i.e. matching the outgoing multicast interface). 571 */ 572 } 573 574 /* 575 * Send down bind to IP to verify that there is a route 576 * and to determine the source address. 577 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 578 */ 579 if (icmp->icmp_family == AF_INET) { 580 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 581 sin->sin_port); 582 } else { 583 ASSERT(icmp->icmp_family == AF_INET6); 584 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 585 sin6->sin6_port); 586 } 587 if (mp1 == NULL) { 588 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 589 return; 590 } 591 592 /* 593 * We also have to send a connection confirmation to 594 * keep TLI happy. Prepare it for icmp_rput. 595 */ 596 if (icmp->icmp_family == AF_INET) { 597 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 598 0); 599 } else { 600 ASSERT(icmp->icmp_family == AF_INET6); 601 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 602 0); 603 } 604 if (mp2 == NULL) { 605 freemsg(mp1); 606 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 607 return; 608 } 609 610 mp = mi_tpi_ok_ack_alloc(mp); 611 if (mp == NULL) { 612 /* Unable to reuse the T_CONN_REQ for the ack. */ 613 freemsg(mp2); 614 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 615 return; 616 } 617 618 icmp->icmp_state = TS_DATA_XFER; 619 620 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 621 linkb(mp1, mp); 622 linkb(mp1, mp2); 623 624 mblk_setcred(mp1, icmp->icmp_credp); 625 putnext(q, mp1); 626 } 627 628 static int 629 icmp_close(queue_t *q) 630 { 631 icmp_t *icmp = (icmp_t *)q->q_ptr; 632 int i1; 633 634 /* tell IP that if we're not here, he can't trust labels */ 635 if (is_system_labeled()) 636 putnext(WR(q), icmp->icmp_delabel); 637 638 qprocsoff(q); 639 640 /* If there are any options associated with the stream, free them. */ 641 if (icmp->icmp_ip_snd_options) 642 mi_free((char *)icmp->icmp_ip_snd_options); 643 644 if (icmp->icmp_filter != NULL) 645 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 646 647 /* Free memory associated with sticky options */ 648 if (icmp->icmp_sticky_hdrs_len != 0) { 649 kmem_free(icmp->icmp_sticky_hdrs, 650 icmp->icmp_sticky_hdrs_len); 651 icmp->icmp_sticky_hdrs = NULL; 652 icmp->icmp_sticky_hdrs_len = 0; 653 } 654 655 ip6_pkt_free(&icmp->icmp_sticky_ipp); 656 657 crfree(icmp->icmp_credp); 658 659 /* Free the icmp structure and release the minor device number. */ 660 i1 = mi_close_comm(&icmp_g_head, q); 661 662 return (i1); 663 } 664 665 /* 666 * This routine handles each T_DISCON_REQ message passed to icmp 667 * as an indicating that ICMP is no longer connected. This results 668 * in sending a T_BIND_REQ to IP to restore the binding to just 669 * the local address. 670 * 671 * This routine sends down a T_BIND_REQ to IP with the following mblks: 672 * T_BIND_REQ - specifying just the local address. 673 * T_OK_ACK - for the T_DISCON_REQ 674 * 675 * The disconnect completes in icmp_rput. 676 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 677 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 678 * it to an error ack for the appropriate primitive. 679 */ 680 static void 681 icmp_disconnect(queue_t *q, mblk_t *mp) 682 { 683 icmp_t *icmp; 684 mblk_t *mp1; 685 686 icmp = (icmp_t *)q->q_ptr; 687 688 if (icmp->icmp_state != TS_DATA_XFER) { 689 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 690 "icmp_disconnect: bad state, %d", icmp->icmp_state); 691 icmp_err_ack(q, mp, TOUTSTATE, 0); 692 return; 693 } 694 icmp->icmp_v6src = icmp->icmp_bound_v6src; 695 icmp->icmp_state = TS_IDLE; 696 697 /* 698 * Send down bind to IP to remove the full binding and revert 699 * to the local address binding. 700 */ 701 if (icmp->icmp_family == AF_INET) { 702 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 703 } else { 704 ASSERT(icmp->icmp_family == AF_INET6); 705 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 706 } 707 if (mp1 == NULL) { 708 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 709 return; 710 } 711 mp = mi_tpi_ok_ack_alloc(mp); 712 if (mp == NULL) { 713 /* Unable to reuse the T_DISCON_REQ for the ack. */ 714 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 715 return; 716 } 717 718 if (icmp->icmp_family == AF_INET6) { 719 int error; 720 721 /* Rebuild the header template */ 722 error = icmp_build_hdrs(q, icmp); 723 if (error != 0) { 724 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 725 freemsg(mp1); 726 return; 727 } 728 } 729 icmp->icmp_discon_pending = 1; 730 731 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ 732 linkb(mp1, mp); 733 putnext(q, mp1); 734 } 735 736 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 737 static void 738 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 739 { 740 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 741 qreply(q, mp); 742 } 743 744 /* Shorthand to generate and send TPI error acks to our client */ 745 static void 746 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 747 t_scalar_t t_error, int sys_error) 748 { 749 struct T_error_ack *teackp; 750 751 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 752 M_PCPROTO, T_ERROR_ACK)) != NULL) { 753 teackp = (struct T_error_ack *)mp->b_rptr; 754 teackp->ERROR_prim = primitive; 755 teackp->TLI_error = t_error; 756 teackp->UNIX_error = sys_error; 757 qreply(q, mp); 758 } 759 } 760 761 /* 762 * icmp_icmp_error is called by icmp_rput to process ICMP 763 * messages passed up by IP. 764 * Generates the appropriate T_UDERROR_IND for permanent 765 * (non-transient) errors. 766 * Assumes that IP has pulled up everything up to and including 767 * the ICMP header. 768 */ 769 static void 770 icmp_icmp_error(queue_t *q, mblk_t *mp) 771 { 772 icmph_t *icmph; 773 ipha_t *ipha; 774 int iph_hdr_length; 775 sin_t sin; 776 sin6_t sin6; 777 mblk_t *mp1; 778 int error = 0; 779 icmp_t *icmp = (icmp_t *)q->q_ptr; 780 781 /* 782 * Deliver T_UDERROR_IND when the application has asked for it. 783 * The socket layer enables this automatically when connected. 784 */ 785 if (!icmp->icmp_dgram_errind) { 786 freemsg(mp); 787 return; 788 } 789 790 ipha = (ipha_t *)mp->b_rptr; 791 792 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 793 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 794 icmp_icmp_error_ipv6(q, mp); 795 return; 796 } 797 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 798 799 iph_hdr_length = IPH_HDR_LENGTH(ipha); 800 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 801 ipha = (ipha_t *)&icmph[1]; 802 iph_hdr_length = IPH_HDR_LENGTH(ipha); 803 804 switch (icmph->icmph_type) { 805 case ICMP_DEST_UNREACHABLE: 806 switch (icmph->icmph_code) { 807 case ICMP_FRAGMENTATION_NEEDED: 808 /* 809 * IP has already adjusted the path MTU. 810 * XXX Somehow pass MTU indication to application? 811 */ 812 break; 813 case ICMP_PORT_UNREACHABLE: 814 case ICMP_PROTOCOL_UNREACHABLE: 815 error = ECONNREFUSED; 816 break; 817 default: 818 /* Transient errors */ 819 break; 820 } 821 break; 822 default: 823 /* Transient errors */ 824 break; 825 } 826 if (error == 0) { 827 freemsg(mp); 828 return; 829 } 830 831 switch (icmp->icmp_family) { 832 case AF_INET: 833 sin = sin_null; 834 sin.sin_family = AF_INET; 835 sin.sin_addr.s_addr = ipha->ipha_dst; 836 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 837 error); 838 break; 839 case AF_INET6: 840 sin6 = sin6_null; 841 sin6.sin6_family = AF_INET6; 842 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 843 844 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 845 NULL, 0, error); 846 break; 847 } 848 if (mp1) 849 putnext(q, mp1); 850 freemsg(mp); 851 } 852 853 /* 854 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 855 * for IPv6 packets. 856 * Send permanent (non-transient) errors upstream. 857 * Assumes that IP has pulled up all the extension headers as well 858 * as the ICMPv6 header. 859 */ 860 static void 861 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 862 { 863 icmp6_t *icmp6; 864 ip6_t *ip6h, *outer_ip6h; 865 uint16_t iph_hdr_length; 866 uint8_t *nexthdrp; 867 sin6_t sin6; 868 mblk_t *mp1; 869 int error = 0; 870 icmp_t *icmp = (icmp_t *)q->q_ptr; 871 872 outer_ip6h = (ip6_t *)mp->b_rptr; 873 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 874 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 875 else 876 iph_hdr_length = IPV6_HDR_LEN; 877 878 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 879 ip6h = (ip6_t *)&icmp6[1]; 880 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 881 freemsg(mp); 882 return; 883 } 884 if (*nexthdrp != icmp->icmp_proto) { 885 /* 886 * Could have switched icmp_proto after while ip did fanout of 887 * this message 888 */ 889 freemsg(mp); 890 return; 891 } 892 switch (icmp6->icmp6_type) { 893 case ICMP6_DST_UNREACH: 894 switch (icmp6->icmp6_code) { 895 case ICMP6_DST_UNREACH_NOPORT: 896 error = ECONNREFUSED; 897 break; 898 case ICMP6_DST_UNREACH_ADMIN: 899 case ICMP6_DST_UNREACH_NOROUTE: 900 case ICMP6_DST_UNREACH_BEYONDSCOPE: 901 case ICMP6_DST_UNREACH_ADDR: 902 /* Transient errors */ 903 break; 904 default: 905 break; 906 } 907 break; 908 case ICMP6_PACKET_TOO_BIG: { 909 struct T_unitdata_ind *tudi; 910 struct T_opthdr *toh; 911 size_t udi_size; 912 mblk_t *newmp; 913 t_scalar_t opt_length = sizeof (struct T_opthdr) + 914 sizeof (struct ip6_mtuinfo); 915 sin6_t *sin6; 916 struct ip6_mtuinfo *mtuinfo; 917 918 /* 919 * If the application has requested to receive path mtu 920 * information, send up an empty message containing an 921 * IPV6_PATHMTU ancillary data item. 922 */ 923 if (!icmp->icmp_ipv6_recvpathmtu) 924 break; 925 926 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 927 opt_length; 928 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 929 BUMP_MIB(&rawip_mib, rawipInErrors); 930 break; 931 } 932 933 /* 934 * newmp->b_cont is left to NULL on purpose. This is an 935 * empty message containing only ancillary data. 936 */ 937 newmp->b_datap->db_type = M_PROTO; 938 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 939 newmp->b_wptr = (uchar_t *)tudi + udi_size; 940 tudi->PRIM_type = T_UNITDATA_IND; 941 tudi->SRC_length = sizeof (sin6_t); 942 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 943 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 944 tudi->OPT_length = opt_length; 945 946 sin6 = (sin6_t *)&tudi[1]; 947 bzero(sin6, sizeof (sin6_t)); 948 sin6->sin6_family = AF_INET6; 949 sin6->sin6_addr = icmp->icmp_v6dst; 950 951 toh = (struct T_opthdr *)&sin6[1]; 952 toh->level = IPPROTO_IPV6; 953 toh->name = IPV6_PATHMTU; 954 toh->len = opt_length; 955 toh->status = 0; 956 957 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 958 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 959 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 960 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 961 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 962 /* 963 * We've consumed everything we need from the original 964 * message. Free it, then send our empty message. 965 */ 966 freemsg(mp); 967 putnext(q, newmp); 968 return; 969 } 970 case ICMP6_TIME_EXCEEDED: 971 /* Transient errors */ 972 break; 973 case ICMP6_PARAM_PROB: 974 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 975 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 976 (uchar_t *)ip6h + icmp6->icmp6_pptr == 977 (uchar_t *)nexthdrp) { 978 error = ECONNREFUSED; 979 break; 980 } 981 break; 982 } 983 if (error == 0) { 984 freemsg(mp); 985 return; 986 } 987 988 sin6 = sin6_null; 989 sin6.sin6_family = AF_INET6; 990 sin6.sin6_addr = ip6h->ip6_dst; 991 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 994 error); 995 if (mp1) 996 putnext(q, mp1); 997 freemsg(mp); 998 } 999 1000 /* 1001 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1002 * The local address is filled in if endpoint is bound. The remote address 1003 * is filled in if remote address has been precified ("connected endpoint") 1004 * (The concept of connected CLTS sockets is alien to published TPI 1005 * but we support it anyway). 1006 */ 1007 static void 1008 icmp_addr_req(queue_t *q, mblk_t *mp) 1009 { 1010 icmp_t *icmp = (icmp_t *)q->q_ptr; 1011 mblk_t *ackmp; 1012 struct T_addr_ack *taa; 1013 1014 /* Make it large enough for worst case */ 1015 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1016 2 * sizeof (sin6_t), 1); 1017 if (ackmp == NULL) { 1018 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1019 return; 1020 } 1021 taa = (struct T_addr_ack *)ackmp->b_rptr; 1022 1023 bzero(taa, sizeof (struct T_addr_ack)); 1024 ackmp->b_wptr = (uchar_t *)&taa[1]; 1025 1026 taa->PRIM_type = T_ADDR_ACK; 1027 ackmp->b_datap->db_type = M_PCPROTO; 1028 1029 /* 1030 * Note: Following code assumes 32 bit alignment of basic 1031 * data structures like sin_t and struct T_addr_ack. 1032 */ 1033 if (icmp->icmp_state != TS_UNBND) { 1034 /* 1035 * Fill in local address 1036 */ 1037 taa->LOCADDR_offset = sizeof (*taa); 1038 if (icmp->icmp_family == AF_INET) { 1039 sin_t *sin; 1040 1041 taa->LOCADDR_length = sizeof (sin_t); 1042 sin = (sin_t *)&taa[1]; 1043 /* Fill zeroes and then intialize non-zero fields */ 1044 *sin = sin_null; 1045 sin->sin_family = AF_INET; 1046 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1047 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1048 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1049 sin->sin_addr.s_addr); 1050 } else { 1051 /* 1052 * INADDR_ANY 1053 * icmp_v6src is not set, we might be bound to 1054 * broadcast/multicast. Use icmp_bound_v6src as 1055 * local address instead (that could 1056 * also still be INADDR_ANY) 1057 */ 1058 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1059 sin->sin_addr.s_addr); 1060 } 1061 ackmp->b_wptr = (uchar_t *)&sin[1]; 1062 } else { 1063 sin6_t *sin6; 1064 1065 ASSERT(icmp->icmp_family == AF_INET6); 1066 taa->LOCADDR_length = sizeof (sin6_t); 1067 sin6 = (sin6_t *)&taa[1]; 1068 /* Fill zeroes and then intialize non-zero fields */ 1069 *sin6 = sin6_null; 1070 sin6->sin6_family = AF_INET6; 1071 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1072 sin6->sin6_addr = icmp->icmp_v6src; 1073 } else { 1074 /* 1075 * UNSPECIFIED 1076 * icmp_v6src is not set, we might be bound to 1077 * broadcast/multicast. Use icmp_bound_v6src as 1078 * local address instead (that could 1079 * also still be UNSPECIFIED) 1080 */ 1081 sin6->sin6_addr = icmp->icmp_bound_v6src; 1082 } 1083 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1084 } 1085 } 1086 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1087 qreply(q, ackmp); 1088 } 1089 1090 static void 1091 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1092 { 1093 *tap = icmp_g_t_info_ack; 1094 1095 if (icmp->icmp_family == AF_INET6) 1096 tap->ADDR_size = sizeof (sin6_t); 1097 else 1098 tap->ADDR_size = sizeof (sin_t); 1099 tap->CURRENT_state = icmp->icmp_state; 1100 tap->OPT_size = icmp_max_optsize; 1101 } 1102 1103 /* 1104 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1105 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1106 * icmp_g_t_info_ack. The current state of the stream is copied from 1107 * icmp_state. 1108 */ 1109 static void 1110 icmp_capability_req(queue_t *q, mblk_t *mp) 1111 { 1112 icmp_t *icmp = (icmp_t *)q->q_ptr; 1113 t_uscalar_t cap_bits1; 1114 struct T_capability_ack *tcap; 1115 1116 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1117 1118 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1119 mp->b_datap->db_type, T_CAPABILITY_ACK); 1120 if (!mp) 1121 return; 1122 1123 tcap = (struct T_capability_ack *)mp->b_rptr; 1124 tcap->CAP_bits1 = 0; 1125 1126 if (cap_bits1 & TC1_INFO) { 1127 icmp_copy_info(&tcap->INFO_ack, icmp); 1128 tcap->CAP_bits1 |= TC1_INFO; 1129 } 1130 1131 qreply(q, mp); 1132 } 1133 1134 /* 1135 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1136 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1137 * The current state of the stream is copied from icmp_state. 1138 */ 1139 static void 1140 icmp_info_req(queue_t *q, mblk_t *mp) 1141 { 1142 icmp_t *icmp = (icmp_t *)q->q_ptr; 1143 1144 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1145 T_INFO_ACK); 1146 if (!mp) 1147 return; 1148 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1149 qreply(q, mp); 1150 } 1151 1152 /* 1153 * IP recognizes seven kinds of bind requests: 1154 * 1155 * - A zero-length address binds only to the protocol number. 1156 * 1157 * - A 4-byte address is treated as a request to 1158 * validate that the address is a valid local IPv4 1159 * address, appropriate for an application to bind to. 1160 * IP does the verification, but does not make any note 1161 * of the address at this time. 1162 * 1163 * - A 16-byte address contains is treated as a request 1164 * to validate a local IPv6 address, as the 4-byte 1165 * address case above. 1166 * 1167 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1168 * use it for the inbound fanout of packets. 1169 * 1170 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1171 * use it for the inbound fanout of packets. 1172 * 1173 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1174 * information consisting of local and remote addresses 1175 * and ports (unused for raw sockets). In this case, the addresses are both 1176 * validated as appropriate for this operation, and, if 1177 * so, the information is retained for use in the 1178 * inbound fanout. 1179 * 1180 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1181 * fanout information, like the 12-byte case above. 1182 * 1183 * IP will also fill in the IRE request mblk with information 1184 * regarding our peer. In all cases, we notify IP of our protocol 1185 * type by appending a single protocol byte to the bind request. 1186 */ 1187 static mblk_t * 1188 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1189 in_port_t fport) 1190 { 1191 char *cp; 1192 mblk_t *mp; 1193 struct T_bind_req *tbr; 1194 ipa_conn_t *ac; 1195 ipa6_conn_t *ac6; 1196 sin_t *sin; 1197 sin6_t *sin6; 1198 1199 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1200 1201 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1202 if (mp == NULL) 1203 return (NULL); 1204 mp->b_datap->db_type = M_PROTO; 1205 tbr = (struct T_bind_req *)mp->b_rptr; 1206 tbr->PRIM_type = bind_prim; 1207 tbr->ADDR_offset = sizeof (*tbr); 1208 tbr->CONIND_number = 0; 1209 tbr->ADDR_length = addr_length; 1210 cp = (char *)&tbr[1]; 1211 switch (addr_length) { 1212 case sizeof (ipa_conn_t): 1213 ASSERT(icmp->icmp_family == AF_INET); 1214 /* Append a request for an IRE */ 1215 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1216 if (mp->b_cont == NULL) { 1217 freemsg(mp); 1218 return (NULL); 1219 } 1220 mp->b_cont->b_wptr += sizeof (ire_t); 1221 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1222 1223 /* cp known to be 32 bit aligned */ 1224 ac = (ipa_conn_t *)cp; 1225 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1226 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1227 ac->ac_fport = fport; 1228 ac->ac_lport = 0; 1229 break; 1230 1231 case sizeof (ipa6_conn_t): 1232 ASSERT(icmp->icmp_family == AF_INET6); 1233 /* Append a request for an IRE */ 1234 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1235 if (mp->b_cont == NULL) { 1236 freemsg(mp); 1237 return (NULL); 1238 } 1239 mp->b_cont->b_wptr += sizeof (ire_t); 1240 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1241 1242 /* cp known to be 32 bit aligned */ 1243 ac6 = (ipa6_conn_t *)cp; 1244 ac6->ac6_laddr = icmp->icmp_v6src; 1245 ac6->ac6_faddr = icmp->icmp_v6dst; 1246 ac6->ac6_fport = fport; 1247 ac6->ac6_lport = 0; 1248 break; 1249 1250 case sizeof (sin_t): 1251 ASSERT(icmp->icmp_family == AF_INET); 1252 /* Append a request for an IRE */ 1253 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1254 if (!mp->b_cont) { 1255 freemsg(mp); 1256 return (NULL); 1257 } 1258 mp->b_cont->b_wptr += sizeof (ire_t); 1259 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1260 1261 sin = (sin_t *)cp; 1262 *sin = sin_null; 1263 sin->sin_family = AF_INET; 1264 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1265 break; 1266 1267 case sizeof (sin6_t): 1268 ASSERT(icmp->icmp_family == AF_INET6); 1269 /* Append a request for an IRE */ 1270 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1271 if (!mp->b_cont) { 1272 freemsg(mp); 1273 return (NULL); 1274 } 1275 mp->b_cont->b_wptr += sizeof (ire_t); 1276 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1277 1278 sin6 = (sin6_t *)cp; 1279 *sin6 = sin6_null; 1280 sin6->sin6_family = AF_INET6; 1281 sin6->sin6_addr = icmp->icmp_bound_v6src; 1282 break; 1283 } 1284 /* Add protocol number to end */ 1285 cp[addr_length] = icmp->icmp_proto; 1286 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1287 return (mp); 1288 } 1289 1290 /* ARGSUSED */ 1291 static void 1292 dummy_func(void *arg) 1293 { 1294 } 1295 1296 static mblk_t * 1297 alloc_wait(queue_t *q, size_t len, int pri, int *errp) 1298 { 1299 mblk_t *mp; 1300 bufcall_id_t id; 1301 int retv; 1302 1303 while ((mp = allocb(len, pri)) == NULL) { 1304 id = qbufcall(q, len, pri, dummy_func, NULL); 1305 if (id == 0) { 1306 *errp = ENOMEM; 1307 break; 1308 } 1309 retv = qwait_sig(q); 1310 qunbufcall(q, id); 1311 if (retv == 0) { 1312 *errp = EINTR; 1313 break; 1314 } 1315 } 1316 if (mp != NULL) 1317 mp->b_wptr += len; 1318 return (mp); 1319 } 1320 1321 /* 1322 * This is the open routine for icmp. It allocates a icmp_t structure for 1323 * the stream and, on the first open of the module, creates an ND table. 1324 */ 1325 static int 1326 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1327 { 1328 int err; 1329 icmp_t *icmp; 1330 mblk_t *mp; 1331 out_labeled_t *olp; 1332 1333 /* If the stream is already open, return immediately. */ 1334 if (q->q_ptr != NULL) 1335 return (0); 1336 1337 /* If this is not a push of icmp as a module, fail. */ 1338 if (sflag != MODOPEN) 1339 return (EINVAL); 1340 1341 /* 1342 * Defer the qprocson until everything is initialized since 1343 * we are D_MTPERQ and after qprocson the rput routine can 1344 * run. (Could do qprocson earlier since icmp currently 1345 * has an outer perimeter.) 1346 */ 1347 1348 /* 1349 * Create a icmp_t structure for this stream and link into the 1350 * list of open streams. 1351 */ 1352 err = mi_open_comm(&icmp_g_head, sizeof (icmp_t), q, devp, 1353 flag, sflag, credp); 1354 if (err != 0) 1355 return (err); 1356 1357 /* 1358 * The receive hiwat is only looked at on the stream head queue. 1359 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 1360 */ 1361 q->q_hiwat = icmp_recv_hiwat; 1362 1363 /* Set the initial state of the stream and the privilege status. */ 1364 icmp = (icmp_t *)q->q_ptr; 1365 icmp->icmp_state = TS_UNBND; 1366 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1367 icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1368 icmp->icmp_filter = NULL; 1369 1370 icmp->icmp_credp = credp; 1371 crhold(credp); 1372 1373 /* 1374 * If the caller has the process-wide flag set, then default to MAC 1375 * exempt mode. This allows read-down to unlabeled hosts. 1376 */ 1377 if (getpflags(NET_MAC_AWARE, credp) != 0) 1378 icmp->icmp_mac_exempt = B_TRUE; 1379 1380 icmp->icmp_zoneid = getzoneid(); 1381 1382 if (getmajor(*devp) == (major_t)ICMP6_MAJ) { 1383 icmp->icmp_ipversion = IPV6_VERSION; 1384 icmp->icmp_family = AF_INET6; 1385 /* May be changed by a SO_PROTOTYPE socket option. */ 1386 icmp->icmp_proto = IPPROTO_ICMPV6; 1387 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1388 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1389 icmp->icmp_ttl = (uint8_t)icmp_ipv6_hoplimit; 1390 } else { 1391 icmp->icmp_ipversion = IPV4_VERSION; 1392 icmp->icmp_family = AF_INET; 1393 /* May be changed by a SO_PROTOTYPE socket option. */ 1394 icmp->icmp_proto = IPPROTO_ICMP; 1395 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1396 icmp->icmp_ttl = (uint8_t)icmp_ipv4_ttl; 1397 } 1398 qprocson(q); 1399 1400 /* 1401 * Check if icmp is being I_PUSHed by a non-privileged user. 1402 * If so, we set icmp_restricted to indicate that only MIB 1403 * traffic may pass. 1404 */ 1405 if (secpolicy_net_icmpaccess(credp) != 0) { 1406 icmp->icmp_restricted = 1; 1407 } 1408 1409 /* 1410 * The transmit hiwat is only looked at on IP's queue. 1411 * Store in q_hiwat in order to return on SO_SNDBUF 1412 * getsockopts. 1413 */ 1414 WR(q)->q_hiwat = icmp_xmit_hiwat; 1415 WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; 1416 WR(q)->q_lowat = icmp_xmit_lowat; 1417 WR(q)->q_next->q_lowat = WR(q)->q_lowat; 1418 1419 if (icmp->icmp_family == AF_INET6) { 1420 /* Build initial header template for transmit */ 1421 err = icmp_build_hdrs(q, icmp); 1422 if (err != 0) 1423 goto open_error; 1424 } 1425 /* Set the Stream head write offset. */ 1426 (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + icmp_wroff_extra); 1427 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1428 1429 if (is_system_labeled()) { 1430 /* notify IP that we know about labeling */ 1431 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1432 if (mp == NULL) 1433 goto open_error; 1434 mp->b_datap->db_type = M_CTL; 1435 olp = (out_labeled_t *)mp->b_rptr; 1436 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1437 olp->out_qnext = WR(q)->q_next; 1438 putnext(WR(q), mp); 1439 1440 /* save off a copy for closing */ 1441 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1442 if (mp == NULL) 1443 goto open_error; 1444 mp->b_datap->db_type = M_CTL; 1445 olp = (out_labeled_t *)mp->b_rptr; 1446 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1447 olp->out_qnext = NULL; 1448 icmp->icmp_delabel = mp; 1449 } 1450 1451 return (0); 1452 1453 open_error: 1454 qprocsoff(q); 1455 crfree(credp); 1456 (void) mi_close_comm(&icmp_g_head, q); 1457 return (err); 1458 } 1459 1460 /* 1461 * Which ICMP options OK to set through T_UNITDATA_REQ... 1462 */ 1463 /* ARGSUSED */ 1464 static boolean_t 1465 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1466 { 1467 return (B_TRUE); 1468 } 1469 1470 /* 1471 * This routine gets default values of certain options whose default 1472 * values are maintained by protcol specific code 1473 */ 1474 /* ARGSUSED */ 1475 int 1476 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1477 { 1478 int *i1 = (int *)ptr; 1479 1480 switch (level) { 1481 case IPPROTO_IP: 1482 switch (name) { 1483 case IP_MULTICAST_TTL: 1484 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1485 return (sizeof (uchar_t)); 1486 case IP_MULTICAST_LOOP: 1487 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1488 return (sizeof (uchar_t)); 1489 } 1490 break; 1491 case IPPROTO_IPV6: 1492 switch (name) { 1493 case IPV6_MULTICAST_HOPS: 1494 *i1 = IP_DEFAULT_MULTICAST_TTL; 1495 return (sizeof (int)); 1496 case IPV6_MULTICAST_LOOP: 1497 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1498 return (sizeof (int)); 1499 case IPV6_UNICAST_HOPS: 1500 *i1 = icmp_ipv6_hoplimit; 1501 return (sizeof (int)); 1502 } 1503 break; 1504 case IPPROTO_ICMPV6: 1505 switch (name) { 1506 case ICMP6_FILTER: 1507 /* Make it look like "pass all" */ 1508 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1509 return (sizeof (icmp6_filter_t)); 1510 } 1511 break; 1512 } 1513 return (-1); 1514 } 1515 1516 /* 1517 * This routine retrieves the current status of socket options. 1518 * It returns the size of the option retrieved. 1519 */ 1520 int 1521 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1522 { 1523 icmp_t *icmp = (icmp_t *)q->q_ptr; 1524 int *i1 = (int *)ptr; 1525 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1526 1527 switch (level) { 1528 case SOL_SOCKET: 1529 switch (name) { 1530 case SO_DEBUG: 1531 *i1 = icmp->icmp_debug; 1532 break; 1533 case SO_TYPE: 1534 *i1 = SOCK_RAW; 1535 break; 1536 case SO_PROTOTYPE: 1537 *i1 = icmp->icmp_proto; 1538 break; 1539 case SO_REUSEADDR: 1540 *i1 = icmp->icmp_reuseaddr; 1541 break; 1542 1543 /* 1544 * The following three items are available here, 1545 * but are only meaningful to IP. 1546 */ 1547 case SO_DONTROUTE: 1548 *i1 = icmp->icmp_dontroute; 1549 break; 1550 case SO_USELOOPBACK: 1551 *i1 = icmp->icmp_useloopback; 1552 break; 1553 case SO_BROADCAST: 1554 *i1 = icmp->icmp_broadcast; 1555 break; 1556 1557 case SO_SNDBUF: 1558 ASSERT(q->q_hiwat <= INT_MAX); 1559 *i1 = (int)q->q_hiwat; 1560 break; 1561 case SO_RCVBUF: 1562 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1563 *i1 = (int)RD(q)->q_hiwat; 1564 break; 1565 case SO_DGRAM_ERRIND: 1566 *i1 = icmp->icmp_dgram_errind; 1567 break; 1568 case SO_TIMESTAMP: 1569 *i1 = icmp->icmp_timestamp; 1570 break; 1571 case SO_MAC_EXEMPT: 1572 *i1 = icmp->icmp_mac_exempt; 1573 break; 1574 case SO_DOMAIN: 1575 *i1 = icmp->icmp_family; 1576 break; 1577 1578 /* 1579 * Following four not meaningful for icmp 1580 * Action is same as "default" to which we fallthrough 1581 * so we keep them in comments. 1582 * case SO_LINGER: 1583 * case SO_KEEPALIVE: 1584 * case SO_OOBINLINE: 1585 * case SO_ALLZONES: 1586 */ 1587 default: 1588 return (-1); 1589 } 1590 break; 1591 case IPPROTO_IP: 1592 /* 1593 * Only allow IPv4 option processing on IPv4 sockets. 1594 */ 1595 if (icmp->icmp_family != AF_INET) 1596 return (-1); 1597 1598 switch (name) { 1599 case IP_OPTIONS: 1600 case T_IP_OPTIONS: 1601 /* Options are passed up with each packet */ 1602 return (0); 1603 case IP_HDRINCL: 1604 *i1 = (int)icmp->icmp_hdrincl; 1605 break; 1606 case IP_TOS: 1607 case T_IP_TOS: 1608 *i1 = (int)icmp->icmp_type_of_service; 1609 break; 1610 case IP_TTL: 1611 *i1 = (int)icmp->icmp_ttl; 1612 break; 1613 case IP_MULTICAST_IF: 1614 /* 0 address if not set */ 1615 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1616 return (sizeof (ipaddr_t)); 1617 case IP_MULTICAST_TTL: 1618 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1619 return (sizeof (uchar_t)); 1620 case IP_MULTICAST_LOOP: 1621 *ptr = icmp->icmp_multicast_loop; 1622 return (sizeof (uint8_t)); 1623 case IP_BOUND_IF: 1624 /* Zero if not set */ 1625 *i1 = icmp->icmp_bound_if; 1626 break; /* goto sizeof (int) option return */ 1627 case IP_UNSPEC_SRC: 1628 *ptr = icmp->icmp_unspec_source; 1629 break; /* goto sizeof (int) option return */ 1630 case IP_XMIT_IF: 1631 *i1 = icmp->icmp_xmit_if; 1632 break; /* goto sizeof (int) option return */ 1633 case IP_RECVIF: 1634 *ptr = icmp->icmp_recvif; 1635 break; /* goto sizeof (int) option return */ 1636 case IP_RECVPKTINFO: 1637 /* 1638 * This also handles IP_PKTINFO. 1639 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1640 * Differentiation is based on the size of the argument 1641 * passed in. 1642 * This option is handled in IP which will return an 1643 * error for IP_PKTINFO as it's not supported as a 1644 * sticky option. 1645 */ 1646 return (-EINVAL); 1647 /* 1648 * Cannot "get" the value of following options 1649 * at this level. Action is same as "default" to 1650 * which we fallthrough so we keep them in comments. 1651 * 1652 * case IP_ADD_MEMBERSHIP: 1653 * case IP_DROP_MEMBERSHIP: 1654 * case IP_BLOCK_SOURCE: 1655 * case IP_UNBLOCK_SOURCE: 1656 * case IP_ADD_SOURCE_MEMBERSHIP: 1657 * case IP_DROP_SOURCE_MEMBERSHIP: 1658 * case MCAST_JOIN_GROUP: 1659 * case MCAST_LEAVE_GROUP: 1660 * case MCAST_BLOCK_SOURCE: 1661 * case MCAST_UNBLOCK_SOURCE: 1662 * case MCAST_JOIN_SOURCE_GROUP: 1663 * case MCAST_LEAVE_SOURCE_GROUP: 1664 * case MRT_INIT: 1665 * case MRT_DONE: 1666 * case MRT_ADD_VIF: 1667 * case MRT_DEL_VIF: 1668 * case MRT_ADD_MFC: 1669 * case MRT_DEL_MFC: 1670 * case MRT_VERSION: 1671 * case MRT_ASSERT: 1672 * case IP_SEC_OPT: 1673 * case IP_DONTFAILOVER_IF: 1674 * case IP_NEXTHOP: 1675 */ 1676 default: 1677 return (-1); 1678 } 1679 break; 1680 case IPPROTO_IPV6: 1681 /* 1682 * Only allow IPv6 option processing on native IPv6 sockets. 1683 */ 1684 if (icmp->icmp_family != AF_INET6) 1685 return (-1); 1686 switch (name) { 1687 case IPV6_UNICAST_HOPS: 1688 *i1 = (unsigned int)icmp->icmp_ttl; 1689 break; 1690 case IPV6_MULTICAST_IF: 1691 /* 0 index if not set */ 1692 *i1 = icmp->icmp_multicast_if_index; 1693 break; 1694 case IPV6_MULTICAST_HOPS: 1695 *i1 = icmp->icmp_multicast_ttl; 1696 break; 1697 case IPV6_MULTICAST_LOOP: 1698 *i1 = icmp->icmp_multicast_loop; 1699 break; 1700 case IPV6_BOUND_IF: 1701 /* Zero if not set */ 1702 *i1 = icmp->icmp_bound_if; 1703 break; 1704 case IPV6_UNSPEC_SRC: 1705 *i1 = icmp->icmp_unspec_source; 1706 break; 1707 case IPV6_CHECKSUM: 1708 /* 1709 * Return offset or -1 if no checksum offset. 1710 * Does not apply to IPPROTO_ICMPV6 1711 */ 1712 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1713 return (-1); 1714 1715 if (icmp->icmp_raw_checksum) { 1716 *i1 = icmp->icmp_checksum_off; 1717 } else { 1718 *i1 = -1; 1719 } 1720 break; 1721 case IPV6_JOIN_GROUP: 1722 case IPV6_LEAVE_GROUP: 1723 case MCAST_JOIN_GROUP: 1724 case MCAST_LEAVE_GROUP: 1725 case MCAST_BLOCK_SOURCE: 1726 case MCAST_UNBLOCK_SOURCE: 1727 case MCAST_JOIN_SOURCE_GROUP: 1728 case MCAST_LEAVE_SOURCE_GROUP: 1729 /* cannot "get" the value for these */ 1730 return (-1); 1731 case IPV6_RECVPKTINFO: 1732 *i1 = icmp->icmp_ip_recvpktinfo; 1733 break; 1734 case IPV6_RECVTCLASS: 1735 *i1 = icmp->icmp_ipv6_recvtclass; 1736 break; 1737 case IPV6_RECVPATHMTU: 1738 *i1 = icmp->icmp_ipv6_recvpathmtu; 1739 break; 1740 case IPV6_V6ONLY: 1741 *i1 = 1; 1742 break; 1743 case IPV6_RECVHOPLIMIT: 1744 *i1 = icmp->icmp_ipv6_recvhoplimit; 1745 break; 1746 case IPV6_RECVHOPOPTS: 1747 *i1 = icmp->icmp_ipv6_recvhopopts; 1748 break; 1749 case IPV6_RECVDSTOPTS: 1750 *i1 = icmp->icmp_ipv6_recvdstopts; 1751 break; 1752 case _OLD_IPV6_RECVDSTOPTS: 1753 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1754 break; 1755 case IPV6_RECVRTHDRDSTOPTS: 1756 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1757 break; 1758 case IPV6_RECVRTHDR: 1759 *i1 = icmp->icmp_ipv6_recvrthdr; 1760 break; 1761 case IPV6_PKTINFO: { 1762 /* XXX assumes that caller has room for max size! */ 1763 struct in6_pktinfo *pkti; 1764 1765 pkti = (struct in6_pktinfo *)ptr; 1766 if (ipp->ipp_fields & IPPF_IFINDEX) 1767 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1768 else 1769 pkti->ipi6_ifindex = 0; 1770 if (ipp->ipp_fields & IPPF_ADDR) 1771 pkti->ipi6_addr = ipp->ipp_addr; 1772 else 1773 pkti->ipi6_addr = ipv6_all_zeros; 1774 return (sizeof (struct in6_pktinfo)); 1775 } 1776 case IPV6_NEXTHOP: { 1777 sin6_t *sin6 = (sin6_t *)ptr; 1778 1779 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1780 return (0); 1781 *sin6 = sin6_null; 1782 sin6->sin6_family = AF_INET6; 1783 sin6->sin6_addr = ipp->ipp_nexthop; 1784 return (sizeof (sin6_t)); 1785 } 1786 case IPV6_HOPOPTS: 1787 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1788 return (0); 1789 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1790 return (0); 1791 bcopy((char *)ipp->ipp_hopopts + 1792 icmp->icmp_label_len_v6, ptr, 1793 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1794 if (icmp->icmp_label_len_v6 > 0) { 1795 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1796 ptr[1] = (ipp->ipp_hopoptslen - 1797 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1798 } 1799 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1800 case IPV6_RTHDRDSTOPTS: 1801 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1802 return (0); 1803 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1804 return (ipp->ipp_rtdstoptslen); 1805 case IPV6_RTHDR: 1806 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1807 return (0); 1808 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1809 return (ipp->ipp_rthdrlen); 1810 case IPV6_DSTOPTS: 1811 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1812 return (0); 1813 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1814 return (ipp->ipp_dstoptslen); 1815 case IPV6_PATHMTU: 1816 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1817 return (0); 1818 1819 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1820 (struct ip6_mtuinfo *)ptr)); 1821 case IPV6_TCLASS: 1822 if (ipp->ipp_fields & IPPF_TCLASS) 1823 *i1 = ipp->ipp_tclass; 1824 else 1825 *i1 = IPV6_FLOW_TCLASS( 1826 IPV6_DEFAULT_VERS_AND_FLOW); 1827 break; 1828 default: 1829 return (-1); 1830 } 1831 break; 1832 case IPPROTO_ICMPV6: 1833 /* 1834 * Only allow IPv6 option processing on native IPv6 sockets. 1835 */ 1836 if (icmp->icmp_family != AF_INET6) 1837 return (-1); 1838 1839 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1840 return (-1); 1841 1842 switch (name) { 1843 case ICMP6_FILTER: 1844 if (icmp->icmp_filter == NULL) { 1845 /* Make it look like "pass all" */ 1846 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1847 } else { 1848 (void) bcopy(icmp->icmp_filter, ptr, 1849 sizeof (icmp6_filter_t)); 1850 } 1851 return (sizeof (icmp6_filter_t)); 1852 default: 1853 return (-1); 1854 } 1855 default: 1856 return (-1); 1857 } 1858 return (sizeof (int)); 1859 } 1860 1861 /* This routine sets socket options. */ 1862 /* ARGSUSED */ 1863 int 1864 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1865 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1866 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 1867 { 1868 icmp_t *icmp = (icmp_t *)q->q_ptr; 1869 int *i1 = (int *)invalp; 1870 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1871 boolean_t checkonly; 1872 int error; 1873 1874 switch (optset_context) { 1875 case SETFN_OPTCOM_CHECKONLY: 1876 checkonly = B_TRUE; 1877 /* 1878 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1879 * inlen != 0 implies value supplied and 1880 * we have to "pretend" to set it. 1881 * inlen == 0 implies that there is no 1882 * value part in T_CHECK request and just validation 1883 * done elsewhere should be enough, we just return here. 1884 */ 1885 if (inlen == 0) { 1886 *outlenp = 0; 1887 return (0); 1888 } 1889 break; 1890 case SETFN_OPTCOM_NEGOTIATE: 1891 checkonly = B_FALSE; 1892 break; 1893 case SETFN_UD_NEGOTIATE: 1894 case SETFN_CONN_NEGOTIATE: 1895 checkonly = B_FALSE; 1896 /* 1897 * Negotiating local and "association-related" options 1898 * through T_UNITDATA_REQ. 1899 * 1900 * Following routine can filter out ones we do not 1901 * want to be "set" this way. 1902 */ 1903 if (!icmp_opt_allow_udr_set(level, name)) { 1904 *outlenp = 0; 1905 return (EINVAL); 1906 } 1907 break; 1908 default: 1909 /* 1910 * We should never get here 1911 */ 1912 *outlenp = 0; 1913 return (EINVAL); 1914 } 1915 1916 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1917 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1918 1919 /* 1920 * For fixed length options, no sanity check 1921 * of passed in length is done. It is assumed *_optcom_req() 1922 * routines do the right thing. 1923 */ 1924 1925 switch (level) { 1926 case SOL_SOCKET: 1927 switch (name) { 1928 case SO_DEBUG: 1929 if (!checkonly) 1930 icmp->icmp_debug = onoff; 1931 break; 1932 case SO_PROTOTYPE: 1933 if ((*i1 & 0xFF) != IPPROTO_ICMP && 1934 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1935 secpolicy_net_rawaccess(cr) != 0) { 1936 *outlenp = 0; 1937 return (EACCES); 1938 } 1939 /* Can't use IPPROTO_RAW with IPv6 */ 1940 if ((*i1 & 0xFF) == IPPROTO_RAW && 1941 icmp->icmp_family == AF_INET6) { 1942 *outlenp = 0; 1943 return (EPROTONOSUPPORT); 1944 } 1945 if (checkonly) { 1946 /* T_CHECK case */ 1947 *(int *)outvalp = (*i1 & 0xFF); 1948 break; 1949 } 1950 icmp->icmp_proto = *i1 & 0xFF; 1951 if ((icmp->icmp_proto == IPPROTO_RAW || 1952 icmp->icmp_proto == IPPROTO_IGMP) && 1953 icmp->icmp_family == AF_INET) 1954 icmp->icmp_hdrincl = 1; 1955 else 1956 icmp->icmp_hdrincl = 0; 1957 1958 if (icmp->icmp_family == AF_INET6 && 1959 icmp->icmp_proto == IPPROTO_ICMPV6) { 1960 /* Set offset for icmp6_cksum */ 1961 icmp->icmp_raw_checksum = 0; 1962 icmp->icmp_checksum_off = 2; 1963 } 1964 if (icmp->icmp_proto == IPPROTO_UDP || 1965 icmp->icmp_proto == IPPROTO_TCP || 1966 icmp->icmp_proto == IPPROTO_SCTP) { 1967 icmp->icmp_no_tp_cksum = 1; 1968 icmp->icmp_sticky_ipp.ipp_fields |= 1969 IPPF_NO_CKSUM; 1970 } else { 1971 icmp->icmp_no_tp_cksum = 0; 1972 icmp->icmp_sticky_ipp.ipp_fields &= 1973 ~IPPF_NO_CKSUM; 1974 } 1975 1976 if (icmp->icmp_filter != NULL && 1977 icmp->icmp_proto != IPPROTO_ICMPV6) { 1978 kmem_free(icmp->icmp_filter, 1979 sizeof (icmp6_filter_t)); 1980 icmp->icmp_filter = NULL; 1981 } 1982 1983 /* Rebuild the header template */ 1984 error = icmp_build_hdrs(q, icmp); 1985 if (error != 0) { 1986 *outlenp = 0; 1987 return (error); 1988 } 1989 1990 /* 1991 * For SCTP, we don't use icmp_bind_proto() for 1992 * raw socket binding. Note that we do not need 1993 * to set *outlenp. 1994 */ 1995 if (icmp->icmp_proto == IPPROTO_SCTP) 1996 return (0); 1997 1998 icmp_bind_proto(q); 1999 *outlenp = sizeof (int); 2000 *(int *)outvalp = *i1 & 0xFF; 2001 return (0); 2002 case SO_REUSEADDR: 2003 if (!checkonly) 2004 icmp->icmp_reuseaddr = onoff; 2005 break; 2006 2007 /* 2008 * The following three items are available here, 2009 * but are only meaningful to IP. 2010 */ 2011 case SO_DONTROUTE: 2012 if (!checkonly) 2013 icmp->icmp_dontroute = onoff; 2014 break; 2015 case SO_USELOOPBACK: 2016 if (!checkonly) 2017 icmp->icmp_useloopback = onoff; 2018 break; 2019 case SO_BROADCAST: 2020 if (!checkonly) 2021 icmp->icmp_broadcast = onoff; 2022 break; 2023 2024 case SO_SNDBUF: 2025 if (*i1 > icmp_max_buf) { 2026 *outlenp = 0; 2027 return (ENOBUFS); 2028 } 2029 if (!checkonly) { 2030 q->q_hiwat = *i1; 2031 q->q_next->q_hiwat = *i1; 2032 } 2033 break; 2034 case SO_RCVBUF: 2035 if (*i1 > icmp_max_buf) { 2036 *outlenp = 0; 2037 return (ENOBUFS); 2038 } 2039 if (!checkonly) { 2040 RD(q)->q_hiwat = *i1; 2041 (void) mi_set_sth_hiwat(RD(q), *i1); 2042 } 2043 break; 2044 case SO_DGRAM_ERRIND: 2045 if (!checkonly) 2046 icmp->icmp_dgram_errind = onoff; 2047 break; 2048 case SO_ALLZONES: 2049 /* 2050 * "soft" error (negative) 2051 * option not handled at this level 2052 * Note: Do not modify *outlenp 2053 */ 2054 return (-EINVAL); 2055 case SO_TIMESTAMP: 2056 if (!checkonly) { 2057 icmp->icmp_timestamp = onoff; 2058 } 2059 break; 2060 case SO_MAC_EXEMPT: 2061 if (secpolicy_net_mac_aware(cr) != 0 || 2062 icmp->icmp_state != TS_UNBND) 2063 return (EACCES); 2064 if (!checkonly) 2065 icmp->icmp_mac_exempt = onoff; 2066 break; 2067 /* 2068 * Following three not meaningful for icmp 2069 * Action is same as "default" so we keep them 2070 * in comments. 2071 * case SO_LINGER: 2072 * case SO_KEEPALIVE: 2073 * case SO_OOBINLINE: 2074 */ 2075 default: 2076 *outlenp = 0; 2077 return (EINVAL); 2078 } 2079 break; 2080 case IPPROTO_IP: 2081 /* 2082 * Only allow IPv4 option processing on IPv4 sockets. 2083 */ 2084 if (icmp->icmp_family != AF_INET) { 2085 *outlenp = 0; 2086 return (ENOPROTOOPT); 2087 } 2088 switch (name) { 2089 case IP_OPTIONS: 2090 case T_IP_OPTIONS: 2091 /* Save options for use by IP. */ 2092 if ((inlen & 0x3) || 2093 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2094 *outlenp = 0; 2095 return (EINVAL); 2096 } 2097 if (checkonly) 2098 break; 2099 2100 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2101 &icmp->icmp_ip_snd_options_len, 2102 icmp->icmp_label_len, invalp, inlen)) { 2103 *outlenp = 0; 2104 return (ENOMEM); 2105 } 2106 2107 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2108 icmp->icmp_ip_snd_options_len; 2109 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2110 icmp_wroff_extra); 2111 break; 2112 case IP_HDRINCL: 2113 if (!checkonly) 2114 icmp->icmp_hdrincl = onoff; 2115 break; 2116 case IP_TOS: 2117 case T_IP_TOS: 2118 if (!checkonly) { 2119 icmp->icmp_type_of_service = (uint8_t)*i1; 2120 } 2121 break; 2122 case IP_TTL: 2123 if (!checkonly) { 2124 icmp->icmp_ttl = (uint8_t)*i1; 2125 } 2126 break; 2127 case IP_MULTICAST_IF: 2128 /* 2129 * TODO should check OPTMGMT reply and undo this if 2130 * there is an error. 2131 */ 2132 if (!checkonly) 2133 icmp->icmp_multicast_if_addr = *i1; 2134 break; 2135 case IP_MULTICAST_TTL: 2136 if (!checkonly) 2137 icmp->icmp_multicast_ttl = *invalp; 2138 break; 2139 case IP_MULTICAST_LOOP: 2140 if (!checkonly) { 2141 icmp->icmp_multicast_loop = 2142 (*invalp == 0) ? 0 : 1; 2143 } 2144 break; 2145 case IP_BOUND_IF: 2146 if (!checkonly) 2147 icmp->icmp_bound_if = *i1; 2148 break; 2149 case IP_UNSPEC_SRC: 2150 if (!checkonly) 2151 icmp->icmp_unspec_source = onoff; 2152 break; 2153 case IP_XMIT_IF: 2154 if (!checkonly) 2155 icmp->icmp_xmit_if = *i1; 2156 break; 2157 case IP_RECVIF: 2158 if (!checkonly) 2159 icmp->icmp_recvif = onoff; 2160 break; 2161 2162 case IP_PKTINFO: { 2163 /* 2164 * This also handles IP_RECVPKTINFO. 2165 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2166 * Differentiation is based on the size of the argument 2167 * passed in. 2168 */ 2169 struct in_pktinfo *pktinfop; 2170 ip4_pkt_t *attr_pktinfop; 2171 2172 if (checkonly) 2173 break; 2174 2175 if (inlen == sizeof (int)) { 2176 /* 2177 * This is IP_RECVPKTINFO option. 2178 * Keep a local copy of wether this option is 2179 * set or not and pass it down to IP for 2180 * processing. 2181 */ 2182 icmp->icmp_ip_recvpktinfo = onoff; 2183 return (-EINVAL); 2184 } 2185 2186 2187 if (inlen != sizeof (struct in_pktinfo)) 2188 return (EINVAL); 2189 2190 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2191 == NULL) { 2192 /* 2193 * sticky option is not supported 2194 */ 2195 return (EINVAL); 2196 } 2197 2198 pktinfop = (struct in_pktinfo *)invalp; 2199 2200 /* 2201 * Atleast one of the values should be specified 2202 */ 2203 if (pktinfop->ipi_ifindex == 0 && 2204 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2205 return (EINVAL); 2206 } 2207 2208 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2209 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2210 } 2211 break; 2212 case IP_ADD_MEMBERSHIP: 2213 case IP_DROP_MEMBERSHIP: 2214 case IP_BLOCK_SOURCE: 2215 case IP_UNBLOCK_SOURCE: 2216 case IP_ADD_SOURCE_MEMBERSHIP: 2217 case IP_DROP_SOURCE_MEMBERSHIP: 2218 case MCAST_JOIN_GROUP: 2219 case MCAST_LEAVE_GROUP: 2220 case MCAST_BLOCK_SOURCE: 2221 case MCAST_UNBLOCK_SOURCE: 2222 case MCAST_JOIN_SOURCE_GROUP: 2223 case MCAST_LEAVE_SOURCE_GROUP: 2224 case MRT_INIT: 2225 case MRT_DONE: 2226 case MRT_ADD_VIF: 2227 case MRT_DEL_VIF: 2228 case MRT_ADD_MFC: 2229 case MRT_DEL_MFC: 2230 case MRT_VERSION: 2231 case MRT_ASSERT: 2232 case IP_SEC_OPT: 2233 case IP_DONTFAILOVER_IF: 2234 case IP_NEXTHOP: 2235 /* 2236 * "soft" error (negative) 2237 * option not handled at this level 2238 * Note: Do not modify *outlenp 2239 */ 2240 return (-EINVAL); 2241 default: 2242 *outlenp = 0; 2243 return (EINVAL); 2244 } 2245 break; 2246 case IPPROTO_IPV6: { 2247 ip6_pkt_t *ipp; 2248 boolean_t sticky; 2249 2250 if (icmp->icmp_family != AF_INET6) { 2251 *outlenp = 0; 2252 return (ENOPROTOOPT); 2253 } 2254 /* 2255 * Deal with both sticky options and ancillary data 2256 */ 2257 if (thisdg_attrs == NULL) { 2258 /* sticky options, or none */ 2259 ipp = &icmp->icmp_sticky_ipp; 2260 sticky = B_TRUE; 2261 } else { 2262 /* ancillary data */ 2263 ipp = (ip6_pkt_t *)thisdg_attrs; 2264 sticky = B_FALSE; 2265 } 2266 2267 switch (name) { 2268 case IPV6_MULTICAST_IF: 2269 if (!checkonly) 2270 icmp->icmp_multicast_if_index = *i1; 2271 break; 2272 case IPV6_UNICAST_HOPS: 2273 /* -1 means use default */ 2274 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2275 *outlenp = 0; 2276 return (EINVAL); 2277 } 2278 if (!checkonly) { 2279 if (*i1 == -1) { 2280 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2281 icmp_ipv6_hoplimit; 2282 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2283 /* Pass modified value to IP. */ 2284 *i1 = ipp->ipp_hoplimit; 2285 } else { 2286 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2287 (uint8_t)*i1; 2288 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2289 } 2290 /* Rebuild the header template */ 2291 error = icmp_build_hdrs(q, icmp); 2292 if (error != 0) { 2293 *outlenp = 0; 2294 return (error); 2295 } 2296 } 2297 break; 2298 case IPV6_MULTICAST_HOPS: 2299 /* -1 means use default */ 2300 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2301 *outlenp = 0; 2302 return (EINVAL); 2303 } 2304 if (!checkonly) { 2305 if (*i1 == -1) { 2306 icmp->icmp_multicast_ttl = 2307 ipp->ipp_multicast_hops = 2308 IP_DEFAULT_MULTICAST_TTL; 2309 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2310 /* Pass modified value to IP. */ 2311 *i1 = icmp->icmp_multicast_ttl; 2312 } else { 2313 icmp->icmp_multicast_ttl = 2314 ipp->ipp_multicast_hops = 2315 (uint8_t)*i1; 2316 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2317 } 2318 } 2319 break; 2320 case IPV6_MULTICAST_LOOP: 2321 if (*i1 != 0 && *i1 != 1) { 2322 *outlenp = 0; 2323 return (EINVAL); 2324 } 2325 if (!checkonly) 2326 icmp->icmp_multicast_loop = *i1; 2327 break; 2328 case IPV6_CHECKSUM: 2329 /* 2330 * Integer offset into the user data of where the 2331 * checksum is located. 2332 * Offset of -1 disables option. 2333 * Does not apply to IPPROTO_ICMPV6. 2334 */ 2335 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2336 *outlenp = 0; 2337 return (EINVAL); 2338 } 2339 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2340 /* Negative or not 16 bit aligned offset */ 2341 *outlenp = 0; 2342 return (EINVAL); 2343 } 2344 if (checkonly) 2345 break; 2346 2347 if (*i1 == -1) { 2348 icmp->icmp_raw_checksum = 0; 2349 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2350 } else { 2351 icmp->icmp_raw_checksum = 1; 2352 icmp->icmp_checksum_off = *i1; 2353 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2354 } 2355 /* Rebuild the header template */ 2356 error = icmp_build_hdrs(q, icmp); 2357 if (error != 0) { 2358 *outlenp = 0; 2359 return (error); 2360 } 2361 break; 2362 case IPV6_JOIN_GROUP: 2363 case IPV6_LEAVE_GROUP: 2364 case MCAST_JOIN_GROUP: 2365 case MCAST_LEAVE_GROUP: 2366 case MCAST_BLOCK_SOURCE: 2367 case MCAST_UNBLOCK_SOURCE: 2368 case MCAST_JOIN_SOURCE_GROUP: 2369 case MCAST_LEAVE_SOURCE_GROUP: 2370 /* 2371 * "soft" error (negative) 2372 * option not handled at this level 2373 * Note: Do not modify *outlenp 2374 */ 2375 return (-EINVAL); 2376 case IPV6_BOUND_IF: 2377 if (!checkonly) 2378 icmp->icmp_bound_if = *i1; 2379 break; 2380 case IPV6_UNSPEC_SRC: 2381 if (!checkonly) 2382 icmp->icmp_unspec_source = onoff; 2383 break; 2384 case IPV6_RECVTCLASS: 2385 if (!checkonly) 2386 icmp->icmp_ipv6_recvtclass = onoff; 2387 break; 2388 /* 2389 * Set boolean switches for ancillary data delivery 2390 */ 2391 case IPV6_RECVPKTINFO: 2392 if (!checkonly) 2393 icmp->icmp_ip_recvpktinfo = onoff; 2394 break; 2395 case IPV6_RECVPATHMTU: 2396 if (!checkonly) 2397 icmp->icmp_ipv6_recvpathmtu = onoff; 2398 break; 2399 case IPV6_RECVHOPLIMIT: 2400 if (!checkonly) 2401 icmp->icmp_ipv6_recvhoplimit = onoff; 2402 break; 2403 case IPV6_RECVHOPOPTS: 2404 if (!checkonly) 2405 icmp->icmp_ipv6_recvhopopts = onoff; 2406 break; 2407 case IPV6_RECVDSTOPTS: 2408 if (!checkonly) 2409 icmp->icmp_ipv6_recvdstopts = onoff; 2410 break; 2411 case _OLD_IPV6_RECVDSTOPTS: 2412 if (!checkonly) 2413 icmp->icmp_old_ipv6_recvdstopts = onoff; 2414 break; 2415 case IPV6_RECVRTHDRDSTOPTS: 2416 if (!checkonly) 2417 icmp->icmp_ipv6_recvrtdstopts = onoff; 2418 break; 2419 case IPV6_RECVRTHDR: 2420 if (!checkonly) 2421 icmp->icmp_ipv6_recvrthdr = onoff; 2422 break; 2423 /* 2424 * Set sticky options or ancillary data. 2425 * If sticky options, (re)build any extension headers 2426 * that might be needed as a result. 2427 */ 2428 case IPV6_PKTINFO: 2429 /* 2430 * The source address and ifindex are verified 2431 * in ip_opt_set(). For ancillary data the 2432 * source address is checked in ip_wput_v6. 2433 */ 2434 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2435 return (EINVAL); 2436 if (checkonly) 2437 break; 2438 2439 if (inlen == 0) { 2440 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2441 ipp->ipp_sticky_ignored |= 2442 (IPPF_IFINDEX|IPPF_ADDR); 2443 } else { 2444 struct in6_pktinfo *pkti; 2445 2446 pkti = (struct in6_pktinfo *)invalp; 2447 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2448 ipp->ipp_addr = pkti->ipi6_addr; 2449 if (ipp->ipp_ifindex != 0) 2450 ipp->ipp_fields |= IPPF_IFINDEX; 2451 else 2452 ipp->ipp_fields &= ~IPPF_IFINDEX; 2453 if (!IN6_IS_ADDR_UNSPECIFIED( 2454 &ipp->ipp_addr)) 2455 ipp->ipp_fields |= IPPF_ADDR; 2456 else 2457 ipp->ipp_fields &= ~IPPF_ADDR; 2458 } 2459 if (sticky) { 2460 error = icmp_build_hdrs(q, icmp); 2461 if (error != 0) 2462 return (error); 2463 } 2464 break; 2465 case IPV6_HOPLIMIT: 2466 /* This option can only be used as ancillary data. */ 2467 if (sticky) 2468 return (EINVAL); 2469 if (inlen != 0 && inlen != sizeof (int)) 2470 return (EINVAL); 2471 if (checkonly) 2472 break; 2473 2474 if (inlen == 0) { 2475 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2476 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2477 } else { 2478 if (*i1 > 255 || *i1 < -1) 2479 return (EINVAL); 2480 if (*i1 == -1) 2481 ipp->ipp_hoplimit = icmp_ipv6_hoplimit; 2482 else 2483 ipp->ipp_hoplimit = *i1; 2484 ipp->ipp_fields |= IPPF_HOPLIMIT; 2485 } 2486 break; 2487 case IPV6_TCLASS: 2488 /* 2489 * IPV6_RECVTCLASS accepts -1 as use kernel default 2490 * and [0, 255] as the actualy traffic class. 2491 */ 2492 if (inlen != 0 && inlen != sizeof (int)) 2493 return (EINVAL); 2494 if (checkonly) 2495 break; 2496 2497 if (inlen == 0) { 2498 ipp->ipp_fields &= ~IPPF_TCLASS; 2499 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2500 } else { 2501 if (*i1 >= 256 || *i1 < -1) 2502 return (EINVAL); 2503 if (*i1 == -1) { 2504 ipp->ipp_tclass = 2505 IPV6_FLOW_TCLASS( 2506 IPV6_DEFAULT_VERS_AND_FLOW); 2507 } else { 2508 ipp->ipp_tclass = *i1; 2509 } 2510 ipp->ipp_fields |= IPPF_TCLASS; 2511 } 2512 if (sticky) { 2513 error = icmp_build_hdrs(q, icmp); 2514 if (error != 0) 2515 return (error); 2516 } 2517 break; 2518 case IPV6_NEXTHOP: 2519 /* 2520 * IP will verify that the nexthop is reachable 2521 * and fail for sticky options. 2522 */ 2523 if (inlen != 0 && inlen != sizeof (sin6_t)) 2524 return (EINVAL); 2525 if (checkonly) 2526 break; 2527 2528 if (inlen == 0) { 2529 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2530 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2531 } else { 2532 sin6_t *sin6 = (sin6_t *)invalp; 2533 2534 if (sin6->sin6_family != AF_INET6) 2535 return (EAFNOSUPPORT); 2536 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2537 return (EADDRNOTAVAIL); 2538 ipp->ipp_nexthop = sin6->sin6_addr; 2539 if (!IN6_IS_ADDR_UNSPECIFIED( 2540 &ipp->ipp_nexthop)) 2541 ipp->ipp_fields |= IPPF_NEXTHOP; 2542 else 2543 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2544 } 2545 if (sticky) { 2546 error = icmp_build_hdrs(q, icmp); 2547 if (error != 0) 2548 return (error); 2549 } 2550 break; 2551 case IPV6_HOPOPTS: { 2552 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2553 /* 2554 * Sanity checks - minimum size, size a multiple of 2555 * eight bytes, and matching size passed in. 2556 */ 2557 if (inlen != 0 && 2558 inlen != (8 * (hopts->ip6h_len + 1))) 2559 return (EINVAL); 2560 2561 if (checkonly) 2562 break; 2563 error = optcom_pkt_set(invalp, inlen, sticky, 2564 (uchar_t **)&ipp->ipp_hopopts, 2565 &ipp->ipp_hopoptslen, 2566 sticky ? icmp->icmp_label_len_v6 : 0); 2567 if (error != 0) 2568 return (error); 2569 if (ipp->ipp_hopoptslen == 0) { 2570 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2571 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2572 } else { 2573 ipp->ipp_fields |= IPPF_HOPOPTS; 2574 } 2575 if (sticky) { 2576 error = icmp_build_hdrs(q, icmp); 2577 if (error != 0) 2578 return (error); 2579 } 2580 break; 2581 } 2582 case IPV6_RTHDRDSTOPTS: { 2583 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2584 2585 /* 2586 * Sanity checks - minimum size, size a multiple of 2587 * eight bytes, and matching size passed in. 2588 */ 2589 if (inlen != 0 && 2590 inlen != (8 * (dopts->ip6d_len + 1))) 2591 return (EINVAL); 2592 2593 if (checkonly) 2594 break; 2595 2596 if (inlen == 0) { 2597 if (sticky && 2598 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2599 kmem_free(ipp->ipp_rtdstopts, 2600 ipp->ipp_rtdstoptslen); 2601 ipp->ipp_rtdstopts = NULL; 2602 ipp->ipp_rtdstoptslen = 0; 2603 } 2604 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2605 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2606 } else { 2607 error = optcom_pkt_set(invalp, inlen, sticky, 2608 (uchar_t **)&ipp->ipp_rtdstopts, 2609 &ipp->ipp_rtdstoptslen, 0); 2610 if (error != 0) 2611 return (error); 2612 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2613 } 2614 if (sticky) { 2615 error = icmp_build_hdrs(q, icmp); 2616 if (error != 0) 2617 return (error); 2618 } 2619 break; 2620 } 2621 case IPV6_DSTOPTS: { 2622 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2623 2624 /* 2625 * Sanity checks - minimum size, size a multiple of 2626 * eight bytes, and matching size passed in. 2627 */ 2628 if (inlen != 0 && 2629 inlen != (8 * (dopts->ip6d_len + 1))) 2630 return (EINVAL); 2631 2632 if (checkonly) 2633 break; 2634 2635 if (inlen == 0) { 2636 if (sticky && 2637 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2638 kmem_free(ipp->ipp_dstopts, 2639 ipp->ipp_dstoptslen); 2640 ipp->ipp_dstopts = NULL; 2641 ipp->ipp_dstoptslen = 0; 2642 } 2643 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2644 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2645 } else { 2646 error = optcom_pkt_set(invalp, inlen, sticky, 2647 (uchar_t **)&ipp->ipp_dstopts, 2648 &ipp->ipp_dstoptslen, 0); 2649 if (error != 0) 2650 return (error); 2651 ipp->ipp_fields |= IPPF_DSTOPTS; 2652 } 2653 if (sticky) { 2654 error = icmp_build_hdrs(q, icmp); 2655 if (error != 0) 2656 return (error); 2657 } 2658 break; 2659 } 2660 case IPV6_RTHDR: { 2661 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2662 2663 /* 2664 * Sanity checks - minimum size, size a multiple of 2665 * eight bytes, and matching size passed in. 2666 */ 2667 if (inlen != 0 && 2668 inlen != (8 * (rt->ip6r_len + 1))) 2669 return (EINVAL); 2670 2671 if (checkonly) 2672 break; 2673 2674 if (inlen == 0) { 2675 if (sticky && 2676 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2677 kmem_free(ipp->ipp_rthdr, 2678 ipp->ipp_rthdrlen); 2679 ipp->ipp_rthdr = NULL; 2680 ipp->ipp_rthdrlen = 0; 2681 } 2682 ipp->ipp_fields &= ~IPPF_RTHDR; 2683 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2684 } else { 2685 error = optcom_pkt_set(invalp, inlen, sticky, 2686 (uchar_t **)&ipp->ipp_rthdr, 2687 &ipp->ipp_rthdrlen, 0); 2688 if (error != 0) 2689 return (error); 2690 ipp->ipp_fields |= IPPF_RTHDR; 2691 } 2692 if (sticky) { 2693 error = icmp_build_hdrs(q, icmp); 2694 if (error != 0) 2695 return (error); 2696 } 2697 break; 2698 } 2699 2700 case IPV6_DONTFRAG: 2701 if (checkonly) 2702 break; 2703 2704 if (onoff) { 2705 ipp->ipp_fields |= IPPF_DONTFRAG; 2706 } else { 2707 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2708 } 2709 break; 2710 2711 case IPV6_USE_MIN_MTU: 2712 if (inlen != sizeof (int)) 2713 return (EINVAL); 2714 2715 if (*i1 < -1 || *i1 > 1) 2716 return (EINVAL); 2717 2718 if (checkonly) 2719 break; 2720 2721 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2722 ipp->ipp_use_min_mtu = *i1; 2723 break; 2724 2725 /* 2726 * This option can't be set. Its only returned via 2727 * getsockopt() or ancillary data. 2728 */ 2729 case IPV6_PATHMTU: 2730 return (EINVAL); 2731 2732 case IPV6_BOUND_PIF: 2733 case IPV6_SEC_OPT: 2734 case IPV6_DONTFAILOVER_IF: 2735 case IPV6_SRC_PREFERENCES: 2736 case IPV6_V6ONLY: 2737 /* Handled at IP level */ 2738 return (-EINVAL); 2739 default: 2740 *outlenp = 0; 2741 return (EINVAL); 2742 } 2743 break; 2744 } /* end IPPROTO_IPV6 */ 2745 2746 case IPPROTO_ICMPV6: 2747 /* 2748 * Only allow IPv6 option processing on IPv6 sockets. 2749 */ 2750 if (icmp->icmp_family != AF_INET6) { 2751 *outlenp = 0; 2752 return (ENOPROTOOPT); 2753 } 2754 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2755 *outlenp = 0; 2756 return (ENOPROTOOPT); 2757 } 2758 switch (name) { 2759 case ICMP6_FILTER: 2760 if (!checkonly) { 2761 if ((inlen != 0) && 2762 (inlen != sizeof (icmp6_filter_t))) 2763 return (EINVAL); 2764 2765 if (inlen == 0) { 2766 if (icmp->icmp_filter != NULL) { 2767 kmem_free(icmp->icmp_filter, 2768 sizeof (icmp6_filter_t)); 2769 icmp->icmp_filter = NULL; 2770 } 2771 } else { 2772 if (icmp->icmp_filter == NULL) { 2773 icmp->icmp_filter = kmem_alloc( 2774 sizeof (icmp6_filter_t), 2775 KM_NOSLEEP); 2776 if (icmp->icmp_filter == NULL) { 2777 *outlenp = 0; 2778 return (ENOBUFS); 2779 } 2780 } 2781 (void) bcopy(invalp, icmp->icmp_filter, 2782 inlen); 2783 } 2784 } 2785 break; 2786 2787 default: 2788 *outlenp = 0; 2789 return (EINVAL); 2790 } 2791 break; 2792 default: 2793 *outlenp = 0; 2794 return (EINVAL); 2795 } 2796 /* 2797 * Common case of OK return with outval same as inval. 2798 */ 2799 if (invalp != outvalp) { 2800 /* don't trust bcopy for identical src/dst */ 2801 (void) bcopy(invalp, outvalp, inlen); 2802 } 2803 *outlenp = inlen; 2804 return (0); 2805 } 2806 2807 /* 2808 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2809 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2810 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2811 * headers. 2812 * Returns failure if can't allocate memory. 2813 */ 2814 static int 2815 icmp_build_hdrs(queue_t *q, icmp_t *icmp) 2816 { 2817 uchar_t *hdrs; 2818 uint_t hdrs_len; 2819 ip6_t *ip6h; 2820 ip6i_t *ip6i; 2821 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2822 2823 hdrs_len = ip_total_hdrs_len_v6(ipp); 2824 ASSERT(hdrs_len != 0); 2825 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 2826 /* Need to reallocate */ 2827 if (hdrs_len != 0) { 2828 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2829 if (hdrs == NULL) 2830 return (ENOMEM); 2831 } else { 2832 hdrs = NULL; 2833 } 2834 if (icmp->icmp_sticky_hdrs_len != 0) { 2835 kmem_free(icmp->icmp_sticky_hdrs, 2836 icmp->icmp_sticky_hdrs_len); 2837 } 2838 icmp->icmp_sticky_hdrs = hdrs; 2839 icmp->icmp_sticky_hdrs_len = hdrs_len; 2840 } 2841 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 2842 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 2843 2844 /* Set header fields not in ipp */ 2845 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 2846 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 2847 ip6h = (ip6_t *)&ip6i[1]; 2848 2849 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 2850 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 2851 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 2852 } 2853 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 2854 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 2855 } 2856 } else { 2857 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 2858 } 2859 2860 if (!(ipp->ipp_fields & IPPF_ADDR)) 2861 ip6h->ip6_src = icmp->icmp_v6src; 2862 2863 /* Try to get everything in a single mblk */ 2864 if (hdrs_len > icmp->icmp_max_hdr_len) { 2865 icmp->icmp_max_hdr_len = hdrs_len; 2866 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2867 icmp_wroff_extra); 2868 } 2869 return (0); 2870 } 2871 2872 /* 2873 * This routine retrieves the value of an ND variable in a icmpparam_t 2874 * structure. It is called through nd_getset when a user reads the 2875 * variable. 2876 */ 2877 /* ARGSUSED */ 2878 static int 2879 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2880 { 2881 icmpparam_t *icmppa = (icmpparam_t *)cp; 2882 2883 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2884 return (0); 2885 } 2886 2887 /* 2888 * Walk through the param array specified registering each element with the 2889 * named dispatch (ND) handler. 2890 */ 2891 static boolean_t 2892 icmp_param_register(icmpparam_t *icmppa, int cnt) 2893 { 2894 for (; cnt-- > 0; icmppa++) { 2895 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2896 if (!nd_load(&icmp_g_nd, icmppa->icmp_param_name, 2897 icmp_param_get, icmp_param_set, 2898 (caddr_t)icmppa)) { 2899 nd_free(&icmp_g_nd); 2900 return (B_FALSE); 2901 } 2902 } 2903 } 2904 if (!nd_load(&icmp_g_nd, "icmp_status", icmp_status_report, NULL, 2905 NULL)) { 2906 nd_free(&icmp_g_nd); 2907 return (B_FALSE); 2908 } 2909 return (B_TRUE); 2910 } 2911 2912 /* This routine sets an ND variable in a icmpparam_t structure. */ 2913 /* ARGSUSED */ 2914 static int 2915 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2916 { 2917 long new_value; 2918 icmpparam_t *icmppa = (icmpparam_t *)cp; 2919 2920 /* 2921 * Fail the request if the new value does not lie within the 2922 * required bounds. 2923 */ 2924 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2925 new_value < icmppa->icmp_param_min || 2926 new_value > icmppa->icmp_param_max) { 2927 return (EINVAL); 2928 } 2929 /* Set the new value */ 2930 icmppa->icmp_param_value = new_value; 2931 return (0); 2932 } 2933 2934 static void 2935 icmp_rput(queue_t *q, mblk_t *mp) 2936 { 2937 struct T_unitdata_ind *tudi; 2938 uchar_t *rptr; 2939 struct T_error_ack *tea; 2940 icmp_t *icmp; 2941 sin_t *sin; 2942 sin6_t *sin6; 2943 ip6_t *ip6h; 2944 ip6i_t *ip6i; 2945 mblk_t *mp1; 2946 int hdr_len; 2947 ipha_t *ipha; 2948 int udi_size; /* Size of T_unitdata_ind */ 2949 uint_t ipvers; 2950 ip6_pkt_t ipp; 2951 uint8_t nexthdr; 2952 boolean_t recvif = B_FALSE; 2953 ip_pktinfo_t *pinfo = NULL; 2954 mblk_t *options_mp = NULL; 2955 uint_t icmp_opt = 0; 2956 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 2957 uint_t hopstrip; 2958 2959 icmp = (icmp_t *)q->q_ptr; 2960 if (icmp->icmp_restricted) { 2961 putnext(q, mp); 2962 return; 2963 } 2964 2965 if (mp->b_datap->db_type == M_CTL) { 2966 /* 2967 * IP sends up the IPSEC_IN message for handling IPSEC 2968 * policy at the TCP level. We don't need it here. 2969 */ 2970 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 2971 mp1 = mp->b_cont; 2972 freeb(mp); 2973 mp = mp1; 2974 } else { 2975 pinfo = (ip_pktinfo_t *)mp->b_rptr; 2976 if ((icmp->icmp_recvif != 0 || 2977 icmp->icmp_ip_recvpktinfo) && 2978 (pinfo->ip_pkt_ulp_type == IN_PKTINFO)) { 2979 /* 2980 * IP has passed the options in mp and the 2981 * actual data is in b_cont. 2982 */ 2983 recvif = B_TRUE; 2984 /* 2985 * We are here bcos IP_RECVIF is set so we need 2986 * to extract the options mblk and adjust the 2987 * rptr 2988 */ 2989 options_mp = mp; 2990 mp = mp->b_cont; 2991 } 2992 } 2993 } 2994 2995 rptr = mp->b_rptr; 2996 switch (mp->b_datap->db_type) { 2997 case M_DATA: 2998 /* 2999 * M_DATA messages contain IP packets. They are handled 3000 * following the switch. 3001 */ 3002 break; 3003 case M_PROTO: 3004 case M_PCPROTO: 3005 /* M_PROTO messages contain some type of TPI message. */ 3006 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 3007 freemsg(mp); 3008 return; 3009 } 3010 tea = (struct T_error_ack *)rptr; 3011 switch (tea->PRIM_type) { 3012 case T_ERROR_ACK: 3013 switch (tea->ERROR_prim) { 3014 case O_T_BIND_REQ: 3015 case T_BIND_REQ: 3016 /* 3017 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3018 * clear out the source address before 3019 * passing the message upstream. 3020 * If this was caused by a T_CONN_REQ 3021 * revert back to bound state. 3022 */ 3023 if (icmp->icmp_state == TS_UNBND) { 3024 /* 3025 * TPI has not yet bound - bind sent by 3026 * icmp_bind_proto. 3027 */ 3028 freemsg(mp); 3029 return; 3030 } 3031 if (icmp->icmp_state == TS_DATA_XFER) { 3032 /* Connect failed */ 3033 tea->ERROR_prim = T_CONN_REQ; 3034 icmp->icmp_v6src = 3035 icmp->icmp_bound_v6src; 3036 icmp->icmp_state = TS_IDLE; 3037 if (icmp->icmp_family == AF_INET6) 3038 (void) icmp_build_hdrs(q, icmp); 3039 break; 3040 } 3041 3042 if (icmp->icmp_discon_pending) { 3043 tea->ERROR_prim = T_DISCON_REQ; 3044 icmp->icmp_discon_pending = 0; 3045 } 3046 V6_SET_ZERO(icmp->icmp_v6src); 3047 V6_SET_ZERO(icmp->icmp_bound_v6src); 3048 icmp->icmp_state = TS_UNBND; 3049 if (icmp->icmp_family == AF_INET6) 3050 (void) icmp_build_hdrs(q, icmp); 3051 break; 3052 default: 3053 break; 3054 } 3055 break; 3056 case T_BIND_ACK: 3057 icmp_rput_bind_ack(q, mp); 3058 return; 3059 3060 case T_OPTMGMT_ACK: 3061 case T_OK_ACK: 3062 if (tea->PRIM_type == T_OK_ACK) { 3063 struct T_ok_ack *toa; 3064 toa = (struct T_ok_ack *)rptr; 3065 if (toa->CORRECT_prim == T_UNBIND_REQ) { 3066 /* 3067 * If somebody sets IPSEC options, IP 3068 * sends some IPSEC info which is used 3069 * by the TCP for detached connections. 3070 * We don't need it here. 3071 */ 3072 if ((mp1 = mp->b_cont) != NULL) { 3073 freemsg(mp1); 3074 mp->b_cont = NULL; 3075 } 3076 } 3077 } 3078 break; 3079 default: 3080 freemsg(mp); 3081 return; 3082 } 3083 putnext(q, mp); 3084 return; 3085 case M_CTL: 3086 if (recvif) { 3087 /* 3088 * IP has passed the options in mp and the actual data 3089 * is in b_cont. Jump to normal data processing. 3090 */ 3091 break; 3092 } 3093 3094 /* Contains ICMP packet from IP */ 3095 icmp_icmp_error(q, mp); 3096 return; 3097 default: 3098 putnext(q, mp); 3099 return; 3100 } 3101 3102 /* 3103 * Discard message if it is misaligned or smaller than the IP header. 3104 */ 3105 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3106 freemsg(mp); 3107 if (options_mp != NULL) 3108 freeb(options_mp); 3109 BUMP_MIB(&rawip_mib, rawipInErrors); 3110 return; 3111 } 3112 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3113 3114 /* Handle M_DATA messages containing IP packets messages */ 3115 if (ipvers == IPV4_VERSION) { 3116 /* 3117 * Special case where IP attaches 3118 * the IRE needs to be handled so that we don't send up 3119 * IRE to the user land. 3120 */ 3121 ipha = (ipha_t *)rptr; 3122 hdr_len = IPH_HDR_LENGTH(ipha); 3123 3124 if (ipha->ipha_protocol == IPPROTO_TCP) { 3125 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3126 3127 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3128 TH_SYN) && mp->b_cont != NULL) { 3129 mp1 = mp->b_cont; 3130 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3131 freeb(mp1); 3132 mp->b_cont = NULL; 3133 } 3134 } 3135 } 3136 if (icmp_bsd_compat) { 3137 ushort_t len; 3138 len = ntohs(ipha->ipha_length); 3139 3140 if (mp->b_datap->db_ref > 1) { 3141 /* 3142 * Allocate a new IP header so that we can 3143 * modify ipha_length. 3144 */ 3145 mblk_t *mp1; 3146 3147 mp1 = allocb(hdr_len, BPRI_MED); 3148 if (!mp1) { 3149 freemsg(mp); 3150 if (options_mp != NULL) 3151 freeb(options_mp); 3152 BUMP_MIB(&rawip_mib, rawipInErrors); 3153 return; 3154 } 3155 bcopy(rptr, mp1->b_rptr, hdr_len); 3156 mp->b_rptr = rptr + hdr_len; 3157 rptr = mp1->b_rptr; 3158 ipha = (ipha_t *)rptr; 3159 mp1->b_cont = mp; 3160 mp1->b_wptr = rptr + hdr_len; 3161 mp = mp1; 3162 } 3163 len -= hdr_len; 3164 ipha->ipha_length = htons(len); 3165 } 3166 } 3167 3168 /* 3169 * This is the inbound data path. Packets are passed upstream as 3170 * T_UNITDATA_IND messages with full IP headers still attached. 3171 */ 3172 if (icmp->icmp_family == AF_INET) { 3173 ASSERT(ipvers == IPV4_VERSION); 3174 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3175 if (icmp->icmp_recvif && recvif && 3176 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3177 udi_size += sizeof (struct T_opthdr) + 3178 sizeof (uint_t); 3179 } 3180 3181 if (icmp->icmp_ip_recvpktinfo && recvif && 3182 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3183 udi_size += sizeof (struct T_opthdr) + 3184 sizeof (struct in_pktinfo); 3185 } 3186 3187 /* 3188 * If SO_TIMESTAMP is set allocate the appropriate sized 3189 * buffer. Since gethrestime() expects a pointer aligned 3190 * argument, we allocate space necessary for extra 3191 * alignment (even though it might not be used). 3192 */ 3193 if (icmp->icmp_timestamp) { 3194 udi_size += sizeof (struct T_opthdr) + 3195 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3196 } 3197 mp1 = allocb(udi_size, BPRI_MED); 3198 if (mp1 == NULL) { 3199 freemsg(mp); 3200 if (options_mp != NULL) 3201 freeb(options_mp); 3202 BUMP_MIB(&rawip_mib, rawipInErrors); 3203 return; 3204 } 3205 mp1->b_cont = mp; 3206 mp = mp1; 3207 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3208 mp->b_datap->db_type = M_PROTO; 3209 mp->b_wptr = (uchar_t *)tudi + udi_size; 3210 tudi->PRIM_type = T_UNITDATA_IND; 3211 tudi->SRC_length = sizeof (sin_t); 3212 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3213 sin = (sin_t *)&tudi[1]; 3214 *sin = sin_null; 3215 sin->sin_family = AF_INET; 3216 sin->sin_addr.s_addr = ipha->ipha_src; 3217 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3218 sizeof (sin_t); 3219 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3220 tudi->OPT_length = udi_size; 3221 3222 /* 3223 * Add options if IP_RECVIF is set 3224 */ 3225 if (udi_size != 0) { 3226 char *dstopt; 3227 3228 dstopt = (char *)&sin[1]; 3229 if (icmp->icmp_recvif && recvif && 3230 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3231 3232 struct T_opthdr *toh; 3233 uint_t *dstptr; 3234 3235 toh = (struct T_opthdr *)dstopt; 3236 toh->level = IPPROTO_IP; 3237 toh->name = IP_RECVIF; 3238 toh->len = sizeof (struct T_opthdr) + 3239 sizeof (uint_t); 3240 toh->status = 0; 3241 dstopt += sizeof (struct T_opthdr); 3242 dstptr = (uint_t *)dstopt; 3243 *dstptr = pinfo->ip_pkt_ifindex; 3244 dstopt += sizeof (uint_t); 3245 freeb(options_mp); 3246 udi_size -= toh->len; 3247 } 3248 if (icmp->icmp_timestamp) { 3249 struct T_opthdr *toh; 3250 3251 toh = (struct T_opthdr *)dstopt; 3252 toh->level = SOL_SOCKET; 3253 toh->name = SCM_TIMESTAMP; 3254 toh->len = sizeof (struct T_opthdr) + 3255 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3256 toh->status = 0; 3257 dstopt += sizeof (struct T_opthdr); 3258 /* Align for gethrestime() */ 3259 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3260 sizeof (intptr_t)); 3261 gethrestime((timestruc_t *)dstopt); 3262 dstopt = (char *)toh + toh->len; 3263 udi_size -= toh->len; 3264 } 3265 if (icmp->icmp_ip_recvpktinfo && recvif && 3266 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3267 struct T_opthdr *toh; 3268 struct in_pktinfo *pktinfop; 3269 3270 toh = (struct T_opthdr *)dstopt; 3271 toh->level = IPPROTO_IP; 3272 toh->name = IP_PKTINFO; 3273 toh->len = sizeof (struct T_opthdr) + 3274 sizeof (in_pktinfo_t); 3275 toh->status = 0; 3276 dstopt += sizeof (struct T_opthdr); 3277 pktinfop = (struct in_pktinfo *)dstopt; 3278 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3279 pktinfop->ipi_spec_dst = 3280 pinfo->ip_pkt_match_addr; 3281 3282 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3283 3284 dstopt += sizeof (struct in_pktinfo); 3285 udi_size -= toh->len; 3286 } 3287 3288 /* Consumed all of allocated space */ 3289 ASSERT(udi_size == 0); 3290 } 3291 3292 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3293 putnext(q, mp); 3294 return; 3295 } 3296 3297 /* 3298 * We don't need options_mp in the IPv6 path. 3299 */ 3300 if (options_mp != NULL) { 3301 freeb(options_mp); 3302 options_mp = NULL; 3303 } 3304 3305 /* 3306 * Discard message if it is smaller than the IPv6 header 3307 * or if the header is malformed. 3308 */ 3309 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3310 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3311 icmp->icmp_family != AF_INET6) { 3312 freemsg(mp); 3313 BUMP_MIB(&rawip_mib, rawipInErrors); 3314 return; 3315 } 3316 3317 /* Initialize */ 3318 ipp.ipp_fields = 0; 3319 hopstrip = 0; 3320 3321 ip6h = (ip6_t *)rptr; 3322 /* 3323 * Call on ip_find_hdr_v6 which gets the total hdr len 3324 * as well as individual lenghts of ext hdrs (and ptrs to 3325 * them). 3326 */ 3327 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3328 /* Look for ifindex information */ 3329 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3330 ip6i = (ip6i_t *)ip6h; 3331 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3332 ASSERT(ip6i->ip6i_ifindex != 0); 3333 ipp.ipp_fields |= IPPF_IFINDEX; 3334 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3335 } 3336 rptr = (uchar_t *)&ip6i[1]; 3337 mp->b_rptr = rptr; 3338 if (rptr == mp->b_wptr) { 3339 mp1 = mp->b_cont; 3340 freeb(mp); 3341 mp = mp1; 3342 rptr = mp->b_rptr; 3343 } 3344 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3345 ip6h = (ip6_t *)rptr; 3346 } 3347 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3348 3349 /* 3350 * We need to lie a bit to the user because users inside 3351 * labeled compartments should not see their own labels. We 3352 * assume that in all other respects IP has checked the label, 3353 * and that the label is always first among the options. (If 3354 * it's not first, then this code won't see it, and the option 3355 * will be passed along to the user.) 3356 * 3357 * If we had multilevel ICMP sockets, then the following code 3358 * should be skipped for them to allow the user to see the 3359 * label. 3360 * 3361 * Alignment restrictions in the definition of IP options 3362 * (namely, the requirement that the 4-octet DOI goes on a 3363 * 4-octet boundary) mean that we know exactly where the option 3364 * should start, but we're lenient for other hosts. 3365 * 3366 * Note that there are no multilevel ICMP or raw IP sockets 3367 * yet, thus nobody ever sees the IP6OPT_LS option. 3368 */ 3369 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3370 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3371 const uchar_t *ucp = 3372 (const uchar_t *)ipp.ipp_hopopts + 2; 3373 int remlen = ipp.ipp_hopoptslen - 2; 3374 3375 while (remlen > 0) { 3376 if (*ucp == IP6OPT_PAD1) { 3377 remlen--; 3378 ucp++; 3379 } else if (*ucp == IP6OPT_PADN) { 3380 remlen -= ucp[1] + 2; 3381 ucp += ucp[1] + 2; 3382 } else if (*ucp == ip6opt_ls) { 3383 hopstrip = (ucp - 3384 (const uchar_t *)ipp.ipp_hopopts) + 3385 ucp[1] + 2; 3386 hopstrip = (hopstrip + 7) & ~7; 3387 break; 3388 } else { 3389 /* label option must be first */ 3390 break; 3391 } 3392 } 3393 } 3394 } else { 3395 hdr_len = IPV6_HDR_LEN; 3396 ip6i = NULL; 3397 nexthdr = ip6h->ip6_nxt; 3398 } 3399 /* 3400 * One special case where IP attaches the IRE needs to 3401 * be handled so that we don't send up IRE to the user land. 3402 */ 3403 if (nexthdr == IPPROTO_TCP) { 3404 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3405 3406 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3407 mp->b_cont != NULL) { 3408 mp1 = mp->b_cont; 3409 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3410 freeb(mp1); 3411 mp->b_cont = NULL; 3412 } 3413 } 3414 } 3415 /* 3416 * Check a filter for ICMPv6 types if needed. 3417 * Verify raw checksums if needed. 3418 */ 3419 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3420 if (icmp->icmp_filter != NULL) { 3421 int type; 3422 3423 /* Assumes that IP has done the pullupmsg */ 3424 type = mp->b_rptr[hdr_len]; 3425 3426 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3427 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3428 freemsg(mp); 3429 return; 3430 } 3431 } else { 3432 /* Checksum */ 3433 uint16_t *up; 3434 uint32_t sum; 3435 int remlen; 3436 3437 up = (uint16_t *)&ip6h->ip6_src; 3438 3439 remlen = msgdsize(mp) - hdr_len; 3440 sum = htons(icmp->icmp_proto + remlen) 3441 + up[0] + up[1] + up[2] + up[3] 3442 + up[4] + up[5] + up[6] + up[7] 3443 + up[8] + up[9] + up[10] + up[11] 3444 + up[12] + up[13] + up[14] + up[15]; 3445 sum = (sum & 0xffff) + (sum >> 16); 3446 sum = IP_CSUM(mp, hdr_len, sum); 3447 if (sum != 0) { 3448 /* IPv6 RAW checksum failed */ 3449 ip0dbg(("icmp_rput: RAW checksum " 3450 "failed %x\n", sum)); 3451 freemsg(mp); 3452 BUMP_MIB(&rawip_mib, rawipInCksumErrs); 3453 return; 3454 } 3455 } 3456 } 3457 /* Skip all the IPv6 headers per API */ 3458 mp->b_rptr += hdr_len; 3459 3460 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3461 3462 /* 3463 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3464 * maintain state information, instead of relying on icmp_t 3465 * structure, since there arent any locks protecting these members 3466 * and there is a window where there might be a race between a 3467 * thread setting options on the write side and a thread reading 3468 * these options on the read size. 3469 */ 3470 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3471 IPPF_RTHDR|IPPF_IFINDEX)) { 3472 if (icmp->icmp_ipv6_recvhopopts && 3473 (ipp.ipp_fields & IPPF_HOPOPTS) && 3474 ipp.ipp_hopoptslen > hopstrip) { 3475 udi_size += sizeof (struct T_opthdr) + 3476 ipp.ipp_hopoptslen - hopstrip; 3477 icmp_opt |= IPPF_HOPOPTS; 3478 } 3479 if ((icmp->icmp_ipv6_recvdstopts || 3480 icmp->icmp_old_ipv6_recvdstopts) && 3481 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3482 udi_size += sizeof (struct T_opthdr) + 3483 ipp.ipp_dstoptslen; 3484 icmp_opt |= IPPF_DSTOPTS; 3485 } 3486 if (((icmp->icmp_ipv6_recvdstopts && 3487 icmp->icmp_ipv6_recvrthdr && 3488 (ipp.ipp_fields & IPPF_RTHDR)) || 3489 icmp->icmp_ipv6_recvrtdstopts) && 3490 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3491 udi_size += sizeof (struct T_opthdr) + 3492 ipp.ipp_rtdstoptslen; 3493 icmp_opt |= IPPF_RTDSTOPTS; 3494 } 3495 if (icmp->icmp_ipv6_recvrthdr && 3496 (ipp.ipp_fields & IPPF_RTHDR)) { 3497 udi_size += sizeof (struct T_opthdr) + 3498 ipp.ipp_rthdrlen; 3499 icmp_opt |= IPPF_RTHDR; 3500 } 3501 if (icmp->icmp_ip_recvpktinfo && 3502 (ipp.ipp_fields & IPPF_IFINDEX)) { 3503 udi_size += sizeof (struct T_opthdr) + 3504 sizeof (struct in6_pktinfo); 3505 icmp_opt |= IPPF_IFINDEX; 3506 } 3507 } 3508 if (icmp->icmp_ipv6_recvhoplimit) { 3509 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3510 icmp_ipv6_recvhoplimit = B_TRUE; 3511 } 3512 3513 if (icmp->icmp_ipv6_recvtclass) 3514 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3515 3516 mp1 = allocb(udi_size, BPRI_MED); 3517 if (mp1 == NULL) { 3518 freemsg(mp); 3519 BUMP_MIB(&rawip_mib, rawipInErrors); 3520 return; 3521 } 3522 mp1->b_cont = mp; 3523 mp = mp1; 3524 mp->b_datap->db_type = M_PROTO; 3525 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3526 mp->b_wptr = (uchar_t *)tudi + udi_size; 3527 tudi->PRIM_type = T_UNITDATA_IND; 3528 tudi->SRC_length = sizeof (sin6_t); 3529 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3530 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3531 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3532 tudi->OPT_length = udi_size; 3533 sin6 = (sin6_t *)&tudi[1]; 3534 sin6->sin6_port = 0; 3535 sin6->sin6_family = AF_INET6; 3536 3537 sin6->sin6_addr = ip6h->ip6_src; 3538 /* No sin6_flowinfo per API */ 3539 sin6->sin6_flowinfo = 0; 3540 /* For link-scope source pass up scope id */ 3541 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3542 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3543 sin6->sin6_scope_id = ipp.ipp_ifindex; 3544 else 3545 sin6->sin6_scope_id = 0; 3546 3547 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3548 icmp->icmp_zoneid); 3549 3550 if (udi_size != 0) { 3551 uchar_t *dstopt; 3552 3553 dstopt = (uchar_t *)&sin6[1]; 3554 if (icmp_opt & IPPF_IFINDEX) { 3555 struct T_opthdr *toh; 3556 struct in6_pktinfo *pkti; 3557 3558 toh = (struct T_opthdr *)dstopt; 3559 toh->level = IPPROTO_IPV6; 3560 toh->name = IPV6_PKTINFO; 3561 toh->len = sizeof (struct T_opthdr) + 3562 sizeof (*pkti); 3563 toh->status = 0; 3564 dstopt += sizeof (struct T_opthdr); 3565 pkti = (struct in6_pktinfo *)dstopt; 3566 pkti->ipi6_addr = ip6h->ip6_dst; 3567 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3568 dstopt += sizeof (*pkti); 3569 udi_size -= toh->len; 3570 } 3571 if (icmp_ipv6_recvhoplimit) { 3572 struct T_opthdr *toh; 3573 3574 toh = (struct T_opthdr *)dstopt; 3575 toh->level = IPPROTO_IPV6; 3576 toh->name = IPV6_HOPLIMIT; 3577 toh->len = sizeof (struct T_opthdr) + 3578 sizeof (uint_t); 3579 toh->status = 0; 3580 dstopt += sizeof (struct T_opthdr); 3581 *(uint_t *)dstopt = ip6h->ip6_hops; 3582 dstopt += sizeof (uint_t); 3583 udi_size -= toh->len; 3584 } 3585 if (icmp->icmp_ipv6_recvtclass) { 3586 struct T_opthdr *toh; 3587 3588 toh = (struct T_opthdr *)dstopt; 3589 toh->level = IPPROTO_IPV6; 3590 toh->name = IPV6_TCLASS; 3591 toh->len = sizeof (struct T_opthdr) + 3592 sizeof (uint_t); 3593 toh->status = 0; 3594 dstopt += sizeof (struct T_opthdr); 3595 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3596 dstopt += sizeof (uint_t); 3597 udi_size -= toh->len; 3598 } 3599 if (icmp_opt & IPPF_HOPOPTS) { 3600 struct T_opthdr *toh; 3601 3602 toh = (struct T_opthdr *)dstopt; 3603 toh->level = IPPROTO_IPV6; 3604 toh->name = IPV6_HOPOPTS; 3605 toh->len = sizeof (struct T_opthdr) + 3606 ipp.ipp_hopoptslen - hopstrip; 3607 toh->status = 0; 3608 dstopt += sizeof (struct T_opthdr); 3609 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3610 ipp.ipp_hopoptslen - hopstrip); 3611 if (hopstrip > 0) { 3612 /* copy next header value and fake length */ 3613 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3614 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3615 hopstrip / 8; 3616 } 3617 dstopt += ipp.ipp_hopoptslen - hopstrip; 3618 udi_size -= toh->len; 3619 } 3620 if (icmp_opt & IPPF_RTDSTOPTS) { 3621 struct T_opthdr *toh; 3622 3623 toh = (struct T_opthdr *)dstopt; 3624 toh->level = IPPROTO_IPV6; 3625 toh->name = IPV6_DSTOPTS; 3626 toh->len = sizeof (struct T_opthdr) + 3627 ipp.ipp_rtdstoptslen; 3628 toh->status = 0; 3629 dstopt += sizeof (struct T_opthdr); 3630 bcopy(ipp.ipp_rtdstopts, dstopt, 3631 ipp.ipp_rtdstoptslen); 3632 dstopt += ipp.ipp_rtdstoptslen; 3633 udi_size -= toh->len; 3634 } 3635 if (icmp_opt & IPPF_RTHDR) { 3636 struct T_opthdr *toh; 3637 3638 toh = (struct T_opthdr *)dstopt; 3639 toh->level = IPPROTO_IPV6; 3640 toh->name = IPV6_RTHDR; 3641 toh->len = sizeof (struct T_opthdr) + 3642 ipp.ipp_rthdrlen; 3643 toh->status = 0; 3644 dstopt += sizeof (struct T_opthdr); 3645 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3646 dstopt += ipp.ipp_rthdrlen; 3647 udi_size -= toh->len; 3648 } 3649 if (icmp_opt & IPPF_DSTOPTS) { 3650 struct T_opthdr *toh; 3651 3652 toh = (struct T_opthdr *)dstopt; 3653 toh->level = IPPROTO_IPV6; 3654 toh->name = IPV6_DSTOPTS; 3655 toh->len = sizeof (struct T_opthdr) + 3656 ipp.ipp_dstoptslen; 3657 toh->status = 0; 3658 dstopt += sizeof (struct T_opthdr); 3659 bcopy(ipp.ipp_dstopts, dstopt, 3660 ipp.ipp_dstoptslen); 3661 dstopt += ipp.ipp_dstoptslen; 3662 udi_size -= toh->len; 3663 } 3664 /* Consumed all of allocated space */ 3665 ASSERT(udi_size == 0); 3666 } 3667 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3668 putnext(q, mp); 3669 } 3670 3671 /* 3672 * Process a T_BIND_ACK 3673 */ 3674 static void 3675 icmp_rput_bind_ack(queue_t *q, mblk_t *mp) 3676 { 3677 icmp_t *icmp = (icmp_t *)q->q_ptr; 3678 mblk_t *mp1; 3679 ire_t *ire; 3680 struct T_bind_ack *tba; 3681 uchar_t *addrp; 3682 ipa_conn_t *ac; 3683 ipa6_conn_t *ac6; 3684 3685 /* 3686 * We know if headers are included or not so we can 3687 * safely do this. 3688 */ 3689 if (icmp->icmp_state == TS_UNBND) { 3690 /* 3691 * TPI has not yet bound - bind sent by 3692 * icmp_bind_proto. 3693 */ 3694 freemsg(mp); 3695 return; 3696 } 3697 if (icmp->icmp_discon_pending) 3698 icmp->icmp_discon_pending = 0; 3699 3700 /* 3701 * If a broadcast/multicast address was bound set 3702 * the source address to 0. 3703 * This ensures no datagrams with broadcast address 3704 * as source address are emitted (which would violate 3705 * RFC1122 - Hosts requirements) 3706 * 3707 * Note that when connecting the returned IRE is 3708 * for the destination address and we only perform 3709 * the broadcast check for the source address (it 3710 * is OK to connect to a broadcast/multicast address.) 3711 */ 3712 mp1 = mp->b_cont; 3713 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3714 ire = (ire_t *)mp1->b_rptr; 3715 3716 /* 3717 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3718 * local address. 3719 */ 3720 if (ire->ire_type == IRE_BROADCAST && 3721 icmp->icmp_state != TS_DATA_XFER) { 3722 /* This was just a local bind to a MC/broadcast addr */ 3723 V6_SET_ZERO(icmp->icmp_v6src); 3724 if (icmp->icmp_family == AF_INET6) 3725 (void) icmp_build_hdrs(q, icmp); 3726 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3727 /* 3728 * Local address not yet set - pick it from the 3729 * T_bind_ack 3730 */ 3731 tba = (struct T_bind_ack *)mp->b_rptr; 3732 addrp = &mp->b_rptr[tba->ADDR_offset]; 3733 switch (icmp->icmp_family) { 3734 case AF_INET: 3735 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3736 ac = (ipa_conn_t *)addrp; 3737 } else { 3738 ASSERT(tba->ADDR_length == 3739 sizeof (ipa_conn_x_t)); 3740 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3741 } 3742 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3743 &icmp->icmp_v6src); 3744 break; 3745 case AF_INET6: 3746 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3747 ac6 = (ipa6_conn_t *)addrp; 3748 } else { 3749 ASSERT(tba->ADDR_length == 3750 sizeof (ipa6_conn_x_t)); 3751 ac6 = &((ipa6_conn_x_t *) 3752 addrp)->ac6x_conn; 3753 } 3754 icmp->icmp_v6src = ac6->ac6_laddr; 3755 (void) icmp_build_hdrs(q, icmp); 3756 } 3757 } 3758 mp1 = mp1->b_cont; 3759 } 3760 /* 3761 * Look for one or more appended ACK message added by 3762 * icmp_connect or icmp_disconnect. 3763 * If none found just send up the T_BIND_ACK. 3764 * icmp_connect has appended a T_OK_ACK and a 3765 * T_CONN_CON. 3766 * icmp_disconnect has appended a T_OK_ACK. 3767 */ 3768 if (mp1 != NULL) { 3769 if (mp->b_cont == mp1) 3770 mp->b_cont = NULL; 3771 else { 3772 ASSERT(mp->b_cont->b_cont == mp1); 3773 mp->b_cont->b_cont = NULL; 3774 } 3775 freemsg(mp); 3776 mp = mp1; 3777 while (mp != NULL) { 3778 mp1 = mp->b_cont; 3779 mp->b_cont = NULL; 3780 putnext(q, mp); 3781 mp = mp1; 3782 } 3783 return; 3784 } 3785 freemsg(mp->b_cont); 3786 mp->b_cont = NULL; 3787 putnext(q, mp); 3788 } 3789 3790 /* 3791 * return SNMP stuff in buffer in mpdata 3792 */ 3793 static int 3794 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3795 { 3796 mblk_t *mpdata; 3797 struct opthdr *optp; 3798 3799 if (mpctl == NULL || 3800 (mpdata = mpctl->b_cont) == NULL) { 3801 return (0); 3802 } 3803 3804 /* fixed length structure for IPv4 and IPv6 counters */ 3805 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 3806 optp->level = EXPER_RAWIP; 3807 optp->name = 0; 3808 (void) snmp_append_data(mpdata, (char *)&rawip_mib, sizeof (rawip_mib)); 3809 optp->len = msgdsize(mpdata); 3810 qreply(q, mpctl); 3811 3812 return (1); 3813 } 3814 3815 /* 3816 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 3817 * TODO: If this ever actually tries to set anything, it needs to be 3818 * to do the appropriate locking. 3819 */ 3820 /* ARGSUSED */ 3821 static int 3822 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 3823 uchar_t *ptr, int len) 3824 { 3825 switch (level) { 3826 case EXPER_RAWIP: 3827 return (0); 3828 default: 3829 return (1); 3830 } 3831 } 3832 3833 /* Report for ndd "icmp_status" */ 3834 /* ARGSUSED */ 3835 static int 3836 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3837 { 3838 IDP idp; 3839 icmp_t *icmp; 3840 char *state; 3841 char laddrbuf[INET6_ADDRSTRLEN]; 3842 char faddrbuf[INET6_ADDRSTRLEN]; 3843 3844 (void) mi_mpprintf(mp, 3845 "RAWIP " MI_COL_HDRPAD_STR 3846 /* 01234567[89ABCDEF] */ 3847 " src addr dest addr state"); 3848 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 3849 3850 3851 for (idp = mi_first_ptr(&icmp_g_head); 3852 (icmp = (icmp_t *)idp) != NULL; 3853 idp = mi_next_ptr(&icmp_g_head, idp)) { 3854 if (icmp->icmp_state == TS_UNBND) 3855 state = "UNBOUND"; 3856 else if (icmp->icmp_state == TS_IDLE) 3857 state = "IDLE"; 3858 else if (icmp->icmp_state == TS_DATA_XFER) 3859 state = "CONNECTED"; 3860 else 3861 state = "UnkState"; 3862 3863 (void) mi_mpprintf(mp, 3864 MI_COL_PTRFMT_STR "%s %s %s", 3865 (void *)icmp, 3866 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 3867 sizeof (faddrbuf)), 3868 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 3869 sizeof (laddrbuf)), 3870 state); 3871 } 3872 return (0); 3873 } 3874 3875 /* 3876 * This routine creates a T_UDERROR_IND message and passes it upstream. 3877 * The address and options are copied from the T_UNITDATA_REQ message 3878 * passed in mp. This message is freed. 3879 */ 3880 static void 3881 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 3882 { 3883 mblk_t *mp1; 3884 uchar_t *rptr = mp->b_rptr; 3885 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 3886 3887 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 3888 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 3889 tudr->OPT_length, err); 3890 if (mp1) 3891 qreply(q, mp1); 3892 freemsg(mp); 3893 } 3894 3895 /* 3896 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 3897 * After some error checking, the message is passed downstream to ip. 3898 */ 3899 static void 3900 icmp_unbind(queue_t *q, mblk_t *mp) 3901 { 3902 icmp_t *icmp = (icmp_t *)q->q_ptr; 3903 3904 /* If a bind has not been done, we can't unbind. */ 3905 if (icmp->icmp_state == TS_UNBND) { 3906 icmp_err_ack(q, mp, TOUTSTATE, 0); 3907 return; 3908 } 3909 V6_SET_ZERO(icmp->icmp_v6src); 3910 V6_SET_ZERO(icmp->icmp_bound_v6src); 3911 icmp->icmp_state = TS_UNBND; 3912 3913 if (icmp->icmp_family == AF_INET6) { 3914 int error; 3915 3916 /* Rebuild the header template */ 3917 error = icmp_build_hdrs(q, icmp); 3918 if (error != 0) { 3919 icmp_err_ack(q, mp, TSYSERR, error); 3920 return; 3921 } 3922 } 3923 /* Pass the unbind to IP. */ 3924 putnext(q, mp); 3925 } 3926 3927 /* 3928 * Process IPv4 packets that already include an IP header. 3929 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 3930 * IPPROTO_IGMP). 3931 */ 3932 static void 3933 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop, 3934 boolean_t use_putnext) 3935 { 3936 ipha_t *ipha; 3937 int ip_hdr_length; 3938 int tp_hdr_len; 3939 mblk_t *mp1; 3940 uint_t pkt_len; 3941 ip_opt_info_t optinfo; 3942 3943 optinfo.ip_opt_flags = 0; 3944 optinfo.ip_opt_ill_index = 0; 3945 ipha = (ipha_t *)mp->b_rptr; 3946 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 3947 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 3948 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3949 BUMP_MIB(&rawip_mib, rawipOutErrors); 3950 freemsg(mp); 3951 return; 3952 } 3953 ipha = (ipha_t *)mp->b_rptr; 3954 } 3955 ipha->ipha_version_and_hdr_length = 3956 (IP_VERSION<<4) | (ip_hdr_length>>2); 3957 3958 /* 3959 * For the socket of SOCK_RAW type, the checksum is provided in the 3960 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 3961 * tell IP that the application has sent a complete IP header and not 3962 * to compute the transport checksum nor change the DF flag. 3963 */ 3964 ipha->ipha_ident = IP_HDR_INCLUDED; 3965 ipha->ipha_hdr_checksum = 0; 3966 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 3967 /* Insert options if any */ 3968 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 3969 /* 3970 * Put the IP header plus any transport header that is 3971 * checksumed by ip_wput into the first mblk. (ip_wput assumes 3972 * that at least the checksum field is in the first mblk.) 3973 */ 3974 switch (ipha->ipha_protocol) { 3975 case IPPROTO_UDP: 3976 tp_hdr_len = 8; 3977 break; 3978 case IPPROTO_TCP: 3979 tp_hdr_len = 20; 3980 break; 3981 default: 3982 tp_hdr_len = 0; 3983 break; 3984 } 3985 /* 3986 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 3987 * tp_hdr_len bytes will be in a single mblk. 3988 */ 3989 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 3990 tp_hdr_len)) { 3991 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 3992 tp_hdr_len)) { 3993 BUMP_MIB(&rawip_mib, rawipOutErrors); 3994 freemsg(mp); 3995 return; 3996 } 3997 ipha = (ipha_t *)mp->b_rptr; 3998 } 3999 4000 /* 4001 * if the length is larger then the max allowed IP packet, 4002 * then send an error and abort the processing. 4003 */ 4004 pkt_len = ntohs(ipha->ipha_length) 4005 + icmp->icmp_ip_snd_options_len; 4006 if (pkt_len > IP_MAXPACKET) { 4007 icmp_ud_err(q, mp, EMSGSIZE); 4008 return; 4009 } 4010 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra + 4011 tp_hdr_len, BPRI_LO))) { 4012 icmp_ud_err(q, mp, ENOMEM); 4013 return; 4014 } 4015 mp1->b_rptr += icmp_wroff_extra; 4016 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4017 4018 ipha->ipha_length = htons((uint16_t)pkt_len); 4019 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4020 4021 /* Copy transport header if any */ 4022 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4023 mp1->b_wptr += tp_hdr_len; 4024 4025 /* Add options */ 4026 ipha = (ipha_t *)mp1->b_rptr; 4027 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4028 icmp->icmp_ip_snd_options_len); 4029 4030 /* Drop IP header and transport header from original */ 4031 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4032 4033 mp1->b_cont = mp; 4034 mp = mp1; 4035 /* 4036 * Massage source route putting first source 4037 * route in ipha_dst. 4038 */ 4039 (void) ip_massage_options(ipha); 4040 } 4041 4042 if (pktinfop != NULL) { 4043 /* 4044 * Over write the source address provided in the header 4045 */ 4046 if (pktinfop->ip4_addr != INADDR_ANY) { 4047 ipha->ipha_src = pktinfop->ip4_addr; 4048 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4049 ASSERT(use_putnext == B_FALSE); 4050 } 4051 4052 if (pktinfop->ip4_ill_index != 0) { 4053 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4054 ASSERT(use_putnext == B_FALSE); 4055 } 4056 } 4057 4058 mblk_setcred(mp, icmp->icmp_credp); 4059 if (use_putnext) { 4060 putnext(q, mp); 4061 } else { 4062 ip_output_options(Q_TO_CONN(q->q_next), mp, q->q_next, IP_WPUT, 4063 &optinfo); 4064 } 4065 } 4066 4067 static boolean_t 4068 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4069 { 4070 int err; 4071 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4072 4073 err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4074 opt_storage, icmp->icmp_mac_exempt); 4075 if (err == 0) { 4076 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4077 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4078 opt_storage); 4079 } 4080 if (err != 0) { 4081 BUMP_MIB(&rawip_mib, rawipOutErrors); 4082 DTRACE_PROBE4( 4083 tx__ip__log__drop__updatelabel__icmp, 4084 char *, "queue(1) failed to update options(2) on mp(3)", 4085 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4086 icmp_ud_err(q, mp, err); 4087 return (B_FALSE); 4088 } 4089 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4090 return (B_TRUE); 4091 } 4092 4093 /* 4094 * This routine handles all messages passed downstream. It either 4095 * consumes the message or passes it downstream; it never queues a 4096 * a message. 4097 */ 4098 static void 4099 icmp_wput(queue_t *q, mblk_t *mp) 4100 { 4101 uchar_t *rptr = mp->b_rptr; 4102 ipha_t *ipha; 4103 mblk_t *mp1; 4104 int ip_hdr_length; 4105 #define tudr ((struct T_unitdata_req *)rptr) 4106 size_t ip_len; 4107 icmp_t *icmp; 4108 sin6_t *sin6; 4109 sin_t *sin; 4110 ipaddr_t v4dst; 4111 ip4_pkt_t pktinfo; 4112 ip4_pkt_t *pktinfop = &pktinfo; 4113 ip_opt_info_t optinfo; 4114 queue_t *ip_wq; 4115 boolean_t use_putnext = B_TRUE; 4116 4117 icmp = (icmp_t *)q->q_ptr; 4118 if (icmp->icmp_restricted) { 4119 icmp_wput_restricted(q, mp); 4120 return; 4121 } 4122 4123 switch (mp->b_datap->db_type) { 4124 case M_DATA: 4125 if (icmp->icmp_hdrincl) { 4126 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4127 ipha = (ipha_t *)mp->b_rptr; 4128 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4129 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4130 BUMP_MIB(&rawip_mib, rawipOutErrors); 4131 freemsg(mp); 4132 return; 4133 } 4134 ipha = (ipha_t *)mp->b_rptr; 4135 } 4136 /* 4137 * If this connection was used for v6 (inconceivable!) 4138 * or if we have a new destination, then it's time to 4139 * figure a new label. 4140 */ 4141 if (is_system_labeled() && 4142 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4143 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4144 ipha->ipha_dst) && 4145 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4146 return; 4147 } 4148 icmp_wput_hdrincl(q, mp, icmp, NULL, use_putnext); 4149 return; 4150 } 4151 freemsg(mp); 4152 return; 4153 case M_PROTO: 4154 case M_PCPROTO: 4155 ip_len = mp->b_wptr - rptr; 4156 if (ip_len >= sizeof (struct T_unitdata_req)) { 4157 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4158 if (((union T_primitives *)rptr)->type 4159 == T_UNITDATA_REQ) 4160 break; 4161 } 4162 /* FALLTHRU */ 4163 default: 4164 icmp_wput_other(q, mp); 4165 return; 4166 } 4167 4168 /* Handle T_UNITDATA_REQ messages here. */ 4169 4170 4171 4172 if (icmp->icmp_state == TS_UNBND) { 4173 /* If a port has not been bound to the stream, fail. */ 4174 BUMP_MIB(&rawip_mib, rawipOutErrors); 4175 icmp_ud_err(q, mp, EPROTO); 4176 return; 4177 } 4178 mp1 = mp->b_cont; 4179 if (mp1 == NULL) { 4180 BUMP_MIB(&rawip_mib, rawipOutErrors); 4181 icmp_ud_err(q, mp, EPROTO); 4182 return; 4183 } 4184 4185 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4186 BUMP_MIB(&rawip_mib, rawipOutErrors); 4187 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4188 return; 4189 } 4190 4191 switch (icmp->icmp_family) { 4192 case AF_INET6: 4193 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4194 if (!OK_32PTR((char *)sin6) || 4195 tudr->DEST_length != sizeof (sin6_t) || 4196 sin6->sin6_family != AF_INET6) { 4197 BUMP_MIB(&rawip_mib, rawipOutErrors); 4198 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4199 return; 4200 } 4201 4202 /* No support for mapped addresses on raw sockets */ 4203 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4204 BUMP_MIB(&rawip_mib, rawipOutErrors); 4205 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4206 return; 4207 } 4208 4209 /* 4210 * Destination is a native IPv6 address. 4211 * Send out an IPv6 format packet. 4212 */ 4213 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4214 return; 4215 4216 case AF_INET: 4217 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4218 if (!OK_32PTR((char *)sin) || 4219 tudr->DEST_length != sizeof (sin_t) || 4220 sin->sin_family != AF_INET) { 4221 BUMP_MIB(&rawip_mib, rawipOutErrors); 4222 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4223 return; 4224 } 4225 /* Extract and ipaddr */ 4226 v4dst = sin->sin_addr.s_addr; 4227 break; 4228 4229 default: 4230 ASSERT(0); 4231 } 4232 4233 pktinfop->ip4_ill_index = 0; 4234 pktinfop->ip4_addr = INADDR_ANY; 4235 optinfo.ip_opt_flags = 0; 4236 optinfo.ip_opt_ill_index = 0; 4237 4238 4239 /* 4240 * If options passed in, feed it for verification and handling 4241 */ 4242 if (tudr->OPT_length != 0) { 4243 int error; 4244 4245 error = 0; 4246 if (icmp_unitdata_opt_process(q, mp, &error, 4247 (void *)pktinfop) < 0) { 4248 /* failure */ 4249 BUMP_MIB(&rawip_mib, rawipOutErrors); 4250 icmp_ud_err(q, mp, error); 4251 return; 4252 } 4253 ASSERT(error == 0); 4254 /* 4255 * Note: Success in processing options. 4256 * mp option buffer represented by 4257 * OPT_length/offset now potentially modified 4258 * and contain option setting results 4259 */ 4260 4261 if (pktinfop->ip4_ill_index != 0 || 4262 pktinfop->ip4_addr != INADDR_ANY) { 4263 /* 4264 * PKTINFO option is supported only when ICMP is 4265 * over IP. 4266 */ 4267 ip_wq = WR(q)->q_next; 4268 if (NOT_OVER_IP(ip_wq)) { 4269 icmp_ud_err(q, mp, EINVAL); 4270 return; 4271 } 4272 use_putnext = B_FALSE; 4273 } 4274 } 4275 4276 if (v4dst == INADDR_ANY) 4277 v4dst = htonl(INADDR_LOOPBACK); 4278 4279 /* Check if our saved options are valid; update if not */ 4280 if (is_system_labeled() && 4281 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4282 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4283 !icmp_update_label(q, icmp, mp, v4dst)) { 4284 return; 4285 } 4286 4287 /* Protocol 255 contains full IP headers */ 4288 if (icmp->icmp_hdrincl) { 4289 freeb(mp); 4290 icmp_wput_hdrincl(q, mp1, icmp, pktinfop, use_putnext); 4291 return; 4292 } 4293 4294 4295 /* Add an IP header */ 4296 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4297 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4298 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4299 mp1->b_datap->db_ref != 1 || 4300 !OK_32PTR(ipha)) { 4301 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra, 4302 BPRI_LO))) { 4303 BUMP_MIB(&rawip_mib, rawipOutErrors); 4304 icmp_ud_err(q, mp, ENOMEM); 4305 return; 4306 } 4307 mp1->b_cont = mp->b_cont; 4308 ipha = (ipha_t *)mp1->b_datap->db_lim; 4309 mp1->b_wptr = (uchar_t *)ipha; 4310 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4311 } 4312 #ifdef _BIG_ENDIAN 4313 /* Set version, header length, and tos */ 4314 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4315 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4316 icmp->icmp_type_of_service); 4317 /* Set ttl and protocol */ 4318 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4319 #else 4320 /* Set version, header length, and tos */ 4321 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4322 ((icmp->icmp_type_of_service << 8) | 4323 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4324 /* Set ttl and protocol */ 4325 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4326 #endif 4327 if (pktinfop->ip4_addr != INADDR_ANY) { 4328 ASSERT(use_putnext == B_FALSE); 4329 ipha->ipha_src = pktinfop->ip4_addr; 4330 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4331 } else { 4332 4333 /* 4334 * Copy our address into the packet. If this is zero, 4335 * ip will fill in the real source address. 4336 */ 4337 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4338 } 4339 4340 ipha->ipha_fragment_offset_and_flags = 0; 4341 4342 if (pktinfop->ip4_ill_index != 0) { 4343 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4344 ASSERT(use_putnext == B_FALSE); 4345 } 4346 4347 4348 /* 4349 * For the socket of SOCK_RAW type, the checksum is provided in the 4350 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4351 * tell IP that the application has sent a complete IP header and not 4352 * to compute the transport checksum nor change the DF flag. 4353 */ 4354 ipha->ipha_ident = IP_HDR_INCLUDED; 4355 4356 /* Finish common formatting of the packet. */ 4357 mp1->b_rptr = (uchar_t *)ipha; 4358 4359 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4360 if (mp1->b_cont != NULL) 4361 ip_len += msgdsize(mp1->b_cont); 4362 4363 /* 4364 * Set the length into the IP header. 4365 * If the length is greater than the maximum allowed by IP, 4366 * then free the message and return. Do not try and send it 4367 * as this can cause problems in layers below. 4368 */ 4369 if (ip_len > IP_MAXPACKET) { 4370 BUMP_MIB(&rawip_mib, rawipOutErrors); 4371 icmp_ud_err(q, mp, EMSGSIZE); 4372 return; 4373 } 4374 ipha->ipha_length = htons((uint16_t)ip_len); 4375 /* 4376 * Copy in the destination address from the T_UNITDATA 4377 * request 4378 */ 4379 ipha->ipha_dst = v4dst; 4380 4381 /* 4382 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4383 */ 4384 if (CLASSD(v4dst)) 4385 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4386 4387 /* Copy in options if any */ 4388 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4389 bcopy(icmp->icmp_ip_snd_options, 4390 &ipha[1], icmp->icmp_ip_snd_options_len); 4391 /* 4392 * Massage source route putting first source route in ipha_dst. 4393 * Ignore the destination in the T_unitdata_req. 4394 */ 4395 (void) ip_massage_options(ipha); 4396 } 4397 4398 freeb(mp); 4399 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4400 mblk_setcred(mp1, icmp->icmp_credp); 4401 if (use_putnext) { 4402 putnext(q, mp1); 4403 } else { 4404 ip_output_options(Q_TO_CONN(q->q_next), mp1, q->q_next, IP_WPUT, 4405 &optinfo); 4406 } 4407 #undef ipha 4408 #undef tudr 4409 } 4410 4411 static boolean_t 4412 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4413 { 4414 int err; 4415 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4416 4417 err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4418 opt_storage, icmp->icmp_mac_exempt); 4419 if (err == 0) { 4420 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4421 &icmp->icmp_label_len_v6, opt_storage); 4422 } 4423 if (err != 0) { 4424 BUMP_MIB(&rawip_mib, rawipOutErrors); 4425 DTRACE_PROBE4( 4426 tx__ip__log__drop__updatelabel__icmp6, 4427 char *, "queue(1) failed to update options(2) on mp(3)", 4428 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4429 icmp_ud_err(wq, mp, err); 4430 return (B_FALSE); 4431 } 4432 4433 icmp->icmp_v6lastdst = *dst; 4434 return (B_TRUE); 4435 } 4436 4437 /* 4438 * icmp_wput_ipv6(): 4439 * Assumes that icmp_wput did some sanity checking on the destination 4440 * address, but that the label may not yet be correct. 4441 */ 4442 void 4443 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4444 { 4445 ip6_t *ip6h; 4446 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4447 mblk_t *mp1; 4448 int ip_hdr_len = IPV6_HDR_LEN; 4449 size_t ip_len; 4450 icmp_t *icmp; 4451 ip6_pkt_t ipp_s; /* For ancillary data options */ 4452 ip6_pkt_t *ipp = &ipp_s; 4453 ip6_pkt_t *tipp; 4454 uint32_t csum = 0; 4455 uint_t ignore = 0; 4456 uint_t option_exists = 0, is_sticky = 0; 4457 uint8_t *cp; 4458 uint8_t *nxthdr_ptr; 4459 in6_addr_t ip6_dst; 4460 4461 icmp = (icmp_t *)q->q_ptr; 4462 4463 /* 4464 * If the local address is a mapped address return 4465 * an error. 4466 * It would be possible to send an IPv6 packet but the 4467 * response would never make it back to the application 4468 * since it is bound to a mapped address. 4469 */ 4470 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4471 BUMP_MIB(&rawip_mib, rawipOutErrors); 4472 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4473 return; 4474 } 4475 4476 ipp->ipp_fields = 0; 4477 ipp->ipp_sticky_ignored = 0; 4478 4479 /* 4480 * If TPI options passed in, feed it for verification and handling 4481 */ 4482 if (tudr_optlen != 0) { 4483 int error; 4484 4485 if (icmp_unitdata_opt_process(q, mp, &error, 4486 (void *)ipp) < 0) { 4487 /* failure */ 4488 BUMP_MIB(&rawip_mib, rawipOutErrors); 4489 icmp_ud_err(q, mp, error); 4490 return; 4491 } 4492 ignore = ipp->ipp_sticky_ignored; 4493 ASSERT(error == 0); 4494 } 4495 4496 if (sin6->sin6_scope_id != 0 && 4497 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4498 /* 4499 * IPPF_SCOPE_ID is special. It's neither a sticky 4500 * option nor ancillary data. It needs to be 4501 * explicitly set in options_exists. 4502 */ 4503 option_exists |= IPPF_SCOPE_ID; 4504 } 4505 4506 /* 4507 * Compute the destination address 4508 */ 4509 ip6_dst = sin6->sin6_addr; 4510 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4511 ip6_dst = ipv6_loopback; 4512 4513 /* 4514 * If we're not going to the same destination as last time, then 4515 * recompute the label required. This is done in a separate routine to 4516 * avoid blowing up our stack here. 4517 */ 4518 if (is_system_labeled() && 4519 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4520 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4521 return; 4522 } 4523 4524 /* 4525 * If there's a security label here, then we ignore any options the 4526 * user may try to set. We keep the peer's label as a hidden sticky 4527 * option. 4528 */ 4529 if (icmp->icmp_label_len_v6 > 0) { 4530 ignore &= ~IPPF_HOPOPTS; 4531 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4532 } 4533 4534 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4535 (ipp->ipp_fields == 0)) { 4536 /* No sticky options nor ancillary data. */ 4537 goto no_options; 4538 } 4539 4540 /* 4541 * Go through the options figuring out where each is going to 4542 * come from and build two masks. The first mask indicates if 4543 * the option exists at all. The second mask indicates if the 4544 * option is sticky or ancillary. 4545 */ 4546 if (!(ignore & IPPF_HOPOPTS)) { 4547 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4548 option_exists |= IPPF_HOPOPTS; 4549 ip_hdr_len += ipp->ipp_hopoptslen; 4550 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4551 option_exists |= IPPF_HOPOPTS; 4552 is_sticky |= IPPF_HOPOPTS; 4553 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4554 } 4555 } 4556 4557 if (!(ignore & IPPF_RTHDR)) { 4558 if (ipp->ipp_fields & IPPF_RTHDR) { 4559 option_exists |= IPPF_RTHDR; 4560 ip_hdr_len += ipp->ipp_rthdrlen; 4561 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4562 option_exists |= IPPF_RTHDR; 4563 is_sticky |= IPPF_RTHDR; 4564 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4565 } 4566 } 4567 4568 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4569 /* 4570 * Need to have a router header to use these. 4571 */ 4572 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4573 option_exists |= IPPF_RTDSTOPTS; 4574 ip_hdr_len += ipp->ipp_rtdstoptslen; 4575 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4576 option_exists |= IPPF_RTDSTOPTS; 4577 is_sticky |= IPPF_RTDSTOPTS; 4578 ip_hdr_len += 4579 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4580 } 4581 } 4582 4583 if (!(ignore & IPPF_DSTOPTS)) { 4584 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4585 option_exists |= IPPF_DSTOPTS; 4586 ip_hdr_len += ipp->ipp_dstoptslen; 4587 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4588 option_exists |= IPPF_DSTOPTS; 4589 is_sticky |= IPPF_DSTOPTS; 4590 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4591 } 4592 } 4593 4594 if (!(ignore & IPPF_IFINDEX)) { 4595 if (ipp->ipp_fields & IPPF_IFINDEX) { 4596 option_exists |= IPPF_IFINDEX; 4597 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4598 option_exists |= IPPF_IFINDEX; 4599 is_sticky |= IPPF_IFINDEX; 4600 } 4601 } 4602 4603 if (!(ignore & IPPF_ADDR)) { 4604 if (ipp->ipp_fields & IPPF_ADDR) { 4605 option_exists |= IPPF_ADDR; 4606 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4607 option_exists |= IPPF_ADDR; 4608 is_sticky |= IPPF_ADDR; 4609 } 4610 } 4611 4612 if (!(ignore & IPPF_DONTFRAG)) { 4613 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4614 option_exists |= IPPF_DONTFRAG; 4615 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4616 option_exists |= IPPF_DONTFRAG; 4617 is_sticky |= IPPF_DONTFRAG; 4618 } 4619 } 4620 4621 if (!(ignore & IPPF_USE_MIN_MTU)) { 4622 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4623 option_exists |= IPPF_USE_MIN_MTU; 4624 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4625 IPPF_USE_MIN_MTU) { 4626 option_exists |= IPPF_USE_MIN_MTU; 4627 is_sticky |= IPPF_USE_MIN_MTU; 4628 } 4629 } 4630 4631 if (!(ignore & IPPF_NEXTHOP)) { 4632 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4633 option_exists |= IPPF_NEXTHOP; 4634 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4635 option_exists |= IPPF_NEXTHOP; 4636 is_sticky |= IPPF_NEXTHOP; 4637 } 4638 } 4639 4640 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4641 option_exists |= IPPF_HOPLIMIT; 4642 /* IPV6_HOPLIMIT can never be sticky */ 4643 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4644 4645 if (!(ignore & IPPF_UNICAST_HOPS) && 4646 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4647 option_exists |= IPPF_UNICAST_HOPS; 4648 is_sticky |= IPPF_UNICAST_HOPS; 4649 } 4650 4651 if (!(ignore & IPPF_MULTICAST_HOPS) && 4652 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4653 option_exists |= IPPF_MULTICAST_HOPS; 4654 is_sticky |= IPPF_MULTICAST_HOPS; 4655 } 4656 4657 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4658 /* This is a sticky socket option only */ 4659 option_exists |= IPPF_NO_CKSUM; 4660 is_sticky |= IPPF_NO_CKSUM; 4661 } 4662 4663 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4664 /* This is a sticky socket option only */ 4665 option_exists |= IPPF_RAW_CKSUM; 4666 is_sticky |= IPPF_RAW_CKSUM; 4667 } 4668 4669 if (!(ignore & IPPF_TCLASS)) { 4670 if (ipp->ipp_fields & IPPF_TCLASS) { 4671 option_exists |= IPPF_TCLASS; 4672 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4673 option_exists |= IPPF_TCLASS; 4674 is_sticky |= IPPF_TCLASS; 4675 } 4676 } 4677 4678 no_options: 4679 4680 /* 4681 * If any options carried in the ip6i_t were specified, we 4682 * need to account for the ip6i_t in the data we'll be sending 4683 * down. 4684 */ 4685 if (option_exists & IPPF_HAS_IP6I) 4686 ip_hdr_len += sizeof (ip6i_t); 4687 4688 /* check/fix buffer config, setup pointers into it */ 4689 mp1 = mp->b_cont; 4690 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4691 if ((mp1->b_datap->db_ref != 1) || 4692 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4693 !OK_32PTR(ip6h)) { 4694 /* Try to get everything in a single mblk next time */ 4695 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4696 icmp->icmp_max_hdr_len = ip_hdr_len; 4697 (void) mi_set_sth_wroff(RD(q), 4698 icmp->icmp_max_hdr_len + icmp_wroff_extra); 4699 } 4700 mp1 = allocb(ip_hdr_len + icmp_wroff_extra, BPRI_LO); 4701 if (!mp1) { 4702 BUMP_MIB(&rawip_mib, rawipOutErrors); 4703 icmp_ud_err(q, mp, ENOMEM); 4704 return; 4705 } 4706 mp1->b_cont = mp->b_cont; 4707 mp1->b_wptr = mp1->b_datap->db_lim; 4708 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4709 } 4710 mp1->b_rptr = (unsigned char *)ip6h; 4711 ip6i = (ip6i_t *)ip6h; 4712 4713 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4714 if (option_exists & IPPF_HAS_IP6I) { 4715 ip6h = (ip6_t *)&ip6i[1]; 4716 ip6i->ip6i_flags = 0; 4717 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4718 4719 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4720 if (option_exists & IPPF_SCOPE_ID) { 4721 ip6i->ip6i_flags |= IP6I_IFINDEX; 4722 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4723 } else if (option_exists & IPPF_IFINDEX) { 4724 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4725 ASSERT(tipp->ipp_ifindex != 0); 4726 ip6i->ip6i_flags |= IP6I_IFINDEX; 4727 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4728 } 4729 4730 if (option_exists & IPPF_RAW_CKSUM) { 4731 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4732 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4733 } 4734 4735 if (option_exists & IPPF_NO_CKSUM) { 4736 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4737 } 4738 4739 if (option_exists & IPPF_ADDR) { 4740 /* 4741 * Enable per-packet source address verification if 4742 * IPV6_PKTINFO specified the source address. 4743 * ip6_src is set in the transport's _wput function. 4744 */ 4745 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4746 } 4747 4748 if (option_exists & IPPF_DONTFRAG) { 4749 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4750 } 4751 4752 if (option_exists & IPPF_USE_MIN_MTU) { 4753 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4754 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4755 } 4756 4757 if (option_exists & IPPF_NEXTHOP) { 4758 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4759 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4760 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4761 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4762 } 4763 4764 /* 4765 * tell IP this is an ip6i_t private header 4766 */ 4767 ip6i->ip6i_nxt = IPPROTO_RAW; 4768 } 4769 4770 /* Initialize IPv6 header */ 4771 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4772 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4773 4774 /* Set the hoplimit of the outgoing packet. */ 4775 if (option_exists & IPPF_HOPLIMIT) { 4776 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4777 ip6h->ip6_hops = ipp->ipp_hoplimit; 4778 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4779 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4780 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4781 if (option_exists & IPPF_MULTICAST_HOPS) 4782 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4783 } else { 4784 ip6h->ip6_hops = icmp->icmp_ttl; 4785 if (option_exists & IPPF_UNICAST_HOPS) 4786 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4787 } 4788 4789 if (option_exists & IPPF_ADDR) { 4790 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4791 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4792 ip6h->ip6_src = tipp->ipp_addr; 4793 } else { 4794 /* 4795 * The source address was not set using IPV6_PKTINFO. 4796 * First look at the bound source. 4797 * If unspecified fallback to __sin6_src_id. 4798 */ 4799 ip6h->ip6_src = icmp->icmp_v6src; 4800 if (sin6->__sin6_src_id != 0 && 4801 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4802 ip_srcid_find_id(sin6->__sin6_src_id, 4803 &ip6h->ip6_src, icmp->icmp_zoneid); 4804 } 4805 } 4806 4807 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4808 cp = (uint8_t *)&ip6h[1]; 4809 4810 /* 4811 * Here's where we have to start stringing together 4812 * any extension headers in the right order: 4813 * Hop-by-hop, destination, routing, and final destination opts. 4814 */ 4815 if (option_exists & IPPF_HOPOPTS) { 4816 /* Hop-by-hop options */ 4817 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4818 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 4819 4820 *nxthdr_ptr = IPPROTO_HOPOPTS; 4821 nxthdr_ptr = &hbh->ip6h_nxt; 4822 4823 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 4824 cp += tipp->ipp_hopoptslen; 4825 } 4826 /* 4827 * En-route destination options 4828 * Only do them if there's a routing header as well 4829 */ 4830 if (option_exists & IPPF_RTDSTOPTS) { 4831 ip6_dest_t *dst = (ip6_dest_t *)cp; 4832 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 4833 4834 *nxthdr_ptr = IPPROTO_DSTOPTS; 4835 nxthdr_ptr = &dst->ip6d_nxt; 4836 4837 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 4838 cp += tipp->ipp_rtdstoptslen; 4839 } 4840 /* 4841 * Routing header next 4842 */ 4843 if (option_exists & IPPF_RTHDR) { 4844 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4845 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 4846 4847 *nxthdr_ptr = IPPROTO_ROUTING; 4848 nxthdr_ptr = &rt->ip6r_nxt; 4849 4850 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 4851 cp += tipp->ipp_rthdrlen; 4852 } 4853 /* 4854 * Do ultimate destination options 4855 */ 4856 if (option_exists & IPPF_DSTOPTS) { 4857 ip6_dest_t *dest = (ip6_dest_t *)cp; 4858 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 4859 4860 *nxthdr_ptr = IPPROTO_DSTOPTS; 4861 nxthdr_ptr = &dest->ip6d_nxt; 4862 4863 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 4864 cp += tipp->ipp_dstoptslen; 4865 } 4866 4867 /* 4868 * Now set the last header pointer to the proto passed in 4869 */ 4870 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 4871 *nxthdr_ptr = icmp->icmp_proto; 4872 4873 /* 4874 * Copy in the destination address 4875 */ 4876 ip6h->ip6_dst = ip6_dst; 4877 4878 ip6h->ip6_vcf = 4879 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4880 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4881 4882 if (option_exists & IPPF_TCLASS) { 4883 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 4884 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4885 tipp->ipp_tclass); 4886 } 4887 if (option_exists & IPPF_RTHDR) { 4888 ip6_rthdr_t *rth; 4889 4890 /* 4891 * Perform any processing needed for source routing. 4892 * We know that all extension headers will be in the same mblk 4893 * as the IPv6 header. 4894 */ 4895 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 4896 if (rth != NULL && rth->ip6r_segleft != 0) { 4897 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 4898 /* 4899 * Drop packet - only support Type 0 routing. 4900 * Notify the application as well. 4901 */ 4902 icmp_ud_err(q, mp, EPROTO); 4903 BUMP_MIB(&rawip_mib, rawipOutErrors); 4904 return; 4905 } 4906 /* 4907 * rth->ip6r_len is twice the number of 4908 * addresses in the header 4909 */ 4910 if (rth->ip6r_len & 0x1) { 4911 icmp_ud_err(q, mp, EPROTO); 4912 BUMP_MIB(&rawip_mib, rawipOutErrors); 4913 return; 4914 } 4915 /* 4916 * Shuffle the routing header and ip6_dst 4917 * addresses, and get the checksum difference 4918 * between the first hop (in ip6_dst) and 4919 * the destination (in the last routing hdr entry). 4920 */ 4921 csum = ip_massage_options_v6(ip6h, rth); 4922 /* 4923 * Verify that the first hop isn't a mapped address. 4924 * Routers along the path need to do this verification 4925 * for subsequent hops. 4926 */ 4927 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 4928 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4929 BUMP_MIB(&rawip_mib, rawipOutErrors); 4930 return; 4931 } 4932 } 4933 } 4934 4935 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 4936 if (mp1->b_cont != NULL) 4937 ip_len += msgdsize(mp1->b_cont); 4938 4939 /* 4940 * Set the length into the IP header. 4941 * If the length is greater than the maximum allowed by IP, 4942 * then free the message and return. Do not try and send it 4943 * as this can cause problems in layers below. 4944 */ 4945 if (ip_len > IP_MAXPACKET) { 4946 BUMP_MIB(&rawip_mib, rawipOutErrors); 4947 icmp_ud_err(q, mp, EMSGSIZE); 4948 return; 4949 } 4950 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 4951 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 4952 uint16_t *cksum_ptr; 4953 uint_t ext_hdrs_len; 4954 4955 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 4956 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 4957 icmp->icmp_checksum_off == 2); 4958 4959 /* 4960 * We make it easy for IP to include our pseudo header 4961 * by putting our length in uh_checksum, modified (if 4962 * we have a routing header) by the checksum difference 4963 * between the ultimate destination and first hop addresses. 4964 * Note: ICMPv6 must always checksum the packet. 4965 */ 4966 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 4967 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 4968 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 4969 BUMP_MIB(&rawip_mib, rawipOutErrors); 4970 freemsg(mp); 4971 return; 4972 } 4973 ip6i = (ip6i_t *)mp1->b_rptr; 4974 if (ip6i->ip6i_nxt == IPPROTO_RAW) 4975 ip6h = (ip6_t *)&ip6i[1]; 4976 else 4977 ip6h = (ip6_t *)ip6i; 4978 } 4979 /* Add payload length to checksum */ 4980 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 4981 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 4982 csum += htons(ip_len - ext_hdrs_len); 4983 4984 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 4985 csum = (csum & 0xFFFF) + (csum >> 16); 4986 *cksum_ptr = (uint16_t)csum; 4987 } 4988 4989 #ifdef _LITTLE_ENDIAN 4990 ip_len = htons(ip_len); 4991 #endif 4992 ip6h->ip6_plen = (uint16_t)ip_len; 4993 4994 freeb(mp); 4995 4996 /* We're done. Pass the packet to IP */ 4997 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4998 mblk_setcred(mp1, icmp->icmp_credp); 4999 putnext(q, mp1); 5000 } 5001 5002 static void 5003 icmp_wput_other(queue_t *q, mblk_t *mp) 5004 { 5005 uchar_t *rptr = mp->b_rptr; 5006 struct iocblk *iocp; 5007 #define tudr ((struct T_unitdata_req *)rptr) 5008 icmp_t *icmp; 5009 cred_t *cr; 5010 5011 icmp = (icmp_t *)q->q_ptr; 5012 5013 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5014 5015 switch (mp->b_datap->db_type) { 5016 case M_PROTO: 5017 case M_PCPROTO: 5018 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5019 /* 5020 * If the message does not contain a PRIM_type, 5021 * throw it away. 5022 */ 5023 freemsg(mp); 5024 return; 5025 } 5026 switch (((union T_primitives *)rptr)->type) { 5027 case T_ADDR_REQ: 5028 icmp_addr_req(q, mp); 5029 return; 5030 case O_T_BIND_REQ: 5031 case T_BIND_REQ: 5032 qwriter(q, mp, icmp_bind, PERIM_OUTER); 5033 return; 5034 case T_CONN_REQ: 5035 icmp_connect(q, mp); 5036 return; 5037 case T_CAPABILITY_REQ: 5038 icmp_capability_req(q, mp); 5039 return; 5040 case T_INFO_REQ: 5041 icmp_info_req(q, mp); 5042 return; 5043 case T_UNITDATA_REQ: 5044 /* 5045 * If a T_UNITDATA_REQ gets here, the address must 5046 * be bad. Valid T_UNITDATA_REQs are found above 5047 * and break to below this switch. 5048 */ 5049 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5050 return; 5051 case T_UNBIND_REQ: 5052 icmp_unbind(q, mp); 5053 return; 5054 5055 case T_SVR4_OPTMGMT_REQ: 5056 if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, 5057 cr)) 5058 /* Only IP can return anything meaningful */ 5059 (void) svr4_optcom_req(q, mp, cr, 5060 &icmp_opt_obj); 5061 return; 5062 5063 case T_OPTMGMT_REQ: 5064 /* Only IP can return anything meaningful */ 5065 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5066 return; 5067 5068 case T_DISCON_REQ: 5069 icmp_disconnect(q, mp); 5070 return; 5071 5072 /* The following TPI message is not supported by icmp. */ 5073 case O_T_CONN_RES: 5074 case T_CONN_RES: 5075 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5076 return; 5077 5078 /* The following 3 TPI requests are illegal for icmp. */ 5079 case T_DATA_REQ: 5080 case T_EXDATA_REQ: 5081 case T_ORDREL_REQ: 5082 freemsg(mp); 5083 (void) putctl1(RD(q), M_ERROR, EPROTO); 5084 return; 5085 default: 5086 break; 5087 } 5088 break; 5089 case M_IOCTL: 5090 iocp = (struct iocblk *)mp->b_rptr; 5091 switch (iocp->ioc_cmd) { 5092 case TI_GETPEERNAME: 5093 if (icmp->icmp_state != TS_DATA_XFER) { 5094 /* 5095 * If a default destination address has not 5096 * been associated with the stream, then we 5097 * don't know the peer's name. 5098 */ 5099 iocp->ioc_error = ENOTCONN; 5100 err_ret:; 5101 iocp->ioc_count = 0; 5102 mp->b_datap->db_type = M_IOCACK; 5103 qreply(q, mp); 5104 return; 5105 } 5106 /* FALLTHRU */ 5107 case TI_GETMYNAME: 5108 /* 5109 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5110 * need to copyin the user's strbuf structure. 5111 * Processing will continue in the M_IOCDATA case 5112 * below. 5113 */ 5114 mi_copyin(q, mp, NULL, 5115 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5116 return; 5117 case ND_SET: 5118 /* nd_getset performs the necessary error checking */ 5119 case ND_GET: 5120 if (nd_getset(q, icmp_g_nd, mp)) { 5121 qreply(q, mp); 5122 return; 5123 } 5124 break; 5125 default: 5126 break; 5127 } 5128 break; 5129 case M_IOCDATA: 5130 icmp_wput_iocdata(q, mp); 5131 return; 5132 default: 5133 break; 5134 } 5135 putnext(q, mp); 5136 } 5137 5138 /* 5139 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5140 * messages. 5141 */ 5142 static void 5143 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5144 { 5145 mblk_t *mp1; 5146 STRUCT_HANDLE(strbuf, sb); 5147 icmp_t *icmp; 5148 in6_addr_t v6addr; 5149 ipaddr_t v4addr; 5150 uint32_t flowinfo = 0; 5151 int addrlen; 5152 5153 /* Make sure it is one of ours. */ 5154 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5155 case TI_GETMYNAME: 5156 case TI_GETPEERNAME: 5157 break; 5158 default: 5159 putnext(q, mp); 5160 return; 5161 } 5162 switch (mi_copy_state(q, mp, &mp1)) { 5163 case -1: 5164 return; 5165 case MI_COPY_CASE(MI_COPY_IN, 1): 5166 break; 5167 case MI_COPY_CASE(MI_COPY_OUT, 1): 5168 /* 5169 * The address has been copied out, so now 5170 * copyout the strbuf. 5171 */ 5172 mi_copyout(q, mp); 5173 return; 5174 case MI_COPY_CASE(MI_COPY_OUT, 2): 5175 /* 5176 * The address and strbuf have been copied out. 5177 * We're done, so just acknowledge the original 5178 * M_IOCTL. 5179 */ 5180 mi_copy_done(q, mp, 0); 5181 return; 5182 default: 5183 /* 5184 * Something strange has happened, so acknowledge 5185 * the original M_IOCTL with an EPROTO error. 5186 */ 5187 mi_copy_done(q, mp, EPROTO); 5188 return; 5189 } 5190 /* 5191 * Now we have the strbuf structure for TI_GETMYNAME 5192 * and TI_GETPEERNAME. Next we copyout the requested 5193 * address and then we'll copyout the strbuf. 5194 */ 5195 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5196 (void *)mp1->b_rptr); 5197 icmp = (icmp_t *)q->q_ptr; 5198 if (icmp->icmp_family == AF_INET) 5199 addrlen = sizeof (sin_t); 5200 else 5201 addrlen = sizeof (sin6_t); 5202 5203 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5204 mi_copy_done(q, mp, EINVAL); 5205 return; 5206 } 5207 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5208 case TI_GETMYNAME: 5209 if (icmp->icmp_family == AF_INET) { 5210 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5211 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5212 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5213 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5214 } else { 5215 /* 5216 * INADDR_ANY 5217 * icmp_v6src is not set, we might be bound to 5218 * broadcast/multicast. Use icmp_bound_v6src as 5219 * local address instead (that could 5220 * also still be INADDR_ANY) 5221 */ 5222 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5223 } 5224 } else { 5225 /* icmp->icmp_family == AF_INET6 */ 5226 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5227 v6addr = icmp->icmp_v6src; 5228 } else { 5229 /* 5230 * UNSPECIFIED 5231 * icmp_v6src is not set, we might be bound to 5232 * broadcast/multicast. Use icmp_bound_v6src as 5233 * local address instead (that could 5234 * also still be UNSPECIFIED) 5235 */ 5236 v6addr = icmp->icmp_bound_v6src; 5237 } 5238 } 5239 break; 5240 case TI_GETPEERNAME: 5241 if (icmp->icmp_family == AF_INET) { 5242 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5243 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5244 } else { 5245 /* icmp->icmp_family == AF_INET6) */ 5246 v6addr = icmp->icmp_v6dst; 5247 flowinfo = icmp->icmp_flowinfo; 5248 } 5249 break; 5250 default: 5251 mi_copy_done(q, mp, EPROTO); 5252 return; 5253 } 5254 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5255 if (!mp1) 5256 return; 5257 5258 if (icmp->icmp_family == AF_INET) { 5259 sin_t *sin; 5260 5261 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5262 sin = (sin_t *)mp1->b_rptr; 5263 mp1->b_wptr = (uchar_t *)&sin[1]; 5264 *sin = sin_null; 5265 sin->sin_family = AF_INET; 5266 sin->sin_addr.s_addr = v4addr; 5267 } else { 5268 /* icmp->icmp_family == AF_INET6 */ 5269 sin6_t *sin6; 5270 5271 ASSERT(icmp->icmp_family == AF_INET6); 5272 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5273 sin6 = (sin6_t *)mp1->b_rptr; 5274 mp1->b_wptr = (uchar_t *)&sin6[1]; 5275 *sin6 = sin6_null; 5276 sin6->sin6_family = AF_INET6; 5277 sin6->sin6_flowinfo = flowinfo; 5278 sin6->sin6_addr = v6addr; 5279 } 5280 /* Copy out the address */ 5281 mi_copyout(q, mp); 5282 } 5283 5284 /* 5285 * Only allow MIB requests and M_FLUSHes to pass. 5286 * All other messages are nacked or dropped. 5287 */ 5288 static void 5289 icmp_wput_restricted(queue_t *q, mblk_t *mp) 5290 { 5291 cred_t *cr; 5292 icmp_t *icmp; 5293 5294 switch (DB_TYPE(mp)) { 5295 case M_PROTO: 5296 case M_PCPROTO: 5297 if (MBLKL(mp) < sizeof (t_scalar_t)) { 5298 freemsg(mp); 5299 return; 5300 } 5301 icmp = (icmp_t *)q->q_ptr; 5302 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5303 5304 switch (((union T_primitives *)mp->b_rptr)->type) { 5305 case T_SVR4_OPTMGMT_REQ: 5306 if (!snmpcom_req(q, mp, 5307 icmp_snmp_set, icmp_snmp_get, cr)) 5308 (void) svr4_optcom_req(q, mp, cr, 5309 &icmp_opt_obj); 5310 return; 5311 case T_OPTMGMT_REQ: 5312 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5313 return; 5314 default: 5315 icmp_err_ack(q, mp, TSYSERR, ENOTSUP); 5316 return; 5317 } 5318 /* NOTREACHED */ 5319 case M_IOCTL: 5320 miocnak(q, mp, 0, ENOTSUP); 5321 break; 5322 case M_FLUSH: 5323 putnext(q, mp); 5324 break; 5325 default: 5326 freemsg(mp); 5327 break; 5328 } 5329 } 5330 5331 static int 5332 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5333 void *thisdg_attrs) 5334 { 5335 icmp_t *icmp; 5336 struct T_unitdata_req *udreqp; 5337 int is_absreq_failure; 5338 cred_t *cr; 5339 5340 icmp = (icmp_t *)q->q_ptr; 5341 5342 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5343 *errorp = 0; 5344 5345 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5346 5347 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5348 udreqp->OPT_offset, cr, &icmp_opt_obj, 5349 thisdg_attrs, &is_absreq_failure); 5350 5351 if (*errorp != 0) { 5352 /* 5353 * Note: No special action needed in this 5354 * module for "is_absreq_failure" 5355 */ 5356 return (-1); /* failure */ 5357 } 5358 ASSERT(is_absreq_failure == 0); 5359 return (0); /* success */ 5360 } 5361 5362 void 5363 icmp_ddi_init(void) 5364 { 5365 ICMP6_MAJ = ddi_name_to_major(ICMP6); 5366 icmp_max_optsize = 5367 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5368 icmp_opt_obj.odb_opt_arr_cnt); 5369 5370 (void) icmp_param_register(icmp_param_arr, A_CNT(icmp_param_arr)); 5371 5372 rawip_kstat_init(); 5373 } 5374 5375 void 5376 icmp_ddi_destroy(void) 5377 { 5378 nd_free(&icmp_g_nd); 5379 5380 rawip_kstat_fini(); 5381 } 5382 5383 static void 5384 rawip_kstat_init(void) { 5385 5386 rawip_named_kstat_t template = { 5387 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5388 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5389 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5390 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5391 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5392 }; 5393 5394 rawip_mibkp = kstat_create("icmp", 0, "rawip", "mib2", 5395 KSTAT_TYPE_NAMED, 5396 NUM_OF_FIELDS(rawip_named_kstat_t), 5397 0); 5398 if (rawip_mibkp == NULL) 5399 return; 5400 5401 bcopy(&template, rawip_mibkp->ks_data, sizeof (template)); 5402 5403 rawip_mibkp->ks_update = rawip_kstat_update; 5404 5405 kstat_install(rawip_mibkp); 5406 } 5407 5408 static void 5409 rawip_kstat_fini(void) { 5410 if (rawip_mibkp) { 5411 kstat_delete(rawip_mibkp); 5412 rawip_mibkp = NULL; 5413 } 5414 } 5415 5416 static int 5417 rawip_kstat_update(kstat_t *kp, int rw) { 5418 rawip_named_kstat_t *rawipkp; 5419 5420 if ((kp == NULL) || (kp->ks_data == NULL)) 5421 return (EIO); 5422 5423 if (rw == KSTAT_WRITE) 5424 return (EACCES); 5425 5426 rawipkp = (rawip_named_kstat_t *)kp->ks_data; 5427 5428 rawipkp->inDatagrams.value.ui32 = rawip_mib.rawipInDatagrams; 5429 rawipkp->inCksumErrs.value.ui32 = rawip_mib.rawipInCksumErrs; 5430 rawipkp->inErrors.value.ui32 = rawip_mib.rawipInErrors; 5431 rawipkp->outDatagrams.value.ui32 = rawip_mib.rawipOutDatagrams; 5432 rawipkp->outErrors.value.ui32 = rawip_mib.rawipOutErrors; 5433 5434 return (0); 5435 } 5436