1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #define ICMP6 "icmp6" 80 major_t ICMP6_MAJ; 81 82 /* 83 * Object to represent database of options to search passed to 84 * {sock,tpi}optcom_req() interface routine to take care of option 85 * management and associated methods. 86 * XXX These and other extern's should really move to a icmp header. 87 */ 88 extern optdb_obj_t icmp_opt_obj; 89 extern uint_t icmp_max_optsize; 90 91 /* 92 * Synchronization notes: 93 * 94 * At all points in this code where exclusive access is required, we 95 * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) 96 * which will arrange to call the routine only after all threads have 97 * exited the shared resource. 98 */ 99 100 /* Named Dispatch Parameter Management Structure */ 101 typedef struct icmpparam_s { 102 uint_t icmp_param_min; 103 uint_t icmp_param_max; 104 uint_t icmp_param_value; 105 char *icmp_param_name; 106 } icmpparam_t; 107 108 static void icmp_addr_req(queue_t *q, mblk_t *mp); 109 static void icmp_bind(queue_t *q, mblk_t *mp); 110 static void icmp_bind_proto(queue_t *q); 111 static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); 112 static void icmp_capability_req(queue_t *q, mblk_t *mp); 113 static int icmp_close(queue_t *q); 114 static void icmp_connect(queue_t *q, mblk_t *mp); 115 static void icmp_disconnect(queue_t *q, mblk_t *mp); 116 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 117 int sys_error); 118 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 119 t_scalar_t t_error, int sys_error); 120 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 121 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 122 static void icmp_info_req(queue_t *q, mblk_t *mp); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, 126 int sflag, cred_t *credp); 127 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 128 int *errorp, void *thisdg_attrs); 129 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 130 int icmp_opt_set(queue_t *q, uint_t optset_context, 131 int level, int name, uint_t inlen, 132 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 133 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 134 int icmp_opt_get(queue_t *q, int level, int name, 135 uchar_t *ptr); 136 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 137 static boolean_t icmp_param_register(icmpparam_t *icmppa, int cnt); 138 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 139 caddr_t cp, cred_t *cr); 140 static void icmp_rput(queue_t *q, mblk_t *mp); 141 static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); 142 static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); 143 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 144 uchar_t *ptr, int len); 145 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 146 cred_t *cr); 147 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 148 static void icmp_unbind(queue_t *q, mblk_t *mp); 149 static void icmp_wput(queue_t *q, mblk_t *mp); 150 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 151 t_scalar_t tudr_optlen); 152 static void icmp_wput_other(queue_t *q, mblk_t *mp); 153 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 154 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 155 156 static void rawip_kstat_init(void); 157 static void rawip_kstat_fini(void); 158 static int rawip_kstat_update(kstat_t *kp, int rw); 159 160 161 static struct module_info info = { 162 5707, "icmp", 1, INFPSZ, 512, 128 163 }; 164 165 static struct qinit rinit = { 166 (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info 167 }; 168 169 static struct qinit winit = { 170 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info 171 }; 172 173 struct streamtab icmpinfo = { 174 &rinit, &winit 175 }; 176 177 static sin_t sin_null; /* Zero address for quick clears */ 178 static sin6_t sin6_null; /* Zero address for quick clears */ 179 static void *icmp_g_head; /* Head for list of open icmp streams. */ 180 static IDP icmp_g_nd; /* Points to table of ICMP ND variables. */ 181 182 /* MIB-2 stuff for SNMP */ 183 static mib2_rawip_t rawip_mib; /* SNMP fixed size info */ 184 static kstat_t *rawip_mibkp; /* kstat exporting rawip_mib data */ 185 186 /* Default structure copied into T_INFO_ACK messages */ 187 static struct T_info_ack icmp_g_t_info_ack = { 188 T_INFO_ACK, 189 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 190 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 191 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 192 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 193 0, /* ADDR_size - filled in later. */ 194 0, /* OPT_size - not initialized here */ 195 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 196 T_CLTS, /* SERV_type. icmp supports connection-less. */ 197 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 198 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 199 }; 200 201 /* 202 * Table of ND variables supported by icmp. These are loaded into icmp_g_nd 203 * in icmp_open. 204 * All of these are alterable, within the min/max values given, at run time. 205 */ 206 static icmpparam_t icmp_param_arr[] = { 207 /* min max value name */ 208 { 0, 128, 32, "icmp_wroff_extra" }, 209 { 1, 255, 255, "icmp_ipv4_ttl" }, 210 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 211 { 0, 1, 1, "icmp_bsd_compat" }, 212 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 213 { 0, 65536, 1024, "icmp_xmit_lowat"}, 214 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 215 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 216 }; 217 #define icmp_wroff_extra icmp_param_arr[0].icmp_param_value 218 #define icmp_ipv4_ttl icmp_param_arr[1].icmp_param_value 219 #define icmp_ipv6_hoplimit icmp_param_arr[2].icmp_param_value 220 #define icmp_bsd_compat icmp_param_arr[3].icmp_param_value 221 #define icmp_xmit_hiwat icmp_param_arr[4].icmp_param_value 222 #define icmp_xmit_lowat icmp_param_arr[5].icmp_param_value 223 #define icmp_recv_hiwat icmp_param_arr[6].icmp_param_value 224 #define icmp_max_buf icmp_param_arr[7].icmp_param_value 225 226 /* 227 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 228 * passed to icmp_wput. 229 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 230 * protocol type placed in the message following the address. A T_BIND_ACK 231 * message is passed upstream when ip acknowledges the request. 232 * (Called as writer.) 233 */ 234 static void 235 icmp_bind(queue_t *q, mblk_t *mp) 236 { 237 sin_t *sin; 238 sin6_t *sin6; 239 mblk_t *mp1; 240 struct T_bind_req *tbr; 241 icmp_t *icmp; 242 243 icmp = (icmp_t *)q->q_ptr; 244 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 245 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 246 "icmp_bind: bad req, len %u", 247 (uint_t)(mp->b_wptr - mp->b_rptr)); 248 icmp_err_ack(q, mp, TPROTO, 0); 249 return; 250 } 251 if (icmp->icmp_state != TS_UNBND) { 252 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 253 "icmp_bind: bad state, %d", icmp->icmp_state); 254 icmp_err_ack(q, mp, TOUTSTATE, 0); 255 return; 256 } 257 /* 258 * Reallocate the message to make sure we have enough room for an 259 * address and the protocol type. 260 */ 261 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 262 if (!mp1) { 263 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 264 return; 265 } 266 mp = mp1; 267 tbr = (struct T_bind_req *)mp->b_rptr; 268 switch (tbr->ADDR_length) { 269 case 0: /* Generic request */ 270 tbr->ADDR_offset = sizeof (struct T_bind_req); 271 if (icmp->icmp_family == AF_INET) { 272 tbr->ADDR_length = sizeof (sin_t); 273 sin = (sin_t *)&tbr[1]; 274 *sin = sin_null; 275 sin->sin_family = AF_INET; 276 mp->b_wptr = (uchar_t *)&sin[1]; 277 } else { 278 ASSERT(icmp->icmp_family == AF_INET6); 279 tbr->ADDR_length = sizeof (sin6_t); 280 sin6 = (sin6_t *)&tbr[1]; 281 *sin6 = sin6_null; 282 sin6->sin6_family = AF_INET6; 283 mp->b_wptr = (uchar_t *)&sin6[1]; 284 } 285 break; 286 case sizeof (sin_t): /* Complete IP address */ 287 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 288 sizeof (sin_t)); 289 if (sin == NULL || !OK_32PTR((char *)sin)) { 290 icmp_err_ack(q, mp, TSYSERR, EINVAL); 291 return; 292 } 293 if (icmp->icmp_family != AF_INET || 294 sin->sin_family != AF_INET) { 295 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 296 return; 297 } 298 break; 299 case sizeof (sin6_t): /* Complete IP address */ 300 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin6_t)); 302 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET6 || 307 sin6->sin6_family != AF_INET6) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 /* No support for mapped addresses on raw sockets */ 312 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 313 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 314 return; 315 } 316 break; 317 default: 318 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 319 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 320 icmp_err_ack(q, mp, TBADADDR, 0); 321 return; 322 } 323 /* 324 * Copy the source address into our icmp structure. This address 325 * may still be zero; if so, ip will fill in the correct address 326 * each time an outbound packet is passed to it. 327 * If we are binding to a broadcast or multicast address icmp_rput 328 * will clear the source address when it receives the T_BIND_ACK. 329 */ 330 icmp->icmp_state = TS_IDLE; 331 332 if (icmp->icmp_family == AF_INET) { 333 ASSERT(sin != NULL); 334 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 335 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 336 &icmp->icmp_v6src); 337 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 338 icmp->icmp_ip_snd_options_len; 339 icmp->icmp_bound_v6src = icmp->icmp_v6src; 340 } else { 341 int error; 342 343 ASSERT(sin6 != NULL); 344 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 345 icmp->icmp_v6src = sin6->sin6_addr; 346 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 347 icmp->icmp_bound_v6src = icmp->icmp_v6src; 348 349 /* Rebuild the header template */ 350 error = icmp_build_hdrs(q, icmp); 351 if (error != 0) { 352 icmp_err_ack(q, mp, TSYSERR, error); 353 return; 354 } 355 } 356 /* 357 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 358 * the address. 359 */ 360 *mp->b_wptr++ = icmp->icmp_proto; 361 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 362 /* 363 * Append a request for an IRE if src not 0 (INADDR_ANY) 364 */ 365 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 366 if (!mp->b_cont) { 367 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 368 return; 369 } 370 mp->b_cont->b_wptr += sizeof (ire_t); 371 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 372 } 373 374 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 375 putnext(q, mp); 376 } 377 378 /* 379 * Send message to IP to just bind to the protocol. 380 */ 381 static void 382 icmp_bind_proto(queue_t *q) 383 { 384 mblk_t *mp; 385 struct T_bind_req *tbr; 386 icmp_t *icmp; 387 388 icmp = (icmp_t *)q->q_ptr; 389 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 390 BPRI_MED); 391 if (!mp) { 392 return; 393 } 394 mp->b_datap->db_type = M_PROTO; 395 tbr = (struct T_bind_req *)mp->b_rptr; 396 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 397 tbr->ADDR_offset = sizeof (struct T_bind_req); 398 if (icmp->icmp_ipversion == IPV4_VERSION) { 399 sin_t *sin; 400 401 tbr->ADDR_length = sizeof (sin_t); 402 sin = (sin_t *)&tbr[1]; 403 *sin = sin_null; 404 sin->sin_family = AF_INET; 405 mp->b_wptr = (uchar_t *)&sin[1]; 406 } else { 407 sin6_t *sin6; 408 409 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 410 tbr->ADDR_length = sizeof (sin6_t); 411 sin6 = (sin6_t *)&tbr[1]; 412 *sin6 = sin6_null; 413 sin6->sin6_family = AF_INET6; 414 mp->b_wptr = (uchar_t *)&sin6[1]; 415 } 416 417 /* Place protocol type in the O_T_BIND_REQ following the address. */ 418 *mp->b_wptr++ = icmp->icmp_proto; 419 420 /* Pass the O_T_BIND_REQ to ip. */ 421 putnext(q, mp); 422 } 423 424 /* 425 * This routine handles each T_CONN_REQ message passed to icmp. It 426 * associates a default destination address with the stream. 427 * 428 * This routine sends down a T_BIND_REQ to IP with the following mblks: 429 * T_BIND_REQ - specifying local and remote address. 430 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 431 * T_OK_ACK - for the T_CONN_REQ 432 * T_CONN_CON - to keep the TPI user happy 433 * 434 * The connect completes in icmp_rput. 435 * When a T_BIND_ACK is received information is extracted from the IRE 436 * and the two appended messages are sent to the TPI user. 437 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 438 * it to an error ack for the appropriate primitive. 439 */ 440 static void 441 icmp_connect(queue_t *q, mblk_t *mp) 442 { 443 sin_t *sin; 444 sin6_t *sin6; 445 mblk_t *mp1, *mp2; 446 struct T_conn_req *tcr; 447 icmp_t *icmp; 448 ipaddr_t v4dst; 449 in6_addr_t v6dst; 450 uint32_t flowinfo; 451 452 icmp = (icmp_t *)q->q_ptr; 453 tcr = (struct T_conn_req *)mp->b_rptr; 454 /* Sanity checks */ 455 if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { 456 icmp_err_ack(q, mp, TPROTO, 0); 457 return; 458 } 459 460 if (icmp->icmp_state == TS_DATA_XFER) { 461 /* Already connected - clear out state */ 462 icmp->icmp_v6src = icmp->icmp_bound_v6src; 463 icmp->icmp_state = TS_IDLE; 464 } 465 466 467 if (tcr->OPT_length != 0) { 468 icmp_err_ack(q, mp, TBADOPT, 0); 469 return; 470 } 471 switch (tcr->DEST_length) { 472 default: 473 icmp_err_ack(q, mp, TBADADDR, 0); 474 return; 475 476 case sizeof (sin_t): 477 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 478 sizeof (sin_t)); 479 if (sin == NULL || !OK_32PTR((char *)sin)) { 480 icmp_err_ack(q, mp, TSYSERR, EINVAL); 481 return; 482 } 483 if (icmp->icmp_family != AF_INET || 484 sin->sin_family != AF_INET) { 485 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 486 return; 487 } 488 v4dst = sin->sin_addr.s_addr; 489 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 490 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 491 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 492 icmp->icmp_ip_snd_options_len; 493 break; 494 495 case sizeof (sin6_t): 496 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 497 sizeof (sin6_t)); 498 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 499 icmp_err_ack(q, mp, TSYSERR, EINVAL); 500 return; 501 } 502 if (icmp->icmp_family != AF_INET6 || 503 sin6->sin6_family != AF_INET6) { 504 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 505 return; 506 } 507 /* No support for mapped addresses on raw sockets */ 508 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 509 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 510 return; 511 } 512 v6dst = sin6->sin6_addr; 513 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 514 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 515 flowinfo = sin6->sin6_flowinfo; 516 break; 517 } 518 if (icmp->icmp_ipversion == IPV4_VERSION) { 519 /* 520 * Interpret a zero destination to mean loopback. 521 * Update the T_CONN_REQ (sin/sin6) since it is used to 522 * generate the T_CONN_CON. 523 */ 524 if (v4dst == INADDR_ANY) { 525 v4dst = htonl(INADDR_LOOPBACK); 526 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 527 if (icmp->icmp_family == AF_INET) { 528 sin->sin_addr.s_addr = v4dst; 529 } else { 530 sin6->sin6_addr = v6dst; 531 } 532 } 533 icmp->icmp_v6dst = v6dst; 534 icmp->icmp_flowinfo = 0; 535 536 /* 537 * If the destination address is multicast and 538 * an outgoing multicast interface has been set, 539 * use the address of that interface as our 540 * source address if no source address has been set. 541 */ 542 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 543 CLASSD(v4dst) && 544 icmp->icmp_multicast_if_addr != INADDR_ANY) { 545 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 546 &icmp->icmp_v6src); 547 } 548 } else { 549 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 550 /* 551 * Interpret a zero destination to mean loopback. 552 * Update the T_CONN_REQ (sin/sin6) since it is used to 553 * generate the T_CONN_CON. 554 */ 555 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 556 v6dst = ipv6_loopback; 557 sin6->sin6_addr = v6dst; 558 } 559 icmp->icmp_v6dst = v6dst; 560 icmp->icmp_flowinfo = flowinfo; 561 /* 562 * If the destination address is multicast and 563 * an outgoing multicast interface has been set, 564 * then the ip bind logic will pick the correct source 565 * address (i.e. matching the outgoing multicast interface). 566 */ 567 } 568 569 /* 570 * Send down bind to IP to verify that there is a route 571 * and to determine the source address. 572 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 573 */ 574 if (icmp->icmp_family == AF_INET) { 575 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 576 sin->sin_port); 577 } else { 578 ASSERT(icmp->icmp_family == AF_INET6); 579 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 580 sin6->sin6_port); 581 } 582 if (mp1 == NULL) { 583 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 584 return; 585 } 586 587 /* 588 * We also have to send a connection confirmation to 589 * keep TLI happy. Prepare it for icmp_rput. 590 */ 591 if (icmp->icmp_family == AF_INET) { 592 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 593 0); 594 } else { 595 ASSERT(icmp->icmp_family == AF_INET6); 596 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 597 0); 598 } 599 if (mp2 == NULL) { 600 freemsg(mp1); 601 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 602 return; 603 } 604 605 mp = mi_tpi_ok_ack_alloc(mp); 606 if (mp == NULL) { 607 /* Unable to reuse the T_CONN_REQ for the ack. */ 608 freemsg(mp2); 609 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 610 return; 611 } 612 613 icmp->icmp_state = TS_DATA_XFER; 614 615 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 616 linkb(mp1, mp); 617 linkb(mp1, mp2); 618 619 mblk_setcred(mp1, icmp->icmp_credp); 620 putnext(q, mp1); 621 } 622 623 static int 624 icmp_close(queue_t *q) 625 { 626 icmp_t *icmp = (icmp_t *)q->q_ptr; 627 int i1; 628 629 /* tell IP that if we're not here, he can't trust labels */ 630 if (is_system_labeled()) 631 putnext(WR(q), icmp->icmp_delabel); 632 633 qprocsoff(q); 634 635 /* If there are any options associated with the stream, free them. */ 636 if (icmp->icmp_ip_snd_options) 637 mi_free((char *)icmp->icmp_ip_snd_options); 638 639 if (icmp->icmp_filter != NULL) 640 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 641 642 /* Free memory associated with sticky options */ 643 if (icmp->icmp_sticky_hdrs_len != 0) { 644 kmem_free(icmp->icmp_sticky_hdrs, 645 icmp->icmp_sticky_hdrs_len); 646 icmp->icmp_sticky_hdrs = NULL; 647 icmp->icmp_sticky_hdrs_len = 0; 648 } 649 650 ip6_pkt_free(&icmp->icmp_sticky_ipp); 651 652 crfree(icmp->icmp_credp); 653 654 /* Free the icmp structure and release the minor device number. */ 655 i1 = mi_close_comm(&icmp_g_head, q); 656 657 return (i1); 658 } 659 660 /* 661 * This routine handles each T_DISCON_REQ message passed to icmp 662 * as an indicating that ICMP is no longer connected. This results 663 * in sending a T_BIND_REQ to IP to restore the binding to just 664 * the local address. 665 * 666 * This routine sends down a T_BIND_REQ to IP with the following mblks: 667 * T_BIND_REQ - specifying just the local address. 668 * T_OK_ACK - for the T_DISCON_REQ 669 * 670 * The disconnect completes in icmp_rput. 671 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 672 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 673 * it to an error ack for the appropriate primitive. 674 */ 675 static void 676 icmp_disconnect(queue_t *q, mblk_t *mp) 677 { 678 icmp_t *icmp; 679 mblk_t *mp1; 680 681 icmp = (icmp_t *)q->q_ptr; 682 683 if (icmp->icmp_state != TS_DATA_XFER) { 684 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 685 "icmp_disconnect: bad state, %d", icmp->icmp_state); 686 icmp_err_ack(q, mp, TOUTSTATE, 0); 687 return; 688 } 689 icmp->icmp_v6src = icmp->icmp_bound_v6src; 690 icmp->icmp_state = TS_IDLE; 691 692 /* 693 * Send down bind to IP to remove the full binding and revert 694 * to the local address binding. 695 */ 696 if (icmp->icmp_family == AF_INET) { 697 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 698 } else { 699 ASSERT(icmp->icmp_family == AF_INET6); 700 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 701 } 702 if (mp1 == NULL) { 703 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 704 return; 705 } 706 mp = mi_tpi_ok_ack_alloc(mp); 707 if (mp == NULL) { 708 /* Unable to reuse the T_DISCON_REQ for the ack. */ 709 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 710 return; 711 } 712 713 if (icmp->icmp_family == AF_INET6) { 714 int error; 715 716 /* Rebuild the header template */ 717 error = icmp_build_hdrs(q, icmp); 718 if (error != 0) { 719 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 720 freemsg(mp1); 721 return; 722 } 723 } 724 icmp->icmp_discon_pending = 1; 725 726 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ 727 linkb(mp1, mp); 728 putnext(q, mp1); 729 } 730 731 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 732 static void 733 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 734 { 735 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 736 qreply(q, mp); 737 } 738 739 /* Shorthand to generate and send TPI error acks to our client */ 740 static void 741 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 742 t_scalar_t t_error, int sys_error) 743 { 744 struct T_error_ack *teackp; 745 746 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 747 M_PCPROTO, T_ERROR_ACK)) != NULL) { 748 teackp = (struct T_error_ack *)mp->b_rptr; 749 teackp->ERROR_prim = primitive; 750 teackp->TLI_error = t_error; 751 teackp->UNIX_error = sys_error; 752 qreply(q, mp); 753 } 754 } 755 756 /* 757 * icmp_icmp_error is called by icmp_rput to process ICMP 758 * messages passed up by IP. 759 * Generates the appropriate T_UDERROR_IND for permanent 760 * (non-transient) errors. 761 * Assumes that IP has pulled up everything up to and including 762 * the ICMP header. 763 */ 764 static void 765 icmp_icmp_error(queue_t *q, mblk_t *mp) 766 { 767 icmph_t *icmph; 768 ipha_t *ipha; 769 int iph_hdr_length; 770 sin_t sin; 771 sin6_t sin6; 772 mblk_t *mp1; 773 int error = 0; 774 icmp_t *icmp = (icmp_t *)q->q_ptr; 775 776 /* 777 * Deliver T_UDERROR_IND when the application has asked for it. 778 * The socket layer enables this automatically when connected. 779 */ 780 if (!icmp->icmp_dgram_errind) { 781 freemsg(mp); 782 return; 783 } 784 785 ipha = (ipha_t *)mp->b_rptr; 786 787 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 788 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 789 icmp_icmp_error_ipv6(q, mp); 790 return; 791 } 792 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 793 794 iph_hdr_length = IPH_HDR_LENGTH(ipha); 795 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 796 ipha = (ipha_t *)&icmph[1]; 797 iph_hdr_length = IPH_HDR_LENGTH(ipha); 798 799 switch (icmph->icmph_type) { 800 case ICMP_DEST_UNREACHABLE: 801 switch (icmph->icmph_code) { 802 case ICMP_FRAGMENTATION_NEEDED: 803 /* 804 * IP has already adjusted the path MTU. 805 * XXX Somehow pass MTU indication to application? 806 */ 807 break; 808 case ICMP_PORT_UNREACHABLE: 809 case ICMP_PROTOCOL_UNREACHABLE: 810 error = ECONNREFUSED; 811 break; 812 default: 813 /* Transient errors */ 814 break; 815 } 816 break; 817 default: 818 /* Transient errors */ 819 break; 820 } 821 if (error == 0) { 822 freemsg(mp); 823 return; 824 } 825 826 switch (icmp->icmp_family) { 827 case AF_INET: 828 sin = sin_null; 829 sin.sin_family = AF_INET; 830 sin.sin_addr.s_addr = ipha->ipha_dst; 831 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 832 error); 833 break; 834 case AF_INET6: 835 sin6 = sin6_null; 836 sin6.sin6_family = AF_INET6; 837 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 838 839 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 840 NULL, 0, error); 841 break; 842 } 843 if (mp1) 844 putnext(q, mp1); 845 freemsg(mp); 846 } 847 848 /* 849 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 850 * for IPv6 packets. 851 * Send permanent (non-transient) errors upstream. 852 * Assumes that IP has pulled up all the extension headers as well 853 * as the ICMPv6 header. 854 */ 855 static void 856 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 857 { 858 icmp6_t *icmp6; 859 ip6_t *ip6h, *outer_ip6h; 860 uint16_t iph_hdr_length; 861 uint8_t *nexthdrp; 862 sin6_t sin6; 863 mblk_t *mp1; 864 int error = 0; 865 icmp_t *icmp = (icmp_t *)q->q_ptr; 866 867 outer_ip6h = (ip6_t *)mp->b_rptr; 868 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 869 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 870 else 871 iph_hdr_length = IPV6_HDR_LEN; 872 873 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 874 ip6h = (ip6_t *)&icmp6[1]; 875 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 876 freemsg(mp); 877 return; 878 } 879 if (*nexthdrp != icmp->icmp_proto) { 880 /* 881 * Could have switched icmp_proto after while ip did fanout of 882 * this message 883 */ 884 freemsg(mp); 885 return; 886 } 887 switch (icmp6->icmp6_type) { 888 case ICMP6_DST_UNREACH: 889 switch (icmp6->icmp6_code) { 890 case ICMP6_DST_UNREACH_NOPORT: 891 error = ECONNREFUSED; 892 break; 893 case ICMP6_DST_UNREACH_ADMIN: 894 case ICMP6_DST_UNREACH_NOROUTE: 895 case ICMP6_DST_UNREACH_BEYONDSCOPE: 896 case ICMP6_DST_UNREACH_ADDR: 897 /* Transient errors */ 898 break; 899 default: 900 break; 901 } 902 break; 903 case ICMP6_PACKET_TOO_BIG: { 904 struct T_unitdata_ind *tudi; 905 struct T_opthdr *toh; 906 size_t udi_size; 907 mblk_t *newmp; 908 t_scalar_t opt_length = sizeof (struct T_opthdr) + 909 sizeof (struct ip6_mtuinfo); 910 sin6_t *sin6; 911 struct ip6_mtuinfo *mtuinfo; 912 913 /* 914 * If the application has requested to receive path mtu 915 * information, send up an empty message containing an 916 * IPV6_PATHMTU ancillary data item. 917 */ 918 if (!icmp->icmp_ipv6_recvpathmtu) 919 break; 920 921 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 922 opt_length; 923 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 924 BUMP_MIB(&rawip_mib, rawipInErrors); 925 break; 926 } 927 928 /* 929 * newmp->b_cont is left to NULL on purpose. This is an 930 * empty message containing only ancillary data. 931 */ 932 newmp->b_datap->db_type = M_PROTO; 933 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 934 newmp->b_wptr = (uchar_t *)tudi + udi_size; 935 tudi->PRIM_type = T_UNITDATA_IND; 936 tudi->SRC_length = sizeof (sin6_t); 937 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 938 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 939 tudi->OPT_length = opt_length; 940 941 sin6 = (sin6_t *)&tudi[1]; 942 bzero(sin6, sizeof (sin6_t)); 943 sin6->sin6_family = AF_INET6; 944 sin6->sin6_addr = icmp->icmp_v6dst; 945 946 toh = (struct T_opthdr *)&sin6[1]; 947 toh->level = IPPROTO_IPV6; 948 toh->name = IPV6_PATHMTU; 949 toh->len = opt_length; 950 toh->status = 0; 951 952 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 953 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 954 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 955 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 956 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 957 /* 958 * We've consumed everything we need from the original 959 * message. Free it, then send our empty message. 960 */ 961 freemsg(mp); 962 putnext(q, newmp); 963 return; 964 } 965 case ICMP6_TIME_EXCEEDED: 966 /* Transient errors */ 967 break; 968 case ICMP6_PARAM_PROB: 969 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 970 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 971 (uchar_t *)ip6h + icmp6->icmp6_pptr == 972 (uchar_t *)nexthdrp) { 973 error = ECONNREFUSED; 974 break; 975 } 976 break; 977 } 978 if (error == 0) { 979 freemsg(mp); 980 return; 981 } 982 983 sin6 = sin6_null; 984 sin6.sin6_family = AF_INET6; 985 sin6.sin6_addr = ip6h->ip6_dst; 986 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 987 988 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 989 error); 990 if (mp1) 991 putnext(q, mp1); 992 freemsg(mp); 993 } 994 995 /* 996 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 997 * The local address is filled in if endpoint is bound. The remote address 998 * is filled in if remote address has been precified ("connected endpoint") 999 * (The concept of connected CLTS sockets is alien to published TPI 1000 * but we support it anyway). 1001 */ 1002 static void 1003 icmp_addr_req(queue_t *q, mblk_t *mp) 1004 { 1005 icmp_t *icmp = (icmp_t *)q->q_ptr; 1006 mblk_t *ackmp; 1007 struct T_addr_ack *taa; 1008 1009 /* Make it large enough for worst case */ 1010 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1011 2 * sizeof (sin6_t), 1); 1012 if (ackmp == NULL) { 1013 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1014 return; 1015 } 1016 taa = (struct T_addr_ack *)ackmp->b_rptr; 1017 1018 bzero(taa, sizeof (struct T_addr_ack)); 1019 ackmp->b_wptr = (uchar_t *)&taa[1]; 1020 1021 taa->PRIM_type = T_ADDR_ACK; 1022 ackmp->b_datap->db_type = M_PCPROTO; 1023 1024 /* 1025 * Note: Following code assumes 32 bit alignment of basic 1026 * data structures like sin_t and struct T_addr_ack. 1027 */ 1028 if (icmp->icmp_state != TS_UNBND) { 1029 /* 1030 * Fill in local address 1031 */ 1032 taa->LOCADDR_offset = sizeof (*taa); 1033 if (icmp->icmp_family == AF_INET) { 1034 sin_t *sin; 1035 1036 taa->LOCADDR_length = sizeof (sin_t); 1037 sin = (sin_t *)&taa[1]; 1038 /* Fill zeroes and then intialize non-zero fields */ 1039 *sin = sin_null; 1040 sin->sin_family = AF_INET; 1041 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1042 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1043 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1044 sin->sin_addr.s_addr); 1045 } else { 1046 /* 1047 * INADDR_ANY 1048 * icmp_v6src is not set, we might be bound to 1049 * broadcast/multicast. Use icmp_bound_v6src as 1050 * local address instead (that could 1051 * also still be INADDR_ANY) 1052 */ 1053 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1054 sin->sin_addr.s_addr); 1055 } 1056 ackmp->b_wptr = (uchar_t *)&sin[1]; 1057 } else { 1058 sin6_t *sin6; 1059 1060 ASSERT(icmp->icmp_family == AF_INET6); 1061 taa->LOCADDR_length = sizeof (sin6_t); 1062 sin6 = (sin6_t *)&taa[1]; 1063 /* Fill zeroes and then intialize non-zero fields */ 1064 *sin6 = sin6_null; 1065 sin6->sin6_family = AF_INET6; 1066 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1067 sin6->sin6_addr = icmp->icmp_v6src; 1068 } else { 1069 /* 1070 * UNSPECIFIED 1071 * icmp_v6src is not set, we might be bound to 1072 * broadcast/multicast. Use icmp_bound_v6src as 1073 * local address instead (that could 1074 * also still be UNSPECIFIED) 1075 */ 1076 sin6->sin6_addr = icmp->icmp_bound_v6src; 1077 } 1078 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1079 } 1080 } 1081 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1082 qreply(q, ackmp); 1083 } 1084 1085 static void 1086 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1087 { 1088 *tap = icmp_g_t_info_ack; 1089 1090 if (icmp->icmp_family == AF_INET6) 1091 tap->ADDR_size = sizeof (sin6_t); 1092 else 1093 tap->ADDR_size = sizeof (sin_t); 1094 tap->CURRENT_state = icmp->icmp_state; 1095 tap->OPT_size = icmp_max_optsize; 1096 } 1097 1098 /* 1099 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1100 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1101 * icmp_g_t_info_ack. The current state of the stream is copied from 1102 * icmp_state. 1103 */ 1104 static void 1105 icmp_capability_req(queue_t *q, mblk_t *mp) 1106 { 1107 icmp_t *icmp = (icmp_t *)q->q_ptr; 1108 t_uscalar_t cap_bits1; 1109 struct T_capability_ack *tcap; 1110 1111 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1112 1113 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1114 mp->b_datap->db_type, T_CAPABILITY_ACK); 1115 if (!mp) 1116 return; 1117 1118 tcap = (struct T_capability_ack *)mp->b_rptr; 1119 tcap->CAP_bits1 = 0; 1120 1121 if (cap_bits1 & TC1_INFO) { 1122 icmp_copy_info(&tcap->INFO_ack, icmp); 1123 tcap->CAP_bits1 |= TC1_INFO; 1124 } 1125 1126 qreply(q, mp); 1127 } 1128 1129 /* 1130 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1131 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1132 * The current state of the stream is copied from icmp_state. 1133 */ 1134 static void 1135 icmp_info_req(queue_t *q, mblk_t *mp) 1136 { 1137 icmp_t *icmp = (icmp_t *)q->q_ptr; 1138 1139 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1140 T_INFO_ACK); 1141 if (!mp) 1142 return; 1143 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1144 qreply(q, mp); 1145 } 1146 1147 /* 1148 * IP recognizes seven kinds of bind requests: 1149 * 1150 * - A zero-length address binds only to the protocol number. 1151 * 1152 * - A 4-byte address is treated as a request to 1153 * validate that the address is a valid local IPv4 1154 * address, appropriate for an application to bind to. 1155 * IP does the verification, but does not make any note 1156 * of the address at this time. 1157 * 1158 * - A 16-byte address contains is treated as a request 1159 * to validate a local IPv6 address, as the 4-byte 1160 * address case above. 1161 * 1162 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1163 * use it for the inbound fanout of packets. 1164 * 1165 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1166 * use it for the inbound fanout of packets. 1167 * 1168 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1169 * information consisting of local and remote addresses 1170 * and ports (unused for raw sockets). In this case, the addresses are both 1171 * validated as appropriate for this operation, and, if 1172 * so, the information is retained for use in the 1173 * inbound fanout. 1174 * 1175 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1176 * fanout information, like the 12-byte case above. 1177 * 1178 * IP will also fill in the IRE request mblk with information 1179 * regarding our peer. In all cases, we notify IP of our protocol 1180 * type by appending a single protocol byte to the bind request. 1181 */ 1182 static mblk_t * 1183 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1184 in_port_t fport) 1185 { 1186 char *cp; 1187 mblk_t *mp; 1188 struct T_bind_req *tbr; 1189 ipa_conn_t *ac; 1190 ipa6_conn_t *ac6; 1191 sin_t *sin; 1192 sin6_t *sin6; 1193 1194 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1195 1196 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1197 if (mp == NULL) 1198 return (NULL); 1199 mp->b_datap->db_type = M_PROTO; 1200 tbr = (struct T_bind_req *)mp->b_rptr; 1201 tbr->PRIM_type = bind_prim; 1202 tbr->ADDR_offset = sizeof (*tbr); 1203 tbr->CONIND_number = 0; 1204 tbr->ADDR_length = addr_length; 1205 cp = (char *)&tbr[1]; 1206 switch (addr_length) { 1207 case sizeof (ipa_conn_t): 1208 ASSERT(icmp->icmp_family == AF_INET); 1209 /* Append a request for an IRE */ 1210 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1211 if (mp->b_cont == NULL) { 1212 freemsg(mp); 1213 return (NULL); 1214 } 1215 mp->b_cont->b_wptr += sizeof (ire_t); 1216 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1217 1218 /* cp known to be 32 bit aligned */ 1219 ac = (ipa_conn_t *)cp; 1220 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1221 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1222 ac->ac_fport = fport; 1223 ac->ac_lport = 0; 1224 break; 1225 1226 case sizeof (ipa6_conn_t): 1227 ASSERT(icmp->icmp_family == AF_INET6); 1228 /* Append a request for an IRE */ 1229 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1230 if (mp->b_cont == NULL) { 1231 freemsg(mp); 1232 return (NULL); 1233 } 1234 mp->b_cont->b_wptr += sizeof (ire_t); 1235 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1236 1237 /* cp known to be 32 bit aligned */ 1238 ac6 = (ipa6_conn_t *)cp; 1239 ac6->ac6_laddr = icmp->icmp_v6src; 1240 ac6->ac6_faddr = icmp->icmp_v6dst; 1241 ac6->ac6_fport = fport; 1242 ac6->ac6_lport = 0; 1243 break; 1244 1245 case sizeof (sin_t): 1246 ASSERT(icmp->icmp_family == AF_INET); 1247 /* Append a request for an IRE */ 1248 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1249 if (!mp->b_cont) { 1250 freemsg(mp); 1251 return (NULL); 1252 } 1253 mp->b_cont->b_wptr += sizeof (ire_t); 1254 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1255 1256 sin = (sin_t *)cp; 1257 *sin = sin_null; 1258 sin->sin_family = AF_INET; 1259 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1260 break; 1261 1262 case sizeof (sin6_t): 1263 ASSERT(icmp->icmp_family == AF_INET6); 1264 /* Append a request for an IRE */ 1265 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1266 if (!mp->b_cont) { 1267 freemsg(mp); 1268 return (NULL); 1269 } 1270 mp->b_cont->b_wptr += sizeof (ire_t); 1271 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1272 1273 sin6 = (sin6_t *)cp; 1274 *sin6 = sin6_null; 1275 sin6->sin6_family = AF_INET6; 1276 sin6->sin6_addr = icmp->icmp_bound_v6src; 1277 break; 1278 } 1279 /* Add protocol number to end */ 1280 cp[addr_length] = icmp->icmp_proto; 1281 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1282 return (mp); 1283 } 1284 1285 /* ARGSUSED */ 1286 static void 1287 dummy_func(void *arg) 1288 { 1289 } 1290 1291 static mblk_t * 1292 alloc_wait(queue_t *q, size_t len, int pri, int *errp) 1293 { 1294 mblk_t *mp; 1295 bufcall_id_t id; 1296 int retv; 1297 1298 while ((mp = allocb(len, pri)) == NULL) { 1299 id = qbufcall(q, len, pri, dummy_func, NULL); 1300 if (id == 0) { 1301 *errp = ENOMEM; 1302 break; 1303 } 1304 retv = qwait_sig(q); 1305 qunbufcall(q, id); 1306 if (retv == 0) { 1307 *errp = EINTR; 1308 break; 1309 } 1310 } 1311 if (mp != NULL) 1312 mp->b_wptr += len; 1313 return (mp); 1314 } 1315 1316 /* 1317 * This is the open routine for icmp. It allocates a icmp_t structure for 1318 * the stream and, on the first open of the module, creates an ND table. 1319 */ 1320 static int 1321 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1322 { 1323 int err; 1324 icmp_t *icmp; 1325 mblk_t *mp; 1326 out_labeled_t *olp; 1327 1328 /* If the stream is already open, return immediately. */ 1329 if (q->q_ptr != NULL) 1330 return (0); 1331 1332 /* If this is not a push of icmp as a module, fail. */ 1333 if (sflag != MODOPEN) 1334 return (EINVAL); 1335 1336 /* 1337 * Defer the qprocson until everything is initialized since 1338 * we are D_MTPERQ and after qprocson the rput routine can 1339 * run. (Could do qprocson earlier since icmp currently 1340 * has an outer perimeter.) 1341 */ 1342 1343 /* 1344 * Create a icmp_t structure for this stream and link into the 1345 * list of open streams. 1346 */ 1347 err = mi_open_comm(&icmp_g_head, sizeof (icmp_t), q, devp, 1348 flag, sflag, credp); 1349 if (err != 0) 1350 return (err); 1351 1352 /* 1353 * The receive hiwat is only looked at on the stream head queue. 1354 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 1355 */ 1356 q->q_hiwat = icmp_recv_hiwat; 1357 1358 /* Set the initial state of the stream and the privilege status. */ 1359 icmp = (icmp_t *)q->q_ptr; 1360 icmp->icmp_state = TS_UNBND; 1361 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1362 icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1363 icmp->icmp_filter = NULL; 1364 1365 icmp->icmp_credp = credp; 1366 crhold(credp); 1367 1368 /* 1369 * If the caller has the process-wide flag set, then default to MAC 1370 * exempt mode. This allows read-down to unlabeled hosts. 1371 */ 1372 if (getpflags(NET_MAC_AWARE, credp) != 0) 1373 icmp->icmp_mac_exempt = B_TRUE; 1374 1375 icmp->icmp_zoneid = getzoneid(); 1376 1377 if (getmajor(*devp) == (major_t)ICMP6_MAJ) { 1378 icmp->icmp_ipversion = IPV6_VERSION; 1379 icmp->icmp_family = AF_INET6; 1380 /* May be changed by a SO_PROTOTYPE socket option. */ 1381 icmp->icmp_proto = IPPROTO_ICMPV6; 1382 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1383 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1384 icmp->icmp_ttl = (uint8_t)icmp_ipv6_hoplimit; 1385 } else { 1386 icmp->icmp_ipversion = IPV4_VERSION; 1387 icmp->icmp_family = AF_INET; 1388 /* May be changed by a SO_PROTOTYPE socket option. */ 1389 icmp->icmp_proto = IPPROTO_ICMP; 1390 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1391 icmp->icmp_ttl = (uint8_t)icmp_ipv4_ttl; 1392 } 1393 qprocson(q); 1394 1395 /* 1396 * Check if icmp is being I_PUSHed by a non-privileged user. 1397 * If so, we set icmp_restricted to indicate that only MIB 1398 * traffic may pass. 1399 */ 1400 if (secpolicy_net_icmpaccess(credp) != 0) { 1401 icmp->icmp_restricted = 1; 1402 } 1403 1404 /* 1405 * The transmit hiwat is only looked at on IP's queue. 1406 * Store in q_hiwat in order to return on SO_SNDBUF 1407 * getsockopts. 1408 */ 1409 WR(q)->q_hiwat = icmp_xmit_hiwat; 1410 WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; 1411 WR(q)->q_lowat = icmp_xmit_lowat; 1412 WR(q)->q_next->q_lowat = WR(q)->q_lowat; 1413 1414 if (icmp->icmp_family == AF_INET6) { 1415 /* Build initial header template for transmit */ 1416 err = icmp_build_hdrs(q, icmp); 1417 if (err != 0) 1418 goto open_error; 1419 } 1420 /* Set the Stream head write offset. */ 1421 (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + icmp_wroff_extra); 1422 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1423 1424 if (is_system_labeled()) { 1425 /* notify IP that we know about labeling */ 1426 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1427 if (mp == NULL) 1428 goto open_error; 1429 mp->b_datap->db_type = M_CTL; 1430 olp = (out_labeled_t *)mp->b_rptr; 1431 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1432 olp->out_qnext = WR(q)->q_next; 1433 putnext(WR(q), mp); 1434 1435 /* save off a copy for closing */ 1436 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1437 if (mp == NULL) 1438 goto open_error; 1439 mp->b_datap->db_type = M_CTL; 1440 olp = (out_labeled_t *)mp->b_rptr; 1441 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1442 olp->out_qnext = NULL; 1443 icmp->icmp_delabel = mp; 1444 } 1445 1446 return (0); 1447 1448 open_error: 1449 qprocsoff(q); 1450 crfree(credp); 1451 (void) mi_close_comm(&icmp_g_head, q); 1452 return (err); 1453 } 1454 1455 /* 1456 * Which ICMP options OK to set through T_UNITDATA_REQ... 1457 */ 1458 /* ARGSUSED */ 1459 static boolean_t 1460 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1461 { 1462 return (B_TRUE); 1463 } 1464 1465 /* 1466 * This routine gets default values of certain options whose default 1467 * values are maintained by protcol specific code 1468 */ 1469 /* ARGSUSED */ 1470 int 1471 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1472 { 1473 int *i1 = (int *)ptr; 1474 1475 switch (level) { 1476 case IPPROTO_IP: 1477 switch (name) { 1478 case IP_MULTICAST_TTL: 1479 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1480 return (sizeof (uchar_t)); 1481 case IP_MULTICAST_LOOP: 1482 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1483 return (sizeof (uchar_t)); 1484 } 1485 break; 1486 case IPPROTO_IPV6: 1487 switch (name) { 1488 case IPV6_MULTICAST_HOPS: 1489 *i1 = IP_DEFAULT_MULTICAST_TTL; 1490 return (sizeof (int)); 1491 case IPV6_MULTICAST_LOOP: 1492 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1493 return (sizeof (int)); 1494 case IPV6_UNICAST_HOPS: 1495 *i1 = icmp_ipv6_hoplimit; 1496 return (sizeof (int)); 1497 } 1498 break; 1499 case IPPROTO_ICMPV6: 1500 switch (name) { 1501 case ICMP6_FILTER: 1502 /* Make it look like "pass all" */ 1503 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1504 return (sizeof (icmp6_filter_t)); 1505 } 1506 break; 1507 } 1508 return (-1); 1509 } 1510 1511 /* 1512 * This routine retrieves the current status of socket options. 1513 * It returns the size of the option retrieved. 1514 */ 1515 int 1516 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1517 { 1518 icmp_t *icmp = (icmp_t *)q->q_ptr; 1519 int *i1 = (int *)ptr; 1520 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1521 1522 switch (level) { 1523 case SOL_SOCKET: 1524 switch (name) { 1525 case SO_DEBUG: 1526 *i1 = icmp->icmp_debug; 1527 break; 1528 case SO_TYPE: 1529 *i1 = SOCK_RAW; 1530 break; 1531 case SO_PROTOTYPE: 1532 *i1 = icmp->icmp_proto; 1533 break; 1534 case SO_REUSEADDR: 1535 *i1 = icmp->icmp_reuseaddr; 1536 break; 1537 1538 /* 1539 * The following three items are available here, 1540 * but are only meaningful to IP. 1541 */ 1542 case SO_DONTROUTE: 1543 *i1 = icmp->icmp_dontroute; 1544 break; 1545 case SO_USELOOPBACK: 1546 *i1 = icmp->icmp_useloopback; 1547 break; 1548 case SO_BROADCAST: 1549 *i1 = icmp->icmp_broadcast; 1550 break; 1551 1552 case SO_SNDBUF: 1553 ASSERT(q->q_hiwat <= INT_MAX); 1554 *i1 = (int)q->q_hiwat; 1555 break; 1556 case SO_RCVBUF: 1557 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1558 *i1 = (int)RD(q)->q_hiwat; 1559 break; 1560 case SO_DGRAM_ERRIND: 1561 *i1 = icmp->icmp_dgram_errind; 1562 break; 1563 case SO_TIMESTAMP: 1564 *i1 = icmp->icmp_timestamp; 1565 break; 1566 case SO_MAC_EXEMPT: 1567 *i1 = icmp->icmp_mac_exempt; 1568 break; 1569 /* 1570 * Following three not meaningful for icmp 1571 * Action is same as "default" to which we fallthrough 1572 * so we keep them in comments. 1573 * case SO_LINGER: 1574 * case SO_KEEPALIVE: 1575 * case SO_OOBINLINE: 1576 */ 1577 default: 1578 return (-1); 1579 } 1580 break; 1581 case IPPROTO_IP: 1582 /* 1583 * Only allow IPv4 option processing on IPv4 sockets. 1584 */ 1585 if (icmp->icmp_family != AF_INET) 1586 return (-1); 1587 1588 switch (name) { 1589 case IP_OPTIONS: 1590 case T_IP_OPTIONS: 1591 /* Options are passed up with each packet */ 1592 return (0); 1593 case IP_HDRINCL: 1594 *i1 = (int)icmp->icmp_hdrincl; 1595 break; 1596 case IP_TOS: 1597 case T_IP_TOS: 1598 *i1 = (int)icmp->icmp_type_of_service; 1599 break; 1600 case IP_TTL: 1601 *i1 = (int)icmp->icmp_ttl; 1602 break; 1603 case IP_MULTICAST_IF: 1604 /* 0 address if not set */ 1605 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1606 return (sizeof (ipaddr_t)); 1607 case IP_MULTICAST_TTL: 1608 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1609 return (sizeof (uchar_t)); 1610 case IP_MULTICAST_LOOP: 1611 *ptr = icmp->icmp_multicast_loop; 1612 return (sizeof (uint8_t)); 1613 case IP_BOUND_IF: 1614 /* Zero if not set */ 1615 *i1 = icmp->icmp_bound_if; 1616 break; /* goto sizeof (int) option return */ 1617 case IP_UNSPEC_SRC: 1618 *ptr = icmp->icmp_unspec_source; 1619 break; /* goto sizeof (int) option return */ 1620 case IP_XMIT_IF: 1621 *i1 = icmp->icmp_xmit_if; 1622 break; /* goto sizeof (int) option return */ 1623 case IP_RECVIF: 1624 *ptr = icmp->icmp_recvif; 1625 break; /* goto sizeof (int) option return */ 1626 /* 1627 * Cannot "get" the value of following options 1628 * at this level. Action is same as "default" to 1629 * which we fallthrough so we keep them in comments. 1630 * 1631 * case IP_ADD_MEMBERSHIP: 1632 * case IP_DROP_MEMBERSHIP: 1633 * case IP_BLOCK_SOURCE: 1634 * case IP_UNBLOCK_SOURCE: 1635 * case IP_ADD_SOURCE_MEMBERSHIP: 1636 * case IP_DROP_SOURCE_MEMBERSHIP: 1637 * case MCAST_JOIN_GROUP: 1638 * case MCAST_LEAVE_GROUP: 1639 * case MCAST_BLOCK_SOURCE: 1640 * case MCAST_UNBLOCK_SOURCE: 1641 * case MCAST_JOIN_SOURCE_GROUP: 1642 * case MCAST_LEAVE_SOURCE_GROUP: 1643 * case MRT_INIT: 1644 * case MRT_DONE: 1645 * case MRT_ADD_VIF: 1646 * case MRT_DEL_VIF: 1647 * case MRT_ADD_MFC: 1648 * case MRT_DEL_MFC: 1649 * case MRT_VERSION: 1650 * case MRT_ASSERT: 1651 * case IP_SEC_OPT: 1652 * case IP_DONTFAILOVER_IF: 1653 * case IP_NEXTHOP: 1654 */ 1655 default: 1656 return (-1); 1657 } 1658 break; 1659 case IPPROTO_IPV6: 1660 /* 1661 * Only allow IPv6 option processing on native IPv6 sockets. 1662 */ 1663 if (icmp->icmp_family != AF_INET6) 1664 return (-1); 1665 switch (name) { 1666 case IPV6_UNICAST_HOPS: 1667 *i1 = (unsigned int)icmp->icmp_ttl; 1668 break; 1669 case IPV6_MULTICAST_IF: 1670 /* 0 index if not set */ 1671 *i1 = icmp->icmp_multicast_if_index; 1672 break; 1673 case IPV6_MULTICAST_HOPS: 1674 *i1 = icmp->icmp_multicast_ttl; 1675 break; 1676 case IPV6_MULTICAST_LOOP: 1677 *i1 = icmp->icmp_multicast_loop; 1678 break; 1679 case IPV6_BOUND_IF: 1680 /* Zero if not set */ 1681 *i1 = icmp->icmp_bound_if; 1682 break; 1683 case IPV6_UNSPEC_SRC: 1684 *i1 = icmp->icmp_unspec_source; 1685 break; 1686 case IPV6_CHECKSUM: 1687 /* 1688 * Return offset or -1 if no checksum offset. 1689 * Does not apply to IPPROTO_ICMPV6 1690 */ 1691 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1692 return (-1); 1693 1694 if (icmp->icmp_raw_checksum) { 1695 *i1 = icmp->icmp_checksum_off; 1696 } else { 1697 *i1 = -1; 1698 } 1699 break; 1700 case IPV6_JOIN_GROUP: 1701 case IPV6_LEAVE_GROUP: 1702 case MCAST_JOIN_GROUP: 1703 case MCAST_LEAVE_GROUP: 1704 case MCAST_BLOCK_SOURCE: 1705 case MCAST_UNBLOCK_SOURCE: 1706 case MCAST_JOIN_SOURCE_GROUP: 1707 case MCAST_LEAVE_SOURCE_GROUP: 1708 /* cannot "get" the value for these */ 1709 return (-1); 1710 case IPV6_RECVPKTINFO: 1711 *i1 = icmp->icmp_ipv6_recvpktinfo; 1712 break; 1713 case IPV6_RECVTCLASS: 1714 *i1 = icmp->icmp_ipv6_recvtclass; 1715 break; 1716 case IPV6_RECVPATHMTU: 1717 *i1 = icmp->icmp_ipv6_recvpathmtu; 1718 break; 1719 case IPV6_V6ONLY: 1720 *i1 = 1; 1721 break; 1722 case IPV6_RECVHOPLIMIT: 1723 *i1 = icmp->icmp_ipv6_recvhoplimit; 1724 break; 1725 case IPV6_RECVHOPOPTS: 1726 *i1 = icmp->icmp_ipv6_recvhopopts; 1727 break; 1728 case IPV6_RECVDSTOPTS: 1729 *i1 = icmp->icmp_ipv6_recvdstopts; 1730 break; 1731 case _OLD_IPV6_RECVDSTOPTS: 1732 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1733 break; 1734 case IPV6_RECVRTHDRDSTOPTS: 1735 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1736 break; 1737 case IPV6_RECVRTHDR: 1738 *i1 = icmp->icmp_ipv6_recvrthdr; 1739 break; 1740 case IPV6_PKTINFO: { 1741 /* XXX assumes that caller has room for max size! */ 1742 struct in6_pktinfo *pkti; 1743 1744 pkti = (struct in6_pktinfo *)ptr; 1745 if (ipp->ipp_fields & IPPF_IFINDEX) 1746 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1747 else 1748 pkti->ipi6_ifindex = 0; 1749 if (ipp->ipp_fields & IPPF_ADDR) 1750 pkti->ipi6_addr = ipp->ipp_addr; 1751 else 1752 pkti->ipi6_addr = ipv6_all_zeros; 1753 return (sizeof (struct in6_pktinfo)); 1754 } 1755 case IPV6_NEXTHOP: { 1756 sin6_t *sin6 = (sin6_t *)ptr; 1757 1758 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1759 return (0); 1760 *sin6 = sin6_null; 1761 sin6->sin6_family = AF_INET6; 1762 sin6->sin6_addr = ipp->ipp_nexthop; 1763 return (sizeof (sin6_t)); 1764 } 1765 case IPV6_HOPOPTS: 1766 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1767 return (0); 1768 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1769 return (0); 1770 bcopy((char *)ipp->ipp_hopopts + 1771 icmp->icmp_label_len_v6, ptr, 1772 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1773 if (icmp->icmp_label_len_v6 > 0) { 1774 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1775 ptr[1] = (ipp->ipp_hopoptslen - 1776 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1777 } 1778 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1779 case IPV6_RTHDRDSTOPTS: 1780 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1781 return (0); 1782 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1783 return (ipp->ipp_rtdstoptslen); 1784 case IPV6_RTHDR: 1785 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1786 return (0); 1787 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1788 return (ipp->ipp_rthdrlen); 1789 case IPV6_DSTOPTS: 1790 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1791 return (0); 1792 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1793 return (ipp->ipp_dstoptslen); 1794 case IPV6_PATHMTU: 1795 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1796 return (0); 1797 1798 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1799 (struct ip6_mtuinfo *)ptr)); 1800 case IPV6_TCLASS: 1801 if (ipp->ipp_fields & IPPF_TCLASS) 1802 *i1 = ipp->ipp_tclass; 1803 else 1804 *i1 = IPV6_FLOW_TCLASS( 1805 IPV6_DEFAULT_VERS_AND_FLOW); 1806 break; 1807 default: 1808 return (-1); 1809 } 1810 break; 1811 case IPPROTO_ICMPV6: 1812 /* 1813 * Only allow IPv6 option processing on native IPv6 sockets. 1814 */ 1815 if (icmp->icmp_family != AF_INET6) 1816 return (-1); 1817 1818 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1819 return (-1); 1820 1821 switch (name) { 1822 case ICMP6_FILTER: 1823 if (icmp->icmp_filter == NULL) { 1824 /* Make it look like "pass all" */ 1825 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1826 } else { 1827 (void) bcopy(icmp->icmp_filter, ptr, 1828 sizeof (icmp6_filter_t)); 1829 } 1830 return (sizeof (icmp6_filter_t)); 1831 default: 1832 return (-1); 1833 } 1834 default: 1835 return (-1); 1836 } 1837 return (sizeof (int)); 1838 } 1839 1840 /* This routine sets socket options. */ 1841 /* ARGSUSED */ 1842 int 1843 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1844 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1845 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 1846 { 1847 icmp_t *icmp = (icmp_t *)q->q_ptr; 1848 int *i1 = (int *)invalp; 1849 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1850 boolean_t checkonly; 1851 int error; 1852 1853 switch (optset_context) { 1854 case SETFN_OPTCOM_CHECKONLY: 1855 checkonly = B_TRUE; 1856 /* 1857 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1858 * inlen != 0 implies value supplied and 1859 * we have to "pretend" to set it. 1860 * inlen == 0 implies that there is no 1861 * value part in T_CHECK request and just validation 1862 * done elsewhere should be enough, we just return here. 1863 */ 1864 if (inlen == 0) { 1865 *outlenp = 0; 1866 return (0); 1867 } 1868 break; 1869 case SETFN_OPTCOM_NEGOTIATE: 1870 checkonly = B_FALSE; 1871 break; 1872 case SETFN_UD_NEGOTIATE: 1873 case SETFN_CONN_NEGOTIATE: 1874 checkonly = B_FALSE; 1875 /* 1876 * Negotiating local and "association-related" options 1877 * through T_UNITDATA_REQ. 1878 * 1879 * Following routine can filter out ones we do not 1880 * want to be "set" this way. 1881 */ 1882 if (!icmp_opt_allow_udr_set(level, name)) { 1883 *outlenp = 0; 1884 return (EINVAL); 1885 } 1886 break; 1887 default: 1888 /* 1889 * We should never get here 1890 */ 1891 *outlenp = 0; 1892 return (EINVAL); 1893 } 1894 1895 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1896 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1897 1898 /* 1899 * For fixed length options, no sanity check 1900 * of passed in length is done. It is assumed *_optcom_req() 1901 * routines do the right thing. 1902 */ 1903 1904 switch (level) { 1905 case SOL_SOCKET: 1906 switch (name) { 1907 case SO_DEBUG: 1908 if (!checkonly) 1909 icmp->icmp_debug = onoff; 1910 break; 1911 case SO_PROTOTYPE: 1912 if ((*i1 & 0xFF) != IPPROTO_ICMP && 1913 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1914 secpolicy_net_rawaccess(cr) != 0) { 1915 *outlenp = 0; 1916 return (EACCES); 1917 } 1918 /* Can't use IPPROTO_RAW with IPv6 */ 1919 if ((*i1 & 0xFF) == IPPROTO_RAW && 1920 icmp->icmp_family == AF_INET6) { 1921 *outlenp = 0; 1922 return (EPROTONOSUPPORT); 1923 } 1924 if (checkonly) { 1925 /* T_CHECK case */ 1926 *(int *)outvalp = (*i1 & 0xFF); 1927 break; 1928 } 1929 icmp->icmp_proto = *i1 & 0xFF; 1930 if ((icmp->icmp_proto == IPPROTO_RAW || 1931 icmp->icmp_proto == IPPROTO_IGMP) && 1932 icmp->icmp_family == AF_INET) 1933 icmp->icmp_hdrincl = 1; 1934 else 1935 icmp->icmp_hdrincl = 0; 1936 1937 if (icmp->icmp_family == AF_INET6 && 1938 icmp->icmp_proto == IPPROTO_ICMPV6) { 1939 /* Set offset for icmp6_cksum */ 1940 icmp->icmp_raw_checksum = 0; 1941 icmp->icmp_checksum_off = 2; 1942 } 1943 if (icmp->icmp_proto == IPPROTO_UDP || 1944 icmp->icmp_proto == IPPROTO_TCP || 1945 icmp->icmp_proto == IPPROTO_SCTP) { 1946 icmp->icmp_no_tp_cksum = 1; 1947 icmp->icmp_sticky_ipp.ipp_fields |= 1948 IPPF_NO_CKSUM; 1949 } else { 1950 icmp->icmp_no_tp_cksum = 0; 1951 icmp->icmp_sticky_ipp.ipp_fields &= 1952 ~IPPF_NO_CKSUM; 1953 } 1954 1955 if (icmp->icmp_filter != NULL && 1956 icmp->icmp_proto != IPPROTO_ICMPV6) { 1957 kmem_free(icmp->icmp_filter, 1958 sizeof (icmp6_filter_t)); 1959 icmp->icmp_filter = NULL; 1960 } 1961 1962 /* Rebuild the header template */ 1963 error = icmp_build_hdrs(q, icmp); 1964 if (error != 0) { 1965 *outlenp = 0; 1966 return (error); 1967 } 1968 1969 /* 1970 * For SCTP, we don't use icmp_bind_proto() for 1971 * raw socket binding. Note that we do not need 1972 * to set *outlenp. 1973 */ 1974 if (icmp->icmp_proto == IPPROTO_SCTP) 1975 return (0); 1976 1977 icmp_bind_proto(q); 1978 *outlenp = sizeof (int); 1979 *(int *)outvalp = *i1 & 0xFF; 1980 return (0); 1981 case SO_REUSEADDR: 1982 if (!checkonly) 1983 icmp->icmp_reuseaddr = onoff; 1984 break; 1985 1986 /* 1987 * The following three items are available here, 1988 * but are only meaningful to IP. 1989 */ 1990 case SO_DONTROUTE: 1991 if (!checkonly) 1992 icmp->icmp_dontroute = onoff; 1993 break; 1994 case SO_USELOOPBACK: 1995 if (!checkonly) 1996 icmp->icmp_useloopback = onoff; 1997 break; 1998 case SO_BROADCAST: 1999 if (!checkonly) 2000 icmp->icmp_broadcast = onoff; 2001 break; 2002 2003 case SO_SNDBUF: 2004 if (*i1 > icmp_max_buf) { 2005 *outlenp = 0; 2006 return (ENOBUFS); 2007 } 2008 if (!checkonly) { 2009 q->q_hiwat = *i1; 2010 q->q_next->q_hiwat = *i1; 2011 } 2012 break; 2013 case SO_RCVBUF: 2014 if (*i1 > icmp_max_buf) { 2015 *outlenp = 0; 2016 return (ENOBUFS); 2017 } 2018 if (!checkonly) { 2019 RD(q)->q_hiwat = *i1; 2020 (void) mi_set_sth_hiwat(RD(q), *i1); 2021 } 2022 break; 2023 case SO_DGRAM_ERRIND: 2024 if (!checkonly) 2025 icmp->icmp_dgram_errind = onoff; 2026 break; 2027 case SO_TIMESTAMP: 2028 if (!checkonly) { 2029 icmp->icmp_timestamp = onoff; 2030 } 2031 break; 2032 case SO_MAC_EXEMPT: 2033 if (secpolicy_net_mac_aware(cr) != 0 || 2034 icmp->icmp_state != TS_UNBND) 2035 return (EACCES); 2036 if (!checkonly) 2037 icmp->icmp_mac_exempt = onoff; 2038 break; 2039 /* 2040 * Following three not meaningful for icmp 2041 * Action is same as "default" so we keep them 2042 * in comments. 2043 * case SO_LINGER: 2044 * case SO_KEEPALIVE: 2045 * case SO_OOBINLINE: 2046 */ 2047 default: 2048 *outlenp = 0; 2049 return (EINVAL); 2050 } 2051 break; 2052 case IPPROTO_IP: 2053 /* 2054 * Only allow IPv4 option processing on IPv4 sockets. 2055 */ 2056 if (icmp->icmp_family != AF_INET) { 2057 *outlenp = 0; 2058 return (ENOPROTOOPT); 2059 } 2060 switch (name) { 2061 case IP_OPTIONS: 2062 case T_IP_OPTIONS: 2063 /* Save options for use by IP. */ 2064 if ((inlen & 0x3) || 2065 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2066 *outlenp = 0; 2067 return (EINVAL); 2068 } 2069 if (checkonly) 2070 break; 2071 2072 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2073 &icmp->icmp_ip_snd_options_len, 2074 icmp->icmp_label_len, invalp, inlen)) { 2075 *outlenp = 0; 2076 return (ENOMEM); 2077 } 2078 2079 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2080 icmp->icmp_ip_snd_options_len; 2081 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2082 icmp_wroff_extra); 2083 break; 2084 case IP_HDRINCL: 2085 if (!checkonly) 2086 icmp->icmp_hdrincl = onoff; 2087 break; 2088 case IP_TOS: 2089 case T_IP_TOS: 2090 if (!checkonly) { 2091 icmp->icmp_type_of_service = (uint8_t)*i1; 2092 } 2093 break; 2094 case IP_TTL: 2095 if (!checkonly) { 2096 icmp->icmp_ttl = (uint8_t)*i1; 2097 } 2098 break; 2099 case IP_MULTICAST_IF: 2100 /* 2101 * TODO should check OPTMGMT reply and undo this if 2102 * there is an error. 2103 */ 2104 if (!checkonly) 2105 icmp->icmp_multicast_if_addr = *i1; 2106 break; 2107 case IP_MULTICAST_TTL: 2108 if (!checkonly) 2109 icmp->icmp_multicast_ttl = *invalp; 2110 break; 2111 case IP_MULTICAST_LOOP: 2112 if (!checkonly) { 2113 icmp->icmp_multicast_loop = 2114 (*invalp == 0) ? 0 : 1; 2115 } 2116 break; 2117 case IP_BOUND_IF: 2118 if (!checkonly) 2119 icmp->icmp_bound_if = *i1; 2120 break; 2121 case IP_UNSPEC_SRC: 2122 if (!checkonly) 2123 icmp->icmp_unspec_source = onoff; 2124 break; 2125 case IP_XMIT_IF: 2126 if (!checkonly) 2127 icmp->icmp_xmit_if = *i1; 2128 break; 2129 case IP_RECVIF: 2130 if (!checkonly) 2131 icmp->icmp_recvif = onoff; 2132 break; 2133 case IP_ADD_MEMBERSHIP: 2134 case IP_DROP_MEMBERSHIP: 2135 case IP_BLOCK_SOURCE: 2136 case IP_UNBLOCK_SOURCE: 2137 case IP_ADD_SOURCE_MEMBERSHIP: 2138 case IP_DROP_SOURCE_MEMBERSHIP: 2139 case MCAST_JOIN_GROUP: 2140 case MCAST_LEAVE_GROUP: 2141 case MCAST_BLOCK_SOURCE: 2142 case MCAST_UNBLOCK_SOURCE: 2143 case MCAST_JOIN_SOURCE_GROUP: 2144 case MCAST_LEAVE_SOURCE_GROUP: 2145 case MRT_INIT: 2146 case MRT_DONE: 2147 case MRT_ADD_VIF: 2148 case MRT_DEL_VIF: 2149 case MRT_ADD_MFC: 2150 case MRT_DEL_MFC: 2151 case MRT_VERSION: 2152 case MRT_ASSERT: 2153 case IP_SEC_OPT: 2154 case IP_DONTFAILOVER_IF: 2155 case IP_NEXTHOP: 2156 /* 2157 * "soft" error (negative) 2158 * option not handled at this level 2159 * Note: Do not modify *outlenp 2160 */ 2161 return (-EINVAL); 2162 default: 2163 *outlenp = 0; 2164 return (EINVAL); 2165 } 2166 break; 2167 case IPPROTO_IPV6: { 2168 ip6_pkt_t *ipp; 2169 boolean_t sticky; 2170 2171 if (icmp->icmp_family != AF_INET6) { 2172 *outlenp = 0; 2173 return (ENOPROTOOPT); 2174 } 2175 /* 2176 * Deal with both sticky options and ancillary data 2177 */ 2178 if (thisdg_attrs == NULL) { 2179 /* sticky options, or none */ 2180 ipp = &icmp->icmp_sticky_ipp; 2181 sticky = B_TRUE; 2182 } else { 2183 /* ancillary data */ 2184 ipp = (ip6_pkt_t *)thisdg_attrs; 2185 sticky = B_FALSE; 2186 } 2187 2188 switch (name) { 2189 case IPV6_MULTICAST_IF: 2190 if (!checkonly) 2191 icmp->icmp_multicast_if_index = *i1; 2192 break; 2193 case IPV6_UNICAST_HOPS: 2194 /* -1 means use default */ 2195 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2196 *outlenp = 0; 2197 return (EINVAL); 2198 } 2199 if (!checkonly) { 2200 if (*i1 == -1) { 2201 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2202 icmp_ipv6_hoplimit; 2203 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2204 /* Pass modified value to IP. */ 2205 *i1 = ipp->ipp_hoplimit; 2206 } else { 2207 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2208 (uint8_t)*i1; 2209 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2210 } 2211 /* Rebuild the header template */ 2212 error = icmp_build_hdrs(q, icmp); 2213 if (error != 0) { 2214 *outlenp = 0; 2215 return (error); 2216 } 2217 } 2218 break; 2219 case IPV6_MULTICAST_HOPS: 2220 /* -1 means use default */ 2221 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2222 *outlenp = 0; 2223 return (EINVAL); 2224 } 2225 if (!checkonly) { 2226 if (*i1 == -1) { 2227 icmp->icmp_multicast_ttl = 2228 ipp->ipp_multicast_hops = 2229 IP_DEFAULT_MULTICAST_TTL; 2230 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2231 /* Pass modified value to IP. */ 2232 *i1 = icmp->icmp_multicast_ttl; 2233 } else { 2234 icmp->icmp_multicast_ttl = 2235 ipp->ipp_multicast_hops = 2236 (uint8_t)*i1; 2237 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2238 } 2239 } 2240 break; 2241 case IPV6_MULTICAST_LOOP: 2242 if (*i1 != 0 && *i1 != 1) { 2243 *outlenp = 0; 2244 return (EINVAL); 2245 } 2246 if (!checkonly) 2247 icmp->icmp_multicast_loop = *i1; 2248 break; 2249 case IPV6_CHECKSUM: 2250 /* 2251 * Integer offset into the user data of where the 2252 * checksum is located. 2253 * Offset of -1 disables option. 2254 * Does not apply to IPPROTO_ICMPV6. 2255 */ 2256 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2257 *outlenp = 0; 2258 return (EINVAL); 2259 } 2260 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2261 /* Negative or not 16 bit aligned offset */ 2262 *outlenp = 0; 2263 return (EINVAL); 2264 } 2265 if (checkonly) 2266 break; 2267 2268 if (*i1 == -1) { 2269 icmp->icmp_raw_checksum = 0; 2270 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2271 } else { 2272 icmp->icmp_raw_checksum = 1; 2273 icmp->icmp_checksum_off = *i1; 2274 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2275 } 2276 /* Rebuild the header template */ 2277 error = icmp_build_hdrs(q, icmp); 2278 if (error != 0) { 2279 *outlenp = 0; 2280 return (error); 2281 } 2282 break; 2283 case IPV6_JOIN_GROUP: 2284 case IPV6_LEAVE_GROUP: 2285 case MCAST_JOIN_GROUP: 2286 case MCAST_LEAVE_GROUP: 2287 case MCAST_BLOCK_SOURCE: 2288 case MCAST_UNBLOCK_SOURCE: 2289 case MCAST_JOIN_SOURCE_GROUP: 2290 case MCAST_LEAVE_SOURCE_GROUP: 2291 /* 2292 * "soft" error (negative) 2293 * option not handled at this level 2294 * Note: Do not modify *outlenp 2295 */ 2296 return (-EINVAL); 2297 case IPV6_BOUND_IF: 2298 if (!checkonly) 2299 icmp->icmp_bound_if = *i1; 2300 break; 2301 case IPV6_UNSPEC_SRC: 2302 if (!checkonly) 2303 icmp->icmp_unspec_source = onoff; 2304 break; 2305 case IPV6_RECVTCLASS: 2306 if (!checkonly) 2307 icmp->icmp_ipv6_recvtclass = onoff; 2308 break; 2309 /* 2310 * Set boolean switches for ancillary data delivery 2311 */ 2312 case IPV6_RECVPKTINFO: 2313 if (!checkonly) 2314 icmp->icmp_ipv6_recvpktinfo = onoff; 2315 break; 2316 case IPV6_RECVPATHMTU: 2317 if (!checkonly) 2318 icmp->icmp_ipv6_recvpathmtu = onoff; 2319 break; 2320 case IPV6_RECVHOPLIMIT: 2321 if (!checkonly) 2322 icmp->icmp_ipv6_recvhoplimit = onoff; 2323 break; 2324 case IPV6_RECVHOPOPTS: 2325 if (!checkonly) 2326 icmp->icmp_ipv6_recvhopopts = onoff; 2327 break; 2328 case IPV6_RECVDSTOPTS: 2329 if (!checkonly) 2330 icmp->icmp_ipv6_recvdstopts = onoff; 2331 break; 2332 case _OLD_IPV6_RECVDSTOPTS: 2333 if (!checkonly) 2334 icmp->icmp_old_ipv6_recvdstopts = onoff; 2335 break; 2336 case IPV6_RECVRTHDRDSTOPTS: 2337 if (!checkonly) 2338 icmp->icmp_ipv6_recvrtdstopts = onoff; 2339 break; 2340 case IPV6_RECVRTHDR: 2341 if (!checkonly) 2342 icmp->icmp_ipv6_recvrthdr = onoff; 2343 break; 2344 /* 2345 * Set sticky options or ancillary data. 2346 * If sticky options, (re)build any extension headers 2347 * that might be needed as a result. 2348 */ 2349 case IPV6_PKTINFO: 2350 /* 2351 * The source address and ifindex are verified 2352 * in ip_opt_set(). For ancillary data the 2353 * source address is checked in ip_wput_v6. 2354 */ 2355 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2356 return (EINVAL); 2357 if (checkonly) 2358 break; 2359 2360 if (inlen == 0) { 2361 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2362 ipp->ipp_sticky_ignored |= 2363 (IPPF_IFINDEX|IPPF_ADDR); 2364 } else { 2365 struct in6_pktinfo *pkti; 2366 2367 pkti = (struct in6_pktinfo *)invalp; 2368 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2369 ipp->ipp_addr = pkti->ipi6_addr; 2370 if (ipp->ipp_ifindex != 0) 2371 ipp->ipp_fields |= IPPF_IFINDEX; 2372 else 2373 ipp->ipp_fields &= ~IPPF_IFINDEX; 2374 if (!IN6_IS_ADDR_UNSPECIFIED( 2375 &ipp->ipp_addr)) 2376 ipp->ipp_fields |= IPPF_ADDR; 2377 else 2378 ipp->ipp_fields &= ~IPPF_ADDR; 2379 } 2380 if (sticky) { 2381 error = icmp_build_hdrs(q, icmp); 2382 if (error != 0) 2383 return (error); 2384 } 2385 break; 2386 case IPV6_HOPLIMIT: 2387 /* This option can only be used as ancillary data. */ 2388 if (sticky) 2389 return (EINVAL); 2390 if (inlen != 0 && inlen != sizeof (int)) 2391 return (EINVAL); 2392 if (checkonly) 2393 break; 2394 2395 if (inlen == 0) { 2396 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2397 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2398 } else { 2399 if (*i1 > 255 || *i1 < -1) 2400 return (EINVAL); 2401 if (*i1 == -1) 2402 ipp->ipp_hoplimit = icmp_ipv6_hoplimit; 2403 else 2404 ipp->ipp_hoplimit = *i1; 2405 ipp->ipp_fields |= IPPF_HOPLIMIT; 2406 } 2407 break; 2408 case IPV6_TCLASS: 2409 /* 2410 * IPV6_RECVTCLASS accepts -1 as use kernel default 2411 * and [0, 255] as the actualy traffic class. 2412 */ 2413 if (inlen != 0 && inlen != sizeof (int)) 2414 return (EINVAL); 2415 if (checkonly) 2416 break; 2417 2418 if (inlen == 0) { 2419 ipp->ipp_fields &= ~IPPF_TCLASS; 2420 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2421 } else { 2422 if (*i1 >= 256 || *i1 < -1) 2423 return (EINVAL); 2424 if (*i1 == -1) { 2425 ipp->ipp_tclass = 2426 IPV6_FLOW_TCLASS( 2427 IPV6_DEFAULT_VERS_AND_FLOW); 2428 } else { 2429 ipp->ipp_tclass = *i1; 2430 } 2431 ipp->ipp_fields |= IPPF_TCLASS; 2432 } 2433 if (sticky) { 2434 error = icmp_build_hdrs(q, icmp); 2435 if (error != 0) 2436 return (error); 2437 } 2438 break; 2439 case IPV6_NEXTHOP: 2440 /* 2441 * IP will verify that the nexthop is reachable 2442 * and fail for sticky options. 2443 */ 2444 if (inlen != 0 && inlen != sizeof (sin6_t)) 2445 return (EINVAL); 2446 if (checkonly) 2447 break; 2448 2449 if (inlen == 0) { 2450 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2451 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2452 } else { 2453 sin6_t *sin6 = (sin6_t *)invalp; 2454 2455 if (sin6->sin6_family != AF_INET6) 2456 return (EAFNOSUPPORT); 2457 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2458 return (EADDRNOTAVAIL); 2459 ipp->ipp_nexthop = sin6->sin6_addr; 2460 if (!IN6_IS_ADDR_UNSPECIFIED( 2461 &ipp->ipp_nexthop)) 2462 ipp->ipp_fields |= IPPF_NEXTHOP; 2463 else 2464 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2465 } 2466 if (sticky) { 2467 error = icmp_build_hdrs(q, icmp); 2468 if (error != 0) 2469 return (error); 2470 } 2471 break; 2472 case IPV6_HOPOPTS: { 2473 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2474 /* 2475 * Sanity checks - minimum size, size a multiple of 2476 * eight bytes, and matching size passed in. 2477 */ 2478 if (inlen != 0 && 2479 inlen != (8 * (hopts->ip6h_len + 1))) 2480 return (EINVAL); 2481 2482 if (checkonly) 2483 break; 2484 error = optcom_pkt_set(invalp, inlen, sticky, 2485 (uchar_t **)&ipp->ipp_hopopts, 2486 &ipp->ipp_hopoptslen, 2487 sticky ? icmp->icmp_label_len_v6 : 0); 2488 if (error != 0) 2489 return (error); 2490 if (ipp->ipp_hopoptslen == 0) { 2491 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2492 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2493 } else { 2494 ipp->ipp_fields |= IPPF_HOPOPTS; 2495 } 2496 if (sticky) { 2497 error = icmp_build_hdrs(q, icmp); 2498 if (error != 0) 2499 return (error); 2500 } 2501 break; 2502 } 2503 case IPV6_RTHDRDSTOPTS: { 2504 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2505 2506 /* 2507 * Sanity checks - minimum size, size a multiple of 2508 * eight bytes, and matching size passed in. 2509 */ 2510 if (inlen != 0 && 2511 inlen != (8 * (dopts->ip6d_len + 1))) 2512 return (EINVAL); 2513 2514 if (checkonly) 2515 break; 2516 2517 if (inlen == 0) { 2518 if (sticky && 2519 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2520 kmem_free(ipp->ipp_rtdstopts, 2521 ipp->ipp_rtdstoptslen); 2522 ipp->ipp_rtdstopts = NULL; 2523 ipp->ipp_rtdstoptslen = 0; 2524 } 2525 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2526 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2527 } else { 2528 error = optcom_pkt_set(invalp, inlen, sticky, 2529 (uchar_t **)&ipp->ipp_rtdstopts, 2530 &ipp->ipp_rtdstoptslen, 0); 2531 if (error != 0) 2532 return (error); 2533 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2534 } 2535 if (sticky) { 2536 error = icmp_build_hdrs(q, icmp); 2537 if (error != 0) 2538 return (error); 2539 } 2540 break; 2541 } 2542 case IPV6_DSTOPTS: { 2543 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2544 2545 /* 2546 * Sanity checks - minimum size, size a multiple of 2547 * eight bytes, and matching size passed in. 2548 */ 2549 if (inlen != 0 && 2550 inlen != (8 * (dopts->ip6d_len + 1))) 2551 return (EINVAL); 2552 2553 if (checkonly) 2554 break; 2555 2556 if (inlen == 0) { 2557 if (sticky && 2558 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2559 kmem_free(ipp->ipp_dstopts, 2560 ipp->ipp_dstoptslen); 2561 ipp->ipp_dstopts = NULL; 2562 ipp->ipp_dstoptslen = 0; 2563 } 2564 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2565 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2566 } else { 2567 error = optcom_pkt_set(invalp, inlen, sticky, 2568 (uchar_t **)&ipp->ipp_dstopts, 2569 &ipp->ipp_dstoptslen, 0); 2570 if (error != 0) 2571 return (error); 2572 ipp->ipp_fields |= IPPF_DSTOPTS; 2573 } 2574 if (sticky) { 2575 error = icmp_build_hdrs(q, icmp); 2576 if (error != 0) 2577 return (error); 2578 } 2579 break; 2580 } 2581 case IPV6_RTHDR: { 2582 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2583 2584 /* 2585 * Sanity checks - minimum size, size a multiple of 2586 * eight bytes, and matching size passed in. 2587 */ 2588 if (inlen != 0 && 2589 inlen != (8 * (rt->ip6r_len + 1))) 2590 return (EINVAL); 2591 2592 if (checkonly) 2593 break; 2594 2595 if (inlen == 0) { 2596 if (sticky && 2597 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2598 kmem_free(ipp->ipp_rthdr, 2599 ipp->ipp_rthdrlen); 2600 ipp->ipp_rthdr = NULL; 2601 ipp->ipp_rthdrlen = 0; 2602 } 2603 ipp->ipp_fields &= ~IPPF_RTHDR; 2604 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2605 } else { 2606 error = optcom_pkt_set(invalp, inlen, sticky, 2607 (uchar_t **)&ipp->ipp_rthdr, 2608 &ipp->ipp_rthdrlen, 0); 2609 if (error != 0) 2610 return (error); 2611 ipp->ipp_fields |= IPPF_RTHDR; 2612 } 2613 if (sticky) { 2614 error = icmp_build_hdrs(q, icmp); 2615 if (error != 0) 2616 return (error); 2617 } 2618 break; 2619 } 2620 2621 case IPV6_DONTFRAG: 2622 if (checkonly) 2623 break; 2624 2625 if (onoff) { 2626 ipp->ipp_fields |= IPPF_DONTFRAG; 2627 } else { 2628 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2629 } 2630 break; 2631 2632 case IPV6_USE_MIN_MTU: 2633 if (inlen != sizeof (int)) 2634 return (EINVAL); 2635 2636 if (*i1 < -1 || *i1 > 1) 2637 return (EINVAL); 2638 2639 if (checkonly) 2640 break; 2641 2642 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2643 ipp->ipp_use_min_mtu = *i1; 2644 break; 2645 2646 /* 2647 * This option can't be set. Its only returned via 2648 * getsockopt() or ancillary data. 2649 */ 2650 case IPV6_PATHMTU: 2651 return (EINVAL); 2652 2653 case IPV6_BOUND_PIF: 2654 case IPV6_SEC_OPT: 2655 case IPV6_DONTFAILOVER_IF: 2656 case IPV6_SRC_PREFERENCES: 2657 case IPV6_V6ONLY: 2658 /* Handled at IP level */ 2659 return (-EINVAL); 2660 default: 2661 *outlenp = 0; 2662 return (EINVAL); 2663 } 2664 break; 2665 } /* end IPPROTO_IPV6 */ 2666 2667 case IPPROTO_ICMPV6: 2668 /* 2669 * Only allow IPv6 option processing on IPv6 sockets. 2670 */ 2671 if (icmp->icmp_family != AF_INET6) { 2672 *outlenp = 0; 2673 return (ENOPROTOOPT); 2674 } 2675 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2676 *outlenp = 0; 2677 return (ENOPROTOOPT); 2678 } 2679 switch (name) { 2680 case ICMP6_FILTER: 2681 if (!checkonly) { 2682 if ((inlen != 0) && 2683 (inlen != sizeof (icmp6_filter_t))) 2684 return (EINVAL); 2685 2686 if (inlen == 0) { 2687 if (icmp->icmp_filter != NULL) { 2688 kmem_free(icmp->icmp_filter, 2689 sizeof (icmp6_filter_t)); 2690 icmp->icmp_filter = NULL; 2691 } 2692 } else { 2693 if (icmp->icmp_filter == NULL) { 2694 icmp->icmp_filter = kmem_alloc( 2695 sizeof (icmp6_filter_t), 2696 KM_NOSLEEP); 2697 if (icmp->icmp_filter == NULL) { 2698 *outlenp = 0; 2699 return (ENOBUFS); 2700 } 2701 } 2702 (void) bcopy(invalp, icmp->icmp_filter, 2703 inlen); 2704 } 2705 } 2706 break; 2707 2708 default: 2709 *outlenp = 0; 2710 return (EINVAL); 2711 } 2712 break; 2713 default: 2714 *outlenp = 0; 2715 return (EINVAL); 2716 } 2717 /* 2718 * Common case of OK return with outval same as inval. 2719 */ 2720 if (invalp != outvalp) { 2721 /* don't trust bcopy for identical src/dst */ 2722 (void) bcopy(invalp, outvalp, inlen); 2723 } 2724 *outlenp = inlen; 2725 return (0); 2726 } 2727 2728 /* 2729 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2730 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2731 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2732 * headers. 2733 * Returns failure if can't allocate memory. 2734 */ 2735 static int 2736 icmp_build_hdrs(queue_t *q, icmp_t *icmp) 2737 { 2738 uchar_t *hdrs; 2739 uint_t hdrs_len; 2740 ip6_t *ip6h; 2741 ip6i_t *ip6i; 2742 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2743 2744 hdrs_len = ip_total_hdrs_len_v6(ipp); 2745 ASSERT(hdrs_len != 0); 2746 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 2747 /* Need to reallocate */ 2748 if (hdrs_len != 0) { 2749 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2750 if (hdrs == NULL) 2751 return (ENOMEM); 2752 } else { 2753 hdrs = NULL; 2754 } 2755 if (icmp->icmp_sticky_hdrs_len != 0) { 2756 kmem_free(icmp->icmp_sticky_hdrs, 2757 icmp->icmp_sticky_hdrs_len); 2758 } 2759 icmp->icmp_sticky_hdrs = hdrs; 2760 icmp->icmp_sticky_hdrs_len = hdrs_len; 2761 } 2762 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 2763 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 2764 2765 /* Set header fields not in ipp */ 2766 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 2767 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 2768 ip6h = (ip6_t *)&ip6i[1]; 2769 2770 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 2771 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 2772 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 2773 } 2774 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 2775 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 2776 } 2777 } else { 2778 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 2779 } 2780 2781 if (!(ipp->ipp_fields & IPPF_ADDR)) 2782 ip6h->ip6_src = icmp->icmp_v6src; 2783 2784 /* Try to get everything in a single mblk */ 2785 if (hdrs_len > icmp->icmp_max_hdr_len) { 2786 icmp->icmp_max_hdr_len = hdrs_len; 2787 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2788 icmp_wroff_extra); 2789 } 2790 return (0); 2791 } 2792 2793 /* 2794 * This routine retrieves the value of an ND variable in a icmpparam_t 2795 * structure. It is called through nd_getset when a user reads the 2796 * variable. 2797 */ 2798 /* ARGSUSED */ 2799 static int 2800 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2801 { 2802 icmpparam_t *icmppa = (icmpparam_t *)cp; 2803 2804 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2805 return (0); 2806 } 2807 2808 /* 2809 * Walk through the param array specified registering each element with the 2810 * named dispatch (ND) handler. 2811 */ 2812 static boolean_t 2813 icmp_param_register(icmpparam_t *icmppa, int cnt) 2814 { 2815 for (; cnt-- > 0; icmppa++) { 2816 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2817 if (!nd_load(&icmp_g_nd, icmppa->icmp_param_name, 2818 icmp_param_get, icmp_param_set, 2819 (caddr_t)icmppa)) { 2820 nd_free(&icmp_g_nd); 2821 return (B_FALSE); 2822 } 2823 } 2824 } 2825 if (!nd_load(&icmp_g_nd, "icmp_status", icmp_status_report, NULL, 2826 NULL)) { 2827 nd_free(&icmp_g_nd); 2828 return (B_FALSE); 2829 } 2830 return (B_TRUE); 2831 } 2832 2833 /* This routine sets an ND variable in a icmpparam_t structure. */ 2834 /* ARGSUSED */ 2835 static int 2836 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2837 { 2838 long new_value; 2839 icmpparam_t *icmppa = (icmpparam_t *)cp; 2840 2841 /* 2842 * Fail the request if the new value does not lie within the 2843 * required bounds. 2844 */ 2845 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2846 new_value < icmppa->icmp_param_min || 2847 new_value > icmppa->icmp_param_max) { 2848 return (EINVAL); 2849 } 2850 /* Set the new value */ 2851 icmppa->icmp_param_value = new_value; 2852 return (0); 2853 } 2854 2855 static void 2856 icmp_rput(queue_t *q, mblk_t *mp) 2857 { 2858 struct T_unitdata_ind *tudi; 2859 uchar_t *rptr; 2860 struct T_error_ack *tea; 2861 icmp_t *icmp; 2862 sin_t *sin; 2863 sin6_t *sin6; 2864 ip6_t *ip6h; 2865 ip6i_t *ip6i; 2866 mblk_t *mp1; 2867 int hdr_len; 2868 ipha_t *ipha; 2869 int udi_size; /* Size of T_unitdata_ind */ 2870 uint_t ipvers; 2871 ip6_pkt_t ipp; 2872 uint8_t nexthdr; 2873 boolean_t recvif = B_FALSE; 2874 in_pktinfo_t *pinfo; 2875 mblk_t *options_mp = NULL; 2876 uint_t icmp_opt = 0; 2877 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 2878 uint_t hopstrip; 2879 2880 icmp = (icmp_t *)q->q_ptr; 2881 if (icmp->icmp_restricted) { 2882 putnext(q, mp); 2883 return; 2884 } 2885 2886 if (mp->b_datap->db_type == M_CTL) { 2887 /* 2888 * IP sends up the IPSEC_IN message for handling IPSEC 2889 * policy at the TCP level. We don't need it here. 2890 */ 2891 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 2892 mp1 = mp->b_cont; 2893 freeb(mp); 2894 mp = mp1; 2895 } else { 2896 pinfo = (in_pktinfo_t *)mp->b_rptr; 2897 if ((icmp->icmp_recvif != 0) && 2898 (pinfo->in_pkt_ulp_type == IN_PKTINFO)) { 2899 /* 2900 * IP has passed the options in mp and the 2901 * actual data is in b_cont. 2902 */ 2903 recvif = B_TRUE; 2904 /* 2905 * We are here bcos IP_RECVIF is set so we need 2906 * to extract the options mblk and adjust the 2907 * rptr 2908 */ 2909 options_mp = mp; 2910 mp = mp->b_cont; 2911 } 2912 } 2913 } 2914 2915 rptr = mp->b_rptr; 2916 switch (mp->b_datap->db_type) { 2917 case M_DATA: 2918 /* 2919 * M_DATA messages contain IP packets. They are handled 2920 * following the switch. 2921 */ 2922 break; 2923 case M_PROTO: 2924 case M_PCPROTO: 2925 /* M_PROTO messages contain some type of TPI message. */ 2926 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 2927 freemsg(mp); 2928 return; 2929 } 2930 tea = (struct T_error_ack *)rptr; 2931 switch (tea->PRIM_type) { 2932 case T_ERROR_ACK: 2933 switch (tea->ERROR_prim) { 2934 case O_T_BIND_REQ: 2935 case T_BIND_REQ: 2936 /* 2937 * If our O_T_BIND_REQ/T_BIND_REQ fails, 2938 * clear out the source address before 2939 * passing the message upstream. 2940 * If this was caused by a T_CONN_REQ 2941 * revert back to bound state. 2942 */ 2943 if (icmp->icmp_state == TS_UNBND) { 2944 /* 2945 * TPI has not yet bound - bind sent by 2946 * icmp_bind_proto. 2947 */ 2948 freemsg(mp); 2949 return; 2950 } 2951 if (icmp->icmp_state == TS_DATA_XFER) { 2952 /* Connect failed */ 2953 tea->ERROR_prim = T_CONN_REQ; 2954 icmp->icmp_v6src = 2955 icmp->icmp_bound_v6src; 2956 icmp->icmp_state = TS_IDLE; 2957 if (icmp->icmp_family == AF_INET6) 2958 (void) icmp_build_hdrs(q, icmp); 2959 break; 2960 } 2961 2962 if (icmp->icmp_discon_pending) { 2963 tea->ERROR_prim = T_DISCON_REQ; 2964 icmp->icmp_discon_pending = 0; 2965 } 2966 V6_SET_ZERO(icmp->icmp_v6src); 2967 V6_SET_ZERO(icmp->icmp_bound_v6src); 2968 icmp->icmp_state = TS_UNBND; 2969 if (icmp->icmp_family == AF_INET6) 2970 (void) icmp_build_hdrs(q, icmp); 2971 break; 2972 default: 2973 break; 2974 } 2975 break; 2976 case T_BIND_ACK: 2977 icmp_rput_bind_ack(q, mp); 2978 return; 2979 2980 case T_OPTMGMT_ACK: 2981 case T_OK_ACK: 2982 if (tea->PRIM_type == T_OK_ACK) { 2983 struct T_ok_ack *toa; 2984 toa = (struct T_ok_ack *)rptr; 2985 if (toa->CORRECT_prim == T_UNBIND_REQ) { 2986 /* 2987 * If somebody sets IPSEC options, IP 2988 * sends some IPSEC info which is used 2989 * by the TCP for detached connections. 2990 * We don't need it here. 2991 */ 2992 if ((mp1 = mp->b_cont) != NULL) { 2993 freemsg(mp1); 2994 mp->b_cont = NULL; 2995 } 2996 } 2997 } 2998 break; 2999 default: 3000 freemsg(mp); 3001 return; 3002 } 3003 putnext(q, mp); 3004 return; 3005 case M_CTL: 3006 if (recvif) { 3007 /* 3008 * IP has passed the options in mp and the actual data 3009 * is in b_cont. Jump to normal data processing. 3010 */ 3011 break; 3012 } 3013 3014 /* Contains ICMP packet from IP */ 3015 icmp_icmp_error(q, mp); 3016 return; 3017 default: 3018 putnext(q, mp); 3019 return; 3020 } 3021 3022 /* 3023 * Discard message if it is misaligned or smaller than the IP header. 3024 */ 3025 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3026 freemsg(mp); 3027 if (options_mp != NULL) 3028 freeb(options_mp); 3029 BUMP_MIB(&rawip_mib, rawipInErrors); 3030 return; 3031 } 3032 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3033 3034 /* Handle M_DATA messages containing IP packets messages */ 3035 if (ipvers == IPV4_VERSION) { 3036 /* 3037 * Special case where IP attaches 3038 * the IRE needs to be handled so that we don't send up 3039 * IRE to the user land. 3040 */ 3041 ipha = (ipha_t *)rptr; 3042 hdr_len = IPH_HDR_LENGTH(ipha); 3043 3044 if (ipha->ipha_protocol == IPPROTO_TCP) { 3045 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3046 3047 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3048 TH_SYN) && mp->b_cont != NULL) { 3049 mp1 = mp->b_cont; 3050 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3051 freeb(mp1); 3052 mp->b_cont = NULL; 3053 } 3054 } 3055 } 3056 if (icmp_bsd_compat) { 3057 ushort_t len; 3058 len = ntohs(ipha->ipha_length); 3059 3060 if (mp->b_datap->db_ref > 1) { 3061 /* 3062 * Allocate a new IP header so that we can 3063 * modify ipha_length. 3064 */ 3065 mblk_t *mp1; 3066 3067 mp1 = allocb(hdr_len, BPRI_MED); 3068 if (!mp1) { 3069 freemsg(mp); 3070 if (options_mp != NULL) 3071 freeb(options_mp); 3072 BUMP_MIB(&rawip_mib, rawipInErrors); 3073 return; 3074 } 3075 bcopy(rptr, mp1->b_rptr, hdr_len); 3076 mp->b_rptr = rptr + hdr_len; 3077 rptr = mp1->b_rptr; 3078 ipha = (ipha_t *)rptr; 3079 mp1->b_cont = mp; 3080 mp1->b_wptr = rptr + hdr_len; 3081 mp = mp1; 3082 } 3083 len -= hdr_len; 3084 ipha->ipha_length = htons(len); 3085 } 3086 } 3087 3088 /* 3089 * This is the inbound data path. Packets are passed upstream as 3090 * T_UNITDATA_IND messages with full IP headers still attached. 3091 */ 3092 if (icmp->icmp_family == AF_INET) { 3093 ASSERT(ipvers == IPV4_VERSION); 3094 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3095 if (recvif) { 3096 udi_size += sizeof (struct T_opthdr) + 3097 sizeof (uint_t); 3098 } 3099 /* 3100 * If SO_TIMESTAMP is set allocate the appropriate sized 3101 * buffer. Since gethrestime() expects a pointer aligned 3102 * argument, we allocate space necessary for extra 3103 * alignment (even though it might not be used). 3104 */ 3105 if (icmp->icmp_timestamp) { 3106 udi_size += sizeof (struct T_opthdr) + 3107 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3108 } 3109 mp1 = allocb(udi_size, BPRI_MED); 3110 if (mp1 == NULL) { 3111 freemsg(mp); 3112 if (options_mp != NULL) 3113 freeb(options_mp); 3114 BUMP_MIB(&rawip_mib, rawipInErrors); 3115 return; 3116 } 3117 mp1->b_cont = mp; 3118 mp = mp1; 3119 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3120 mp->b_datap->db_type = M_PROTO; 3121 mp->b_wptr = (uchar_t *)tudi + udi_size; 3122 tudi->PRIM_type = T_UNITDATA_IND; 3123 tudi->SRC_length = sizeof (sin_t); 3124 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3125 sin = (sin_t *)&tudi[1]; 3126 *sin = sin_null; 3127 sin->sin_family = AF_INET; 3128 sin->sin_addr.s_addr = ipha->ipha_src; 3129 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3130 sizeof (sin_t); 3131 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3132 tudi->OPT_length = udi_size; 3133 3134 /* 3135 * Add options if IP_RECVIF is set 3136 */ 3137 if (udi_size != 0) { 3138 char *dstopt; 3139 3140 dstopt = (char *)&sin[1]; 3141 if (recvif) { 3142 3143 struct T_opthdr *toh; 3144 uint_t *dstptr; 3145 3146 toh = (struct T_opthdr *)dstopt; 3147 toh->level = IPPROTO_IP; 3148 toh->name = IP_RECVIF; 3149 toh->len = sizeof (struct T_opthdr) + 3150 sizeof (uint_t); 3151 toh->status = 0; 3152 dstopt += sizeof (struct T_opthdr); 3153 dstptr = (uint_t *)dstopt; 3154 *dstptr = pinfo->in_pkt_ifindex; 3155 dstopt += sizeof (uint_t); 3156 freeb(options_mp); 3157 udi_size -= toh->len; 3158 } 3159 if (icmp->icmp_timestamp) { 3160 struct T_opthdr *toh; 3161 3162 toh = (struct T_opthdr *)dstopt; 3163 toh->level = SOL_SOCKET; 3164 toh->name = SCM_TIMESTAMP; 3165 toh->len = sizeof (struct T_opthdr) + 3166 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3167 toh->status = 0; 3168 dstopt += sizeof (struct T_opthdr); 3169 /* Align for gethrestime() */ 3170 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3171 sizeof (intptr_t)); 3172 gethrestime((timestruc_t *)dstopt); 3173 dstopt += sizeof (timestruc_t); 3174 udi_size -= toh->len; 3175 } 3176 3177 /* Consumed all of allocated space */ 3178 ASSERT(udi_size == 0); 3179 } 3180 3181 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3182 putnext(q, mp); 3183 return; 3184 } 3185 3186 /* 3187 * We don't need options_mp in the IPv6 path. 3188 */ 3189 if (options_mp != NULL) { 3190 freeb(options_mp); 3191 options_mp = NULL; 3192 } 3193 3194 /* 3195 * Discard message if it is smaller than the IPv6 header 3196 * or if the header is malformed. 3197 */ 3198 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3199 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3200 icmp->icmp_family != AF_INET6) { 3201 freemsg(mp); 3202 BUMP_MIB(&rawip_mib, rawipInErrors); 3203 return; 3204 } 3205 3206 /* Initialize */ 3207 ipp.ipp_fields = 0; 3208 hopstrip = 0; 3209 3210 ip6h = (ip6_t *)rptr; 3211 /* 3212 * Call on ip_find_hdr_v6 which gets the total hdr len 3213 * as well as individual lenghts of ext hdrs (and ptrs to 3214 * them). 3215 */ 3216 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3217 /* Look for ifindex information */ 3218 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3219 ip6i = (ip6i_t *)ip6h; 3220 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3221 ASSERT(ip6i->ip6i_ifindex != 0); 3222 ipp.ipp_fields |= IPPF_IFINDEX; 3223 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3224 } 3225 rptr = (uchar_t *)&ip6i[1]; 3226 mp->b_rptr = rptr; 3227 if (rptr == mp->b_wptr) { 3228 mp1 = mp->b_cont; 3229 freeb(mp); 3230 mp = mp1; 3231 rptr = mp->b_rptr; 3232 } 3233 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3234 ip6h = (ip6_t *)rptr; 3235 } 3236 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3237 3238 /* 3239 * We need to lie a bit to the user because users inside 3240 * labeled compartments should not see their own labels. We 3241 * assume that in all other respects IP has checked the label, 3242 * and that the label is always first among the options. (If 3243 * it's not first, then this code won't see it, and the option 3244 * will be passed along to the user.) 3245 * 3246 * If we had multilevel ICMP sockets, then the following code 3247 * should be skipped for them to allow the user to see the 3248 * label. 3249 * 3250 * Alignment restrictions in the definition of IP options 3251 * (namely, the requirement that the 4-octet DOI goes on a 3252 * 4-octet boundary) mean that we know exactly where the option 3253 * should start, but we're lenient for other hosts. 3254 * 3255 * Note that there are no multilevel ICMP or raw IP sockets 3256 * yet, thus nobody ever sees the IP6OPT_LS option. 3257 */ 3258 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3259 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3260 const uchar_t *ucp = 3261 (const uchar_t *)ipp.ipp_hopopts + 2; 3262 int remlen = ipp.ipp_hopoptslen - 2; 3263 3264 while (remlen > 0) { 3265 if (*ucp == IP6OPT_PAD1) { 3266 remlen--; 3267 ucp++; 3268 } else if (*ucp == IP6OPT_PADN) { 3269 remlen -= ucp[1] + 2; 3270 ucp += ucp[1] + 2; 3271 } else if (*ucp == ip6opt_ls) { 3272 hopstrip = (ucp - 3273 (const uchar_t *)ipp.ipp_hopopts) + 3274 ucp[1] + 2; 3275 hopstrip = (hopstrip + 7) & ~7; 3276 break; 3277 } else { 3278 /* label option must be first */ 3279 break; 3280 } 3281 } 3282 } 3283 } else { 3284 hdr_len = IPV6_HDR_LEN; 3285 ip6i = NULL; 3286 nexthdr = ip6h->ip6_nxt; 3287 } 3288 /* 3289 * One special case where IP attaches the IRE needs to 3290 * be handled so that we don't send up IRE to the user land. 3291 */ 3292 if (nexthdr == IPPROTO_TCP) { 3293 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3294 3295 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3296 mp->b_cont != NULL) { 3297 mp1 = mp->b_cont; 3298 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3299 freeb(mp1); 3300 mp->b_cont = NULL; 3301 } 3302 } 3303 } 3304 /* 3305 * Check a filter for ICMPv6 types if needed. 3306 * Verify raw checksums if needed. 3307 */ 3308 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3309 if (icmp->icmp_filter != NULL) { 3310 int type; 3311 3312 /* Assumes that IP has done the pullupmsg */ 3313 type = mp->b_rptr[hdr_len]; 3314 3315 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3316 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3317 freemsg(mp); 3318 return; 3319 } 3320 } else { 3321 /* Checksum */ 3322 uint16_t *up; 3323 uint32_t sum; 3324 int remlen; 3325 3326 up = (uint16_t *)&ip6h->ip6_src; 3327 3328 remlen = msgdsize(mp) - hdr_len; 3329 sum = htons(icmp->icmp_proto + remlen) 3330 + up[0] + up[1] + up[2] + up[3] 3331 + up[4] + up[5] + up[6] + up[7] 3332 + up[8] + up[9] + up[10] + up[11] 3333 + up[12] + up[13] + up[14] + up[15]; 3334 sum = (sum & 0xffff) + (sum >> 16); 3335 sum = IP_CSUM(mp, hdr_len, sum); 3336 if (sum != 0) { 3337 /* IPv6 RAW checksum failed */ 3338 ip0dbg(("icmp_rput: RAW checksum " 3339 "failed %x\n", sum)); 3340 freemsg(mp); 3341 BUMP_MIB(&rawip_mib, rawipInCksumErrs); 3342 return; 3343 } 3344 } 3345 } 3346 /* Skip all the IPv6 headers per API */ 3347 mp->b_rptr += hdr_len; 3348 3349 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3350 3351 /* 3352 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3353 * maintain state information, instead of relying on icmp_t 3354 * structure, since there arent any locks protecting these members 3355 * and there is a window where there might be a race between a 3356 * thread setting options on the write side and a thread reading 3357 * these options on the read size. 3358 */ 3359 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3360 IPPF_RTHDR|IPPF_IFINDEX)) { 3361 if (icmp->icmp_ipv6_recvhopopts && 3362 (ipp.ipp_fields & IPPF_HOPOPTS) && 3363 ipp.ipp_hopoptslen > hopstrip) { 3364 udi_size += sizeof (struct T_opthdr) + 3365 ipp.ipp_hopoptslen - hopstrip; 3366 icmp_opt |= IPPF_HOPOPTS; 3367 } 3368 if ((icmp->icmp_ipv6_recvdstopts || 3369 icmp->icmp_old_ipv6_recvdstopts) && 3370 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3371 udi_size += sizeof (struct T_opthdr) + 3372 ipp.ipp_dstoptslen; 3373 icmp_opt |= IPPF_DSTOPTS; 3374 } 3375 if (((icmp->icmp_ipv6_recvdstopts && 3376 icmp->icmp_ipv6_recvrthdr && 3377 (ipp.ipp_fields & IPPF_RTHDR)) || 3378 icmp->icmp_ipv6_recvrtdstopts) && 3379 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3380 udi_size += sizeof (struct T_opthdr) + 3381 ipp.ipp_rtdstoptslen; 3382 icmp_opt |= IPPF_RTDSTOPTS; 3383 } 3384 if (icmp->icmp_ipv6_recvrthdr && 3385 (ipp.ipp_fields & IPPF_RTHDR)) { 3386 udi_size += sizeof (struct T_opthdr) + 3387 ipp.ipp_rthdrlen; 3388 icmp_opt |= IPPF_RTHDR; 3389 } 3390 if (icmp->icmp_ipv6_recvpktinfo && 3391 (ipp.ipp_fields & IPPF_IFINDEX)) { 3392 udi_size += sizeof (struct T_opthdr) + 3393 sizeof (struct in6_pktinfo); 3394 icmp_opt |= IPPF_IFINDEX; 3395 } 3396 } 3397 if (icmp->icmp_ipv6_recvhoplimit) { 3398 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3399 icmp_ipv6_recvhoplimit = B_TRUE; 3400 } 3401 3402 if (icmp->icmp_ipv6_recvtclass) 3403 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3404 3405 mp1 = allocb(udi_size, BPRI_MED); 3406 if (mp1 == NULL) { 3407 freemsg(mp); 3408 BUMP_MIB(&rawip_mib, rawipInErrors); 3409 return; 3410 } 3411 mp1->b_cont = mp; 3412 mp = mp1; 3413 mp->b_datap->db_type = M_PROTO; 3414 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3415 mp->b_wptr = (uchar_t *)tudi + udi_size; 3416 tudi->PRIM_type = T_UNITDATA_IND; 3417 tudi->SRC_length = sizeof (sin6_t); 3418 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3419 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3420 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3421 tudi->OPT_length = udi_size; 3422 sin6 = (sin6_t *)&tudi[1]; 3423 sin6->sin6_port = 0; 3424 sin6->sin6_family = AF_INET6; 3425 3426 sin6->sin6_addr = ip6h->ip6_src; 3427 /* No sin6_flowinfo per API */ 3428 sin6->sin6_flowinfo = 0; 3429 /* For link-scope source pass up scope id */ 3430 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3431 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3432 sin6->sin6_scope_id = ipp.ipp_ifindex; 3433 else 3434 sin6->sin6_scope_id = 0; 3435 3436 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3437 icmp->icmp_zoneid); 3438 3439 if (udi_size != 0) { 3440 uchar_t *dstopt; 3441 3442 dstopt = (uchar_t *)&sin6[1]; 3443 if (icmp_opt & IPPF_IFINDEX) { 3444 struct T_opthdr *toh; 3445 struct in6_pktinfo *pkti; 3446 3447 toh = (struct T_opthdr *)dstopt; 3448 toh->level = IPPROTO_IPV6; 3449 toh->name = IPV6_PKTINFO; 3450 toh->len = sizeof (struct T_opthdr) + 3451 sizeof (*pkti); 3452 toh->status = 0; 3453 dstopt += sizeof (struct T_opthdr); 3454 pkti = (struct in6_pktinfo *)dstopt; 3455 pkti->ipi6_addr = ip6h->ip6_dst; 3456 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3457 dstopt += sizeof (*pkti); 3458 udi_size -= toh->len; 3459 } 3460 if (icmp_ipv6_recvhoplimit) { 3461 struct T_opthdr *toh; 3462 3463 toh = (struct T_opthdr *)dstopt; 3464 toh->level = IPPROTO_IPV6; 3465 toh->name = IPV6_HOPLIMIT; 3466 toh->len = sizeof (struct T_opthdr) + 3467 sizeof (uint_t); 3468 toh->status = 0; 3469 dstopt += sizeof (struct T_opthdr); 3470 *(uint_t *)dstopt = ip6h->ip6_hops; 3471 dstopt += sizeof (uint_t); 3472 udi_size -= toh->len; 3473 } 3474 if (icmp->icmp_ipv6_recvtclass) { 3475 struct T_opthdr *toh; 3476 3477 toh = (struct T_opthdr *)dstopt; 3478 toh->level = IPPROTO_IPV6; 3479 toh->name = IPV6_TCLASS; 3480 toh->len = sizeof (struct T_opthdr) + 3481 sizeof (uint_t); 3482 toh->status = 0; 3483 dstopt += sizeof (struct T_opthdr); 3484 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3485 dstopt += sizeof (uint_t); 3486 udi_size -= toh->len; 3487 } 3488 if (icmp_opt & IPPF_HOPOPTS) { 3489 struct T_opthdr *toh; 3490 3491 toh = (struct T_opthdr *)dstopt; 3492 toh->level = IPPROTO_IPV6; 3493 toh->name = IPV6_HOPOPTS; 3494 toh->len = sizeof (struct T_opthdr) + 3495 ipp.ipp_hopoptslen - hopstrip; 3496 toh->status = 0; 3497 dstopt += sizeof (struct T_opthdr); 3498 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3499 ipp.ipp_hopoptslen - hopstrip); 3500 if (hopstrip > 0) { 3501 /* copy next header value and fake length */ 3502 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3503 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3504 hopstrip / 8; 3505 } 3506 dstopt += ipp.ipp_hopoptslen - hopstrip; 3507 udi_size -= toh->len; 3508 } 3509 if (icmp_opt & IPPF_RTDSTOPTS) { 3510 struct T_opthdr *toh; 3511 3512 toh = (struct T_opthdr *)dstopt; 3513 toh->level = IPPROTO_IPV6; 3514 toh->name = IPV6_DSTOPTS; 3515 toh->len = sizeof (struct T_opthdr) + 3516 ipp.ipp_rtdstoptslen; 3517 toh->status = 0; 3518 dstopt += sizeof (struct T_opthdr); 3519 bcopy(ipp.ipp_rtdstopts, dstopt, 3520 ipp.ipp_rtdstoptslen); 3521 dstopt += ipp.ipp_rtdstoptslen; 3522 udi_size -= toh->len; 3523 } 3524 if (icmp_opt & IPPF_RTHDR) { 3525 struct T_opthdr *toh; 3526 3527 toh = (struct T_opthdr *)dstopt; 3528 toh->level = IPPROTO_IPV6; 3529 toh->name = IPV6_RTHDR; 3530 toh->len = sizeof (struct T_opthdr) + 3531 ipp.ipp_rthdrlen; 3532 toh->status = 0; 3533 dstopt += sizeof (struct T_opthdr); 3534 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3535 dstopt += ipp.ipp_rthdrlen; 3536 udi_size -= toh->len; 3537 } 3538 if (icmp_opt & IPPF_DSTOPTS) { 3539 struct T_opthdr *toh; 3540 3541 toh = (struct T_opthdr *)dstopt; 3542 toh->level = IPPROTO_IPV6; 3543 toh->name = IPV6_DSTOPTS; 3544 toh->len = sizeof (struct T_opthdr) + 3545 ipp.ipp_dstoptslen; 3546 toh->status = 0; 3547 dstopt += sizeof (struct T_opthdr); 3548 bcopy(ipp.ipp_dstopts, dstopt, 3549 ipp.ipp_dstoptslen); 3550 dstopt += ipp.ipp_dstoptslen; 3551 udi_size -= toh->len; 3552 } 3553 /* Consumed all of allocated space */ 3554 ASSERT(udi_size == 0); 3555 } 3556 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3557 putnext(q, mp); 3558 } 3559 3560 /* 3561 * Process a T_BIND_ACK 3562 */ 3563 static void 3564 icmp_rput_bind_ack(queue_t *q, mblk_t *mp) 3565 { 3566 icmp_t *icmp = (icmp_t *)q->q_ptr; 3567 mblk_t *mp1; 3568 ire_t *ire; 3569 struct T_bind_ack *tba; 3570 uchar_t *addrp; 3571 ipa_conn_t *ac; 3572 ipa6_conn_t *ac6; 3573 3574 /* 3575 * We know if headers are included or not so we can 3576 * safely do this. 3577 */ 3578 if (icmp->icmp_state == TS_UNBND) { 3579 /* 3580 * TPI has not yet bound - bind sent by 3581 * icmp_bind_proto. 3582 */ 3583 freemsg(mp); 3584 return; 3585 } 3586 if (icmp->icmp_discon_pending) 3587 icmp->icmp_discon_pending = 0; 3588 3589 /* 3590 * If a broadcast/multicast address was bound set 3591 * the source address to 0. 3592 * This ensures no datagrams with broadcast address 3593 * as source address are emitted (which would violate 3594 * RFC1122 - Hosts requirements) 3595 * 3596 * Note that when connecting the returned IRE is 3597 * for the destination address and we only perform 3598 * the broadcast check for the source address (it 3599 * is OK to connect to a broadcast/multicast address.) 3600 */ 3601 mp1 = mp->b_cont; 3602 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3603 ire = (ire_t *)mp1->b_rptr; 3604 3605 /* 3606 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3607 * local address. 3608 */ 3609 if (ire->ire_type == IRE_BROADCAST && 3610 icmp->icmp_state != TS_DATA_XFER) { 3611 /* This was just a local bind to a MC/broadcast addr */ 3612 V6_SET_ZERO(icmp->icmp_v6src); 3613 if (icmp->icmp_family == AF_INET6) 3614 (void) icmp_build_hdrs(q, icmp); 3615 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3616 /* 3617 * Local address not yet set - pick it from the 3618 * T_bind_ack 3619 */ 3620 tba = (struct T_bind_ack *)mp->b_rptr; 3621 addrp = &mp->b_rptr[tba->ADDR_offset]; 3622 switch (icmp->icmp_family) { 3623 case AF_INET: 3624 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3625 ac = (ipa_conn_t *)addrp; 3626 } else { 3627 ASSERT(tba->ADDR_length == 3628 sizeof (ipa_conn_x_t)); 3629 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3630 } 3631 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3632 &icmp->icmp_v6src); 3633 break; 3634 case AF_INET6: 3635 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3636 ac6 = (ipa6_conn_t *)addrp; 3637 } else { 3638 ASSERT(tba->ADDR_length == 3639 sizeof (ipa6_conn_x_t)); 3640 ac6 = &((ipa6_conn_x_t *) 3641 addrp)->ac6x_conn; 3642 } 3643 icmp->icmp_v6src = ac6->ac6_laddr; 3644 (void) icmp_build_hdrs(q, icmp); 3645 } 3646 } 3647 mp1 = mp1->b_cont; 3648 } 3649 /* 3650 * Look for one or more appended ACK message added by 3651 * icmp_connect or icmp_disconnect. 3652 * If none found just send up the T_BIND_ACK. 3653 * icmp_connect has appended a T_OK_ACK and a 3654 * T_CONN_CON. 3655 * icmp_disconnect has appended a T_OK_ACK. 3656 */ 3657 if (mp1 != NULL) { 3658 if (mp->b_cont == mp1) 3659 mp->b_cont = NULL; 3660 else { 3661 ASSERT(mp->b_cont->b_cont == mp1); 3662 mp->b_cont->b_cont = NULL; 3663 } 3664 freemsg(mp); 3665 mp = mp1; 3666 while (mp != NULL) { 3667 mp1 = mp->b_cont; 3668 mp->b_cont = NULL; 3669 putnext(q, mp); 3670 mp = mp1; 3671 } 3672 return; 3673 } 3674 freemsg(mp->b_cont); 3675 mp->b_cont = NULL; 3676 putnext(q, mp); 3677 } 3678 3679 /* 3680 * return SNMP stuff in buffer in mpdata 3681 */ 3682 static int 3683 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3684 { 3685 mblk_t *mpdata; 3686 struct opthdr *optp; 3687 3688 if (mpctl == NULL || 3689 (mpdata = mpctl->b_cont) == NULL) { 3690 return (0); 3691 } 3692 3693 /* fixed length structure for IPv4 and IPv6 counters */ 3694 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 3695 optp->level = EXPER_RAWIP; 3696 optp->name = 0; 3697 (void) snmp_append_data(mpdata, (char *)&rawip_mib, sizeof (rawip_mib)); 3698 optp->len = msgdsize(mpdata); 3699 qreply(q, mpctl); 3700 3701 return (1); 3702 } 3703 3704 /* 3705 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 3706 * TODO: If this ever actually tries to set anything, it needs to be 3707 * to do the appropriate locking. 3708 */ 3709 /* ARGSUSED */ 3710 static int 3711 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 3712 uchar_t *ptr, int len) 3713 { 3714 switch (level) { 3715 case EXPER_RAWIP: 3716 return (0); 3717 default: 3718 return (1); 3719 } 3720 } 3721 3722 /* Report for ndd "icmp_status" */ 3723 /* ARGSUSED */ 3724 static int 3725 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3726 { 3727 IDP idp; 3728 icmp_t *icmp; 3729 char *state; 3730 char laddrbuf[INET6_ADDRSTRLEN]; 3731 char faddrbuf[INET6_ADDRSTRLEN]; 3732 3733 (void) mi_mpprintf(mp, 3734 "RAWIP " MI_COL_HDRPAD_STR 3735 /* 01234567[89ABCDEF] */ 3736 " src addr dest addr state"); 3737 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 3738 3739 3740 for (idp = mi_first_ptr(&icmp_g_head); 3741 (icmp = (icmp_t *)idp) != NULL; 3742 idp = mi_next_ptr(&icmp_g_head, idp)) { 3743 if (icmp->icmp_state == TS_UNBND) 3744 state = "UNBOUND"; 3745 else if (icmp->icmp_state == TS_IDLE) 3746 state = "IDLE"; 3747 else if (icmp->icmp_state == TS_DATA_XFER) 3748 state = "CONNECTED"; 3749 else 3750 state = "UnkState"; 3751 3752 (void) mi_mpprintf(mp, 3753 MI_COL_PTRFMT_STR "%s %s %s", 3754 (void *)icmp, 3755 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 3756 sizeof (faddrbuf)), 3757 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 3758 sizeof (laddrbuf)), 3759 state); 3760 } 3761 return (0); 3762 } 3763 3764 /* 3765 * This routine creates a T_UDERROR_IND message and passes it upstream. 3766 * The address and options are copied from the T_UNITDATA_REQ message 3767 * passed in mp. This message is freed. 3768 */ 3769 static void 3770 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 3771 { 3772 mblk_t *mp1; 3773 uchar_t *rptr = mp->b_rptr; 3774 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 3775 3776 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 3777 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 3778 tudr->OPT_length, err); 3779 if (mp1) 3780 qreply(q, mp1); 3781 freemsg(mp); 3782 } 3783 3784 /* 3785 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 3786 * After some error checking, the message is passed downstream to ip. 3787 */ 3788 static void 3789 icmp_unbind(queue_t *q, mblk_t *mp) 3790 { 3791 icmp_t *icmp = (icmp_t *)q->q_ptr; 3792 3793 /* If a bind has not been done, we can't unbind. */ 3794 if (icmp->icmp_state == TS_UNBND) { 3795 icmp_err_ack(q, mp, TOUTSTATE, 0); 3796 return; 3797 } 3798 V6_SET_ZERO(icmp->icmp_v6src); 3799 V6_SET_ZERO(icmp->icmp_bound_v6src); 3800 icmp->icmp_state = TS_UNBND; 3801 3802 if (icmp->icmp_family == AF_INET6) { 3803 int error; 3804 3805 /* Rebuild the header template */ 3806 error = icmp_build_hdrs(q, icmp); 3807 if (error != 0) { 3808 icmp_err_ack(q, mp, TSYSERR, error); 3809 return; 3810 } 3811 } 3812 /* Pass the unbind to IP. */ 3813 putnext(q, mp); 3814 } 3815 3816 /* 3817 * Process IPv4 packets that already include an IP header. 3818 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 3819 * IPPROTO_IGMP). 3820 */ 3821 static void 3822 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp) 3823 { 3824 ipha_t *ipha; 3825 int ip_hdr_length; 3826 int tp_hdr_len; 3827 mblk_t *mp1; 3828 uint_t pkt_len; 3829 3830 ipha = (ipha_t *)mp->b_rptr; 3831 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 3832 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 3833 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3834 BUMP_MIB(&rawip_mib, rawipOutErrors); 3835 freemsg(mp); 3836 return; 3837 } 3838 ipha = (ipha_t *)mp->b_rptr; 3839 } 3840 ipha->ipha_version_and_hdr_length = 3841 (IP_VERSION<<4) | (ip_hdr_length>>2); 3842 3843 /* 3844 * For the socket of SOCK_RAW type, the checksum is provided in the 3845 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 3846 * tell IP that the application has sent a complete IP header and not 3847 * to compute the transport checksum nor change the DF flag. 3848 */ 3849 ipha->ipha_ident = IP_HDR_INCLUDED; 3850 ipha->ipha_hdr_checksum = 0; 3851 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 3852 /* Insert options if any */ 3853 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 3854 /* 3855 * Put the IP header plus any transport header that is 3856 * checksumed by ip_wput into the first mblk. (ip_wput assumes 3857 * that at least the checksum field is in the first mblk.) 3858 */ 3859 switch (ipha->ipha_protocol) { 3860 case IPPROTO_UDP: 3861 tp_hdr_len = 8; 3862 break; 3863 case IPPROTO_TCP: 3864 tp_hdr_len = 20; 3865 break; 3866 default: 3867 tp_hdr_len = 0; 3868 break; 3869 } 3870 /* 3871 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 3872 * tp_hdr_len bytes will be in a single mblk. 3873 */ 3874 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 3875 tp_hdr_len)) { 3876 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 3877 tp_hdr_len)) { 3878 BUMP_MIB(&rawip_mib, rawipOutErrors); 3879 freemsg(mp); 3880 return; 3881 } 3882 ipha = (ipha_t *)mp->b_rptr; 3883 } 3884 3885 /* 3886 * if the length is larger then the max allowed IP packet, 3887 * then send an error and abort the processing. 3888 */ 3889 pkt_len = ntohs(ipha->ipha_length) 3890 + icmp->icmp_ip_snd_options_len; 3891 if (pkt_len > IP_MAXPACKET) { 3892 icmp_ud_err(q, mp, EMSGSIZE); 3893 return; 3894 } 3895 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra + 3896 tp_hdr_len, BPRI_LO))) { 3897 icmp_ud_err(q, mp, ENOMEM); 3898 return; 3899 } 3900 mp1->b_rptr += icmp_wroff_extra; 3901 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 3902 3903 ipha->ipha_length = htons((uint16_t)pkt_len); 3904 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 3905 3906 /* Copy transport header if any */ 3907 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 3908 mp1->b_wptr += tp_hdr_len; 3909 3910 /* Add options */ 3911 ipha = (ipha_t *)mp1->b_rptr; 3912 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 3913 icmp->icmp_ip_snd_options_len); 3914 3915 /* Drop IP header and transport header from original */ 3916 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 3917 3918 mp1->b_cont = mp; 3919 mp = mp1; 3920 /* 3921 * Massage source route putting first source 3922 * route in ipha_dst. 3923 */ 3924 (void) ip_massage_options(ipha); 3925 } 3926 mblk_setcred(mp, icmp->icmp_credp); 3927 putnext(q, mp); 3928 } 3929 3930 static boolean_t 3931 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 3932 { 3933 int err; 3934 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 3935 3936 err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, 3937 opt_storage, icmp->icmp_mac_exempt); 3938 if (err == 0) { 3939 err = tsol_update_options(&icmp->icmp_ip_snd_options, 3940 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 3941 opt_storage); 3942 } 3943 if (err != 0) { 3944 BUMP_MIB(&rawip_mib, rawipOutErrors); 3945 DTRACE_PROBE4( 3946 tx__ip__log__drop__updatelabel__icmp, 3947 char *, "queue(1) failed to update options(2) on mp(3)", 3948 queue_t *, q, char *, opt_storage, mblk_t *, mp); 3949 icmp_ud_err(q, mp, err); 3950 return (B_FALSE); 3951 } 3952 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 3953 return (B_TRUE); 3954 } 3955 3956 /* 3957 * This routine handles all messages passed downstream. It either 3958 * consumes the message or passes it downstream; it never queues a 3959 * a message. 3960 */ 3961 static void 3962 icmp_wput(queue_t *q, mblk_t *mp) 3963 { 3964 uchar_t *rptr = mp->b_rptr; 3965 ipha_t *ipha; 3966 mblk_t *mp1; 3967 int ip_hdr_length; 3968 #define tudr ((struct T_unitdata_req *)rptr) 3969 size_t ip_len; 3970 icmp_t *icmp; 3971 sin6_t *sin6; 3972 sin_t *sin; 3973 ipaddr_t v4dst; 3974 3975 icmp = (icmp_t *)q->q_ptr; 3976 if (icmp->icmp_restricted) { 3977 icmp_wput_restricted(q, mp); 3978 return; 3979 } 3980 3981 switch (mp->b_datap->db_type) { 3982 case M_DATA: 3983 if (icmp->icmp_hdrincl) { 3984 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 3985 ipha = (ipha_t *)mp->b_rptr; 3986 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 3987 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3988 BUMP_MIB(&rawip_mib, rawipOutErrors); 3989 freemsg(mp); 3990 return; 3991 } 3992 ipha = (ipha_t *)mp->b_rptr; 3993 } 3994 /* 3995 * If this connection was used for v6 (inconceivable!) 3996 * or if we have a new destination, then it's time to 3997 * figure a new label. 3998 */ 3999 if (is_system_labeled() && 4000 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4001 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4002 ipha->ipha_dst) && 4003 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4004 return; 4005 } 4006 icmp_wput_hdrincl(q, mp, icmp); 4007 return; 4008 } 4009 freemsg(mp); 4010 return; 4011 case M_PROTO: 4012 case M_PCPROTO: 4013 ip_len = mp->b_wptr - rptr; 4014 if (ip_len >= sizeof (struct T_unitdata_req)) { 4015 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4016 if (((union T_primitives *)rptr)->type 4017 == T_UNITDATA_REQ) 4018 break; 4019 } 4020 /* FALLTHRU */ 4021 default: 4022 icmp_wput_other(q, mp); 4023 return; 4024 } 4025 4026 /* Handle T_UNITDATA_REQ messages here. */ 4027 4028 if (icmp->icmp_state == TS_UNBND) { 4029 /* If a port has not been bound to the stream, fail. */ 4030 BUMP_MIB(&rawip_mib, rawipOutErrors); 4031 icmp_ud_err(q, mp, EPROTO); 4032 return; 4033 } 4034 mp1 = mp->b_cont; 4035 if (mp1 == NULL) { 4036 BUMP_MIB(&rawip_mib, rawipOutErrors); 4037 icmp_ud_err(q, mp, EPROTO); 4038 return; 4039 } 4040 4041 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4042 BUMP_MIB(&rawip_mib, rawipOutErrors); 4043 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4044 return; 4045 } 4046 4047 switch (icmp->icmp_family) { 4048 case AF_INET6: 4049 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4050 if (!OK_32PTR((char *)sin6) || 4051 tudr->DEST_length != sizeof (sin6_t) || 4052 sin6->sin6_family != AF_INET6) { 4053 BUMP_MIB(&rawip_mib, rawipOutErrors); 4054 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4055 return; 4056 } 4057 4058 /* No support for mapped addresses on raw sockets */ 4059 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4060 BUMP_MIB(&rawip_mib, rawipOutErrors); 4061 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4062 return; 4063 } 4064 4065 /* 4066 * Destination is a native IPv6 address. 4067 * Send out an IPv6 format packet. 4068 */ 4069 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4070 return; 4071 4072 case AF_INET: 4073 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4074 if (!OK_32PTR((char *)sin) || 4075 tudr->DEST_length != sizeof (sin_t) || 4076 sin->sin_family != AF_INET) { 4077 BUMP_MIB(&rawip_mib, rawipOutErrors); 4078 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4079 return; 4080 } 4081 /* Extract and ipaddr */ 4082 v4dst = sin->sin_addr.s_addr; 4083 break; 4084 4085 default: 4086 ASSERT(0); 4087 } 4088 4089 /* 4090 * If options passed in, feed it for verification and handling 4091 */ 4092 if (tudr->OPT_length != 0) { 4093 int error; 4094 4095 if (icmp_unitdata_opt_process(q, mp, &error, 4096 (uchar_t *)0) < 0) { 4097 /* failure */ 4098 BUMP_MIB(&rawip_mib, rawipOutErrors); 4099 icmp_ud_err(q, mp, error); 4100 return; 4101 } 4102 /* 4103 * Note: Success in processing options. 4104 * mp option buffer represented by 4105 * OPT_length/offset now potentially modified 4106 * and contain option setting results 4107 */ 4108 } 4109 4110 if (v4dst == INADDR_ANY) 4111 v4dst = htonl(INADDR_LOOPBACK); 4112 4113 /* Check if our saved options are valid; update if not */ 4114 if (is_system_labeled() && 4115 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4116 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4117 !icmp_update_label(q, icmp, mp, v4dst)) { 4118 return; 4119 } 4120 4121 /* Protocol 255 contains full IP headers */ 4122 if (icmp->icmp_hdrincl) { 4123 freeb(mp); 4124 icmp_wput_hdrincl(q, mp1, icmp); 4125 return; 4126 } 4127 4128 /* Add an IP header */ 4129 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4130 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4131 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4132 mp1->b_datap->db_ref != 1 || 4133 !OK_32PTR(ipha)) { 4134 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra, 4135 BPRI_LO))) { 4136 BUMP_MIB(&rawip_mib, rawipOutErrors); 4137 icmp_ud_err(q, mp, ENOMEM); 4138 return; 4139 } 4140 mp1->b_cont = mp->b_cont; 4141 ipha = (ipha_t *)mp1->b_datap->db_lim; 4142 mp1->b_wptr = (uchar_t *)ipha; 4143 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4144 } 4145 #ifdef _BIG_ENDIAN 4146 /* Set version, header length, and tos */ 4147 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4148 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4149 icmp->icmp_type_of_service); 4150 /* Set ttl and protocol */ 4151 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4152 #else 4153 /* Set version, header length, and tos */ 4154 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4155 ((icmp->icmp_type_of_service << 8) | 4156 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4157 /* Set ttl and protocol */ 4158 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4159 #endif 4160 /* 4161 * Copy our address into the packet. If this is zero, 4162 * ip will fill in the real source address. 4163 */ 4164 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4165 ipha->ipha_fragment_offset_and_flags = 0; 4166 4167 /* 4168 * For the socket of SOCK_RAW type, the checksum is provided in the 4169 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4170 * tell IP that the application has sent a complete IP header and not 4171 * to compute the transport checksum nor change the DF flag. 4172 */ 4173 ipha->ipha_ident = IP_HDR_INCLUDED; 4174 4175 /* Finish common formatting of the packet. */ 4176 mp1->b_rptr = (uchar_t *)ipha; 4177 4178 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4179 if (mp1->b_cont != NULL) 4180 ip_len += msgdsize(mp1->b_cont); 4181 4182 /* 4183 * Set the length into the IP header. 4184 * If the length is greater than the maximum allowed by IP, 4185 * then free the message and return. Do not try and send it 4186 * as this can cause problems in layers below. 4187 */ 4188 if (ip_len > IP_MAXPACKET) { 4189 BUMP_MIB(&rawip_mib, rawipOutErrors); 4190 icmp_ud_err(q, mp, EMSGSIZE); 4191 return; 4192 } 4193 ipha->ipha_length = htons((uint16_t)ip_len); 4194 /* 4195 * Copy in the destination address from the T_UNITDATA 4196 * request 4197 */ 4198 ipha->ipha_dst = v4dst; 4199 4200 /* 4201 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4202 */ 4203 if (CLASSD(v4dst)) 4204 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4205 4206 /* Copy in options if any */ 4207 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4208 bcopy(icmp->icmp_ip_snd_options, 4209 &ipha[1], icmp->icmp_ip_snd_options_len); 4210 /* 4211 * Massage source route putting first source route in ipha_dst. 4212 * Ignore the destination in the T_unitdata_req. 4213 */ 4214 (void) ip_massage_options(ipha); 4215 } 4216 freeb(mp); 4217 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4218 mblk_setcred(mp1, icmp->icmp_credp); 4219 putnext(q, mp1); 4220 #undef ipha 4221 #undef tudr 4222 } 4223 4224 static boolean_t 4225 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4226 { 4227 int err; 4228 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4229 4230 err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4231 opt_storage, icmp->icmp_mac_exempt); 4232 if (err == 0) { 4233 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4234 &icmp->icmp_label_len_v6, opt_storage); 4235 } 4236 if (err != 0) { 4237 BUMP_MIB(&rawip_mib, rawipOutErrors); 4238 DTRACE_PROBE4( 4239 tx__ip__log__drop__updatelabel__icmp6, 4240 char *, "queue(1) failed to update options(2) on mp(3)", 4241 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4242 icmp_ud_err(wq, mp, err); 4243 return (B_FALSE); 4244 } 4245 4246 icmp->icmp_v6lastdst = *dst; 4247 return (B_TRUE); 4248 } 4249 4250 /* 4251 * icmp_wput_ipv6(): 4252 * Assumes that icmp_wput did some sanity checking on the destination 4253 * address, but that the label may not yet be correct. 4254 */ 4255 void 4256 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4257 { 4258 ip6_t *ip6h; 4259 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4260 mblk_t *mp1; 4261 int ip_hdr_len = IPV6_HDR_LEN; 4262 size_t ip_len; 4263 icmp_t *icmp; 4264 ip6_pkt_t ipp_s; /* For ancillary data options */ 4265 ip6_pkt_t *ipp = &ipp_s; 4266 ip6_pkt_t *tipp; 4267 uint32_t csum = 0; 4268 uint_t ignore = 0; 4269 uint_t option_exists = 0, is_sticky = 0; 4270 uint8_t *cp; 4271 uint8_t *nxthdr_ptr; 4272 in6_addr_t ip6_dst; 4273 4274 icmp = (icmp_t *)q->q_ptr; 4275 4276 /* 4277 * If the local address is a mapped address return 4278 * an error. 4279 * It would be possible to send an IPv6 packet but the 4280 * response would never make it back to the application 4281 * since it is bound to a mapped address. 4282 */ 4283 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4284 BUMP_MIB(&rawip_mib, rawipOutErrors); 4285 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4286 return; 4287 } 4288 4289 ipp->ipp_fields = 0; 4290 ipp->ipp_sticky_ignored = 0; 4291 4292 /* 4293 * If TPI options passed in, feed it for verification and handling 4294 */ 4295 if (tudr_optlen != 0) { 4296 int error; 4297 4298 if (icmp_unitdata_opt_process(q, mp, &error, 4299 (void *)ipp) < 0) { 4300 /* failure */ 4301 BUMP_MIB(&rawip_mib, rawipOutErrors); 4302 icmp_ud_err(q, mp, error); 4303 return; 4304 } 4305 ignore = ipp->ipp_sticky_ignored; 4306 ASSERT(error == 0); 4307 } 4308 4309 if (sin6->sin6_scope_id != 0 && 4310 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4311 /* 4312 * IPPF_SCOPE_ID is special. It's neither a sticky 4313 * option nor ancillary data. It needs to be 4314 * explicitly set in options_exists. 4315 */ 4316 option_exists |= IPPF_SCOPE_ID; 4317 } 4318 4319 /* 4320 * Compute the destination address 4321 */ 4322 ip6_dst = sin6->sin6_addr; 4323 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4324 ip6_dst = ipv6_loopback; 4325 4326 /* 4327 * If we're not going to the same destination as last time, then 4328 * recompute the label required. This is done in a separate routine to 4329 * avoid blowing up our stack here. 4330 */ 4331 if (is_system_labeled() && 4332 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4333 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4334 return; 4335 } 4336 4337 /* 4338 * If there's a security label here, then we ignore any options the 4339 * user may try to set. We keep the peer's label as a hidden sticky 4340 * option. 4341 */ 4342 if (icmp->icmp_label_len_v6 > 0) { 4343 ignore &= ~IPPF_HOPOPTS; 4344 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4345 } 4346 4347 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4348 (ipp->ipp_fields == 0)) { 4349 /* No sticky options nor ancillary data. */ 4350 goto no_options; 4351 } 4352 4353 /* 4354 * Go through the options figuring out where each is going to 4355 * come from and build two masks. The first mask indicates if 4356 * the option exists at all. The second mask indicates if the 4357 * option is sticky or ancillary. 4358 */ 4359 if (!(ignore & IPPF_HOPOPTS)) { 4360 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4361 option_exists |= IPPF_HOPOPTS; 4362 ip_hdr_len += ipp->ipp_hopoptslen; 4363 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4364 option_exists |= IPPF_HOPOPTS; 4365 is_sticky |= IPPF_HOPOPTS; 4366 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4367 } 4368 } 4369 4370 if (!(ignore & IPPF_RTHDR)) { 4371 if (ipp->ipp_fields & IPPF_RTHDR) { 4372 option_exists |= IPPF_RTHDR; 4373 ip_hdr_len += ipp->ipp_rthdrlen; 4374 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4375 option_exists |= IPPF_RTHDR; 4376 is_sticky |= IPPF_RTHDR; 4377 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4378 } 4379 } 4380 4381 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4382 /* 4383 * Need to have a router header to use these. 4384 */ 4385 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4386 option_exists |= IPPF_RTDSTOPTS; 4387 ip_hdr_len += ipp->ipp_rtdstoptslen; 4388 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4389 option_exists |= IPPF_RTDSTOPTS; 4390 is_sticky |= IPPF_RTDSTOPTS; 4391 ip_hdr_len += 4392 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4393 } 4394 } 4395 4396 if (!(ignore & IPPF_DSTOPTS)) { 4397 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4398 option_exists |= IPPF_DSTOPTS; 4399 ip_hdr_len += ipp->ipp_dstoptslen; 4400 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4401 option_exists |= IPPF_DSTOPTS; 4402 is_sticky |= IPPF_DSTOPTS; 4403 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4404 } 4405 } 4406 4407 if (!(ignore & IPPF_IFINDEX)) { 4408 if (ipp->ipp_fields & IPPF_IFINDEX) { 4409 option_exists |= IPPF_IFINDEX; 4410 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4411 option_exists |= IPPF_IFINDEX; 4412 is_sticky |= IPPF_IFINDEX; 4413 } 4414 } 4415 4416 if (!(ignore & IPPF_ADDR)) { 4417 if (ipp->ipp_fields & IPPF_ADDR) { 4418 option_exists |= IPPF_ADDR; 4419 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4420 option_exists |= IPPF_ADDR; 4421 is_sticky |= IPPF_ADDR; 4422 } 4423 } 4424 4425 if (!(ignore & IPPF_DONTFRAG)) { 4426 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4427 option_exists |= IPPF_DONTFRAG; 4428 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4429 option_exists |= IPPF_DONTFRAG; 4430 is_sticky |= IPPF_DONTFRAG; 4431 } 4432 } 4433 4434 if (!(ignore & IPPF_USE_MIN_MTU)) { 4435 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4436 option_exists |= IPPF_USE_MIN_MTU; 4437 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4438 IPPF_USE_MIN_MTU) { 4439 option_exists |= IPPF_USE_MIN_MTU; 4440 is_sticky |= IPPF_USE_MIN_MTU; 4441 } 4442 } 4443 4444 if (!(ignore & IPPF_NEXTHOP)) { 4445 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4446 option_exists |= IPPF_NEXTHOP; 4447 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4448 option_exists |= IPPF_NEXTHOP; 4449 is_sticky |= IPPF_NEXTHOP; 4450 } 4451 } 4452 4453 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4454 option_exists |= IPPF_HOPLIMIT; 4455 /* IPV6_HOPLIMIT can never be sticky */ 4456 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4457 4458 if (!(ignore & IPPF_UNICAST_HOPS) && 4459 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4460 option_exists |= IPPF_UNICAST_HOPS; 4461 is_sticky |= IPPF_UNICAST_HOPS; 4462 } 4463 4464 if (!(ignore & IPPF_MULTICAST_HOPS) && 4465 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4466 option_exists |= IPPF_MULTICAST_HOPS; 4467 is_sticky |= IPPF_MULTICAST_HOPS; 4468 } 4469 4470 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4471 /* This is a sticky socket option only */ 4472 option_exists |= IPPF_NO_CKSUM; 4473 is_sticky |= IPPF_NO_CKSUM; 4474 } 4475 4476 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4477 /* This is a sticky socket option only */ 4478 option_exists |= IPPF_RAW_CKSUM; 4479 is_sticky |= IPPF_RAW_CKSUM; 4480 } 4481 4482 if (!(ignore & IPPF_TCLASS)) { 4483 if (ipp->ipp_fields & IPPF_TCLASS) { 4484 option_exists |= IPPF_TCLASS; 4485 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4486 option_exists |= IPPF_TCLASS; 4487 is_sticky |= IPPF_TCLASS; 4488 } 4489 } 4490 4491 no_options: 4492 4493 /* 4494 * If any options carried in the ip6i_t were specified, we 4495 * need to account for the ip6i_t in the data we'll be sending 4496 * down. 4497 */ 4498 if (option_exists & IPPF_HAS_IP6I) 4499 ip_hdr_len += sizeof (ip6i_t); 4500 4501 /* check/fix buffer config, setup pointers into it */ 4502 mp1 = mp->b_cont; 4503 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4504 if ((mp1->b_datap->db_ref != 1) || 4505 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4506 !OK_32PTR(ip6h)) { 4507 /* Try to get everything in a single mblk next time */ 4508 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4509 icmp->icmp_max_hdr_len = ip_hdr_len; 4510 (void) mi_set_sth_wroff(RD(q), 4511 icmp->icmp_max_hdr_len + icmp_wroff_extra); 4512 } 4513 mp1 = allocb(ip_hdr_len + icmp_wroff_extra, BPRI_LO); 4514 if (!mp1) { 4515 BUMP_MIB(&rawip_mib, rawipOutErrors); 4516 icmp_ud_err(q, mp, ENOMEM); 4517 return; 4518 } 4519 mp1->b_cont = mp->b_cont; 4520 mp1->b_wptr = mp1->b_datap->db_lim; 4521 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4522 } 4523 mp1->b_rptr = (unsigned char *)ip6h; 4524 ip6i = (ip6i_t *)ip6h; 4525 4526 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4527 if (option_exists & IPPF_HAS_IP6I) { 4528 ip6h = (ip6_t *)&ip6i[1]; 4529 ip6i->ip6i_flags = 0; 4530 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4531 4532 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4533 if (option_exists & IPPF_SCOPE_ID) { 4534 ip6i->ip6i_flags |= IP6I_IFINDEX; 4535 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4536 } else if (option_exists & IPPF_IFINDEX) { 4537 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4538 ASSERT(tipp->ipp_ifindex != 0); 4539 ip6i->ip6i_flags |= IP6I_IFINDEX; 4540 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4541 } 4542 4543 if (option_exists & IPPF_RAW_CKSUM) { 4544 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4545 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4546 } 4547 4548 if (option_exists & IPPF_NO_CKSUM) { 4549 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4550 } 4551 4552 if (option_exists & IPPF_ADDR) { 4553 /* 4554 * Enable per-packet source address verification if 4555 * IPV6_PKTINFO specified the source address. 4556 * ip6_src is set in the transport's _wput function. 4557 */ 4558 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4559 } 4560 4561 if (option_exists & IPPF_DONTFRAG) { 4562 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4563 } 4564 4565 if (option_exists & IPPF_USE_MIN_MTU) { 4566 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4567 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4568 } 4569 4570 if (option_exists & IPPF_NEXTHOP) { 4571 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4572 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4573 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4574 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4575 } 4576 4577 /* 4578 * tell IP this is an ip6i_t private header 4579 */ 4580 ip6i->ip6i_nxt = IPPROTO_RAW; 4581 } 4582 4583 /* Initialize IPv6 header */ 4584 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4585 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4586 4587 /* Set the hoplimit of the outgoing packet. */ 4588 if (option_exists & IPPF_HOPLIMIT) { 4589 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4590 ip6h->ip6_hops = ipp->ipp_hoplimit; 4591 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4592 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4593 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4594 if (option_exists & IPPF_MULTICAST_HOPS) 4595 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4596 } else { 4597 ip6h->ip6_hops = icmp->icmp_ttl; 4598 if (option_exists & IPPF_UNICAST_HOPS) 4599 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4600 } 4601 4602 if (option_exists & IPPF_ADDR) { 4603 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4604 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4605 ip6h->ip6_src = tipp->ipp_addr; 4606 } else { 4607 /* 4608 * The source address was not set using IPV6_PKTINFO. 4609 * First look at the bound source. 4610 * If unspecified fallback to __sin6_src_id. 4611 */ 4612 ip6h->ip6_src = icmp->icmp_v6src; 4613 if (sin6->__sin6_src_id != 0 && 4614 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4615 ip_srcid_find_id(sin6->__sin6_src_id, 4616 &ip6h->ip6_src, icmp->icmp_zoneid); 4617 } 4618 } 4619 4620 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4621 cp = (uint8_t *)&ip6h[1]; 4622 4623 /* 4624 * Here's where we have to start stringing together 4625 * any extension headers in the right order: 4626 * Hop-by-hop, destination, routing, and final destination opts. 4627 */ 4628 if (option_exists & IPPF_HOPOPTS) { 4629 /* Hop-by-hop options */ 4630 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4631 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 4632 4633 *nxthdr_ptr = IPPROTO_HOPOPTS; 4634 nxthdr_ptr = &hbh->ip6h_nxt; 4635 4636 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 4637 cp += tipp->ipp_hopoptslen; 4638 } 4639 /* 4640 * En-route destination options 4641 * Only do them if there's a routing header as well 4642 */ 4643 if (option_exists & IPPF_RTDSTOPTS) { 4644 ip6_dest_t *dst = (ip6_dest_t *)cp; 4645 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 4646 4647 *nxthdr_ptr = IPPROTO_DSTOPTS; 4648 nxthdr_ptr = &dst->ip6d_nxt; 4649 4650 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 4651 cp += tipp->ipp_rtdstoptslen; 4652 } 4653 /* 4654 * Routing header next 4655 */ 4656 if (option_exists & IPPF_RTHDR) { 4657 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4658 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 4659 4660 *nxthdr_ptr = IPPROTO_ROUTING; 4661 nxthdr_ptr = &rt->ip6r_nxt; 4662 4663 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 4664 cp += tipp->ipp_rthdrlen; 4665 } 4666 /* 4667 * Do ultimate destination options 4668 */ 4669 if (option_exists & IPPF_DSTOPTS) { 4670 ip6_dest_t *dest = (ip6_dest_t *)cp; 4671 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 4672 4673 *nxthdr_ptr = IPPROTO_DSTOPTS; 4674 nxthdr_ptr = &dest->ip6d_nxt; 4675 4676 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 4677 cp += tipp->ipp_dstoptslen; 4678 } 4679 4680 /* 4681 * Now set the last header pointer to the proto passed in 4682 */ 4683 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 4684 *nxthdr_ptr = icmp->icmp_proto; 4685 4686 /* 4687 * Copy in the destination address 4688 */ 4689 ip6h->ip6_dst = ip6_dst; 4690 4691 ip6h->ip6_vcf = 4692 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4693 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4694 4695 if (option_exists & IPPF_TCLASS) { 4696 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 4697 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4698 tipp->ipp_tclass); 4699 } 4700 if (option_exists & IPPF_RTHDR) { 4701 ip6_rthdr_t *rth; 4702 4703 /* 4704 * Perform any processing needed for source routing. 4705 * We know that all extension headers will be in the same mblk 4706 * as the IPv6 header. 4707 */ 4708 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 4709 if (rth != NULL && rth->ip6r_segleft != 0) { 4710 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 4711 /* 4712 * Drop packet - only support Type 0 routing. 4713 * Notify the application as well. 4714 */ 4715 icmp_ud_err(q, mp, EPROTO); 4716 BUMP_MIB(&rawip_mib, rawipOutErrors); 4717 return; 4718 } 4719 /* 4720 * rth->ip6r_len is twice the number of 4721 * addresses in the header 4722 */ 4723 if (rth->ip6r_len & 0x1) { 4724 icmp_ud_err(q, mp, EPROTO); 4725 BUMP_MIB(&rawip_mib, rawipOutErrors); 4726 return; 4727 } 4728 /* 4729 * Shuffle the routing header and ip6_dst 4730 * addresses, and get the checksum difference 4731 * between the first hop (in ip6_dst) and 4732 * the destination (in the last routing hdr entry). 4733 */ 4734 csum = ip_massage_options_v6(ip6h, rth); 4735 /* 4736 * Verify that the first hop isn't a mapped address. 4737 * Routers along the path need to do this verification 4738 * for subsequent hops. 4739 */ 4740 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 4741 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4742 BUMP_MIB(&rawip_mib, rawipOutErrors); 4743 return; 4744 } 4745 } 4746 } 4747 4748 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 4749 if (mp1->b_cont != NULL) 4750 ip_len += msgdsize(mp1->b_cont); 4751 4752 /* 4753 * Set the length into the IP header. 4754 * If the length is greater than the maximum allowed by IP, 4755 * then free the message and return. Do not try and send it 4756 * as this can cause problems in layers below. 4757 */ 4758 if (ip_len > IP_MAXPACKET) { 4759 BUMP_MIB(&rawip_mib, rawipOutErrors); 4760 icmp_ud_err(q, mp, EMSGSIZE); 4761 return; 4762 } 4763 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 4764 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 4765 uint16_t *cksum_ptr; 4766 uint_t ext_hdrs_len; 4767 4768 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 4769 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 4770 icmp->icmp_checksum_off == 2); 4771 4772 /* 4773 * We make it easy for IP to include our pseudo header 4774 * by putting our length in uh_checksum, modified (if 4775 * we have a routing header) by the checksum difference 4776 * between the ultimate destination and first hop addresses. 4777 * Note: ICMPv6 must always checksum the packet. 4778 */ 4779 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 4780 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 4781 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 4782 BUMP_MIB(&rawip_mib, rawipOutErrors); 4783 freemsg(mp); 4784 return; 4785 } 4786 ip6i = (ip6i_t *)mp1->b_rptr; 4787 if (ip6i->ip6i_nxt == IPPROTO_RAW) 4788 ip6h = (ip6_t *)&ip6i[1]; 4789 else 4790 ip6h = (ip6_t *)ip6i; 4791 } 4792 /* Add payload length to checksum */ 4793 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 4794 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 4795 csum += htons(ip_len - ext_hdrs_len); 4796 4797 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 4798 csum = (csum & 0xFFFF) + (csum >> 16); 4799 *cksum_ptr = (uint16_t)csum; 4800 } 4801 4802 #ifdef _LITTLE_ENDIAN 4803 ip_len = htons(ip_len); 4804 #endif 4805 ip6h->ip6_plen = (uint16_t)ip_len; 4806 4807 freeb(mp); 4808 4809 /* We're done. Pass the packet to IP */ 4810 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4811 mblk_setcred(mp1, icmp->icmp_credp); 4812 putnext(q, mp1); 4813 } 4814 4815 static void 4816 icmp_wput_other(queue_t *q, mblk_t *mp) 4817 { 4818 uchar_t *rptr = mp->b_rptr; 4819 struct iocblk *iocp; 4820 #define tudr ((struct T_unitdata_req *)rptr) 4821 icmp_t *icmp; 4822 cred_t *cr; 4823 4824 icmp = (icmp_t *)q->q_ptr; 4825 4826 cr = DB_CREDDEF(mp, icmp->icmp_credp); 4827 4828 switch (mp->b_datap->db_type) { 4829 case M_PROTO: 4830 case M_PCPROTO: 4831 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4832 /* 4833 * If the message does not contain a PRIM_type, 4834 * throw it away. 4835 */ 4836 freemsg(mp); 4837 return; 4838 } 4839 switch (((union T_primitives *)rptr)->type) { 4840 case T_ADDR_REQ: 4841 icmp_addr_req(q, mp); 4842 return; 4843 case O_T_BIND_REQ: 4844 case T_BIND_REQ: 4845 qwriter(q, mp, icmp_bind, PERIM_OUTER); 4846 return; 4847 case T_CONN_REQ: 4848 icmp_connect(q, mp); 4849 return; 4850 case T_CAPABILITY_REQ: 4851 icmp_capability_req(q, mp); 4852 return; 4853 case T_INFO_REQ: 4854 icmp_info_req(q, mp); 4855 return; 4856 case T_UNITDATA_REQ: 4857 /* 4858 * If a T_UNITDATA_REQ gets here, the address must 4859 * be bad. Valid T_UNITDATA_REQs are found above 4860 * and break to below this switch. 4861 */ 4862 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4863 return; 4864 case T_UNBIND_REQ: 4865 icmp_unbind(q, mp); 4866 return; 4867 4868 case T_SVR4_OPTMGMT_REQ: 4869 if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, 4870 cr)) 4871 /* Only IP can return anything meaningful */ 4872 (void) svr4_optcom_req(q, mp, cr, 4873 &icmp_opt_obj); 4874 return; 4875 4876 case T_OPTMGMT_REQ: 4877 /* Only IP can return anything meaningful */ 4878 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 4879 return; 4880 4881 case T_DISCON_REQ: 4882 icmp_disconnect(q, mp); 4883 return; 4884 4885 /* The following TPI message is not supported by icmp. */ 4886 case O_T_CONN_RES: 4887 case T_CONN_RES: 4888 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 4889 return; 4890 4891 /* The following 3 TPI requests are illegal for icmp. */ 4892 case T_DATA_REQ: 4893 case T_EXDATA_REQ: 4894 case T_ORDREL_REQ: 4895 freemsg(mp); 4896 (void) putctl1(RD(q), M_ERROR, EPROTO); 4897 return; 4898 default: 4899 break; 4900 } 4901 break; 4902 case M_IOCTL: 4903 iocp = (struct iocblk *)mp->b_rptr; 4904 switch (iocp->ioc_cmd) { 4905 case TI_GETPEERNAME: 4906 if (icmp->icmp_state != TS_DATA_XFER) { 4907 /* 4908 * If a default destination address has not 4909 * been associated with the stream, then we 4910 * don't know the peer's name. 4911 */ 4912 iocp->ioc_error = ENOTCONN; 4913 err_ret:; 4914 iocp->ioc_count = 0; 4915 mp->b_datap->db_type = M_IOCACK; 4916 qreply(q, mp); 4917 return; 4918 } 4919 /* FALLTHRU */ 4920 case TI_GETMYNAME: 4921 /* 4922 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4923 * need to copyin the user's strbuf structure. 4924 * Processing will continue in the M_IOCDATA case 4925 * below. 4926 */ 4927 mi_copyin(q, mp, NULL, 4928 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4929 return; 4930 case ND_SET: 4931 /* nd_getset performs the necessary error checking */ 4932 case ND_GET: 4933 if (nd_getset(q, icmp_g_nd, mp)) { 4934 qreply(q, mp); 4935 return; 4936 } 4937 break; 4938 default: 4939 break; 4940 } 4941 break; 4942 case M_IOCDATA: 4943 icmp_wput_iocdata(q, mp); 4944 return; 4945 default: 4946 break; 4947 } 4948 putnext(q, mp); 4949 } 4950 4951 /* 4952 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 4953 * messages. 4954 */ 4955 static void 4956 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 4957 { 4958 mblk_t *mp1; 4959 STRUCT_HANDLE(strbuf, sb); 4960 icmp_t *icmp; 4961 in6_addr_t v6addr; 4962 ipaddr_t v4addr; 4963 uint32_t flowinfo = 0; 4964 int addrlen; 4965 4966 /* Make sure it is one of ours. */ 4967 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4968 case TI_GETMYNAME: 4969 case TI_GETPEERNAME: 4970 break; 4971 default: 4972 putnext(q, mp); 4973 return; 4974 } 4975 switch (mi_copy_state(q, mp, &mp1)) { 4976 case -1: 4977 return; 4978 case MI_COPY_CASE(MI_COPY_IN, 1): 4979 break; 4980 case MI_COPY_CASE(MI_COPY_OUT, 1): 4981 /* 4982 * The address has been copied out, so now 4983 * copyout the strbuf. 4984 */ 4985 mi_copyout(q, mp); 4986 return; 4987 case MI_COPY_CASE(MI_COPY_OUT, 2): 4988 /* 4989 * The address and strbuf have been copied out. 4990 * We're done, so just acknowledge the original 4991 * M_IOCTL. 4992 */ 4993 mi_copy_done(q, mp, 0); 4994 return; 4995 default: 4996 /* 4997 * Something strange has happened, so acknowledge 4998 * the original M_IOCTL with an EPROTO error. 4999 */ 5000 mi_copy_done(q, mp, EPROTO); 5001 return; 5002 } 5003 /* 5004 * Now we have the strbuf structure for TI_GETMYNAME 5005 * and TI_GETPEERNAME. Next we copyout the requested 5006 * address and then we'll copyout the strbuf. 5007 */ 5008 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5009 (void *)mp1->b_rptr); 5010 icmp = (icmp_t *)q->q_ptr; 5011 if (icmp->icmp_family == AF_INET) 5012 addrlen = sizeof (sin_t); 5013 else 5014 addrlen = sizeof (sin6_t); 5015 5016 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5017 mi_copy_done(q, mp, EINVAL); 5018 return; 5019 } 5020 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5021 case TI_GETMYNAME: 5022 if (icmp->icmp_family == AF_INET) { 5023 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5024 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5025 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5026 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5027 } else { 5028 /* 5029 * INADDR_ANY 5030 * icmp_v6src is not set, we might be bound to 5031 * broadcast/multicast. Use icmp_bound_v6src as 5032 * local address instead (that could 5033 * also still be INADDR_ANY) 5034 */ 5035 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5036 } 5037 } else { 5038 /* icmp->icmp_family == AF_INET6 */ 5039 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5040 v6addr = icmp->icmp_v6src; 5041 } else { 5042 /* 5043 * UNSPECIFIED 5044 * icmp_v6src is not set, we might be bound to 5045 * broadcast/multicast. Use icmp_bound_v6src as 5046 * local address instead (that could 5047 * also still be UNSPECIFIED) 5048 */ 5049 v6addr = icmp->icmp_bound_v6src; 5050 } 5051 } 5052 break; 5053 case TI_GETPEERNAME: 5054 if (icmp->icmp_family == AF_INET) { 5055 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5056 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5057 } else { 5058 /* icmp->icmp_family == AF_INET6) */ 5059 v6addr = icmp->icmp_v6dst; 5060 flowinfo = icmp->icmp_flowinfo; 5061 } 5062 break; 5063 default: 5064 mi_copy_done(q, mp, EPROTO); 5065 return; 5066 } 5067 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5068 if (!mp1) 5069 return; 5070 5071 if (icmp->icmp_family == AF_INET) { 5072 sin_t *sin; 5073 5074 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5075 sin = (sin_t *)mp1->b_rptr; 5076 mp1->b_wptr = (uchar_t *)&sin[1]; 5077 *sin = sin_null; 5078 sin->sin_family = AF_INET; 5079 sin->sin_addr.s_addr = v4addr; 5080 } else { 5081 /* icmp->icmp_family == AF_INET6 */ 5082 sin6_t *sin6; 5083 5084 ASSERT(icmp->icmp_family == AF_INET6); 5085 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5086 sin6 = (sin6_t *)mp1->b_rptr; 5087 mp1->b_wptr = (uchar_t *)&sin6[1]; 5088 *sin6 = sin6_null; 5089 sin6->sin6_family = AF_INET6; 5090 sin6->sin6_flowinfo = flowinfo; 5091 sin6->sin6_addr = v6addr; 5092 } 5093 /* Copy out the address */ 5094 mi_copyout(q, mp); 5095 } 5096 5097 /* 5098 * Only allow MIB requests and M_FLUSHes to pass. 5099 * All other messages are nacked or dropped. 5100 */ 5101 static void 5102 icmp_wput_restricted(queue_t *q, mblk_t *mp) 5103 { 5104 cred_t *cr; 5105 icmp_t *icmp; 5106 5107 switch (DB_TYPE(mp)) { 5108 case M_PROTO: 5109 case M_PCPROTO: 5110 if (MBLKL(mp) < sizeof (t_scalar_t)) { 5111 freemsg(mp); 5112 return; 5113 } 5114 icmp = (icmp_t *)q->q_ptr; 5115 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5116 5117 switch (((union T_primitives *)mp->b_rptr)->type) { 5118 case T_SVR4_OPTMGMT_REQ: 5119 if (!snmpcom_req(q, mp, 5120 icmp_snmp_set, icmp_snmp_get, cr)) 5121 (void) svr4_optcom_req(q, mp, cr, 5122 &icmp_opt_obj); 5123 return; 5124 case T_OPTMGMT_REQ: 5125 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5126 return; 5127 default: 5128 icmp_err_ack(q, mp, TSYSERR, ENOTSUP); 5129 return; 5130 } 5131 /* NOTREACHED */ 5132 case M_IOCTL: 5133 miocnak(q, mp, 0, ENOTSUP); 5134 break; 5135 case M_FLUSH: 5136 putnext(q, mp); 5137 break; 5138 default: 5139 freemsg(mp); 5140 break; 5141 } 5142 } 5143 5144 static int 5145 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5146 void *thisdg_attrs) 5147 { 5148 icmp_t *icmp; 5149 struct T_unitdata_req *udreqp; 5150 int is_absreq_failure; 5151 cred_t *cr; 5152 5153 icmp = (icmp_t *)q->q_ptr; 5154 5155 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5156 *errorp = 0; 5157 5158 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5159 5160 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5161 udreqp->OPT_offset, cr, &icmp_opt_obj, 5162 thisdg_attrs, &is_absreq_failure); 5163 5164 if (*errorp != 0) { 5165 /* 5166 * Note: No special action needed in this 5167 * module for "is_absreq_failure" 5168 */ 5169 return (-1); /* failure */ 5170 } 5171 ASSERT(is_absreq_failure == 0); 5172 return (0); /* success */ 5173 } 5174 5175 void 5176 icmp_ddi_init(void) 5177 { 5178 ICMP6_MAJ = ddi_name_to_major(ICMP6); 5179 icmp_max_optsize = 5180 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5181 icmp_opt_obj.odb_opt_arr_cnt); 5182 5183 (void) icmp_param_register(icmp_param_arr, A_CNT(icmp_param_arr)); 5184 5185 rawip_kstat_init(); 5186 } 5187 5188 void 5189 icmp_ddi_destroy(void) 5190 { 5191 nd_free(&icmp_g_nd); 5192 5193 rawip_kstat_fini(); 5194 } 5195 5196 static void 5197 rawip_kstat_init(void) { 5198 5199 rawip_named_kstat_t template = { 5200 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5201 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5202 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5203 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5204 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5205 }; 5206 5207 rawip_mibkp = kstat_create("icmp", 0, "rawip", "mib2", 5208 KSTAT_TYPE_NAMED, 5209 NUM_OF_FIELDS(rawip_named_kstat_t), 5210 0); 5211 if (rawip_mibkp == NULL) 5212 return; 5213 5214 bcopy(&template, rawip_mibkp->ks_data, sizeof (template)); 5215 5216 rawip_mibkp->ks_update = rawip_kstat_update; 5217 5218 kstat_install(rawip_mibkp); 5219 } 5220 5221 static void 5222 rawip_kstat_fini(void) { 5223 if (rawip_mibkp) { 5224 kstat_delete(rawip_mibkp); 5225 rawip_mibkp = NULL; 5226 } 5227 } 5228 5229 static int 5230 rawip_kstat_update(kstat_t *kp, int rw) { 5231 rawip_named_kstat_t *rawipkp; 5232 5233 if ((kp == NULL) || (kp->ks_data == NULL)) 5234 return (EIO); 5235 5236 if (rw == KSTAT_WRITE) 5237 return (EACCES); 5238 5239 rawipkp = (rawip_named_kstat_t *)kp->ks_data; 5240 5241 rawipkp->inDatagrams.value.ui32 = rawip_mib.rawipInDatagrams; 5242 rawipkp->inCksumErrs.value.ui32 = rawip_mib.rawipInCksumErrs; 5243 rawipkp->inErrors.value.ui32 = rawip_mib.rawipInErrors; 5244 rawipkp->outDatagrams.value.ui32 = rawip_mib.rawipOutDatagrams; 5245 rawipkp->outErrors.value.ui32 = rawip_mib.rawipOutErrors; 5246 5247 return (0); 5248 } 5249