1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #define ICMP6 "icmp6" 80 major_t ICMP6_MAJ; 81 82 /* 83 * Object to represent database of options to search passed to 84 * {sock,tpi}optcom_req() interface routine to take care of option 85 * management and associated methods. 86 * XXX These and other extern's should really move to a icmp header. 87 */ 88 extern optdb_obj_t icmp_opt_obj; 89 extern uint_t icmp_max_optsize; 90 91 /* 92 * Synchronization notes: 93 * 94 * At all points in this code where exclusive access is required, we 95 * pass a message to a subroutine by invoking qwriter(..., PERIM_OUTER) 96 * which will arrange to call the routine only after all threads have 97 * exited the shared resource. 98 */ 99 100 /* Named Dispatch Parameter Management Structure */ 101 typedef struct icmpparam_s { 102 uint_t icmp_param_min; 103 uint_t icmp_param_max; 104 uint_t icmp_param_value; 105 char *icmp_param_name; 106 } icmpparam_t; 107 108 static void icmp_addr_req(queue_t *q, mblk_t *mp); 109 static void icmp_bind(queue_t *q, mblk_t *mp); 110 static void icmp_bind_proto(queue_t *q); 111 static int icmp_build_hdrs(queue_t *q, icmp_t *icmp); 112 static void icmp_capability_req(queue_t *q, mblk_t *mp); 113 static int icmp_close(queue_t *q); 114 static void icmp_connect(queue_t *q, mblk_t *mp); 115 static void icmp_disconnect(queue_t *q, mblk_t *mp); 116 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 117 int sys_error); 118 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 119 t_scalar_t t_error, int sys_error); 120 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 121 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 122 static void icmp_info_req(queue_t *q, mblk_t *mp); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, 126 int sflag, cred_t *credp); 127 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 128 int *errorp, void *thisdg_attrs); 129 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 130 int icmp_opt_set(queue_t *q, uint_t optset_context, 131 int level, int name, uint_t inlen, 132 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 133 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 134 int icmp_opt_get(queue_t *q, int level, int name, 135 uchar_t *ptr); 136 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 137 static boolean_t icmp_param_register(icmpparam_t *icmppa, int cnt); 138 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 139 caddr_t cp, cred_t *cr); 140 static void icmp_rput(queue_t *q, mblk_t *mp); 141 static void icmp_rput_bind_ack(queue_t *q, mblk_t *mp); 142 static int icmp_snmp_get(queue_t *q, mblk_t *mpctl); 143 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 144 uchar_t *ptr, int len); 145 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 146 cred_t *cr); 147 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 148 static void icmp_unbind(queue_t *q, mblk_t *mp); 149 static void icmp_wput(queue_t *q, mblk_t *mp); 150 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 151 t_scalar_t tudr_optlen); 152 static void icmp_wput_other(queue_t *q, mblk_t *mp); 153 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 154 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 155 156 static void rawip_kstat_init(void); 157 static void rawip_kstat_fini(void); 158 static int rawip_kstat_update(kstat_t *kp, int rw); 159 160 161 static struct module_info info = { 162 5707, "icmp", 1, INFPSZ, 512, 128 163 }; 164 165 static struct qinit rinit = { 166 (pfi_t)icmp_rput, NULL, icmp_open, icmp_close, NULL, &info 167 }; 168 169 static struct qinit winit = { 170 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &info 171 }; 172 173 struct streamtab icmpinfo = { 174 &rinit, &winit 175 }; 176 177 static sin_t sin_null; /* Zero address for quick clears */ 178 static sin6_t sin6_null; /* Zero address for quick clears */ 179 static void *icmp_g_head; /* Head for list of open icmp streams. */ 180 static IDP icmp_g_nd; /* Points to table of ICMP ND variables. */ 181 182 /* MIB-2 stuff for SNMP */ 183 static mib2_rawip_t rawip_mib; /* SNMP fixed size info */ 184 static kstat_t *rawip_mibkp; /* kstat exporting rawip_mib data */ 185 186 /* Default structure copied into T_INFO_ACK messages */ 187 static struct T_info_ack icmp_g_t_info_ack = { 188 T_INFO_ACK, 189 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 190 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 191 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 192 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 193 0, /* ADDR_size - filled in later. */ 194 0, /* OPT_size - not initialized here */ 195 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 196 T_CLTS, /* SERV_type. icmp supports connection-less. */ 197 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 198 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 199 }; 200 201 /* 202 * Table of ND variables supported by icmp. These are loaded into icmp_g_nd 203 * in icmp_open. 204 * All of these are alterable, within the min/max values given, at run time. 205 */ 206 static icmpparam_t icmp_param_arr[] = { 207 /* min max value name */ 208 { 0, 128, 32, "icmp_wroff_extra" }, 209 { 1, 255, 255, "icmp_ipv4_ttl" }, 210 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 211 { 0, 1, 1, "icmp_bsd_compat" }, 212 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 213 { 0, 65536, 1024, "icmp_xmit_lowat"}, 214 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 215 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 216 }; 217 #define icmp_wroff_extra icmp_param_arr[0].icmp_param_value 218 #define icmp_ipv4_ttl icmp_param_arr[1].icmp_param_value 219 #define icmp_ipv6_hoplimit icmp_param_arr[2].icmp_param_value 220 #define icmp_bsd_compat icmp_param_arr[3].icmp_param_value 221 #define icmp_xmit_hiwat icmp_param_arr[4].icmp_param_value 222 #define icmp_xmit_lowat icmp_param_arr[5].icmp_param_value 223 #define icmp_recv_hiwat icmp_param_arr[6].icmp_param_value 224 #define icmp_max_buf icmp_param_arr[7].icmp_param_value 225 226 /* 227 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 228 * passed to icmp_wput. 229 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 230 * protocol type placed in the message following the address. A T_BIND_ACK 231 * message is passed upstream when ip acknowledges the request. 232 * (Called as writer.) 233 */ 234 static void 235 icmp_bind(queue_t *q, mblk_t *mp) 236 { 237 sin_t *sin; 238 sin6_t *sin6; 239 mblk_t *mp1; 240 struct T_bind_req *tbr; 241 icmp_t *icmp; 242 243 icmp = (icmp_t *)q->q_ptr; 244 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 245 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 246 "icmp_bind: bad req, len %u", 247 (uint_t)(mp->b_wptr - mp->b_rptr)); 248 icmp_err_ack(q, mp, TPROTO, 0); 249 return; 250 } 251 if (icmp->icmp_state != TS_UNBND) { 252 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 253 "icmp_bind: bad state, %d", icmp->icmp_state); 254 icmp_err_ack(q, mp, TOUTSTATE, 0); 255 return; 256 } 257 /* 258 * Reallocate the message to make sure we have enough room for an 259 * address and the protocol type. 260 */ 261 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 262 if (!mp1) { 263 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 264 return; 265 } 266 mp = mp1; 267 tbr = (struct T_bind_req *)mp->b_rptr; 268 switch (tbr->ADDR_length) { 269 case 0: /* Generic request */ 270 tbr->ADDR_offset = sizeof (struct T_bind_req); 271 if (icmp->icmp_family == AF_INET) { 272 tbr->ADDR_length = sizeof (sin_t); 273 sin = (sin_t *)&tbr[1]; 274 *sin = sin_null; 275 sin->sin_family = AF_INET; 276 mp->b_wptr = (uchar_t *)&sin[1]; 277 } else { 278 ASSERT(icmp->icmp_family == AF_INET6); 279 tbr->ADDR_length = sizeof (sin6_t); 280 sin6 = (sin6_t *)&tbr[1]; 281 *sin6 = sin6_null; 282 sin6->sin6_family = AF_INET6; 283 mp->b_wptr = (uchar_t *)&sin6[1]; 284 } 285 break; 286 case sizeof (sin_t): /* Complete IP address */ 287 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 288 sizeof (sin_t)); 289 if (sin == NULL || !OK_32PTR((char *)sin)) { 290 icmp_err_ack(q, mp, TSYSERR, EINVAL); 291 return; 292 } 293 if (icmp->icmp_family != AF_INET || 294 sin->sin_family != AF_INET) { 295 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 296 return; 297 } 298 break; 299 case sizeof (sin6_t): /* Complete IP address */ 300 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin6_t)); 302 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET6 || 307 sin6->sin6_family != AF_INET6) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 /* No support for mapped addresses on raw sockets */ 312 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 313 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 314 return; 315 } 316 break; 317 default: 318 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 319 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 320 icmp_err_ack(q, mp, TBADADDR, 0); 321 return; 322 } 323 /* 324 * Copy the source address into our icmp structure. This address 325 * may still be zero; if so, ip will fill in the correct address 326 * each time an outbound packet is passed to it. 327 * If we are binding to a broadcast or multicast address icmp_rput 328 * will clear the source address when it receives the T_BIND_ACK. 329 */ 330 icmp->icmp_state = TS_IDLE; 331 332 if (icmp->icmp_family == AF_INET) { 333 ASSERT(sin != NULL); 334 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 335 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 336 &icmp->icmp_v6src); 337 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 338 icmp->icmp_ip_snd_options_len; 339 icmp->icmp_bound_v6src = icmp->icmp_v6src; 340 } else { 341 int error; 342 343 ASSERT(sin6 != NULL); 344 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 345 icmp->icmp_v6src = sin6->sin6_addr; 346 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 347 icmp->icmp_bound_v6src = icmp->icmp_v6src; 348 349 /* Rebuild the header template */ 350 error = icmp_build_hdrs(q, icmp); 351 if (error != 0) { 352 icmp_err_ack(q, mp, TSYSERR, error); 353 return; 354 } 355 } 356 /* 357 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 358 * the address. 359 */ 360 *mp->b_wptr++ = icmp->icmp_proto; 361 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 362 /* 363 * Append a request for an IRE if src not 0 (INADDR_ANY) 364 */ 365 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 366 if (!mp->b_cont) { 367 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 368 return; 369 } 370 mp->b_cont->b_wptr += sizeof (ire_t); 371 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 372 } 373 374 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 375 putnext(q, mp); 376 } 377 378 /* 379 * Send message to IP to just bind to the protocol. 380 */ 381 static void 382 icmp_bind_proto(queue_t *q) 383 { 384 mblk_t *mp; 385 struct T_bind_req *tbr; 386 icmp_t *icmp; 387 388 icmp = (icmp_t *)q->q_ptr; 389 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 390 BPRI_MED); 391 if (!mp) { 392 return; 393 } 394 mp->b_datap->db_type = M_PROTO; 395 tbr = (struct T_bind_req *)mp->b_rptr; 396 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 397 tbr->ADDR_offset = sizeof (struct T_bind_req); 398 if (icmp->icmp_ipversion == IPV4_VERSION) { 399 sin_t *sin; 400 401 tbr->ADDR_length = sizeof (sin_t); 402 sin = (sin_t *)&tbr[1]; 403 *sin = sin_null; 404 sin->sin_family = AF_INET; 405 mp->b_wptr = (uchar_t *)&sin[1]; 406 } else { 407 sin6_t *sin6; 408 409 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 410 tbr->ADDR_length = sizeof (sin6_t); 411 sin6 = (sin6_t *)&tbr[1]; 412 *sin6 = sin6_null; 413 sin6->sin6_family = AF_INET6; 414 mp->b_wptr = (uchar_t *)&sin6[1]; 415 } 416 417 /* Place protocol type in the O_T_BIND_REQ following the address. */ 418 *mp->b_wptr++ = icmp->icmp_proto; 419 420 /* Pass the O_T_BIND_REQ to ip. */ 421 putnext(q, mp); 422 } 423 424 /* 425 * This routine handles each T_CONN_REQ message passed to icmp. It 426 * associates a default destination address with the stream. 427 * 428 * This routine sends down a T_BIND_REQ to IP with the following mblks: 429 * T_BIND_REQ - specifying local and remote address. 430 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 431 * T_OK_ACK - for the T_CONN_REQ 432 * T_CONN_CON - to keep the TPI user happy 433 * 434 * The connect completes in icmp_rput. 435 * When a T_BIND_ACK is received information is extracted from the IRE 436 * and the two appended messages are sent to the TPI user. 437 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 438 * it to an error ack for the appropriate primitive. 439 */ 440 static void 441 icmp_connect(queue_t *q, mblk_t *mp) 442 { 443 sin_t *sin; 444 sin6_t *sin6; 445 mblk_t *mp1, *mp2; 446 struct T_conn_req *tcr; 447 icmp_t *icmp; 448 ipaddr_t v4dst; 449 in6_addr_t v6dst; 450 uint32_t flowinfo; 451 452 icmp = (icmp_t *)q->q_ptr; 453 tcr = (struct T_conn_req *)mp->b_rptr; 454 /* Sanity checks */ 455 if ((mp->b_wptr - mp->b_rptr < sizeof (struct T_conn_req))) { 456 icmp_err_ack(q, mp, TPROTO, 0); 457 return; 458 } 459 460 if (icmp->icmp_state == TS_DATA_XFER) { 461 /* Already connected - clear out state */ 462 icmp->icmp_v6src = icmp->icmp_bound_v6src; 463 icmp->icmp_state = TS_IDLE; 464 } 465 466 467 if (tcr->OPT_length != 0) { 468 icmp_err_ack(q, mp, TBADOPT, 0); 469 return; 470 } 471 switch (tcr->DEST_length) { 472 default: 473 icmp_err_ack(q, mp, TBADADDR, 0); 474 return; 475 476 case sizeof (sin_t): 477 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 478 sizeof (sin_t)); 479 if (sin == NULL || !OK_32PTR((char *)sin)) { 480 icmp_err_ack(q, mp, TSYSERR, EINVAL); 481 return; 482 } 483 if (icmp->icmp_family != AF_INET || 484 sin->sin_family != AF_INET) { 485 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 486 return; 487 } 488 v4dst = sin->sin_addr.s_addr; 489 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 490 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 491 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 492 icmp->icmp_ip_snd_options_len; 493 break; 494 495 case sizeof (sin6_t): 496 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 497 sizeof (sin6_t)); 498 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 499 icmp_err_ack(q, mp, TSYSERR, EINVAL); 500 return; 501 } 502 if (icmp->icmp_family != AF_INET6 || 503 sin6->sin6_family != AF_INET6) { 504 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 505 return; 506 } 507 /* No support for mapped addresses on raw sockets */ 508 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 509 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 510 return; 511 } 512 v6dst = sin6->sin6_addr; 513 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 514 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 515 flowinfo = sin6->sin6_flowinfo; 516 break; 517 } 518 if (icmp->icmp_ipversion == IPV4_VERSION) { 519 /* 520 * Interpret a zero destination to mean loopback. 521 * Update the T_CONN_REQ (sin/sin6) since it is used to 522 * generate the T_CONN_CON. 523 */ 524 if (v4dst == INADDR_ANY) { 525 v4dst = htonl(INADDR_LOOPBACK); 526 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 527 if (icmp->icmp_family == AF_INET) { 528 sin->sin_addr.s_addr = v4dst; 529 } else { 530 sin6->sin6_addr = v6dst; 531 } 532 } 533 icmp->icmp_v6dst = v6dst; 534 icmp->icmp_flowinfo = 0; 535 536 /* 537 * If the destination address is multicast and 538 * an outgoing multicast interface has been set, 539 * use the address of that interface as our 540 * source address if no source address has been set. 541 */ 542 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 543 CLASSD(v4dst) && 544 icmp->icmp_multicast_if_addr != INADDR_ANY) { 545 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 546 &icmp->icmp_v6src); 547 } 548 } else { 549 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 550 /* 551 * Interpret a zero destination to mean loopback. 552 * Update the T_CONN_REQ (sin/sin6) since it is used to 553 * generate the T_CONN_CON. 554 */ 555 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 556 v6dst = ipv6_loopback; 557 sin6->sin6_addr = v6dst; 558 } 559 icmp->icmp_v6dst = v6dst; 560 icmp->icmp_flowinfo = flowinfo; 561 /* 562 * If the destination address is multicast and 563 * an outgoing multicast interface has been set, 564 * then the ip bind logic will pick the correct source 565 * address (i.e. matching the outgoing multicast interface). 566 */ 567 } 568 569 /* 570 * Send down bind to IP to verify that there is a route 571 * and to determine the source address. 572 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 573 */ 574 if (icmp->icmp_family == AF_INET) { 575 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 576 sin->sin_port); 577 } else { 578 ASSERT(icmp->icmp_family == AF_INET6); 579 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 580 sin6->sin6_port); 581 } 582 if (mp1 == NULL) { 583 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 584 return; 585 } 586 587 /* 588 * We also have to send a connection confirmation to 589 * keep TLI happy. Prepare it for icmp_rput. 590 */ 591 if (icmp->icmp_family == AF_INET) { 592 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 593 0); 594 } else { 595 ASSERT(icmp->icmp_family == AF_INET6); 596 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 597 0); 598 } 599 if (mp2 == NULL) { 600 freemsg(mp1); 601 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 602 return; 603 } 604 605 mp = mi_tpi_ok_ack_alloc(mp); 606 if (mp == NULL) { 607 /* Unable to reuse the T_CONN_REQ for the ack. */ 608 freemsg(mp2); 609 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 610 return; 611 } 612 613 icmp->icmp_state = TS_DATA_XFER; 614 615 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 616 linkb(mp1, mp); 617 linkb(mp1, mp2); 618 619 mblk_setcred(mp1, icmp->icmp_credp); 620 putnext(q, mp1); 621 } 622 623 static int 624 icmp_close(queue_t *q) 625 { 626 icmp_t *icmp = (icmp_t *)q->q_ptr; 627 int i1; 628 629 /* tell IP that if we're not here, he can't trust labels */ 630 if (is_system_labeled()) 631 putnext(WR(q), icmp->icmp_delabel); 632 633 qprocsoff(q); 634 635 /* If there are any options associated with the stream, free them. */ 636 if (icmp->icmp_ip_snd_options) 637 mi_free((char *)icmp->icmp_ip_snd_options); 638 639 if (icmp->icmp_filter != NULL) 640 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 641 642 /* Free memory associated with sticky options */ 643 if (icmp->icmp_sticky_hdrs_len != 0) { 644 kmem_free(icmp->icmp_sticky_hdrs, 645 icmp->icmp_sticky_hdrs_len); 646 icmp->icmp_sticky_hdrs = NULL; 647 icmp->icmp_sticky_hdrs_len = 0; 648 } 649 650 ip6_pkt_free(&icmp->icmp_sticky_ipp); 651 652 crfree(icmp->icmp_credp); 653 654 /* Free the icmp structure and release the minor device number. */ 655 i1 = mi_close_comm(&icmp_g_head, q); 656 657 return (i1); 658 } 659 660 /* 661 * This routine handles each T_DISCON_REQ message passed to icmp 662 * as an indicating that ICMP is no longer connected. This results 663 * in sending a T_BIND_REQ to IP to restore the binding to just 664 * the local address. 665 * 666 * This routine sends down a T_BIND_REQ to IP with the following mblks: 667 * T_BIND_REQ - specifying just the local address. 668 * T_OK_ACK - for the T_DISCON_REQ 669 * 670 * The disconnect completes in icmp_rput. 671 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 672 * Should icmp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 673 * it to an error ack for the appropriate primitive. 674 */ 675 static void 676 icmp_disconnect(queue_t *q, mblk_t *mp) 677 { 678 icmp_t *icmp; 679 mblk_t *mp1; 680 681 icmp = (icmp_t *)q->q_ptr; 682 683 if (icmp->icmp_state != TS_DATA_XFER) { 684 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 685 "icmp_disconnect: bad state, %d", icmp->icmp_state); 686 icmp_err_ack(q, mp, TOUTSTATE, 0); 687 return; 688 } 689 icmp->icmp_v6src = icmp->icmp_bound_v6src; 690 icmp->icmp_state = TS_IDLE; 691 692 /* 693 * Send down bind to IP to remove the full binding and revert 694 * to the local address binding. 695 */ 696 if (icmp->icmp_family == AF_INET) { 697 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 698 } else { 699 ASSERT(icmp->icmp_family == AF_INET6); 700 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 701 } 702 if (mp1 == NULL) { 703 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 704 return; 705 } 706 mp = mi_tpi_ok_ack_alloc(mp); 707 if (mp == NULL) { 708 /* Unable to reuse the T_DISCON_REQ for the ack. */ 709 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 710 return; 711 } 712 713 if (icmp->icmp_family == AF_INET6) { 714 int error; 715 716 /* Rebuild the header template */ 717 error = icmp_build_hdrs(q, icmp); 718 if (error != 0) { 719 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 720 freemsg(mp1); 721 return; 722 } 723 } 724 icmp->icmp_discon_pending = 1; 725 726 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_rput */ 727 linkb(mp1, mp); 728 putnext(q, mp1); 729 } 730 731 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 732 static void 733 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 734 { 735 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 736 qreply(q, mp); 737 } 738 739 /* Shorthand to generate and send TPI error acks to our client */ 740 static void 741 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 742 t_scalar_t t_error, int sys_error) 743 { 744 struct T_error_ack *teackp; 745 746 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 747 M_PCPROTO, T_ERROR_ACK)) != NULL) { 748 teackp = (struct T_error_ack *)mp->b_rptr; 749 teackp->ERROR_prim = primitive; 750 teackp->TLI_error = t_error; 751 teackp->UNIX_error = sys_error; 752 qreply(q, mp); 753 } 754 } 755 756 /* 757 * icmp_icmp_error is called by icmp_rput to process ICMP 758 * messages passed up by IP. 759 * Generates the appropriate T_UDERROR_IND for permanent 760 * (non-transient) errors. 761 * Assumes that IP has pulled up everything up to and including 762 * the ICMP header. 763 */ 764 static void 765 icmp_icmp_error(queue_t *q, mblk_t *mp) 766 { 767 icmph_t *icmph; 768 ipha_t *ipha; 769 int iph_hdr_length; 770 sin_t sin; 771 sin6_t sin6; 772 mblk_t *mp1; 773 int error = 0; 774 icmp_t *icmp = (icmp_t *)q->q_ptr; 775 776 /* 777 * Deliver T_UDERROR_IND when the application has asked for it. 778 * The socket layer enables this automatically when connected. 779 */ 780 if (!icmp->icmp_dgram_errind) { 781 freemsg(mp); 782 return; 783 } 784 785 ipha = (ipha_t *)mp->b_rptr; 786 787 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 788 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 789 icmp_icmp_error_ipv6(q, mp); 790 return; 791 } 792 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 793 794 iph_hdr_length = IPH_HDR_LENGTH(ipha); 795 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 796 ipha = (ipha_t *)&icmph[1]; 797 iph_hdr_length = IPH_HDR_LENGTH(ipha); 798 799 switch (icmph->icmph_type) { 800 case ICMP_DEST_UNREACHABLE: 801 switch (icmph->icmph_code) { 802 case ICMP_FRAGMENTATION_NEEDED: 803 /* 804 * IP has already adjusted the path MTU. 805 * XXX Somehow pass MTU indication to application? 806 */ 807 break; 808 case ICMP_PORT_UNREACHABLE: 809 case ICMP_PROTOCOL_UNREACHABLE: 810 error = ECONNREFUSED; 811 break; 812 default: 813 /* Transient errors */ 814 break; 815 } 816 break; 817 default: 818 /* Transient errors */ 819 break; 820 } 821 if (error == 0) { 822 freemsg(mp); 823 return; 824 } 825 826 switch (icmp->icmp_family) { 827 case AF_INET: 828 sin = sin_null; 829 sin.sin_family = AF_INET; 830 sin.sin_addr.s_addr = ipha->ipha_dst; 831 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 832 error); 833 break; 834 case AF_INET6: 835 sin6 = sin6_null; 836 sin6.sin6_family = AF_INET6; 837 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 838 839 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 840 NULL, 0, error); 841 break; 842 } 843 if (mp1) 844 putnext(q, mp1); 845 freemsg(mp); 846 } 847 848 /* 849 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 850 * for IPv6 packets. 851 * Send permanent (non-transient) errors upstream. 852 * Assumes that IP has pulled up all the extension headers as well 853 * as the ICMPv6 header. 854 */ 855 static void 856 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 857 { 858 icmp6_t *icmp6; 859 ip6_t *ip6h, *outer_ip6h; 860 uint16_t iph_hdr_length; 861 uint8_t *nexthdrp; 862 sin6_t sin6; 863 mblk_t *mp1; 864 int error = 0; 865 icmp_t *icmp = (icmp_t *)q->q_ptr; 866 867 outer_ip6h = (ip6_t *)mp->b_rptr; 868 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 869 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 870 else 871 iph_hdr_length = IPV6_HDR_LEN; 872 873 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 874 ip6h = (ip6_t *)&icmp6[1]; 875 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 876 freemsg(mp); 877 return; 878 } 879 if (*nexthdrp != icmp->icmp_proto) { 880 /* 881 * Could have switched icmp_proto after while ip did fanout of 882 * this message 883 */ 884 freemsg(mp); 885 return; 886 } 887 switch (icmp6->icmp6_type) { 888 case ICMP6_DST_UNREACH: 889 switch (icmp6->icmp6_code) { 890 case ICMP6_DST_UNREACH_NOPORT: 891 error = ECONNREFUSED; 892 break; 893 case ICMP6_DST_UNREACH_ADMIN: 894 case ICMP6_DST_UNREACH_NOROUTE: 895 case ICMP6_DST_UNREACH_BEYONDSCOPE: 896 case ICMP6_DST_UNREACH_ADDR: 897 /* Transient errors */ 898 break; 899 default: 900 break; 901 } 902 break; 903 case ICMP6_PACKET_TOO_BIG: { 904 struct T_unitdata_ind *tudi; 905 struct T_opthdr *toh; 906 size_t udi_size; 907 mblk_t *newmp; 908 t_scalar_t opt_length = sizeof (struct T_opthdr) + 909 sizeof (struct ip6_mtuinfo); 910 sin6_t *sin6; 911 struct ip6_mtuinfo *mtuinfo; 912 913 /* 914 * If the application has requested to receive path mtu 915 * information, send up an empty message containing an 916 * IPV6_PATHMTU ancillary data item. 917 */ 918 if (!icmp->icmp_ipv6_recvpathmtu) 919 break; 920 921 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 922 opt_length; 923 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 924 BUMP_MIB(&rawip_mib, rawipInErrors); 925 break; 926 } 927 928 /* 929 * newmp->b_cont is left to NULL on purpose. This is an 930 * empty message containing only ancillary data. 931 */ 932 newmp->b_datap->db_type = M_PROTO; 933 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 934 newmp->b_wptr = (uchar_t *)tudi + udi_size; 935 tudi->PRIM_type = T_UNITDATA_IND; 936 tudi->SRC_length = sizeof (sin6_t); 937 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 938 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 939 tudi->OPT_length = opt_length; 940 941 sin6 = (sin6_t *)&tudi[1]; 942 bzero(sin6, sizeof (sin6_t)); 943 sin6->sin6_family = AF_INET6; 944 sin6->sin6_addr = icmp->icmp_v6dst; 945 946 toh = (struct T_opthdr *)&sin6[1]; 947 toh->level = IPPROTO_IPV6; 948 toh->name = IPV6_PATHMTU; 949 toh->len = opt_length; 950 toh->status = 0; 951 952 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 953 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 954 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 955 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 956 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 957 /* 958 * We've consumed everything we need from the original 959 * message. Free it, then send our empty message. 960 */ 961 freemsg(mp); 962 putnext(q, newmp); 963 return; 964 } 965 case ICMP6_TIME_EXCEEDED: 966 /* Transient errors */ 967 break; 968 case ICMP6_PARAM_PROB: 969 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 970 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 971 (uchar_t *)ip6h + icmp6->icmp6_pptr == 972 (uchar_t *)nexthdrp) { 973 error = ECONNREFUSED; 974 break; 975 } 976 break; 977 } 978 if (error == 0) { 979 freemsg(mp); 980 return; 981 } 982 983 sin6 = sin6_null; 984 sin6.sin6_family = AF_INET6; 985 sin6.sin6_addr = ip6h->ip6_dst; 986 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 987 988 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 989 error); 990 if (mp1) 991 putnext(q, mp1); 992 freemsg(mp); 993 } 994 995 /* 996 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 997 * The local address is filled in if endpoint is bound. The remote address 998 * is filled in if remote address has been precified ("connected endpoint") 999 * (The concept of connected CLTS sockets is alien to published TPI 1000 * but we support it anyway). 1001 */ 1002 static void 1003 icmp_addr_req(queue_t *q, mblk_t *mp) 1004 { 1005 icmp_t *icmp = (icmp_t *)q->q_ptr; 1006 mblk_t *ackmp; 1007 struct T_addr_ack *taa; 1008 1009 /* Make it large enough for worst case */ 1010 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1011 2 * sizeof (sin6_t), 1); 1012 if (ackmp == NULL) { 1013 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1014 return; 1015 } 1016 taa = (struct T_addr_ack *)ackmp->b_rptr; 1017 1018 bzero(taa, sizeof (struct T_addr_ack)); 1019 ackmp->b_wptr = (uchar_t *)&taa[1]; 1020 1021 taa->PRIM_type = T_ADDR_ACK; 1022 ackmp->b_datap->db_type = M_PCPROTO; 1023 1024 /* 1025 * Note: Following code assumes 32 bit alignment of basic 1026 * data structures like sin_t and struct T_addr_ack. 1027 */ 1028 if (icmp->icmp_state != TS_UNBND) { 1029 /* 1030 * Fill in local address 1031 */ 1032 taa->LOCADDR_offset = sizeof (*taa); 1033 if (icmp->icmp_family == AF_INET) { 1034 sin_t *sin; 1035 1036 taa->LOCADDR_length = sizeof (sin_t); 1037 sin = (sin_t *)&taa[1]; 1038 /* Fill zeroes and then intialize non-zero fields */ 1039 *sin = sin_null; 1040 sin->sin_family = AF_INET; 1041 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1042 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1043 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1044 sin->sin_addr.s_addr); 1045 } else { 1046 /* 1047 * INADDR_ANY 1048 * icmp_v6src is not set, we might be bound to 1049 * broadcast/multicast. Use icmp_bound_v6src as 1050 * local address instead (that could 1051 * also still be INADDR_ANY) 1052 */ 1053 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1054 sin->sin_addr.s_addr); 1055 } 1056 ackmp->b_wptr = (uchar_t *)&sin[1]; 1057 } else { 1058 sin6_t *sin6; 1059 1060 ASSERT(icmp->icmp_family == AF_INET6); 1061 taa->LOCADDR_length = sizeof (sin6_t); 1062 sin6 = (sin6_t *)&taa[1]; 1063 /* Fill zeroes and then intialize non-zero fields */ 1064 *sin6 = sin6_null; 1065 sin6->sin6_family = AF_INET6; 1066 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1067 sin6->sin6_addr = icmp->icmp_v6src; 1068 } else { 1069 /* 1070 * UNSPECIFIED 1071 * icmp_v6src is not set, we might be bound to 1072 * broadcast/multicast. Use icmp_bound_v6src as 1073 * local address instead (that could 1074 * also still be UNSPECIFIED) 1075 */ 1076 sin6->sin6_addr = icmp->icmp_bound_v6src; 1077 } 1078 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1079 } 1080 } 1081 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1082 qreply(q, ackmp); 1083 } 1084 1085 static void 1086 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1087 { 1088 *tap = icmp_g_t_info_ack; 1089 1090 if (icmp->icmp_family == AF_INET6) 1091 tap->ADDR_size = sizeof (sin6_t); 1092 else 1093 tap->ADDR_size = sizeof (sin_t); 1094 tap->CURRENT_state = icmp->icmp_state; 1095 tap->OPT_size = icmp_max_optsize; 1096 } 1097 1098 /* 1099 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1100 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1101 * icmp_g_t_info_ack. The current state of the stream is copied from 1102 * icmp_state. 1103 */ 1104 static void 1105 icmp_capability_req(queue_t *q, mblk_t *mp) 1106 { 1107 icmp_t *icmp = (icmp_t *)q->q_ptr; 1108 t_uscalar_t cap_bits1; 1109 struct T_capability_ack *tcap; 1110 1111 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1112 1113 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1114 mp->b_datap->db_type, T_CAPABILITY_ACK); 1115 if (!mp) 1116 return; 1117 1118 tcap = (struct T_capability_ack *)mp->b_rptr; 1119 tcap->CAP_bits1 = 0; 1120 1121 if (cap_bits1 & TC1_INFO) { 1122 icmp_copy_info(&tcap->INFO_ack, icmp); 1123 tcap->CAP_bits1 |= TC1_INFO; 1124 } 1125 1126 qreply(q, mp); 1127 } 1128 1129 /* 1130 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1131 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1132 * The current state of the stream is copied from icmp_state. 1133 */ 1134 static void 1135 icmp_info_req(queue_t *q, mblk_t *mp) 1136 { 1137 icmp_t *icmp = (icmp_t *)q->q_ptr; 1138 1139 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1140 T_INFO_ACK); 1141 if (!mp) 1142 return; 1143 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1144 qreply(q, mp); 1145 } 1146 1147 /* 1148 * IP recognizes seven kinds of bind requests: 1149 * 1150 * - A zero-length address binds only to the protocol number. 1151 * 1152 * - A 4-byte address is treated as a request to 1153 * validate that the address is a valid local IPv4 1154 * address, appropriate for an application to bind to. 1155 * IP does the verification, but does not make any note 1156 * of the address at this time. 1157 * 1158 * - A 16-byte address contains is treated as a request 1159 * to validate a local IPv6 address, as the 4-byte 1160 * address case above. 1161 * 1162 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1163 * use it for the inbound fanout of packets. 1164 * 1165 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1166 * use it for the inbound fanout of packets. 1167 * 1168 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1169 * information consisting of local and remote addresses 1170 * and ports (unused for raw sockets). In this case, the addresses are both 1171 * validated as appropriate for this operation, and, if 1172 * so, the information is retained for use in the 1173 * inbound fanout. 1174 * 1175 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1176 * fanout information, like the 12-byte case above. 1177 * 1178 * IP will also fill in the IRE request mblk with information 1179 * regarding our peer. In all cases, we notify IP of our protocol 1180 * type by appending a single protocol byte to the bind request. 1181 */ 1182 static mblk_t * 1183 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1184 in_port_t fport) 1185 { 1186 char *cp; 1187 mblk_t *mp; 1188 struct T_bind_req *tbr; 1189 ipa_conn_t *ac; 1190 ipa6_conn_t *ac6; 1191 sin_t *sin; 1192 sin6_t *sin6; 1193 1194 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1195 1196 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1197 if (mp == NULL) 1198 return (NULL); 1199 mp->b_datap->db_type = M_PROTO; 1200 tbr = (struct T_bind_req *)mp->b_rptr; 1201 tbr->PRIM_type = bind_prim; 1202 tbr->ADDR_offset = sizeof (*tbr); 1203 tbr->CONIND_number = 0; 1204 tbr->ADDR_length = addr_length; 1205 cp = (char *)&tbr[1]; 1206 switch (addr_length) { 1207 case sizeof (ipa_conn_t): 1208 ASSERT(icmp->icmp_family == AF_INET); 1209 /* Append a request for an IRE */ 1210 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1211 if (mp->b_cont == NULL) { 1212 freemsg(mp); 1213 return (NULL); 1214 } 1215 mp->b_cont->b_wptr += sizeof (ire_t); 1216 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1217 1218 /* cp known to be 32 bit aligned */ 1219 ac = (ipa_conn_t *)cp; 1220 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1221 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1222 ac->ac_fport = fport; 1223 ac->ac_lport = 0; 1224 break; 1225 1226 case sizeof (ipa6_conn_t): 1227 ASSERT(icmp->icmp_family == AF_INET6); 1228 /* Append a request for an IRE */ 1229 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1230 if (mp->b_cont == NULL) { 1231 freemsg(mp); 1232 return (NULL); 1233 } 1234 mp->b_cont->b_wptr += sizeof (ire_t); 1235 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1236 1237 /* cp known to be 32 bit aligned */ 1238 ac6 = (ipa6_conn_t *)cp; 1239 ac6->ac6_laddr = icmp->icmp_v6src; 1240 ac6->ac6_faddr = icmp->icmp_v6dst; 1241 ac6->ac6_fport = fport; 1242 ac6->ac6_lport = 0; 1243 break; 1244 1245 case sizeof (sin_t): 1246 ASSERT(icmp->icmp_family == AF_INET); 1247 /* Append a request for an IRE */ 1248 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1249 if (!mp->b_cont) { 1250 freemsg(mp); 1251 return (NULL); 1252 } 1253 mp->b_cont->b_wptr += sizeof (ire_t); 1254 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1255 1256 sin = (sin_t *)cp; 1257 *sin = sin_null; 1258 sin->sin_family = AF_INET; 1259 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1260 break; 1261 1262 case sizeof (sin6_t): 1263 ASSERT(icmp->icmp_family == AF_INET6); 1264 /* Append a request for an IRE */ 1265 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1266 if (!mp->b_cont) { 1267 freemsg(mp); 1268 return (NULL); 1269 } 1270 mp->b_cont->b_wptr += sizeof (ire_t); 1271 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1272 1273 sin6 = (sin6_t *)cp; 1274 *sin6 = sin6_null; 1275 sin6->sin6_family = AF_INET6; 1276 sin6->sin6_addr = icmp->icmp_bound_v6src; 1277 break; 1278 } 1279 /* Add protocol number to end */ 1280 cp[addr_length] = icmp->icmp_proto; 1281 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1282 return (mp); 1283 } 1284 1285 /* ARGSUSED */ 1286 static void 1287 dummy_func(void *arg) 1288 { 1289 } 1290 1291 static mblk_t * 1292 alloc_wait(queue_t *q, size_t len, int pri, int *errp) 1293 { 1294 mblk_t *mp; 1295 bufcall_id_t id; 1296 int retv; 1297 1298 while ((mp = allocb(len, pri)) == NULL) { 1299 id = qbufcall(q, len, pri, dummy_func, NULL); 1300 if (id == 0) { 1301 *errp = ENOMEM; 1302 break; 1303 } 1304 retv = qwait_sig(q); 1305 qunbufcall(q, id); 1306 if (retv == 0) { 1307 *errp = EINTR; 1308 break; 1309 } 1310 } 1311 if (mp != NULL) 1312 mp->b_wptr += len; 1313 return (mp); 1314 } 1315 1316 /* 1317 * This is the open routine for icmp. It allocates a icmp_t structure for 1318 * the stream and, on the first open of the module, creates an ND table. 1319 */ 1320 static int 1321 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1322 { 1323 int err; 1324 icmp_t *icmp; 1325 mblk_t *mp; 1326 out_labeled_t *olp; 1327 1328 /* If the stream is already open, return immediately. */ 1329 if (q->q_ptr != NULL) 1330 return (0); 1331 1332 /* If this is not a push of icmp as a module, fail. */ 1333 if (sflag != MODOPEN) 1334 return (EINVAL); 1335 1336 /* 1337 * Defer the qprocson until everything is initialized since 1338 * we are D_MTPERQ and after qprocson the rput routine can 1339 * run. (Could do qprocson earlier since icmp currently 1340 * has an outer perimeter.) 1341 */ 1342 1343 /* 1344 * Create a icmp_t structure for this stream and link into the 1345 * list of open streams. 1346 */ 1347 err = mi_open_comm(&icmp_g_head, sizeof (icmp_t), q, devp, 1348 flag, sflag, credp); 1349 if (err != 0) 1350 return (err); 1351 1352 /* 1353 * The receive hiwat is only looked at on the stream head queue. 1354 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 1355 */ 1356 q->q_hiwat = icmp_recv_hiwat; 1357 1358 /* Set the initial state of the stream and the privilege status. */ 1359 icmp = (icmp_t *)q->q_ptr; 1360 icmp->icmp_state = TS_UNBND; 1361 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1362 icmp->icmp_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1363 icmp->icmp_filter = NULL; 1364 1365 icmp->icmp_credp = credp; 1366 crhold(credp); 1367 1368 /* 1369 * If the caller has the process-wide flag set, then default to MAC 1370 * exempt mode. This allows read-down to unlabeled hosts. 1371 */ 1372 if (getpflags(NET_MAC_AWARE, credp) != 0) 1373 icmp->icmp_mac_exempt = B_TRUE; 1374 1375 icmp->icmp_zoneid = getzoneid(); 1376 1377 if (getmajor(*devp) == (major_t)ICMP6_MAJ) { 1378 icmp->icmp_ipversion = IPV6_VERSION; 1379 icmp->icmp_family = AF_INET6; 1380 /* May be changed by a SO_PROTOTYPE socket option. */ 1381 icmp->icmp_proto = IPPROTO_ICMPV6; 1382 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1383 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1384 icmp->icmp_ttl = (uint8_t)icmp_ipv6_hoplimit; 1385 } else { 1386 icmp->icmp_ipversion = IPV4_VERSION; 1387 icmp->icmp_family = AF_INET; 1388 /* May be changed by a SO_PROTOTYPE socket option. */ 1389 icmp->icmp_proto = IPPROTO_ICMP; 1390 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1391 icmp->icmp_ttl = (uint8_t)icmp_ipv4_ttl; 1392 } 1393 qprocson(q); 1394 1395 /* 1396 * Check if icmp is being I_PUSHed by a non-privileged user. 1397 * If so, we set icmp_restricted to indicate that only MIB 1398 * traffic may pass. 1399 */ 1400 if (secpolicy_net_icmpaccess(credp) != 0) { 1401 icmp->icmp_restricted = 1; 1402 } 1403 1404 /* 1405 * The transmit hiwat is only looked at on IP's queue. 1406 * Store in q_hiwat in order to return on SO_SNDBUF 1407 * getsockopts. 1408 */ 1409 WR(q)->q_hiwat = icmp_xmit_hiwat; 1410 WR(q)->q_next->q_hiwat = WR(q)->q_hiwat; 1411 WR(q)->q_lowat = icmp_xmit_lowat; 1412 WR(q)->q_next->q_lowat = WR(q)->q_lowat; 1413 1414 if (icmp->icmp_family == AF_INET6) { 1415 /* Build initial header template for transmit */ 1416 err = icmp_build_hdrs(q, icmp); 1417 if (err != 0) 1418 goto open_error; 1419 } 1420 /* Set the Stream head write offset. */ 1421 (void) mi_set_sth_wroff(q, icmp->icmp_max_hdr_len + icmp_wroff_extra); 1422 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1423 1424 if (is_system_labeled()) { 1425 /* notify IP that we know about labeling */ 1426 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1427 if (mp == NULL) 1428 goto open_error; 1429 mp->b_datap->db_type = M_CTL; 1430 olp = (out_labeled_t *)mp->b_rptr; 1431 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1432 olp->out_qnext = WR(q)->q_next; 1433 putnext(WR(q), mp); 1434 1435 /* save off a copy for closing */ 1436 mp = alloc_wait(q, sizeof (*olp), BPRI_MED, &err); 1437 if (mp == NULL) 1438 goto open_error; 1439 mp->b_datap->db_type = M_CTL; 1440 olp = (out_labeled_t *)mp->b_rptr; 1441 olp->out_labeled_type = IP_ULP_OUT_LABELED; 1442 olp->out_qnext = NULL; 1443 icmp->icmp_delabel = mp; 1444 } 1445 1446 return (0); 1447 1448 open_error: 1449 qprocsoff(q); 1450 crfree(credp); 1451 (void) mi_close_comm(&icmp_g_head, q); 1452 return (err); 1453 } 1454 1455 /* 1456 * Which ICMP options OK to set through T_UNITDATA_REQ... 1457 */ 1458 /* ARGSUSED */ 1459 static boolean_t 1460 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1461 { 1462 return (B_TRUE); 1463 } 1464 1465 /* 1466 * This routine gets default values of certain options whose default 1467 * values are maintained by protcol specific code 1468 */ 1469 /* ARGSUSED */ 1470 int 1471 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1472 { 1473 int *i1 = (int *)ptr; 1474 1475 switch (level) { 1476 case IPPROTO_IP: 1477 switch (name) { 1478 case IP_MULTICAST_TTL: 1479 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1480 return (sizeof (uchar_t)); 1481 case IP_MULTICAST_LOOP: 1482 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1483 return (sizeof (uchar_t)); 1484 } 1485 break; 1486 case IPPROTO_IPV6: 1487 switch (name) { 1488 case IPV6_MULTICAST_HOPS: 1489 *i1 = IP_DEFAULT_MULTICAST_TTL; 1490 return (sizeof (int)); 1491 case IPV6_MULTICAST_LOOP: 1492 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1493 return (sizeof (int)); 1494 case IPV6_UNICAST_HOPS: 1495 *i1 = icmp_ipv6_hoplimit; 1496 return (sizeof (int)); 1497 } 1498 break; 1499 case IPPROTO_ICMPV6: 1500 switch (name) { 1501 case ICMP6_FILTER: 1502 /* Make it look like "pass all" */ 1503 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1504 return (sizeof (icmp6_filter_t)); 1505 } 1506 break; 1507 } 1508 return (-1); 1509 } 1510 1511 /* 1512 * This routine retrieves the current status of socket options. 1513 * It returns the size of the option retrieved. 1514 */ 1515 int 1516 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1517 { 1518 icmp_t *icmp = (icmp_t *)q->q_ptr; 1519 int *i1 = (int *)ptr; 1520 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1521 1522 switch (level) { 1523 case SOL_SOCKET: 1524 switch (name) { 1525 case SO_DEBUG: 1526 *i1 = icmp->icmp_debug; 1527 break; 1528 case SO_TYPE: 1529 *i1 = SOCK_RAW; 1530 break; 1531 case SO_PROTOTYPE: 1532 *i1 = icmp->icmp_proto; 1533 break; 1534 case SO_REUSEADDR: 1535 *i1 = icmp->icmp_reuseaddr; 1536 break; 1537 1538 /* 1539 * The following three items are available here, 1540 * but are only meaningful to IP. 1541 */ 1542 case SO_DONTROUTE: 1543 *i1 = icmp->icmp_dontroute; 1544 break; 1545 case SO_USELOOPBACK: 1546 *i1 = icmp->icmp_useloopback; 1547 break; 1548 case SO_BROADCAST: 1549 *i1 = icmp->icmp_broadcast; 1550 break; 1551 1552 case SO_SNDBUF: 1553 ASSERT(q->q_hiwat <= INT_MAX); 1554 *i1 = (int)q->q_hiwat; 1555 break; 1556 case SO_RCVBUF: 1557 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1558 *i1 = (int)RD(q)->q_hiwat; 1559 break; 1560 case SO_DGRAM_ERRIND: 1561 *i1 = icmp->icmp_dgram_errind; 1562 break; 1563 case SO_TIMESTAMP: 1564 *i1 = icmp->icmp_timestamp; 1565 break; 1566 case SO_MAC_EXEMPT: 1567 *i1 = icmp->icmp_mac_exempt; 1568 break; 1569 /* 1570 * Following four not meaningful for icmp 1571 * Action is same as "default" to which we fallthrough 1572 * so we keep them in comments. 1573 * case SO_LINGER: 1574 * case SO_KEEPALIVE: 1575 * case SO_OOBINLINE: 1576 * case SO_ALLZONES: 1577 */ 1578 default: 1579 return (-1); 1580 } 1581 break; 1582 case IPPROTO_IP: 1583 /* 1584 * Only allow IPv4 option processing on IPv4 sockets. 1585 */ 1586 if (icmp->icmp_family != AF_INET) 1587 return (-1); 1588 1589 switch (name) { 1590 case IP_OPTIONS: 1591 case T_IP_OPTIONS: 1592 /* Options are passed up with each packet */ 1593 return (0); 1594 case IP_HDRINCL: 1595 *i1 = (int)icmp->icmp_hdrincl; 1596 break; 1597 case IP_TOS: 1598 case T_IP_TOS: 1599 *i1 = (int)icmp->icmp_type_of_service; 1600 break; 1601 case IP_TTL: 1602 *i1 = (int)icmp->icmp_ttl; 1603 break; 1604 case IP_MULTICAST_IF: 1605 /* 0 address if not set */ 1606 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1607 return (sizeof (ipaddr_t)); 1608 case IP_MULTICAST_TTL: 1609 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1610 return (sizeof (uchar_t)); 1611 case IP_MULTICAST_LOOP: 1612 *ptr = icmp->icmp_multicast_loop; 1613 return (sizeof (uint8_t)); 1614 case IP_BOUND_IF: 1615 /* Zero if not set */ 1616 *i1 = icmp->icmp_bound_if; 1617 break; /* goto sizeof (int) option return */ 1618 case IP_UNSPEC_SRC: 1619 *ptr = icmp->icmp_unspec_source; 1620 break; /* goto sizeof (int) option return */ 1621 case IP_XMIT_IF: 1622 *i1 = icmp->icmp_xmit_if; 1623 break; /* goto sizeof (int) option return */ 1624 case IP_RECVIF: 1625 *ptr = icmp->icmp_recvif; 1626 break; /* goto sizeof (int) option return */ 1627 /* 1628 * Cannot "get" the value of following options 1629 * at this level. Action is same as "default" to 1630 * which we fallthrough so we keep them in comments. 1631 * 1632 * case IP_ADD_MEMBERSHIP: 1633 * case IP_DROP_MEMBERSHIP: 1634 * case IP_BLOCK_SOURCE: 1635 * case IP_UNBLOCK_SOURCE: 1636 * case IP_ADD_SOURCE_MEMBERSHIP: 1637 * case IP_DROP_SOURCE_MEMBERSHIP: 1638 * case MCAST_JOIN_GROUP: 1639 * case MCAST_LEAVE_GROUP: 1640 * case MCAST_BLOCK_SOURCE: 1641 * case MCAST_UNBLOCK_SOURCE: 1642 * case MCAST_JOIN_SOURCE_GROUP: 1643 * case MCAST_LEAVE_SOURCE_GROUP: 1644 * case MRT_INIT: 1645 * case MRT_DONE: 1646 * case MRT_ADD_VIF: 1647 * case MRT_DEL_VIF: 1648 * case MRT_ADD_MFC: 1649 * case MRT_DEL_MFC: 1650 * case MRT_VERSION: 1651 * case MRT_ASSERT: 1652 * case IP_SEC_OPT: 1653 * case IP_DONTFAILOVER_IF: 1654 * case IP_NEXTHOP: 1655 */ 1656 default: 1657 return (-1); 1658 } 1659 break; 1660 case IPPROTO_IPV6: 1661 /* 1662 * Only allow IPv6 option processing on native IPv6 sockets. 1663 */ 1664 if (icmp->icmp_family != AF_INET6) 1665 return (-1); 1666 switch (name) { 1667 case IPV6_UNICAST_HOPS: 1668 *i1 = (unsigned int)icmp->icmp_ttl; 1669 break; 1670 case IPV6_MULTICAST_IF: 1671 /* 0 index if not set */ 1672 *i1 = icmp->icmp_multicast_if_index; 1673 break; 1674 case IPV6_MULTICAST_HOPS: 1675 *i1 = icmp->icmp_multicast_ttl; 1676 break; 1677 case IPV6_MULTICAST_LOOP: 1678 *i1 = icmp->icmp_multicast_loop; 1679 break; 1680 case IPV6_BOUND_IF: 1681 /* Zero if not set */ 1682 *i1 = icmp->icmp_bound_if; 1683 break; 1684 case IPV6_UNSPEC_SRC: 1685 *i1 = icmp->icmp_unspec_source; 1686 break; 1687 case IPV6_CHECKSUM: 1688 /* 1689 * Return offset or -1 if no checksum offset. 1690 * Does not apply to IPPROTO_ICMPV6 1691 */ 1692 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1693 return (-1); 1694 1695 if (icmp->icmp_raw_checksum) { 1696 *i1 = icmp->icmp_checksum_off; 1697 } else { 1698 *i1 = -1; 1699 } 1700 break; 1701 case IPV6_JOIN_GROUP: 1702 case IPV6_LEAVE_GROUP: 1703 case MCAST_JOIN_GROUP: 1704 case MCAST_LEAVE_GROUP: 1705 case MCAST_BLOCK_SOURCE: 1706 case MCAST_UNBLOCK_SOURCE: 1707 case MCAST_JOIN_SOURCE_GROUP: 1708 case MCAST_LEAVE_SOURCE_GROUP: 1709 /* cannot "get" the value for these */ 1710 return (-1); 1711 case IPV6_RECVPKTINFO: 1712 *i1 = icmp->icmp_ipv6_recvpktinfo; 1713 break; 1714 case IPV6_RECVTCLASS: 1715 *i1 = icmp->icmp_ipv6_recvtclass; 1716 break; 1717 case IPV6_RECVPATHMTU: 1718 *i1 = icmp->icmp_ipv6_recvpathmtu; 1719 break; 1720 case IPV6_V6ONLY: 1721 *i1 = 1; 1722 break; 1723 case IPV6_RECVHOPLIMIT: 1724 *i1 = icmp->icmp_ipv6_recvhoplimit; 1725 break; 1726 case IPV6_RECVHOPOPTS: 1727 *i1 = icmp->icmp_ipv6_recvhopopts; 1728 break; 1729 case IPV6_RECVDSTOPTS: 1730 *i1 = icmp->icmp_ipv6_recvdstopts; 1731 break; 1732 case _OLD_IPV6_RECVDSTOPTS: 1733 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1734 break; 1735 case IPV6_RECVRTHDRDSTOPTS: 1736 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1737 break; 1738 case IPV6_RECVRTHDR: 1739 *i1 = icmp->icmp_ipv6_recvrthdr; 1740 break; 1741 case IPV6_PKTINFO: { 1742 /* XXX assumes that caller has room for max size! */ 1743 struct in6_pktinfo *pkti; 1744 1745 pkti = (struct in6_pktinfo *)ptr; 1746 if (ipp->ipp_fields & IPPF_IFINDEX) 1747 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1748 else 1749 pkti->ipi6_ifindex = 0; 1750 if (ipp->ipp_fields & IPPF_ADDR) 1751 pkti->ipi6_addr = ipp->ipp_addr; 1752 else 1753 pkti->ipi6_addr = ipv6_all_zeros; 1754 return (sizeof (struct in6_pktinfo)); 1755 } 1756 case IPV6_NEXTHOP: { 1757 sin6_t *sin6 = (sin6_t *)ptr; 1758 1759 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1760 return (0); 1761 *sin6 = sin6_null; 1762 sin6->sin6_family = AF_INET6; 1763 sin6->sin6_addr = ipp->ipp_nexthop; 1764 return (sizeof (sin6_t)); 1765 } 1766 case IPV6_HOPOPTS: 1767 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1768 return (0); 1769 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1770 return (0); 1771 bcopy((char *)ipp->ipp_hopopts + 1772 icmp->icmp_label_len_v6, ptr, 1773 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1774 if (icmp->icmp_label_len_v6 > 0) { 1775 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1776 ptr[1] = (ipp->ipp_hopoptslen - 1777 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1778 } 1779 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1780 case IPV6_RTHDRDSTOPTS: 1781 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1782 return (0); 1783 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1784 return (ipp->ipp_rtdstoptslen); 1785 case IPV6_RTHDR: 1786 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1787 return (0); 1788 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1789 return (ipp->ipp_rthdrlen); 1790 case IPV6_DSTOPTS: 1791 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1792 return (0); 1793 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1794 return (ipp->ipp_dstoptslen); 1795 case IPV6_PATHMTU: 1796 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1797 return (0); 1798 1799 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1800 (struct ip6_mtuinfo *)ptr)); 1801 case IPV6_TCLASS: 1802 if (ipp->ipp_fields & IPPF_TCLASS) 1803 *i1 = ipp->ipp_tclass; 1804 else 1805 *i1 = IPV6_FLOW_TCLASS( 1806 IPV6_DEFAULT_VERS_AND_FLOW); 1807 break; 1808 default: 1809 return (-1); 1810 } 1811 break; 1812 case IPPROTO_ICMPV6: 1813 /* 1814 * Only allow IPv6 option processing on native IPv6 sockets. 1815 */ 1816 if (icmp->icmp_family != AF_INET6) 1817 return (-1); 1818 1819 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1820 return (-1); 1821 1822 switch (name) { 1823 case ICMP6_FILTER: 1824 if (icmp->icmp_filter == NULL) { 1825 /* Make it look like "pass all" */ 1826 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1827 } else { 1828 (void) bcopy(icmp->icmp_filter, ptr, 1829 sizeof (icmp6_filter_t)); 1830 } 1831 return (sizeof (icmp6_filter_t)); 1832 default: 1833 return (-1); 1834 } 1835 default: 1836 return (-1); 1837 } 1838 return (sizeof (int)); 1839 } 1840 1841 /* This routine sets socket options. */ 1842 /* ARGSUSED */ 1843 int 1844 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1845 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1846 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 1847 { 1848 icmp_t *icmp = (icmp_t *)q->q_ptr; 1849 int *i1 = (int *)invalp; 1850 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1851 boolean_t checkonly; 1852 int error; 1853 1854 switch (optset_context) { 1855 case SETFN_OPTCOM_CHECKONLY: 1856 checkonly = B_TRUE; 1857 /* 1858 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1859 * inlen != 0 implies value supplied and 1860 * we have to "pretend" to set it. 1861 * inlen == 0 implies that there is no 1862 * value part in T_CHECK request and just validation 1863 * done elsewhere should be enough, we just return here. 1864 */ 1865 if (inlen == 0) { 1866 *outlenp = 0; 1867 return (0); 1868 } 1869 break; 1870 case SETFN_OPTCOM_NEGOTIATE: 1871 checkonly = B_FALSE; 1872 break; 1873 case SETFN_UD_NEGOTIATE: 1874 case SETFN_CONN_NEGOTIATE: 1875 checkonly = B_FALSE; 1876 /* 1877 * Negotiating local and "association-related" options 1878 * through T_UNITDATA_REQ. 1879 * 1880 * Following routine can filter out ones we do not 1881 * want to be "set" this way. 1882 */ 1883 if (!icmp_opt_allow_udr_set(level, name)) { 1884 *outlenp = 0; 1885 return (EINVAL); 1886 } 1887 break; 1888 default: 1889 /* 1890 * We should never get here 1891 */ 1892 *outlenp = 0; 1893 return (EINVAL); 1894 } 1895 1896 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1897 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1898 1899 /* 1900 * For fixed length options, no sanity check 1901 * of passed in length is done. It is assumed *_optcom_req() 1902 * routines do the right thing. 1903 */ 1904 1905 switch (level) { 1906 case SOL_SOCKET: 1907 switch (name) { 1908 case SO_DEBUG: 1909 if (!checkonly) 1910 icmp->icmp_debug = onoff; 1911 break; 1912 case SO_PROTOTYPE: 1913 if ((*i1 & 0xFF) != IPPROTO_ICMP && 1914 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 1915 secpolicy_net_rawaccess(cr) != 0) { 1916 *outlenp = 0; 1917 return (EACCES); 1918 } 1919 /* Can't use IPPROTO_RAW with IPv6 */ 1920 if ((*i1 & 0xFF) == IPPROTO_RAW && 1921 icmp->icmp_family == AF_INET6) { 1922 *outlenp = 0; 1923 return (EPROTONOSUPPORT); 1924 } 1925 if (checkonly) { 1926 /* T_CHECK case */ 1927 *(int *)outvalp = (*i1 & 0xFF); 1928 break; 1929 } 1930 icmp->icmp_proto = *i1 & 0xFF; 1931 if ((icmp->icmp_proto == IPPROTO_RAW || 1932 icmp->icmp_proto == IPPROTO_IGMP) && 1933 icmp->icmp_family == AF_INET) 1934 icmp->icmp_hdrincl = 1; 1935 else 1936 icmp->icmp_hdrincl = 0; 1937 1938 if (icmp->icmp_family == AF_INET6 && 1939 icmp->icmp_proto == IPPROTO_ICMPV6) { 1940 /* Set offset for icmp6_cksum */ 1941 icmp->icmp_raw_checksum = 0; 1942 icmp->icmp_checksum_off = 2; 1943 } 1944 if (icmp->icmp_proto == IPPROTO_UDP || 1945 icmp->icmp_proto == IPPROTO_TCP || 1946 icmp->icmp_proto == IPPROTO_SCTP) { 1947 icmp->icmp_no_tp_cksum = 1; 1948 icmp->icmp_sticky_ipp.ipp_fields |= 1949 IPPF_NO_CKSUM; 1950 } else { 1951 icmp->icmp_no_tp_cksum = 0; 1952 icmp->icmp_sticky_ipp.ipp_fields &= 1953 ~IPPF_NO_CKSUM; 1954 } 1955 1956 if (icmp->icmp_filter != NULL && 1957 icmp->icmp_proto != IPPROTO_ICMPV6) { 1958 kmem_free(icmp->icmp_filter, 1959 sizeof (icmp6_filter_t)); 1960 icmp->icmp_filter = NULL; 1961 } 1962 1963 /* Rebuild the header template */ 1964 error = icmp_build_hdrs(q, icmp); 1965 if (error != 0) { 1966 *outlenp = 0; 1967 return (error); 1968 } 1969 1970 /* 1971 * For SCTP, we don't use icmp_bind_proto() for 1972 * raw socket binding. Note that we do not need 1973 * to set *outlenp. 1974 */ 1975 if (icmp->icmp_proto == IPPROTO_SCTP) 1976 return (0); 1977 1978 icmp_bind_proto(q); 1979 *outlenp = sizeof (int); 1980 *(int *)outvalp = *i1 & 0xFF; 1981 return (0); 1982 case SO_REUSEADDR: 1983 if (!checkonly) 1984 icmp->icmp_reuseaddr = onoff; 1985 break; 1986 1987 /* 1988 * The following three items are available here, 1989 * but are only meaningful to IP. 1990 */ 1991 case SO_DONTROUTE: 1992 if (!checkonly) 1993 icmp->icmp_dontroute = onoff; 1994 break; 1995 case SO_USELOOPBACK: 1996 if (!checkonly) 1997 icmp->icmp_useloopback = onoff; 1998 break; 1999 case SO_BROADCAST: 2000 if (!checkonly) 2001 icmp->icmp_broadcast = onoff; 2002 break; 2003 2004 case SO_SNDBUF: 2005 if (*i1 > icmp_max_buf) { 2006 *outlenp = 0; 2007 return (ENOBUFS); 2008 } 2009 if (!checkonly) { 2010 q->q_hiwat = *i1; 2011 q->q_next->q_hiwat = *i1; 2012 } 2013 break; 2014 case SO_RCVBUF: 2015 if (*i1 > icmp_max_buf) { 2016 *outlenp = 0; 2017 return (ENOBUFS); 2018 } 2019 if (!checkonly) { 2020 RD(q)->q_hiwat = *i1; 2021 (void) mi_set_sth_hiwat(RD(q), *i1); 2022 } 2023 break; 2024 case SO_DGRAM_ERRIND: 2025 if (!checkonly) 2026 icmp->icmp_dgram_errind = onoff; 2027 break; 2028 case SO_ALLZONES: 2029 /* 2030 * "soft" error (negative) 2031 * option not handled at this level 2032 * Note: Do not modify *outlenp 2033 */ 2034 return (-EINVAL); 2035 case SO_TIMESTAMP: 2036 if (!checkonly) { 2037 icmp->icmp_timestamp = onoff; 2038 } 2039 break; 2040 case SO_MAC_EXEMPT: 2041 if (secpolicy_net_mac_aware(cr) != 0 || 2042 icmp->icmp_state != TS_UNBND) 2043 return (EACCES); 2044 if (!checkonly) 2045 icmp->icmp_mac_exempt = onoff; 2046 break; 2047 /* 2048 * Following three not meaningful for icmp 2049 * Action is same as "default" so we keep them 2050 * in comments. 2051 * case SO_LINGER: 2052 * case SO_KEEPALIVE: 2053 * case SO_OOBINLINE: 2054 */ 2055 default: 2056 *outlenp = 0; 2057 return (EINVAL); 2058 } 2059 break; 2060 case IPPROTO_IP: 2061 /* 2062 * Only allow IPv4 option processing on IPv4 sockets. 2063 */ 2064 if (icmp->icmp_family != AF_INET) { 2065 *outlenp = 0; 2066 return (ENOPROTOOPT); 2067 } 2068 switch (name) { 2069 case IP_OPTIONS: 2070 case T_IP_OPTIONS: 2071 /* Save options for use by IP. */ 2072 if ((inlen & 0x3) || 2073 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2074 *outlenp = 0; 2075 return (EINVAL); 2076 } 2077 if (checkonly) 2078 break; 2079 2080 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2081 &icmp->icmp_ip_snd_options_len, 2082 icmp->icmp_label_len, invalp, inlen)) { 2083 *outlenp = 0; 2084 return (ENOMEM); 2085 } 2086 2087 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2088 icmp->icmp_ip_snd_options_len; 2089 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2090 icmp_wroff_extra); 2091 break; 2092 case IP_HDRINCL: 2093 if (!checkonly) 2094 icmp->icmp_hdrincl = onoff; 2095 break; 2096 case IP_TOS: 2097 case T_IP_TOS: 2098 if (!checkonly) { 2099 icmp->icmp_type_of_service = (uint8_t)*i1; 2100 } 2101 break; 2102 case IP_TTL: 2103 if (!checkonly) { 2104 icmp->icmp_ttl = (uint8_t)*i1; 2105 } 2106 break; 2107 case IP_MULTICAST_IF: 2108 /* 2109 * TODO should check OPTMGMT reply and undo this if 2110 * there is an error. 2111 */ 2112 if (!checkonly) 2113 icmp->icmp_multicast_if_addr = *i1; 2114 break; 2115 case IP_MULTICAST_TTL: 2116 if (!checkonly) 2117 icmp->icmp_multicast_ttl = *invalp; 2118 break; 2119 case IP_MULTICAST_LOOP: 2120 if (!checkonly) { 2121 icmp->icmp_multicast_loop = 2122 (*invalp == 0) ? 0 : 1; 2123 } 2124 break; 2125 case IP_BOUND_IF: 2126 if (!checkonly) 2127 icmp->icmp_bound_if = *i1; 2128 break; 2129 case IP_UNSPEC_SRC: 2130 if (!checkonly) 2131 icmp->icmp_unspec_source = onoff; 2132 break; 2133 case IP_XMIT_IF: 2134 if (!checkonly) 2135 icmp->icmp_xmit_if = *i1; 2136 break; 2137 case IP_RECVIF: 2138 if (!checkonly) 2139 icmp->icmp_recvif = onoff; 2140 break; 2141 case IP_ADD_MEMBERSHIP: 2142 case IP_DROP_MEMBERSHIP: 2143 case IP_BLOCK_SOURCE: 2144 case IP_UNBLOCK_SOURCE: 2145 case IP_ADD_SOURCE_MEMBERSHIP: 2146 case IP_DROP_SOURCE_MEMBERSHIP: 2147 case MCAST_JOIN_GROUP: 2148 case MCAST_LEAVE_GROUP: 2149 case MCAST_BLOCK_SOURCE: 2150 case MCAST_UNBLOCK_SOURCE: 2151 case MCAST_JOIN_SOURCE_GROUP: 2152 case MCAST_LEAVE_SOURCE_GROUP: 2153 case MRT_INIT: 2154 case MRT_DONE: 2155 case MRT_ADD_VIF: 2156 case MRT_DEL_VIF: 2157 case MRT_ADD_MFC: 2158 case MRT_DEL_MFC: 2159 case MRT_VERSION: 2160 case MRT_ASSERT: 2161 case IP_SEC_OPT: 2162 case IP_DONTFAILOVER_IF: 2163 case IP_NEXTHOP: 2164 /* 2165 * "soft" error (negative) 2166 * option not handled at this level 2167 * Note: Do not modify *outlenp 2168 */ 2169 return (-EINVAL); 2170 default: 2171 *outlenp = 0; 2172 return (EINVAL); 2173 } 2174 break; 2175 case IPPROTO_IPV6: { 2176 ip6_pkt_t *ipp; 2177 boolean_t sticky; 2178 2179 if (icmp->icmp_family != AF_INET6) { 2180 *outlenp = 0; 2181 return (ENOPROTOOPT); 2182 } 2183 /* 2184 * Deal with both sticky options and ancillary data 2185 */ 2186 if (thisdg_attrs == NULL) { 2187 /* sticky options, or none */ 2188 ipp = &icmp->icmp_sticky_ipp; 2189 sticky = B_TRUE; 2190 } else { 2191 /* ancillary data */ 2192 ipp = (ip6_pkt_t *)thisdg_attrs; 2193 sticky = B_FALSE; 2194 } 2195 2196 switch (name) { 2197 case IPV6_MULTICAST_IF: 2198 if (!checkonly) 2199 icmp->icmp_multicast_if_index = *i1; 2200 break; 2201 case IPV6_UNICAST_HOPS: 2202 /* -1 means use default */ 2203 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2204 *outlenp = 0; 2205 return (EINVAL); 2206 } 2207 if (!checkonly) { 2208 if (*i1 == -1) { 2209 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2210 icmp_ipv6_hoplimit; 2211 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2212 /* Pass modified value to IP. */ 2213 *i1 = ipp->ipp_hoplimit; 2214 } else { 2215 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2216 (uint8_t)*i1; 2217 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2218 } 2219 /* Rebuild the header template */ 2220 error = icmp_build_hdrs(q, icmp); 2221 if (error != 0) { 2222 *outlenp = 0; 2223 return (error); 2224 } 2225 } 2226 break; 2227 case IPV6_MULTICAST_HOPS: 2228 /* -1 means use default */ 2229 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2230 *outlenp = 0; 2231 return (EINVAL); 2232 } 2233 if (!checkonly) { 2234 if (*i1 == -1) { 2235 icmp->icmp_multicast_ttl = 2236 ipp->ipp_multicast_hops = 2237 IP_DEFAULT_MULTICAST_TTL; 2238 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2239 /* Pass modified value to IP. */ 2240 *i1 = icmp->icmp_multicast_ttl; 2241 } else { 2242 icmp->icmp_multicast_ttl = 2243 ipp->ipp_multicast_hops = 2244 (uint8_t)*i1; 2245 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2246 } 2247 } 2248 break; 2249 case IPV6_MULTICAST_LOOP: 2250 if (*i1 != 0 && *i1 != 1) { 2251 *outlenp = 0; 2252 return (EINVAL); 2253 } 2254 if (!checkonly) 2255 icmp->icmp_multicast_loop = *i1; 2256 break; 2257 case IPV6_CHECKSUM: 2258 /* 2259 * Integer offset into the user data of where the 2260 * checksum is located. 2261 * Offset of -1 disables option. 2262 * Does not apply to IPPROTO_ICMPV6. 2263 */ 2264 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2265 *outlenp = 0; 2266 return (EINVAL); 2267 } 2268 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2269 /* Negative or not 16 bit aligned offset */ 2270 *outlenp = 0; 2271 return (EINVAL); 2272 } 2273 if (checkonly) 2274 break; 2275 2276 if (*i1 == -1) { 2277 icmp->icmp_raw_checksum = 0; 2278 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2279 } else { 2280 icmp->icmp_raw_checksum = 1; 2281 icmp->icmp_checksum_off = *i1; 2282 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2283 } 2284 /* Rebuild the header template */ 2285 error = icmp_build_hdrs(q, icmp); 2286 if (error != 0) { 2287 *outlenp = 0; 2288 return (error); 2289 } 2290 break; 2291 case IPV6_JOIN_GROUP: 2292 case IPV6_LEAVE_GROUP: 2293 case MCAST_JOIN_GROUP: 2294 case MCAST_LEAVE_GROUP: 2295 case MCAST_BLOCK_SOURCE: 2296 case MCAST_UNBLOCK_SOURCE: 2297 case MCAST_JOIN_SOURCE_GROUP: 2298 case MCAST_LEAVE_SOURCE_GROUP: 2299 /* 2300 * "soft" error (negative) 2301 * option not handled at this level 2302 * Note: Do not modify *outlenp 2303 */ 2304 return (-EINVAL); 2305 case IPV6_BOUND_IF: 2306 if (!checkonly) 2307 icmp->icmp_bound_if = *i1; 2308 break; 2309 case IPV6_UNSPEC_SRC: 2310 if (!checkonly) 2311 icmp->icmp_unspec_source = onoff; 2312 break; 2313 case IPV6_RECVTCLASS: 2314 if (!checkonly) 2315 icmp->icmp_ipv6_recvtclass = onoff; 2316 break; 2317 /* 2318 * Set boolean switches for ancillary data delivery 2319 */ 2320 case IPV6_RECVPKTINFO: 2321 if (!checkonly) 2322 icmp->icmp_ipv6_recvpktinfo = onoff; 2323 break; 2324 case IPV6_RECVPATHMTU: 2325 if (!checkonly) 2326 icmp->icmp_ipv6_recvpathmtu = onoff; 2327 break; 2328 case IPV6_RECVHOPLIMIT: 2329 if (!checkonly) 2330 icmp->icmp_ipv6_recvhoplimit = onoff; 2331 break; 2332 case IPV6_RECVHOPOPTS: 2333 if (!checkonly) 2334 icmp->icmp_ipv6_recvhopopts = onoff; 2335 break; 2336 case IPV6_RECVDSTOPTS: 2337 if (!checkonly) 2338 icmp->icmp_ipv6_recvdstopts = onoff; 2339 break; 2340 case _OLD_IPV6_RECVDSTOPTS: 2341 if (!checkonly) 2342 icmp->icmp_old_ipv6_recvdstopts = onoff; 2343 break; 2344 case IPV6_RECVRTHDRDSTOPTS: 2345 if (!checkonly) 2346 icmp->icmp_ipv6_recvrtdstopts = onoff; 2347 break; 2348 case IPV6_RECVRTHDR: 2349 if (!checkonly) 2350 icmp->icmp_ipv6_recvrthdr = onoff; 2351 break; 2352 /* 2353 * Set sticky options or ancillary data. 2354 * If sticky options, (re)build any extension headers 2355 * that might be needed as a result. 2356 */ 2357 case IPV6_PKTINFO: 2358 /* 2359 * The source address and ifindex are verified 2360 * in ip_opt_set(). For ancillary data the 2361 * source address is checked in ip_wput_v6. 2362 */ 2363 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2364 return (EINVAL); 2365 if (checkonly) 2366 break; 2367 2368 if (inlen == 0) { 2369 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2370 ipp->ipp_sticky_ignored |= 2371 (IPPF_IFINDEX|IPPF_ADDR); 2372 } else { 2373 struct in6_pktinfo *pkti; 2374 2375 pkti = (struct in6_pktinfo *)invalp; 2376 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2377 ipp->ipp_addr = pkti->ipi6_addr; 2378 if (ipp->ipp_ifindex != 0) 2379 ipp->ipp_fields |= IPPF_IFINDEX; 2380 else 2381 ipp->ipp_fields &= ~IPPF_IFINDEX; 2382 if (!IN6_IS_ADDR_UNSPECIFIED( 2383 &ipp->ipp_addr)) 2384 ipp->ipp_fields |= IPPF_ADDR; 2385 else 2386 ipp->ipp_fields &= ~IPPF_ADDR; 2387 } 2388 if (sticky) { 2389 error = icmp_build_hdrs(q, icmp); 2390 if (error != 0) 2391 return (error); 2392 } 2393 break; 2394 case IPV6_HOPLIMIT: 2395 /* This option can only be used as ancillary data. */ 2396 if (sticky) 2397 return (EINVAL); 2398 if (inlen != 0 && inlen != sizeof (int)) 2399 return (EINVAL); 2400 if (checkonly) 2401 break; 2402 2403 if (inlen == 0) { 2404 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2405 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2406 } else { 2407 if (*i1 > 255 || *i1 < -1) 2408 return (EINVAL); 2409 if (*i1 == -1) 2410 ipp->ipp_hoplimit = icmp_ipv6_hoplimit; 2411 else 2412 ipp->ipp_hoplimit = *i1; 2413 ipp->ipp_fields |= IPPF_HOPLIMIT; 2414 } 2415 break; 2416 case IPV6_TCLASS: 2417 /* 2418 * IPV6_RECVTCLASS accepts -1 as use kernel default 2419 * and [0, 255] as the actualy traffic class. 2420 */ 2421 if (inlen != 0 && inlen != sizeof (int)) 2422 return (EINVAL); 2423 if (checkonly) 2424 break; 2425 2426 if (inlen == 0) { 2427 ipp->ipp_fields &= ~IPPF_TCLASS; 2428 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2429 } else { 2430 if (*i1 >= 256 || *i1 < -1) 2431 return (EINVAL); 2432 if (*i1 == -1) { 2433 ipp->ipp_tclass = 2434 IPV6_FLOW_TCLASS( 2435 IPV6_DEFAULT_VERS_AND_FLOW); 2436 } else { 2437 ipp->ipp_tclass = *i1; 2438 } 2439 ipp->ipp_fields |= IPPF_TCLASS; 2440 } 2441 if (sticky) { 2442 error = icmp_build_hdrs(q, icmp); 2443 if (error != 0) 2444 return (error); 2445 } 2446 break; 2447 case IPV6_NEXTHOP: 2448 /* 2449 * IP will verify that the nexthop is reachable 2450 * and fail for sticky options. 2451 */ 2452 if (inlen != 0 && inlen != sizeof (sin6_t)) 2453 return (EINVAL); 2454 if (checkonly) 2455 break; 2456 2457 if (inlen == 0) { 2458 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2459 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2460 } else { 2461 sin6_t *sin6 = (sin6_t *)invalp; 2462 2463 if (sin6->sin6_family != AF_INET6) 2464 return (EAFNOSUPPORT); 2465 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2466 return (EADDRNOTAVAIL); 2467 ipp->ipp_nexthop = sin6->sin6_addr; 2468 if (!IN6_IS_ADDR_UNSPECIFIED( 2469 &ipp->ipp_nexthop)) 2470 ipp->ipp_fields |= IPPF_NEXTHOP; 2471 else 2472 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2473 } 2474 if (sticky) { 2475 error = icmp_build_hdrs(q, icmp); 2476 if (error != 0) 2477 return (error); 2478 } 2479 break; 2480 case IPV6_HOPOPTS: { 2481 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2482 /* 2483 * Sanity checks - minimum size, size a multiple of 2484 * eight bytes, and matching size passed in. 2485 */ 2486 if (inlen != 0 && 2487 inlen != (8 * (hopts->ip6h_len + 1))) 2488 return (EINVAL); 2489 2490 if (checkonly) 2491 break; 2492 error = optcom_pkt_set(invalp, inlen, sticky, 2493 (uchar_t **)&ipp->ipp_hopopts, 2494 &ipp->ipp_hopoptslen, 2495 sticky ? icmp->icmp_label_len_v6 : 0); 2496 if (error != 0) 2497 return (error); 2498 if (ipp->ipp_hopoptslen == 0) { 2499 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2500 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2501 } else { 2502 ipp->ipp_fields |= IPPF_HOPOPTS; 2503 } 2504 if (sticky) { 2505 error = icmp_build_hdrs(q, icmp); 2506 if (error != 0) 2507 return (error); 2508 } 2509 break; 2510 } 2511 case IPV6_RTHDRDSTOPTS: { 2512 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2513 2514 /* 2515 * Sanity checks - minimum size, size a multiple of 2516 * eight bytes, and matching size passed in. 2517 */ 2518 if (inlen != 0 && 2519 inlen != (8 * (dopts->ip6d_len + 1))) 2520 return (EINVAL); 2521 2522 if (checkonly) 2523 break; 2524 2525 if (inlen == 0) { 2526 if (sticky && 2527 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2528 kmem_free(ipp->ipp_rtdstopts, 2529 ipp->ipp_rtdstoptslen); 2530 ipp->ipp_rtdstopts = NULL; 2531 ipp->ipp_rtdstoptslen = 0; 2532 } 2533 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2534 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2535 } else { 2536 error = optcom_pkt_set(invalp, inlen, sticky, 2537 (uchar_t **)&ipp->ipp_rtdstopts, 2538 &ipp->ipp_rtdstoptslen, 0); 2539 if (error != 0) 2540 return (error); 2541 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2542 } 2543 if (sticky) { 2544 error = icmp_build_hdrs(q, icmp); 2545 if (error != 0) 2546 return (error); 2547 } 2548 break; 2549 } 2550 case IPV6_DSTOPTS: { 2551 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2552 2553 /* 2554 * Sanity checks - minimum size, size a multiple of 2555 * eight bytes, and matching size passed in. 2556 */ 2557 if (inlen != 0 && 2558 inlen != (8 * (dopts->ip6d_len + 1))) 2559 return (EINVAL); 2560 2561 if (checkonly) 2562 break; 2563 2564 if (inlen == 0) { 2565 if (sticky && 2566 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2567 kmem_free(ipp->ipp_dstopts, 2568 ipp->ipp_dstoptslen); 2569 ipp->ipp_dstopts = NULL; 2570 ipp->ipp_dstoptslen = 0; 2571 } 2572 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2573 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2574 } else { 2575 error = optcom_pkt_set(invalp, inlen, sticky, 2576 (uchar_t **)&ipp->ipp_dstopts, 2577 &ipp->ipp_dstoptslen, 0); 2578 if (error != 0) 2579 return (error); 2580 ipp->ipp_fields |= IPPF_DSTOPTS; 2581 } 2582 if (sticky) { 2583 error = icmp_build_hdrs(q, icmp); 2584 if (error != 0) 2585 return (error); 2586 } 2587 break; 2588 } 2589 case IPV6_RTHDR: { 2590 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2591 2592 /* 2593 * Sanity checks - minimum size, size a multiple of 2594 * eight bytes, and matching size passed in. 2595 */ 2596 if (inlen != 0 && 2597 inlen != (8 * (rt->ip6r_len + 1))) 2598 return (EINVAL); 2599 2600 if (checkonly) 2601 break; 2602 2603 if (inlen == 0) { 2604 if (sticky && 2605 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2606 kmem_free(ipp->ipp_rthdr, 2607 ipp->ipp_rthdrlen); 2608 ipp->ipp_rthdr = NULL; 2609 ipp->ipp_rthdrlen = 0; 2610 } 2611 ipp->ipp_fields &= ~IPPF_RTHDR; 2612 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2613 } else { 2614 error = optcom_pkt_set(invalp, inlen, sticky, 2615 (uchar_t **)&ipp->ipp_rthdr, 2616 &ipp->ipp_rthdrlen, 0); 2617 if (error != 0) 2618 return (error); 2619 ipp->ipp_fields |= IPPF_RTHDR; 2620 } 2621 if (sticky) { 2622 error = icmp_build_hdrs(q, icmp); 2623 if (error != 0) 2624 return (error); 2625 } 2626 break; 2627 } 2628 2629 case IPV6_DONTFRAG: 2630 if (checkonly) 2631 break; 2632 2633 if (onoff) { 2634 ipp->ipp_fields |= IPPF_DONTFRAG; 2635 } else { 2636 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2637 } 2638 break; 2639 2640 case IPV6_USE_MIN_MTU: 2641 if (inlen != sizeof (int)) 2642 return (EINVAL); 2643 2644 if (*i1 < -1 || *i1 > 1) 2645 return (EINVAL); 2646 2647 if (checkonly) 2648 break; 2649 2650 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2651 ipp->ipp_use_min_mtu = *i1; 2652 break; 2653 2654 /* 2655 * This option can't be set. Its only returned via 2656 * getsockopt() or ancillary data. 2657 */ 2658 case IPV6_PATHMTU: 2659 return (EINVAL); 2660 2661 case IPV6_BOUND_PIF: 2662 case IPV6_SEC_OPT: 2663 case IPV6_DONTFAILOVER_IF: 2664 case IPV6_SRC_PREFERENCES: 2665 case IPV6_V6ONLY: 2666 /* Handled at IP level */ 2667 return (-EINVAL); 2668 default: 2669 *outlenp = 0; 2670 return (EINVAL); 2671 } 2672 break; 2673 } /* end IPPROTO_IPV6 */ 2674 2675 case IPPROTO_ICMPV6: 2676 /* 2677 * Only allow IPv6 option processing on IPv6 sockets. 2678 */ 2679 if (icmp->icmp_family != AF_INET6) { 2680 *outlenp = 0; 2681 return (ENOPROTOOPT); 2682 } 2683 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2684 *outlenp = 0; 2685 return (ENOPROTOOPT); 2686 } 2687 switch (name) { 2688 case ICMP6_FILTER: 2689 if (!checkonly) { 2690 if ((inlen != 0) && 2691 (inlen != sizeof (icmp6_filter_t))) 2692 return (EINVAL); 2693 2694 if (inlen == 0) { 2695 if (icmp->icmp_filter != NULL) { 2696 kmem_free(icmp->icmp_filter, 2697 sizeof (icmp6_filter_t)); 2698 icmp->icmp_filter = NULL; 2699 } 2700 } else { 2701 if (icmp->icmp_filter == NULL) { 2702 icmp->icmp_filter = kmem_alloc( 2703 sizeof (icmp6_filter_t), 2704 KM_NOSLEEP); 2705 if (icmp->icmp_filter == NULL) { 2706 *outlenp = 0; 2707 return (ENOBUFS); 2708 } 2709 } 2710 (void) bcopy(invalp, icmp->icmp_filter, 2711 inlen); 2712 } 2713 } 2714 break; 2715 2716 default: 2717 *outlenp = 0; 2718 return (EINVAL); 2719 } 2720 break; 2721 default: 2722 *outlenp = 0; 2723 return (EINVAL); 2724 } 2725 /* 2726 * Common case of OK return with outval same as inval. 2727 */ 2728 if (invalp != outvalp) { 2729 /* don't trust bcopy for identical src/dst */ 2730 (void) bcopy(invalp, outvalp, inlen); 2731 } 2732 *outlenp = inlen; 2733 return (0); 2734 } 2735 2736 /* 2737 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2738 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2739 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2740 * headers. 2741 * Returns failure if can't allocate memory. 2742 */ 2743 static int 2744 icmp_build_hdrs(queue_t *q, icmp_t *icmp) 2745 { 2746 uchar_t *hdrs; 2747 uint_t hdrs_len; 2748 ip6_t *ip6h; 2749 ip6i_t *ip6i; 2750 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2751 2752 hdrs_len = ip_total_hdrs_len_v6(ipp); 2753 ASSERT(hdrs_len != 0); 2754 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 2755 /* Need to reallocate */ 2756 if (hdrs_len != 0) { 2757 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 2758 if (hdrs == NULL) 2759 return (ENOMEM); 2760 } else { 2761 hdrs = NULL; 2762 } 2763 if (icmp->icmp_sticky_hdrs_len != 0) { 2764 kmem_free(icmp->icmp_sticky_hdrs, 2765 icmp->icmp_sticky_hdrs_len); 2766 } 2767 icmp->icmp_sticky_hdrs = hdrs; 2768 icmp->icmp_sticky_hdrs_len = hdrs_len; 2769 } 2770 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 2771 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 2772 2773 /* Set header fields not in ipp */ 2774 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 2775 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 2776 ip6h = (ip6_t *)&ip6i[1]; 2777 2778 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 2779 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 2780 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 2781 } 2782 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 2783 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 2784 } 2785 } else { 2786 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 2787 } 2788 2789 if (!(ipp->ipp_fields & IPPF_ADDR)) 2790 ip6h->ip6_src = icmp->icmp_v6src; 2791 2792 /* Try to get everything in a single mblk */ 2793 if (hdrs_len > icmp->icmp_max_hdr_len) { 2794 icmp->icmp_max_hdr_len = hdrs_len; 2795 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2796 icmp_wroff_extra); 2797 } 2798 return (0); 2799 } 2800 2801 /* 2802 * This routine retrieves the value of an ND variable in a icmpparam_t 2803 * structure. It is called through nd_getset when a user reads the 2804 * variable. 2805 */ 2806 /* ARGSUSED */ 2807 static int 2808 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2809 { 2810 icmpparam_t *icmppa = (icmpparam_t *)cp; 2811 2812 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 2813 return (0); 2814 } 2815 2816 /* 2817 * Walk through the param array specified registering each element with the 2818 * named dispatch (ND) handler. 2819 */ 2820 static boolean_t 2821 icmp_param_register(icmpparam_t *icmppa, int cnt) 2822 { 2823 for (; cnt-- > 0; icmppa++) { 2824 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 2825 if (!nd_load(&icmp_g_nd, icmppa->icmp_param_name, 2826 icmp_param_get, icmp_param_set, 2827 (caddr_t)icmppa)) { 2828 nd_free(&icmp_g_nd); 2829 return (B_FALSE); 2830 } 2831 } 2832 } 2833 if (!nd_load(&icmp_g_nd, "icmp_status", icmp_status_report, NULL, 2834 NULL)) { 2835 nd_free(&icmp_g_nd); 2836 return (B_FALSE); 2837 } 2838 return (B_TRUE); 2839 } 2840 2841 /* This routine sets an ND variable in a icmpparam_t structure. */ 2842 /* ARGSUSED */ 2843 static int 2844 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2845 { 2846 long new_value; 2847 icmpparam_t *icmppa = (icmpparam_t *)cp; 2848 2849 /* 2850 * Fail the request if the new value does not lie within the 2851 * required bounds. 2852 */ 2853 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2854 new_value < icmppa->icmp_param_min || 2855 new_value > icmppa->icmp_param_max) { 2856 return (EINVAL); 2857 } 2858 /* Set the new value */ 2859 icmppa->icmp_param_value = new_value; 2860 return (0); 2861 } 2862 2863 static void 2864 icmp_rput(queue_t *q, mblk_t *mp) 2865 { 2866 struct T_unitdata_ind *tudi; 2867 uchar_t *rptr; 2868 struct T_error_ack *tea; 2869 icmp_t *icmp; 2870 sin_t *sin; 2871 sin6_t *sin6; 2872 ip6_t *ip6h; 2873 ip6i_t *ip6i; 2874 mblk_t *mp1; 2875 int hdr_len; 2876 ipha_t *ipha; 2877 int udi_size; /* Size of T_unitdata_ind */ 2878 uint_t ipvers; 2879 ip6_pkt_t ipp; 2880 uint8_t nexthdr; 2881 boolean_t recvif = B_FALSE; 2882 in_pktinfo_t *pinfo; 2883 mblk_t *options_mp = NULL; 2884 uint_t icmp_opt = 0; 2885 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 2886 uint_t hopstrip; 2887 2888 icmp = (icmp_t *)q->q_ptr; 2889 if (icmp->icmp_restricted) { 2890 putnext(q, mp); 2891 return; 2892 } 2893 2894 if (mp->b_datap->db_type == M_CTL) { 2895 /* 2896 * IP sends up the IPSEC_IN message for handling IPSEC 2897 * policy at the TCP level. We don't need it here. 2898 */ 2899 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 2900 mp1 = mp->b_cont; 2901 freeb(mp); 2902 mp = mp1; 2903 } else { 2904 pinfo = (in_pktinfo_t *)mp->b_rptr; 2905 if ((icmp->icmp_recvif != 0) && 2906 (pinfo->in_pkt_ulp_type == IN_PKTINFO)) { 2907 /* 2908 * IP has passed the options in mp and the 2909 * actual data is in b_cont. 2910 */ 2911 recvif = B_TRUE; 2912 /* 2913 * We are here bcos IP_RECVIF is set so we need 2914 * to extract the options mblk and adjust the 2915 * rptr 2916 */ 2917 options_mp = mp; 2918 mp = mp->b_cont; 2919 } 2920 } 2921 } 2922 2923 rptr = mp->b_rptr; 2924 switch (mp->b_datap->db_type) { 2925 case M_DATA: 2926 /* 2927 * M_DATA messages contain IP packets. They are handled 2928 * following the switch. 2929 */ 2930 break; 2931 case M_PROTO: 2932 case M_PCPROTO: 2933 /* M_PROTO messages contain some type of TPI message. */ 2934 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 2935 freemsg(mp); 2936 return; 2937 } 2938 tea = (struct T_error_ack *)rptr; 2939 switch (tea->PRIM_type) { 2940 case T_ERROR_ACK: 2941 switch (tea->ERROR_prim) { 2942 case O_T_BIND_REQ: 2943 case T_BIND_REQ: 2944 /* 2945 * If our O_T_BIND_REQ/T_BIND_REQ fails, 2946 * clear out the source address before 2947 * passing the message upstream. 2948 * If this was caused by a T_CONN_REQ 2949 * revert back to bound state. 2950 */ 2951 if (icmp->icmp_state == TS_UNBND) { 2952 /* 2953 * TPI has not yet bound - bind sent by 2954 * icmp_bind_proto. 2955 */ 2956 freemsg(mp); 2957 return; 2958 } 2959 if (icmp->icmp_state == TS_DATA_XFER) { 2960 /* Connect failed */ 2961 tea->ERROR_prim = T_CONN_REQ; 2962 icmp->icmp_v6src = 2963 icmp->icmp_bound_v6src; 2964 icmp->icmp_state = TS_IDLE; 2965 if (icmp->icmp_family == AF_INET6) 2966 (void) icmp_build_hdrs(q, icmp); 2967 break; 2968 } 2969 2970 if (icmp->icmp_discon_pending) { 2971 tea->ERROR_prim = T_DISCON_REQ; 2972 icmp->icmp_discon_pending = 0; 2973 } 2974 V6_SET_ZERO(icmp->icmp_v6src); 2975 V6_SET_ZERO(icmp->icmp_bound_v6src); 2976 icmp->icmp_state = TS_UNBND; 2977 if (icmp->icmp_family == AF_INET6) 2978 (void) icmp_build_hdrs(q, icmp); 2979 break; 2980 default: 2981 break; 2982 } 2983 break; 2984 case T_BIND_ACK: 2985 icmp_rput_bind_ack(q, mp); 2986 return; 2987 2988 case T_OPTMGMT_ACK: 2989 case T_OK_ACK: 2990 if (tea->PRIM_type == T_OK_ACK) { 2991 struct T_ok_ack *toa; 2992 toa = (struct T_ok_ack *)rptr; 2993 if (toa->CORRECT_prim == T_UNBIND_REQ) { 2994 /* 2995 * If somebody sets IPSEC options, IP 2996 * sends some IPSEC info which is used 2997 * by the TCP for detached connections. 2998 * We don't need it here. 2999 */ 3000 if ((mp1 = mp->b_cont) != NULL) { 3001 freemsg(mp1); 3002 mp->b_cont = NULL; 3003 } 3004 } 3005 } 3006 break; 3007 default: 3008 freemsg(mp); 3009 return; 3010 } 3011 putnext(q, mp); 3012 return; 3013 case M_CTL: 3014 if (recvif) { 3015 /* 3016 * IP has passed the options in mp and the actual data 3017 * is in b_cont. Jump to normal data processing. 3018 */ 3019 break; 3020 } 3021 3022 /* Contains ICMP packet from IP */ 3023 icmp_icmp_error(q, mp); 3024 return; 3025 default: 3026 putnext(q, mp); 3027 return; 3028 } 3029 3030 /* 3031 * Discard message if it is misaligned or smaller than the IP header. 3032 */ 3033 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3034 freemsg(mp); 3035 if (options_mp != NULL) 3036 freeb(options_mp); 3037 BUMP_MIB(&rawip_mib, rawipInErrors); 3038 return; 3039 } 3040 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3041 3042 /* Handle M_DATA messages containing IP packets messages */ 3043 if (ipvers == IPV4_VERSION) { 3044 /* 3045 * Special case where IP attaches 3046 * the IRE needs to be handled so that we don't send up 3047 * IRE to the user land. 3048 */ 3049 ipha = (ipha_t *)rptr; 3050 hdr_len = IPH_HDR_LENGTH(ipha); 3051 3052 if (ipha->ipha_protocol == IPPROTO_TCP) { 3053 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3054 3055 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3056 TH_SYN) && mp->b_cont != NULL) { 3057 mp1 = mp->b_cont; 3058 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3059 freeb(mp1); 3060 mp->b_cont = NULL; 3061 } 3062 } 3063 } 3064 if (icmp_bsd_compat) { 3065 ushort_t len; 3066 len = ntohs(ipha->ipha_length); 3067 3068 if (mp->b_datap->db_ref > 1) { 3069 /* 3070 * Allocate a new IP header so that we can 3071 * modify ipha_length. 3072 */ 3073 mblk_t *mp1; 3074 3075 mp1 = allocb(hdr_len, BPRI_MED); 3076 if (!mp1) { 3077 freemsg(mp); 3078 if (options_mp != NULL) 3079 freeb(options_mp); 3080 BUMP_MIB(&rawip_mib, rawipInErrors); 3081 return; 3082 } 3083 bcopy(rptr, mp1->b_rptr, hdr_len); 3084 mp->b_rptr = rptr + hdr_len; 3085 rptr = mp1->b_rptr; 3086 ipha = (ipha_t *)rptr; 3087 mp1->b_cont = mp; 3088 mp1->b_wptr = rptr + hdr_len; 3089 mp = mp1; 3090 } 3091 len -= hdr_len; 3092 ipha->ipha_length = htons(len); 3093 } 3094 } 3095 3096 /* 3097 * This is the inbound data path. Packets are passed upstream as 3098 * T_UNITDATA_IND messages with full IP headers still attached. 3099 */ 3100 if (icmp->icmp_family == AF_INET) { 3101 ASSERT(ipvers == IPV4_VERSION); 3102 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3103 if (recvif) { 3104 udi_size += sizeof (struct T_opthdr) + 3105 sizeof (uint_t); 3106 } 3107 /* 3108 * If SO_TIMESTAMP is set allocate the appropriate sized 3109 * buffer. Since gethrestime() expects a pointer aligned 3110 * argument, we allocate space necessary for extra 3111 * alignment (even though it might not be used). 3112 */ 3113 if (icmp->icmp_timestamp) { 3114 udi_size += sizeof (struct T_opthdr) + 3115 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3116 } 3117 mp1 = allocb(udi_size, BPRI_MED); 3118 if (mp1 == NULL) { 3119 freemsg(mp); 3120 if (options_mp != NULL) 3121 freeb(options_mp); 3122 BUMP_MIB(&rawip_mib, rawipInErrors); 3123 return; 3124 } 3125 mp1->b_cont = mp; 3126 mp = mp1; 3127 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3128 mp->b_datap->db_type = M_PROTO; 3129 mp->b_wptr = (uchar_t *)tudi + udi_size; 3130 tudi->PRIM_type = T_UNITDATA_IND; 3131 tudi->SRC_length = sizeof (sin_t); 3132 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3133 sin = (sin_t *)&tudi[1]; 3134 *sin = sin_null; 3135 sin->sin_family = AF_INET; 3136 sin->sin_addr.s_addr = ipha->ipha_src; 3137 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3138 sizeof (sin_t); 3139 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3140 tudi->OPT_length = udi_size; 3141 3142 /* 3143 * Add options if IP_RECVIF is set 3144 */ 3145 if (udi_size != 0) { 3146 char *dstopt; 3147 3148 dstopt = (char *)&sin[1]; 3149 if (recvif) { 3150 3151 struct T_opthdr *toh; 3152 uint_t *dstptr; 3153 3154 toh = (struct T_opthdr *)dstopt; 3155 toh->level = IPPROTO_IP; 3156 toh->name = IP_RECVIF; 3157 toh->len = sizeof (struct T_opthdr) + 3158 sizeof (uint_t); 3159 toh->status = 0; 3160 dstopt += sizeof (struct T_opthdr); 3161 dstptr = (uint_t *)dstopt; 3162 *dstptr = pinfo->in_pkt_ifindex; 3163 dstopt += sizeof (uint_t); 3164 freeb(options_mp); 3165 udi_size -= toh->len; 3166 } 3167 if (icmp->icmp_timestamp) { 3168 struct T_opthdr *toh; 3169 3170 toh = (struct T_opthdr *)dstopt; 3171 toh->level = SOL_SOCKET; 3172 toh->name = SCM_TIMESTAMP; 3173 toh->len = sizeof (struct T_opthdr) + 3174 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3175 toh->status = 0; 3176 dstopt += sizeof (struct T_opthdr); 3177 /* Align for gethrestime() */ 3178 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3179 sizeof (intptr_t)); 3180 gethrestime((timestruc_t *)dstopt); 3181 dstopt += sizeof (timestruc_t); 3182 udi_size -= toh->len; 3183 } 3184 3185 /* Consumed all of allocated space */ 3186 ASSERT(udi_size == 0); 3187 } 3188 3189 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3190 putnext(q, mp); 3191 return; 3192 } 3193 3194 /* 3195 * We don't need options_mp in the IPv6 path. 3196 */ 3197 if (options_mp != NULL) { 3198 freeb(options_mp); 3199 options_mp = NULL; 3200 } 3201 3202 /* 3203 * Discard message if it is smaller than the IPv6 header 3204 * or if the header is malformed. 3205 */ 3206 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3207 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3208 icmp->icmp_family != AF_INET6) { 3209 freemsg(mp); 3210 BUMP_MIB(&rawip_mib, rawipInErrors); 3211 return; 3212 } 3213 3214 /* Initialize */ 3215 ipp.ipp_fields = 0; 3216 hopstrip = 0; 3217 3218 ip6h = (ip6_t *)rptr; 3219 /* 3220 * Call on ip_find_hdr_v6 which gets the total hdr len 3221 * as well as individual lenghts of ext hdrs (and ptrs to 3222 * them). 3223 */ 3224 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3225 /* Look for ifindex information */ 3226 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3227 ip6i = (ip6i_t *)ip6h; 3228 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3229 ASSERT(ip6i->ip6i_ifindex != 0); 3230 ipp.ipp_fields |= IPPF_IFINDEX; 3231 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3232 } 3233 rptr = (uchar_t *)&ip6i[1]; 3234 mp->b_rptr = rptr; 3235 if (rptr == mp->b_wptr) { 3236 mp1 = mp->b_cont; 3237 freeb(mp); 3238 mp = mp1; 3239 rptr = mp->b_rptr; 3240 } 3241 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3242 ip6h = (ip6_t *)rptr; 3243 } 3244 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3245 3246 /* 3247 * We need to lie a bit to the user because users inside 3248 * labeled compartments should not see their own labels. We 3249 * assume that in all other respects IP has checked the label, 3250 * and that the label is always first among the options. (If 3251 * it's not first, then this code won't see it, and the option 3252 * will be passed along to the user.) 3253 * 3254 * If we had multilevel ICMP sockets, then the following code 3255 * should be skipped for them to allow the user to see the 3256 * label. 3257 * 3258 * Alignment restrictions in the definition of IP options 3259 * (namely, the requirement that the 4-octet DOI goes on a 3260 * 4-octet boundary) mean that we know exactly where the option 3261 * should start, but we're lenient for other hosts. 3262 * 3263 * Note that there are no multilevel ICMP or raw IP sockets 3264 * yet, thus nobody ever sees the IP6OPT_LS option. 3265 */ 3266 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3267 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3268 const uchar_t *ucp = 3269 (const uchar_t *)ipp.ipp_hopopts + 2; 3270 int remlen = ipp.ipp_hopoptslen - 2; 3271 3272 while (remlen > 0) { 3273 if (*ucp == IP6OPT_PAD1) { 3274 remlen--; 3275 ucp++; 3276 } else if (*ucp == IP6OPT_PADN) { 3277 remlen -= ucp[1] + 2; 3278 ucp += ucp[1] + 2; 3279 } else if (*ucp == ip6opt_ls) { 3280 hopstrip = (ucp - 3281 (const uchar_t *)ipp.ipp_hopopts) + 3282 ucp[1] + 2; 3283 hopstrip = (hopstrip + 7) & ~7; 3284 break; 3285 } else { 3286 /* label option must be first */ 3287 break; 3288 } 3289 } 3290 } 3291 } else { 3292 hdr_len = IPV6_HDR_LEN; 3293 ip6i = NULL; 3294 nexthdr = ip6h->ip6_nxt; 3295 } 3296 /* 3297 * One special case where IP attaches the IRE needs to 3298 * be handled so that we don't send up IRE to the user land. 3299 */ 3300 if (nexthdr == IPPROTO_TCP) { 3301 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3302 3303 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3304 mp->b_cont != NULL) { 3305 mp1 = mp->b_cont; 3306 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3307 freeb(mp1); 3308 mp->b_cont = NULL; 3309 } 3310 } 3311 } 3312 /* 3313 * Check a filter for ICMPv6 types if needed. 3314 * Verify raw checksums if needed. 3315 */ 3316 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3317 if (icmp->icmp_filter != NULL) { 3318 int type; 3319 3320 /* Assumes that IP has done the pullupmsg */ 3321 type = mp->b_rptr[hdr_len]; 3322 3323 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3324 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3325 freemsg(mp); 3326 return; 3327 } 3328 } else { 3329 /* Checksum */ 3330 uint16_t *up; 3331 uint32_t sum; 3332 int remlen; 3333 3334 up = (uint16_t *)&ip6h->ip6_src; 3335 3336 remlen = msgdsize(mp) - hdr_len; 3337 sum = htons(icmp->icmp_proto + remlen) 3338 + up[0] + up[1] + up[2] + up[3] 3339 + up[4] + up[5] + up[6] + up[7] 3340 + up[8] + up[9] + up[10] + up[11] 3341 + up[12] + up[13] + up[14] + up[15]; 3342 sum = (sum & 0xffff) + (sum >> 16); 3343 sum = IP_CSUM(mp, hdr_len, sum); 3344 if (sum != 0) { 3345 /* IPv6 RAW checksum failed */ 3346 ip0dbg(("icmp_rput: RAW checksum " 3347 "failed %x\n", sum)); 3348 freemsg(mp); 3349 BUMP_MIB(&rawip_mib, rawipInCksumErrs); 3350 return; 3351 } 3352 } 3353 } 3354 /* Skip all the IPv6 headers per API */ 3355 mp->b_rptr += hdr_len; 3356 3357 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3358 3359 /* 3360 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3361 * maintain state information, instead of relying on icmp_t 3362 * structure, since there arent any locks protecting these members 3363 * and there is a window where there might be a race between a 3364 * thread setting options on the write side and a thread reading 3365 * these options on the read size. 3366 */ 3367 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3368 IPPF_RTHDR|IPPF_IFINDEX)) { 3369 if (icmp->icmp_ipv6_recvhopopts && 3370 (ipp.ipp_fields & IPPF_HOPOPTS) && 3371 ipp.ipp_hopoptslen > hopstrip) { 3372 udi_size += sizeof (struct T_opthdr) + 3373 ipp.ipp_hopoptslen - hopstrip; 3374 icmp_opt |= IPPF_HOPOPTS; 3375 } 3376 if ((icmp->icmp_ipv6_recvdstopts || 3377 icmp->icmp_old_ipv6_recvdstopts) && 3378 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3379 udi_size += sizeof (struct T_opthdr) + 3380 ipp.ipp_dstoptslen; 3381 icmp_opt |= IPPF_DSTOPTS; 3382 } 3383 if (((icmp->icmp_ipv6_recvdstopts && 3384 icmp->icmp_ipv6_recvrthdr && 3385 (ipp.ipp_fields & IPPF_RTHDR)) || 3386 icmp->icmp_ipv6_recvrtdstopts) && 3387 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3388 udi_size += sizeof (struct T_opthdr) + 3389 ipp.ipp_rtdstoptslen; 3390 icmp_opt |= IPPF_RTDSTOPTS; 3391 } 3392 if (icmp->icmp_ipv6_recvrthdr && 3393 (ipp.ipp_fields & IPPF_RTHDR)) { 3394 udi_size += sizeof (struct T_opthdr) + 3395 ipp.ipp_rthdrlen; 3396 icmp_opt |= IPPF_RTHDR; 3397 } 3398 if (icmp->icmp_ipv6_recvpktinfo && 3399 (ipp.ipp_fields & IPPF_IFINDEX)) { 3400 udi_size += sizeof (struct T_opthdr) + 3401 sizeof (struct in6_pktinfo); 3402 icmp_opt |= IPPF_IFINDEX; 3403 } 3404 } 3405 if (icmp->icmp_ipv6_recvhoplimit) { 3406 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3407 icmp_ipv6_recvhoplimit = B_TRUE; 3408 } 3409 3410 if (icmp->icmp_ipv6_recvtclass) 3411 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3412 3413 mp1 = allocb(udi_size, BPRI_MED); 3414 if (mp1 == NULL) { 3415 freemsg(mp); 3416 BUMP_MIB(&rawip_mib, rawipInErrors); 3417 return; 3418 } 3419 mp1->b_cont = mp; 3420 mp = mp1; 3421 mp->b_datap->db_type = M_PROTO; 3422 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3423 mp->b_wptr = (uchar_t *)tudi + udi_size; 3424 tudi->PRIM_type = T_UNITDATA_IND; 3425 tudi->SRC_length = sizeof (sin6_t); 3426 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3427 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3428 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3429 tudi->OPT_length = udi_size; 3430 sin6 = (sin6_t *)&tudi[1]; 3431 sin6->sin6_port = 0; 3432 sin6->sin6_family = AF_INET6; 3433 3434 sin6->sin6_addr = ip6h->ip6_src; 3435 /* No sin6_flowinfo per API */ 3436 sin6->sin6_flowinfo = 0; 3437 /* For link-scope source pass up scope id */ 3438 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3439 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3440 sin6->sin6_scope_id = ipp.ipp_ifindex; 3441 else 3442 sin6->sin6_scope_id = 0; 3443 3444 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3445 icmp->icmp_zoneid); 3446 3447 if (udi_size != 0) { 3448 uchar_t *dstopt; 3449 3450 dstopt = (uchar_t *)&sin6[1]; 3451 if (icmp_opt & IPPF_IFINDEX) { 3452 struct T_opthdr *toh; 3453 struct in6_pktinfo *pkti; 3454 3455 toh = (struct T_opthdr *)dstopt; 3456 toh->level = IPPROTO_IPV6; 3457 toh->name = IPV6_PKTINFO; 3458 toh->len = sizeof (struct T_opthdr) + 3459 sizeof (*pkti); 3460 toh->status = 0; 3461 dstopt += sizeof (struct T_opthdr); 3462 pkti = (struct in6_pktinfo *)dstopt; 3463 pkti->ipi6_addr = ip6h->ip6_dst; 3464 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3465 dstopt += sizeof (*pkti); 3466 udi_size -= toh->len; 3467 } 3468 if (icmp_ipv6_recvhoplimit) { 3469 struct T_opthdr *toh; 3470 3471 toh = (struct T_opthdr *)dstopt; 3472 toh->level = IPPROTO_IPV6; 3473 toh->name = IPV6_HOPLIMIT; 3474 toh->len = sizeof (struct T_opthdr) + 3475 sizeof (uint_t); 3476 toh->status = 0; 3477 dstopt += sizeof (struct T_opthdr); 3478 *(uint_t *)dstopt = ip6h->ip6_hops; 3479 dstopt += sizeof (uint_t); 3480 udi_size -= toh->len; 3481 } 3482 if (icmp->icmp_ipv6_recvtclass) { 3483 struct T_opthdr *toh; 3484 3485 toh = (struct T_opthdr *)dstopt; 3486 toh->level = IPPROTO_IPV6; 3487 toh->name = IPV6_TCLASS; 3488 toh->len = sizeof (struct T_opthdr) + 3489 sizeof (uint_t); 3490 toh->status = 0; 3491 dstopt += sizeof (struct T_opthdr); 3492 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3493 dstopt += sizeof (uint_t); 3494 udi_size -= toh->len; 3495 } 3496 if (icmp_opt & IPPF_HOPOPTS) { 3497 struct T_opthdr *toh; 3498 3499 toh = (struct T_opthdr *)dstopt; 3500 toh->level = IPPROTO_IPV6; 3501 toh->name = IPV6_HOPOPTS; 3502 toh->len = sizeof (struct T_opthdr) + 3503 ipp.ipp_hopoptslen - hopstrip; 3504 toh->status = 0; 3505 dstopt += sizeof (struct T_opthdr); 3506 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3507 ipp.ipp_hopoptslen - hopstrip); 3508 if (hopstrip > 0) { 3509 /* copy next header value and fake length */ 3510 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3511 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3512 hopstrip / 8; 3513 } 3514 dstopt += ipp.ipp_hopoptslen - hopstrip; 3515 udi_size -= toh->len; 3516 } 3517 if (icmp_opt & IPPF_RTDSTOPTS) { 3518 struct T_opthdr *toh; 3519 3520 toh = (struct T_opthdr *)dstopt; 3521 toh->level = IPPROTO_IPV6; 3522 toh->name = IPV6_DSTOPTS; 3523 toh->len = sizeof (struct T_opthdr) + 3524 ipp.ipp_rtdstoptslen; 3525 toh->status = 0; 3526 dstopt += sizeof (struct T_opthdr); 3527 bcopy(ipp.ipp_rtdstopts, dstopt, 3528 ipp.ipp_rtdstoptslen); 3529 dstopt += ipp.ipp_rtdstoptslen; 3530 udi_size -= toh->len; 3531 } 3532 if (icmp_opt & IPPF_RTHDR) { 3533 struct T_opthdr *toh; 3534 3535 toh = (struct T_opthdr *)dstopt; 3536 toh->level = IPPROTO_IPV6; 3537 toh->name = IPV6_RTHDR; 3538 toh->len = sizeof (struct T_opthdr) + 3539 ipp.ipp_rthdrlen; 3540 toh->status = 0; 3541 dstopt += sizeof (struct T_opthdr); 3542 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3543 dstopt += ipp.ipp_rthdrlen; 3544 udi_size -= toh->len; 3545 } 3546 if (icmp_opt & IPPF_DSTOPTS) { 3547 struct T_opthdr *toh; 3548 3549 toh = (struct T_opthdr *)dstopt; 3550 toh->level = IPPROTO_IPV6; 3551 toh->name = IPV6_DSTOPTS; 3552 toh->len = sizeof (struct T_opthdr) + 3553 ipp.ipp_dstoptslen; 3554 toh->status = 0; 3555 dstopt += sizeof (struct T_opthdr); 3556 bcopy(ipp.ipp_dstopts, dstopt, 3557 ipp.ipp_dstoptslen); 3558 dstopt += ipp.ipp_dstoptslen; 3559 udi_size -= toh->len; 3560 } 3561 /* Consumed all of allocated space */ 3562 ASSERT(udi_size == 0); 3563 } 3564 BUMP_MIB(&rawip_mib, rawipInDatagrams); 3565 putnext(q, mp); 3566 } 3567 3568 /* 3569 * Process a T_BIND_ACK 3570 */ 3571 static void 3572 icmp_rput_bind_ack(queue_t *q, mblk_t *mp) 3573 { 3574 icmp_t *icmp = (icmp_t *)q->q_ptr; 3575 mblk_t *mp1; 3576 ire_t *ire; 3577 struct T_bind_ack *tba; 3578 uchar_t *addrp; 3579 ipa_conn_t *ac; 3580 ipa6_conn_t *ac6; 3581 3582 /* 3583 * We know if headers are included or not so we can 3584 * safely do this. 3585 */ 3586 if (icmp->icmp_state == TS_UNBND) { 3587 /* 3588 * TPI has not yet bound - bind sent by 3589 * icmp_bind_proto. 3590 */ 3591 freemsg(mp); 3592 return; 3593 } 3594 if (icmp->icmp_discon_pending) 3595 icmp->icmp_discon_pending = 0; 3596 3597 /* 3598 * If a broadcast/multicast address was bound set 3599 * the source address to 0. 3600 * This ensures no datagrams with broadcast address 3601 * as source address are emitted (which would violate 3602 * RFC1122 - Hosts requirements) 3603 * 3604 * Note that when connecting the returned IRE is 3605 * for the destination address and we only perform 3606 * the broadcast check for the source address (it 3607 * is OK to connect to a broadcast/multicast address.) 3608 */ 3609 mp1 = mp->b_cont; 3610 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3611 ire = (ire_t *)mp1->b_rptr; 3612 3613 /* 3614 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3615 * local address. 3616 */ 3617 if (ire->ire_type == IRE_BROADCAST && 3618 icmp->icmp_state != TS_DATA_XFER) { 3619 /* This was just a local bind to a MC/broadcast addr */ 3620 V6_SET_ZERO(icmp->icmp_v6src); 3621 if (icmp->icmp_family == AF_INET6) 3622 (void) icmp_build_hdrs(q, icmp); 3623 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3624 /* 3625 * Local address not yet set - pick it from the 3626 * T_bind_ack 3627 */ 3628 tba = (struct T_bind_ack *)mp->b_rptr; 3629 addrp = &mp->b_rptr[tba->ADDR_offset]; 3630 switch (icmp->icmp_family) { 3631 case AF_INET: 3632 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3633 ac = (ipa_conn_t *)addrp; 3634 } else { 3635 ASSERT(tba->ADDR_length == 3636 sizeof (ipa_conn_x_t)); 3637 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3638 } 3639 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3640 &icmp->icmp_v6src); 3641 break; 3642 case AF_INET6: 3643 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3644 ac6 = (ipa6_conn_t *)addrp; 3645 } else { 3646 ASSERT(tba->ADDR_length == 3647 sizeof (ipa6_conn_x_t)); 3648 ac6 = &((ipa6_conn_x_t *) 3649 addrp)->ac6x_conn; 3650 } 3651 icmp->icmp_v6src = ac6->ac6_laddr; 3652 (void) icmp_build_hdrs(q, icmp); 3653 } 3654 } 3655 mp1 = mp1->b_cont; 3656 } 3657 /* 3658 * Look for one or more appended ACK message added by 3659 * icmp_connect or icmp_disconnect. 3660 * If none found just send up the T_BIND_ACK. 3661 * icmp_connect has appended a T_OK_ACK and a 3662 * T_CONN_CON. 3663 * icmp_disconnect has appended a T_OK_ACK. 3664 */ 3665 if (mp1 != NULL) { 3666 if (mp->b_cont == mp1) 3667 mp->b_cont = NULL; 3668 else { 3669 ASSERT(mp->b_cont->b_cont == mp1); 3670 mp->b_cont->b_cont = NULL; 3671 } 3672 freemsg(mp); 3673 mp = mp1; 3674 while (mp != NULL) { 3675 mp1 = mp->b_cont; 3676 mp->b_cont = NULL; 3677 putnext(q, mp); 3678 mp = mp1; 3679 } 3680 return; 3681 } 3682 freemsg(mp->b_cont); 3683 mp->b_cont = NULL; 3684 putnext(q, mp); 3685 } 3686 3687 /* 3688 * return SNMP stuff in buffer in mpdata 3689 */ 3690 static int 3691 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3692 { 3693 mblk_t *mpdata; 3694 struct opthdr *optp; 3695 3696 if (mpctl == NULL || 3697 (mpdata = mpctl->b_cont) == NULL) { 3698 return (0); 3699 } 3700 3701 /* fixed length structure for IPv4 and IPv6 counters */ 3702 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 3703 optp->level = EXPER_RAWIP; 3704 optp->name = 0; 3705 (void) snmp_append_data(mpdata, (char *)&rawip_mib, sizeof (rawip_mib)); 3706 optp->len = msgdsize(mpdata); 3707 qreply(q, mpctl); 3708 3709 return (1); 3710 } 3711 3712 /* 3713 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 3714 * TODO: If this ever actually tries to set anything, it needs to be 3715 * to do the appropriate locking. 3716 */ 3717 /* ARGSUSED */ 3718 static int 3719 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 3720 uchar_t *ptr, int len) 3721 { 3722 switch (level) { 3723 case EXPER_RAWIP: 3724 return (0); 3725 default: 3726 return (1); 3727 } 3728 } 3729 3730 /* Report for ndd "icmp_status" */ 3731 /* ARGSUSED */ 3732 static int 3733 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3734 { 3735 IDP idp; 3736 icmp_t *icmp; 3737 char *state; 3738 char laddrbuf[INET6_ADDRSTRLEN]; 3739 char faddrbuf[INET6_ADDRSTRLEN]; 3740 3741 (void) mi_mpprintf(mp, 3742 "RAWIP " MI_COL_HDRPAD_STR 3743 /* 01234567[89ABCDEF] */ 3744 " src addr dest addr state"); 3745 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 3746 3747 3748 for (idp = mi_first_ptr(&icmp_g_head); 3749 (icmp = (icmp_t *)idp) != NULL; 3750 idp = mi_next_ptr(&icmp_g_head, idp)) { 3751 if (icmp->icmp_state == TS_UNBND) 3752 state = "UNBOUND"; 3753 else if (icmp->icmp_state == TS_IDLE) 3754 state = "IDLE"; 3755 else if (icmp->icmp_state == TS_DATA_XFER) 3756 state = "CONNECTED"; 3757 else 3758 state = "UnkState"; 3759 3760 (void) mi_mpprintf(mp, 3761 MI_COL_PTRFMT_STR "%s %s %s", 3762 (void *)icmp, 3763 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 3764 sizeof (faddrbuf)), 3765 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 3766 sizeof (laddrbuf)), 3767 state); 3768 } 3769 return (0); 3770 } 3771 3772 /* 3773 * This routine creates a T_UDERROR_IND message and passes it upstream. 3774 * The address and options are copied from the T_UNITDATA_REQ message 3775 * passed in mp. This message is freed. 3776 */ 3777 static void 3778 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 3779 { 3780 mblk_t *mp1; 3781 uchar_t *rptr = mp->b_rptr; 3782 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 3783 3784 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 3785 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 3786 tudr->OPT_length, err); 3787 if (mp1) 3788 qreply(q, mp1); 3789 freemsg(mp); 3790 } 3791 3792 /* 3793 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 3794 * After some error checking, the message is passed downstream to ip. 3795 */ 3796 static void 3797 icmp_unbind(queue_t *q, mblk_t *mp) 3798 { 3799 icmp_t *icmp = (icmp_t *)q->q_ptr; 3800 3801 /* If a bind has not been done, we can't unbind. */ 3802 if (icmp->icmp_state == TS_UNBND) { 3803 icmp_err_ack(q, mp, TOUTSTATE, 0); 3804 return; 3805 } 3806 V6_SET_ZERO(icmp->icmp_v6src); 3807 V6_SET_ZERO(icmp->icmp_bound_v6src); 3808 icmp->icmp_state = TS_UNBND; 3809 3810 if (icmp->icmp_family == AF_INET6) { 3811 int error; 3812 3813 /* Rebuild the header template */ 3814 error = icmp_build_hdrs(q, icmp); 3815 if (error != 0) { 3816 icmp_err_ack(q, mp, TSYSERR, error); 3817 return; 3818 } 3819 } 3820 /* Pass the unbind to IP. */ 3821 putnext(q, mp); 3822 } 3823 3824 /* 3825 * Process IPv4 packets that already include an IP header. 3826 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 3827 * IPPROTO_IGMP). 3828 */ 3829 static void 3830 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp) 3831 { 3832 ipha_t *ipha; 3833 int ip_hdr_length; 3834 int tp_hdr_len; 3835 mblk_t *mp1; 3836 uint_t pkt_len; 3837 3838 ipha = (ipha_t *)mp->b_rptr; 3839 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 3840 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 3841 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3842 BUMP_MIB(&rawip_mib, rawipOutErrors); 3843 freemsg(mp); 3844 return; 3845 } 3846 ipha = (ipha_t *)mp->b_rptr; 3847 } 3848 ipha->ipha_version_and_hdr_length = 3849 (IP_VERSION<<4) | (ip_hdr_length>>2); 3850 3851 /* 3852 * For the socket of SOCK_RAW type, the checksum is provided in the 3853 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 3854 * tell IP that the application has sent a complete IP header and not 3855 * to compute the transport checksum nor change the DF flag. 3856 */ 3857 ipha->ipha_ident = IP_HDR_INCLUDED; 3858 ipha->ipha_hdr_checksum = 0; 3859 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 3860 /* Insert options if any */ 3861 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 3862 /* 3863 * Put the IP header plus any transport header that is 3864 * checksumed by ip_wput into the first mblk. (ip_wput assumes 3865 * that at least the checksum field is in the first mblk.) 3866 */ 3867 switch (ipha->ipha_protocol) { 3868 case IPPROTO_UDP: 3869 tp_hdr_len = 8; 3870 break; 3871 case IPPROTO_TCP: 3872 tp_hdr_len = 20; 3873 break; 3874 default: 3875 tp_hdr_len = 0; 3876 break; 3877 } 3878 /* 3879 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 3880 * tp_hdr_len bytes will be in a single mblk. 3881 */ 3882 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 3883 tp_hdr_len)) { 3884 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 3885 tp_hdr_len)) { 3886 BUMP_MIB(&rawip_mib, rawipOutErrors); 3887 freemsg(mp); 3888 return; 3889 } 3890 ipha = (ipha_t *)mp->b_rptr; 3891 } 3892 3893 /* 3894 * if the length is larger then the max allowed IP packet, 3895 * then send an error and abort the processing. 3896 */ 3897 pkt_len = ntohs(ipha->ipha_length) 3898 + icmp->icmp_ip_snd_options_len; 3899 if (pkt_len > IP_MAXPACKET) { 3900 icmp_ud_err(q, mp, EMSGSIZE); 3901 return; 3902 } 3903 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra + 3904 tp_hdr_len, BPRI_LO))) { 3905 icmp_ud_err(q, mp, ENOMEM); 3906 return; 3907 } 3908 mp1->b_rptr += icmp_wroff_extra; 3909 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 3910 3911 ipha->ipha_length = htons((uint16_t)pkt_len); 3912 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 3913 3914 /* Copy transport header if any */ 3915 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 3916 mp1->b_wptr += tp_hdr_len; 3917 3918 /* Add options */ 3919 ipha = (ipha_t *)mp1->b_rptr; 3920 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 3921 icmp->icmp_ip_snd_options_len); 3922 3923 /* Drop IP header and transport header from original */ 3924 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 3925 3926 mp1->b_cont = mp; 3927 mp = mp1; 3928 /* 3929 * Massage source route putting first source 3930 * route in ipha_dst. 3931 */ 3932 (void) ip_massage_options(ipha); 3933 } 3934 mblk_setcred(mp, icmp->icmp_credp); 3935 putnext(q, mp); 3936 } 3937 3938 static boolean_t 3939 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 3940 { 3941 int err; 3942 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 3943 3944 err = tsol_compute_label(DB_CREDDEF(mp, icmp->icmp_credp), dst, 3945 opt_storage, icmp->icmp_mac_exempt); 3946 if (err == 0) { 3947 err = tsol_update_options(&icmp->icmp_ip_snd_options, 3948 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 3949 opt_storage); 3950 } 3951 if (err != 0) { 3952 BUMP_MIB(&rawip_mib, rawipOutErrors); 3953 DTRACE_PROBE4( 3954 tx__ip__log__drop__updatelabel__icmp, 3955 char *, "queue(1) failed to update options(2) on mp(3)", 3956 queue_t *, q, char *, opt_storage, mblk_t *, mp); 3957 icmp_ud_err(q, mp, err); 3958 return (B_FALSE); 3959 } 3960 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 3961 return (B_TRUE); 3962 } 3963 3964 /* 3965 * This routine handles all messages passed downstream. It either 3966 * consumes the message or passes it downstream; it never queues a 3967 * a message. 3968 */ 3969 static void 3970 icmp_wput(queue_t *q, mblk_t *mp) 3971 { 3972 uchar_t *rptr = mp->b_rptr; 3973 ipha_t *ipha; 3974 mblk_t *mp1; 3975 int ip_hdr_length; 3976 #define tudr ((struct T_unitdata_req *)rptr) 3977 size_t ip_len; 3978 icmp_t *icmp; 3979 sin6_t *sin6; 3980 sin_t *sin; 3981 ipaddr_t v4dst; 3982 3983 icmp = (icmp_t *)q->q_ptr; 3984 if (icmp->icmp_restricted) { 3985 icmp_wput_restricted(q, mp); 3986 return; 3987 } 3988 3989 switch (mp->b_datap->db_type) { 3990 case M_DATA: 3991 if (icmp->icmp_hdrincl) { 3992 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 3993 ipha = (ipha_t *)mp->b_rptr; 3994 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 3995 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 3996 BUMP_MIB(&rawip_mib, rawipOutErrors); 3997 freemsg(mp); 3998 return; 3999 } 4000 ipha = (ipha_t *)mp->b_rptr; 4001 } 4002 /* 4003 * If this connection was used for v6 (inconceivable!) 4004 * or if we have a new destination, then it's time to 4005 * figure a new label. 4006 */ 4007 if (is_system_labeled() && 4008 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4009 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4010 ipha->ipha_dst) && 4011 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4012 return; 4013 } 4014 icmp_wput_hdrincl(q, mp, icmp); 4015 return; 4016 } 4017 freemsg(mp); 4018 return; 4019 case M_PROTO: 4020 case M_PCPROTO: 4021 ip_len = mp->b_wptr - rptr; 4022 if (ip_len >= sizeof (struct T_unitdata_req)) { 4023 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4024 if (((union T_primitives *)rptr)->type 4025 == T_UNITDATA_REQ) 4026 break; 4027 } 4028 /* FALLTHRU */ 4029 default: 4030 icmp_wput_other(q, mp); 4031 return; 4032 } 4033 4034 /* Handle T_UNITDATA_REQ messages here. */ 4035 4036 if (icmp->icmp_state == TS_UNBND) { 4037 /* If a port has not been bound to the stream, fail. */ 4038 BUMP_MIB(&rawip_mib, rawipOutErrors); 4039 icmp_ud_err(q, mp, EPROTO); 4040 return; 4041 } 4042 mp1 = mp->b_cont; 4043 if (mp1 == NULL) { 4044 BUMP_MIB(&rawip_mib, rawipOutErrors); 4045 icmp_ud_err(q, mp, EPROTO); 4046 return; 4047 } 4048 4049 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4050 BUMP_MIB(&rawip_mib, rawipOutErrors); 4051 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4052 return; 4053 } 4054 4055 switch (icmp->icmp_family) { 4056 case AF_INET6: 4057 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4058 if (!OK_32PTR((char *)sin6) || 4059 tudr->DEST_length != sizeof (sin6_t) || 4060 sin6->sin6_family != AF_INET6) { 4061 BUMP_MIB(&rawip_mib, rawipOutErrors); 4062 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4063 return; 4064 } 4065 4066 /* No support for mapped addresses on raw sockets */ 4067 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4068 BUMP_MIB(&rawip_mib, rawipOutErrors); 4069 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4070 return; 4071 } 4072 4073 /* 4074 * Destination is a native IPv6 address. 4075 * Send out an IPv6 format packet. 4076 */ 4077 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4078 return; 4079 4080 case AF_INET: 4081 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4082 if (!OK_32PTR((char *)sin) || 4083 tudr->DEST_length != sizeof (sin_t) || 4084 sin->sin_family != AF_INET) { 4085 BUMP_MIB(&rawip_mib, rawipOutErrors); 4086 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4087 return; 4088 } 4089 /* Extract and ipaddr */ 4090 v4dst = sin->sin_addr.s_addr; 4091 break; 4092 4093 default: 4094 ASSERT(0); 4095 } 4096 4097 /* 4098 * If options passed in, feed it for verification and handling 4099 */ 4100 if (tudr->OPT_length != 0) { 4101 int error; 4102 4103 if (icmp_unitdata_opt_process(q, mp, &error, 4104 (uchar_t *)0) < 0) { 4105 /* failure */ 4106 BUMP_MIB(&rawip_mib, rawipOutErrors); 4107 icmp_ud_err(q, mp, error); 4108 return; 4109 } 4110 /* 4111 * Note: Success in processing options. 4112 * mp option buffer represented by 4113 * OPT_length/offset now potentially modified 4114 * and contain option setting results 4115 */ 4116 } 4117 4118 if (v4dst == INADDR_ANY) 4119 v4dst = htonl(INADDR_LOOPBACK); 4120 4121 /* Check if our saved options are valid; update if not */ 4122 if (is_system_labeled() && 4123 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4124 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4125 !icmp_update_label(q, icmp, mp, v4dst)) { 4126 return; 4127 } 4128 4129 /* Protocol 255 contains full IP headers */ 4130 if (icmp->icmp_hdrincl) { 4131 freeb(mp); 4132 icmp_wput_hdrincl(q, mp1, icmp); 4133 return; 4134 } 4135 4136 /* Add an IP header */ 4137 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4138 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4139 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4140 mp1->b_datap->db_ref != 1 || 4141 !OK_32PTR(ipha)) { 4142 if (!(mp1 = allocb(ip_hdr_length + icmp_wroff_extra, 4143 BPRI_LO))) { 4144 BUMP_MIB(&rawip_mib, rawipOutErrors); 4145 icmp_ud_err(q, mp, ENOMEM); 4146 return; 4147 } 4148 mp1->b_cont = mp->b_cont; 4149 ipha = (ipha_t *)mp1->b_datap->db_lim; 4150 mp1->b_wptr = (uchar_t *)ipha; 4151 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4152 } 4153 #ifdef _BIG_ENDIAN 4154 /* Set version, header length, and tos */ 4155 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4156 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4157 icmp->icmp_type_of_service); 4158 /* Set ttl and protocol */ 4159 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4160 #else 4161 /* Set version, header length, and tos */ 4162 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4163 ((icmp->icmp_type_of_service << 8) | 4164 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4165 /* Set ttl and protocol */ 4166 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4167 #endif 4168 /* 4169 * Copy our address into the packet. If this is zero, 4170 * ip will fill in the real source address. 4171 */ 4172 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4173 ipha->ipha_fragment_offset_and_flags = 0; 4174 4175 /* 4176 * For the socket of SOCK_RAW type, the checksum is provided in the 4177 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4178 * tell IP that the application has sent a complete IP header and not 4179 * to compute the transport checksum nor change the DF flag. 4180 */ 4181 ipha->ipha_ident = IP_HDR_INCLUDED; 4182 4183 /* Finish common formatting of the packet. */ 4184 mp1->b_rptr = (uchar_t *)ipha; 4185 4186 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4187 if (mp1->b_cont != NULL) 4188 ip_len += msgdsize(mp1->b_cont); 4189 4190 /* 4191 * Set the length into the IP header. 4192 * If the length is greater than the maximum allowed by IP, 4193 * then free the message and return. Do not try and send it 4194 * as this can cause problems in layers below. 4195 */ 4196 if (ip_len > IP_MAXPACKET) { 4197 BUMP_MIB(&rawip_mib, rawipOutErrors); 4198 icmp_ud_err(q, mp, EMSGSIZE); 4199 return; 4200 } 4201 ipha->ipha_length = htons((uint16_t)ip_len); 4202 /* 4203 * Copy in the destination address from the T_UNITDATA 4204 * request 4205 */ 4206 ipha->ipha_dst = v4dst; 4207 4208 /* 4209 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4210 */ 4211 if (CLASSD(v4dst)) 4212 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4213 4214 /* Copy in options if any */ 4215 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4216 bcopy(icmp->icmp_ip_snd_options, 4217 &ipha[1], icmp->icmp_ip_snd_options_len); 4218 /* 4219 * Massage source route putting first source route in ipha_dst. 4220 * Ignore the destination in the T_unitdata_req. 4221 */ 4222 (void) ip_massage_options(ipha); 4223 } 4224 freeb(mp); 4225 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4226 mblk_setcred(mp1, icmp->icmp_credp); 4227 putnext(q, mp1); 4228 #undef ipha 4229 #undef tudr 4230 } 4231 4232 static boolean_t 4233 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4234 { 4235 int err; 4236 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4237 4238 err = tsol_compute_label_v6(DB_CREDDEF(mp, icmp->icmp_credp), dst, 4239 opt_storage, icmp->icmp_mac_exempt); 4240 if (err == 0) { 4241 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4242 &icmp->icmp_label_len_v6, opt_storage); 4243 } 4244 if (err != 0) { 4245 BUMP_MIB(&rawip_mib, rawipOutErrors); 4246 DTRACE_PROBE4( 4247 tx__ip__log__drop__updatelabel__icmp6, 4248 char *, "queue(1) failed to update options(2) on mp(3)", 4249 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4250 icmp_ud_err(wq, mp, err); 4251 return (B_FALSE); 4252 } 4253 4254 icmp->icmp_v6lastdst = *dst; 4255 return (B_TRUE); 4256 } 4257 4258 /* 4259 * icmp_wput_ipv6(): 4260 * Assumes that icmp_wput did some sanity checking on the destination 4261 * address, but that the label may not yet be correct. 4262 */ 4263 void 4264 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4265 { 4266 ip6_t *ip6h; 4267 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4268 mblk_t *mp1; 4269 int ip_hdr_len = IPV6_HDR_LEN; 4270 size_t ip_len; 4271 icmp_t *icmp; 4272 ip6_pkt_t ipp_s; /* For ancillary data options */ 4273 ip6_pkt_t *ipp = &ipp_s; 4274 ip6_pkt_t *tipp; 4275 uint32_t csum = 0; 4276 uint_t ignore = 0; 4277 uint_t option_exists = 0, is_sticky = 0; 4278 uint8_t *cp; 4279 uint8_t *nxthdr_ptr; 4280 in6_addr_t ip6_dst; 4281 4282 icmp = (icmp_t *)q->q_ptr; 4283 4284 /* 4285 * If the local address is a mapped address return 4286 * an error. 4287 * It would be possible to send an IPv6 packet but the 4288 * response would never make it back to the application 4289 * since it is bound to a mapped address. 4290 */ 4291 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4292 BUMP_MIB(&rawip_mib, rawipOutErrors); 4293 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4294 return; 4295 } 4296 4297 ipp->ipp_fields = 0; 4298 ipp->ipp_sticky_ignored = 0; 4299 4300 /* 4301 * If TPI options passed in, feed it for verification and handling 4302 */ 4303 if (tudr_optlen != 0) { 4304 int error; 4305 4306 if (icmp_unitdata_opt_process(q, mp, &error, 4307 (void *)ipp) < 0) { 4308 /* failure */ 4309 BUMP_MIB(&rawip_mib, rawipOutErrors); 4310 icmp_ud_err(q, mp, error); 4311 return; 4312 } 4313 ignore = ipp->ipp_sticky_ignored; 4314 ASSERT(error == 0); 4315 } 4316 4317 if (sin6->sin6_scope_id != 0 && 4318 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4319 /* 4320 * IPPF_SCOPE_ID is special. It's neither a sticky 4321 * option nor ancillary data. It needs to be 4322 * explicitly set in options_exists. 4323 */ 4324 option_exists |= IPPF_SCOPE_ID; 4325 } 4326 4327 /* 4328 * Compute the destination address 4329 */ 4330 ip6_dst = sin6->sin6_addr; 4331 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4332 ip6_dst = ipv6_loopback; 4333 4334 /* 4335 * If we're not going to the same destination as last time, then 4336 * recompute the label required. This is done in a separate routine to 4337 * avoid blowing up our stack here. 4338 */ 4339 if (is_system_labeled() && 4340 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4341 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4342 return; 4343 } 4344 4345 /* 4346 * If there's a security label here, then we ignore any options the 4347 * user may try to set. We keep the peer's label as a hidden sticky 4348 * option. 4349 */ 4350 if (icmp->icmp_label_len_v6 > 0) { 4351 ignore &= ~IPPF_HOPOPTS; 4352 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4353 } 4354 4355 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4356 (ipp->ipp_fields == 0)) { 4357 /* No sticky options nor ancillary data. */ 4358 goto no_options; 4359 } 4360 4361 /* 4362 * Go through the options figuring out where each is going to 4363 * come from and build two masks. The first mask indicates if 4364 * the option exists at all. The second mask indicates if the 4365 * option is sticky or ancillary. 4366 */ 4367 if (!(ignore & IPPF_HOPOPTS)) { 4368 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4369 option_exists |= IPPF_HOPOPTS; 4370 ip_hdr_len += ipp->ipp_hopoptslen; 4371 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4372 option_exists |= IPPF_HOPOPTS; 4373 is_sticky |= IPPF_HOPOPTS; 4374 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4375 } 4376 } 4377 4378 if (!(ignore & IPPF_RTHDR)) { 4379 if (ipp->ipp_fields & IPPF_RTHDR) { 4380 option_exists |= IPPF_RTHDR; 4381 ip_hdr_len += ipp->ipp_rthdrlen; 4382 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4383 option_exists |= IPPF_RTHDR; 4384 is_sticky |= IPPF_RTHDR; 4385 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4386 } 4387 } 4388 4389 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4390 /* 4391 * Need to have a router header to use these. 4392 */ 4393 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4394 option_exists |= IPPF_RTDSTOPTS; 4395 ip_hdr_len += ipp->ipp_rtdstoptslen; 4396 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4397 option_exists |= IPPF_RTDSTOPTS; 4398 is_sticky |= IPPF_RTDSTOPTS; 4399 ip_hdr_len += 4400 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4401 } 4402 } 4403 4404 if (!(ignore & IPPF_DSTOPTS)) { 4405 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4406 option_exists |= IPPF_DSTOPTS; 4407 ip_hdr_len += ipp->ipp_dstoptslen; 4408 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4409 option_exists |= IPPF_DSTOPTS; 4410 is_sticky |= IPPF_DSTOPTS; 4411 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4412 } 4413 } 4414 4415 if (!(ignore & IPPF_IFINDEX)) { 4416 if (ipp->ipp_fields & IPPF_IFINDEX) { 4417 option_exists |= IPPF_IFINDEX; 4418 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4419 option_exists |= IPPF_IFINDEX; 4420 is_sticky |= IPPF_IFINDEX; 4421 } 4422 } 4423 4424 if (!(ignore & IPPF_ADDR)) { 4425 if (ipp->ipp_fields & IPPF_ADDR) { 4426 option_exists |= IPPF_ADDR; 4427 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4428 option_exists |= IPPF_ADDR; 4429 is_sticky |= IPPF_ADDR; 4430 } 4431 } 4432 4433 if (!(ignore & IPPF_DONTFRAG)) { 4434 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4435 option_exists |= IPPF_DONTFRAG; 4436 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4437 option_exists |= IPPF_DONTFRAG; 4438 is_sticky |= IPPF_DONTFRAG; 4439 } 4440 } 4441 4442 if (!(ignore & IPPF_USE_MIN_MTU)) { 4443 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4444 option_exists |= IPPF_USE_MIN_MTU; 4445 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4446 IPPF_USE_MIN_MTU) { 4447 option_exists |= IPPF_USE_MIN_MTU; 4448 is_sticky |= IPPF_USE_MIN_MTU; 4449 } 4450 } 4451 4452 if (!(ignore & IPPF_NEXTHOP)) { 4453 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4454 option_exists |= IPPF_NEXTHOP; 4455 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4456 option_exists |= IPPF_NEXTHOP; 4457 is_sticky |= IPPF_NEXTHOP; 4458 } 4459 } 4460 4461 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4462 option_exists |= IPPF_HOPLIMIT; 4463 /* IPV6_HOPLIMIT can never be sticky */ 4464 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4465 4466 if (!(ignore & IPPF_UNICAST_HOPS) && 4467 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4468 option_exists |= IPPF_UNICAST_HOPS; 4469 is_sticky |= IPPF_UNICAST_HOPS; 4470 } 4471 4472 if (!(ignore & IPPF_MULTICAST_HOPS) && 4473 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4474 option_exists |= IPPF_MULTICAST_HOPS; 4475 is_sticky |= IPPF_MULTICAST_HOPS; 4476 } 4477 4478 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4479 /* This is a sticky socket option only */ 4480 option_exists |= IPPF_NO_CKSUM; 4481 is_sticky |= IPPF_NO_CKSUM; 4482 } 4483 4484 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4485 /* This is a sticky socket option only */ 4486 option_exists |= IPPF_RAW_CKSUM; 4487 is_sticky |= IPPF_RAW_CKSUM; 4488 } 4489 4490 if (!(ignore & IPPF_TCLASS)) { 4491 if (ipp->ipp_fields & IPPF_TCLASS) { 4492 option_exists |= IPPF_TCLASS; 4493 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4494 option_exists |= IPPF_TCLASS; 4495 is_sticky |= IPPF_TCLASS; 4496 } 4497 } 4498 4499 no_options: 4500 4501 /* 4502 * If any options carried in the ip6i_t were specified, we 4503 * need to account for the ip6i_t in the data we'll be sending 4504 * down. 4505 */ 4506 if (option_exists & IPPF_HAS_IP6I) 4507 ip_hdr_len += sizeof (ip6i_t); 4508 4509 /* check/fix buffer config, setup pointers into it */ 4510 mp1 = mp->b_cont; 4511 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4512 if ((mp1->b_datap->db_ref != 1) || 4513 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4514 !OK_32PTR(ip6h)) { 4515 /* Try to get everything in a single mblk next time */ 4516 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4517 icmp->icmp_max_hdr_len = ip_hdr_len; 4518 (void) mi_set_sth_wroff(RD(q), 4519 icmp->icmp_max_hdr_len + icmp_wroff_extra); 4520 } 4521 mp1 = allocb(ip_hdr_len + icmp_wroff_extra, BPRI_LO); 4522 if (!mp1) { 4523 BUMP_MIB(&rawip_mib, rawipOutErrors); 4524 icmp_ud_err(q, mp, ENOMEM); 4525 return; 4526 } 4527 mp1->b_cont = mp->b_cont; 4528 mp1->b_wptr = mp1->b_datap->db_lim; 4529 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4530 } 4531 mp1->b_rptr = (unsigned char *)ip6h; 4532 ip6i = (ip6i_t *)ip6h; 4533 4534 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4535 if (option_exists & IPPF_HAS_IP6I) { 4536 ip6h = (ip6_t *)&ip6i[1]; 4537 ip6i->ip6i_flags = 0; 4538 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4539 4540 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4541 if (option_exists & IPPF_SCOPE_ID) { 4542 ip6i->ip6i_flags |= IP6I_IFINDEX; 4543 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4544 } else if (option_exists & IPPF_IFINDEX) { 4545 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4546 ASSERT(tipp->ipp_ifindex != 0); 4547 ip6i->ip6i_flags |= IP6I_IFINDEX; 4548 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4549 } 4550 4551 if (option_exists & IPPF_RAW_CKSUM) { 4552 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4553 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4554 } 4555 4556 if (option_exists & IPPF_NO_CKSUM) { 4557 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4558 } 4559 4560 if (option_exists & IPPF_ADDR) { 4561 /* 4562 * Enable per-packet source address verification if 4563 * IPV6_PKTINFO specified the source address. 4564 * ip6_src is set in the transport's _wput function. 4565 */ 4566 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4567 } 4568 4569 if (option_exists & IPPF_DONTFRAG) { 4570 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4571 } 4572 4573 if (option_exists & IPPF_USE_MIN_MTU) { 4574 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4575 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4576 } 4577 4578 if (option_exists & IPPF_NEXTHOP) { 4579 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4580 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4581 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4582 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4583 } 4584 4585 /* 4586 * tell IP this is an ip6i_t private header 4587 */ 4588 ip6i->ip6i_nxt = IPPROTO_RAW; 4589 } 4590 4591 /* Initialize IPv6 header */ 4592 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4593 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4594 4595 /* Set the hoplimit of the outgoing packet. */ 4596 if (option_exists & IPPF_HOPLIMIT) { 4597 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4598 ip6h->ip6_hops = ipp->ipp_hoplimit; 4599 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4600 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4601 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4602 if (option_exists & IPPF_MULTICAST_HOPS) 4603 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4604 } else { 4605 ip6h->ip6_hops = icmp->icmp_ttl; 4606 if (option_exists & IPPF_UNICAST_HOPS) 4607 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4608 } 4609 4610 if (option_exists & IPPF_ADDR) { 4611 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4612 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4613 ip6h->ip6_src = tipp->ipp_addr; 4614 } else { 4615 /* 4616 * The source address was not set using IPV6_PKTINFO. 4617 * First look at the bound source. 4618 * If unspecified fallback to __sin6_src_id. 4619 */ 4620 ip6h->ip6_src = icmp->icmp_v6src; 4621 if (sin6->__sin6_src_id != 0 && 4622 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 4623 ip_srcid_find_id(sin6->__sin6_src_id, 4624 &ip6h->ip6_src, icmp->icmp_zoneid); 4625 } 4626 } 4627 4628 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 4629 cp = (uint8_t *)&ip6h[1]; 4630 4631 /* 4632 * Here's where we have to start stringing together 4633 * any extension headers in the right order: 4634 * Hop-by-hop, destination, routing, and final destination opts. 4635 */ 4636 if (option_exists & IPPF_HOPOPTS) { 4637 /* Hop-by-hop options */ 4638 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 4639 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 4640 4641 *nxthdr_ptr = IPPROTO_HOPOPTS; 4642 nxthdr_ptr = &hbh->ip6h_nxt; 4643 4644 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 4645 cp += tipp->ipp_hopoptslen; 4646 } 4647 /* 4648 * En-route destination options 4649 * Only do them if there's a routing header as well 4650 */ 4651 if (option_exists & IPPF_RTDSTOPTS) { 4652 ip6_dest_t *dst = (ip6_dest_t *)cp; 4653 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 4654 4655 *nxthdr_ptr = IPPROTO_DSTOPTS; 4656 nxthdr_ptr = &dst->ip6d_nxt; 4657 4658 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 4659 cp += tipp->ipp_rtdstoptslen; 4660 } 4661 /* 4662 * Routing header next 4663 */ 4664 if (option_exists & IPPF_RTHDR) { 4665 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 4666 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 4667 4668 *nxthdr_ptr = IPPROTO_ROUTING; 4669 nxthdr_ptr = &rt->ip6r_nxt; 4670 4671 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 4672 cp += tipp->ipp_rthdrlen; 4673 } 4674 /* 4675 * Do ultimate destination options 4676 */ 4677 if (option_exists & IPPF_DSTOPTS) { 4678 ip6_dest_t *dest = (ip6_dest_t *)cp; 4679 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 4680 4681 *nxthdr_ptr = IPPROTO_DSTOPTS; 4682 nxthdr_ptr = &dest->ip6d_nxt; 4683 4684 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 4685 cp += tipp->ipp_dstoptslen; 4686 } 4687 4688 /* 4689 * Now set the last header pointer to the proto passed in 4690 */ 4691 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 4692 *nxthdr_ptr = icmp->icmp_proto; 4693 4694 /* 4695 * Copy in the destination address 4696 */ 4697 ip6h->ip6_dst = ip6_dst; 4698 4699 ip6h->ip6_vcf = 4700 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 4701 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 4702 4703 if (option_exists & IPPF_TCLASS) { 4704 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 4705 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 4706 tipp->ipp_tclass); 4707 } 4708 if (option_exists & IPPF_RTHDR) { 4709 ip6_rthdr_t *rth; 4710 4711 /* 4712 * Perform any processing needed for source routing. 4713 * We know that all extension headers will be in the same mblk 4714 * as the IPv6 header. 4715 */ 4716 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 4717 if (rth != NULL && rth->ip6r_segleft != 0) { 4718 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 4719 /* 4720 * Drop packet - only support Type 0 routing. 4721 * Notify the application as well. 4722 */ 4723 icmp_ud_err(q, mp, EPROTO); 4724 BUMP_MIB(&rawip_mib, rawipOutErrors); 4725 return; 4726 } 4727 /* 4728 * rth->ip6r_len is twice the number of 4729 * addresses in the header 4730 */ 4731 if (rth->ip6r_len & 0x1) { 4732 icmp_ud_err(q, mp, EPROTO); 4733 BUMP_MIB(&rawip_mib, rawipOutErrors); 4734 return; 4735 } 4736 /* 4737 * Shuffle the routing header and ip6_dst 4738 * addresses, and get the checksum difference 4739 * between the first hop (in ip6_dst) and 4740 * the destination (in the last routing hdr entry). 4741 */ 4742 csum = ip_massage_options_v6(ip6h, rth); 4743 /* 4744 * Verify that the first hop isn't a mapped address. 4745 * Routers along the path need to do this verification 4746 * for subsequent hops. 4747 */ 4748 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 4749 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4750 BUMP_MIB(&rawip_mib, rawipOutErrors); 4751 return; 4752 } 4753 } 4754 } 4755 4756 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 4757 if (mp1->b_cont != NULL) 4758 ip_len += msgdsize(mp1->b_cont); 4759 4760 /* 4761 * Set the length into the IP header. 4762 * If the length is greater than the maximum allowed by IP, 4763 * then free the message and return. Do not try and send it 4764 * as this can cause problems in layers below. 4765 */ 4766 if (ip_len > IP_MAXPACKET) { 4767 BUMP_MIB(&rawip_mib, rawipOutErrors); 4768 icmp_ud_err(q, mp, EMSGSIZE); 4769 return; 4770 } 4771 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 4772 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 4773 uint16_t *cksum_ptr; 4774 uint_t ext_hdrs_len; 4775 4776 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 4777 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 4778 icmp->icmp_checksum_off == 2); 4779 4780 /* 4781 * We make it easy for IP to include our pseudo header 4782 * by putting our length in uh_checksum, modified (if 4783 * we have a routing header) by the checksum difference 4784 * between the ultimate destination and first hop addresses. 4785 * Note: ICMPv6 must always checksum the packet. 4786 */ 4787 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 4788 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 4789 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 4790 BUMP_MIB(&rawip_mib, rawipOutErrors); 4791 freemsg(mp); 4792 return; 4793 } 4794 ip6i = (ip6i_t *)mp1->b_rptr; 4795 if (ip6i->ip6i_nxt == IPPROTO_RAW) 4796 ip6h = (ip6_t *)&ip6i[1]; 4797 else 4798 ip6h = (ip6_t *)ip6i; 4799 } 4800 /* Add payload length to checksum */ 4801 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 4802 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 4803 csum += htons(ip_len - ext_hdrs_len); 4804 4805 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 4806 csum = (csum & 0xFFFF) + (csum >> 16); 4807 *cksum_ptr = (uint16_t)csum; 4808 } 4809 4810 #ifdef _LITTLE_ENDIAN 4811 ip_len = htons(ip_len); 4812 #endif 4813 ip6h->ip6_plen = (uint16_t)ip_len; 4814 4815 freeb(mp); 4816 4817 /* We're done. Pass the packet to IP */ 4818 BUMP_MIB(&rawip_mib, rawipOutDatagrams); 4819 mblk_setcred(mp1, icmp->icmp_credp); 4820 putnext(q, mp1); 4821 } 4822 4823 static void 4824 icmp_wput_other(queue_t *q, mblk_t *mp) 4825 { 4826 uchar_t *rptr = mp->b_rptr; 4827 struct iocblk *iocp; 4828 #define tudr ((struct T_unitdata_req *)rptr) 4829 icmp_t *icmp; 4830 cred_t *cr; 4831 4832 icmp = (icmp_t *)q->q_ptr; 4833 4834 cr = DB_CREDDEF(mp, icmp->icmp_credp); 4835 4836 switch (mp->b_datap->db_type) { 4837 case M_PROTO: 4838 case M_PCPROTO: 4839 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4840 /* 4841 * If the message does not contain a PRIM_type, 4842 * throw it away. 4843 */ 4844 freemsg(mp); 4845 return; 4846 } 4847 switch (((union T_primitives *)rptr)->type) { 4848 case T_ADDR_REQ: 4849 icmp_addr_req(q, mp); 4850 return; 4851 case O_T_BIND_REQ: 4852 case T_BIND_REQ: 4853 qwriter(q, mp, icmp_bind, PERIM_OUTER); 4854 return; 4855 case T_CONN_REQ: 4856 icmp_connect(q, mp); 4857 return; 4858 case T_CAPABILITY_REQ: 4859 icmp_capability_req(q, mp); 4860 return; 4861 case T_INFO_REQ: 4862 icmp_info_req(q, mp); 4863 return; 4864 case T_UNITDATA_REQ: 4865 /* 4866 * If a T_UNITDATA_REQ gets here, the address must 4867 * be bad. Valid T_UNITDATA_REQs are found above 4868 * and break to below this switch. 4869 */ 4870 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4871 return; 4872 case T_UNBIND_REQ: 4873 icmp_unbind(q, mp); 4874 return; 4875 4876 case T_SVR4_OPTMGMT_REQ: 4877 if (!snmpcom_req(q, mp, icmp_snmp_set, icmp_snmp_get, 4878 cr)) 4879 /* Only IP can return anything meaningful */ 4880 (void) svr4_optcom_req(q, mp, cr, 4881 &icmp_opt_obj); 4882 return; 4883 4884 case T_OPTMGMT_REQ: 4885 /* Only IP can return anything meaningful */ 4886 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 4887 return; 4888 4889 case T_DISCON_REQ: 4890 icmp_disconnect(q, mp); 4891 return; 4892 4893 /* The following TPI message is not supported by icmp. */ 4894 case O_T_CONN_RES: 4895 case T_CONN_RES: 4896 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 4897 return; 4898 4899 /* The following 3 TPI requests are illegal for icmp. */ 4900 case T_DATA_REQ: 4901 case T_EXDATA_REQ: 4902 case T_ORDREL_REQ: 4903 freemsg(mp); 4904 (void) putctl1(RD(q), M_ERROR, EPROTO); 4905 return; 4906 default: 4907 break; 4908 } 4909 break; 4910 case M_IOCTL: 4911 iocp = (struct iocblk *)mp->b_rptr; 4912 switch (iocp->ioc_cmd) { 4913 case TI_GETPEERNAME: 4914 if (icmp->icmp_state != TS_DATA_XFER) { 4915 /* 4916 * If a default destination address has not 4917 * been associated with the stream, then we 4918 * don't know the peer's name. 4919 */ 4920 iocp->ioc_error = ENOTCONN; 4921 err_ret:; 4922 iocp->ioc_count = 0; 4923 mp->b_datap->db_type = M_IOCACK; 4924 qreply(q, mp); 4925 return; 4926 } 4927 /* FALLTHRU */ 4928 case TI_GETMYNAME: 4929 /* 4930 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4931 * need to copyin the user's strbuf structure. 4932 * Processing will continue in the M_IOCDATA case 4933 * below. 4934 */ 4935 mi_copyin(q, mp, NULL, 4936 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4937 return; 4938 case ND_SET: 4939 /* nd_getset performs the necessary error checking */ 4940 case ND_GET: 4941 if (nd_getset(q, icmp_g_nd, mp)) { 4942 qreply(q, mp); 4943 return; 4944 } 4945 break; 4946 default: 4947 break; 4948 } 4949 break; 4950 case M_IOCDATA: 4951 icmp_wput_iocdata(q, mp); 4952 return; 4953 default: 4954 break; 4955 } 4956 putnext(q, mp); 4957 } 4958 4959 /* 4960 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 4961 * messages. 4962 */ 4963 static void 4964 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 4965 { 4966 mblk_t *mp1; 4967 STRUCT_HANDLE(strbuf, sb); 4968 icmp_t *icmp; 4969 in6_addr_t v6addr; 4970 ipaddr_t v4addr; 4971 uint32_t flowinfo = 0; 4972 int addrlen; 4973 4974 /* Make sure it is one of ours. */ 4975 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4976 case TI_GETMYNAME: 4977 case TI_GETPEERNAME: 4978 break; 4979 default: 4980 putnext(q, mp); 4981 return; 4982 } 4983 switch (mi_copy_state(q, mp, &mp1)) { 4984 case -1: 4985 return; 4986 case MI_COPY_CASE(MI_COPY_IN, 1): 4987 break; 4988 case MI_COPY_CASE(MI_COPY_OUT, 1): 4989 /* 4990 * The address has been copied out, so now 4991 * copyout the strbuf. 4992 */ 4993 mi_copyout(q, mp); 4994 return; 4995 case MI_COPY_CASE(MI_COPY_OUT, 2): 4996 /* 4997 * The address and strbuf have been copied out. 4998 * We're done, so just acknowledge the original 4999 * M_IOCTL. 5000 */ 5001 mi_copy_done(q, mp, 0); 5002 return; 5003 default: 5004 /* 5005 * Something strange has happened, so acknowledge 5006 * the original M_IOCTL with an EPROTO error. 5007 */ 5008 mi_copy_done(q, mp, EPROTO); 5009 return; 5010 } 5011 /* 5012 * Now we have the strbuf structure for TI_GETMYNAME 5013 * and TI_GETPEERNAME. Next we copyout the requested 5014 * address and then we'll copyout the strbuf. 5015 */ 5016 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5017 (void *)mp1->b_rptr); 5018 icmp = (icmp_t *)q->q_ptr; 5019 if (icmp->icmp_family == AF_INET) 5020 addrlen = sizeof (sin_t); 5021 else 5022 addrlen = sizeof (sin6_t); 5023 5024 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5025 mi_copy_done(q, mp, EINVAL); 5026 return; 5027 } 5028 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5029 case TI_GETMYNAME: 5030 if (icmp->icmp_family == AF_INET) { 5031 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5032 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5033 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5034 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5035 } else { 5036 /* 5037 * INADDR_ANY 5038 * icmp_v6src is not set, we might be bound to 5039 * broadcast/multicast. Use icmp_bound_v6src as 5040 * local address instead (that could 5041 * also still be INADDR_ANY) 5042 */ 5043 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5044 } 5045 } else { 5046 /* icmp->icmp_family == AF_INET6 */ 5047 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5048 v6addr = icmp->icmp_v6src; 5049 } else { 5050 /* 5051 * UNSPECIFIED 5052 * icmp_v6src is not set, we might be bound to 5053 * broadcast/multicast. Use icmp_bound_v6src as 5054 * local address instead (that could 5055 * also still be UNSPECIFIED) 5056 */ 5057 v6addr = icmp->icmp_bound_v6src; 5058 } 5059 } 5060 break; 5061 case TI_GETPEERNAME: 5062 if (icmp->icmp_family == AF_INET) { 5063 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5064 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5065 } else { 5066 /* icmp->icmp_family == AF_INET6) */ 5067 v6addr = icmp->icmp_v6dst; 5068 flowinfo = icmp->icmp_flowinfo; 5069 } 5070 break; 5071 default: 5072 mi_copy_done(q, mp, EPROTO); 5073 return; 5074 } 5075 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5076 if (!mp1) 5077 return; 5078 5079 if (icmp->icmp_family == AF_INET) { 5080 sin_t *sin; 5081 5082 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5083 sin = (sin_t *)mp1->b_rptr; 5084 mp1->b_wptr = (uchar_t *)&sin[1]; 5085 *sin = sin_null; 5086 sin->sin_family = AF_INET; 5087 sin->sin_addr.s_addr = v4addr; 5088 } else { 5089 /* icmp->icmp_family == AF_INET6 */ 5090 sin6_t *sin6; 5091 5092 ASSERT(icmp->icmp_family == AF_INET6); 5093 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5094 sin6 = (sin6_t *)mp1->b_rptr; 5095 mp1->b_wptr = (uchar_t *)&sin6[1]; 5096 *sin6 = sin6_null; 5097 sin6->sin6_family = AF_INET6; 5098 sin6->sin6_flowinfo = flowinfo; 5099 sin6->sin6_addr = v6addr; 5100 } 5101 /* Copy out the address */ 5102 mi_copyout(q, mp); 5103 } 5104 5105 /* 5106 * Only allow MIB requests and M_FLUSHes to pass. 5107 * All other messages are nacked or dropped. 5108 */ 5109 static void 5110 icmp_wput_restricted(queue_t *q, mblk_t *mp) 5111 { 5112 cred_t *cr; 5113 icmp_t *icmp; 5114 5115 switch (DB_TYPE(mp)) { 5116 case M_PROTO: 5117 case M_PCPROTO: 5118 if (MBLKL(mp) < sizeof (t_scalar_t)) { 5119 freemsg(mp); 5120 return; 5121 } 5122 icmp = (icmp_t *)q->q_ptr; 5123 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5124 5125 switch (((union T_primitives *)mp->b_rptr)->type) { 5126 case T_SVR4_OPTMGMT_REQ: 5127 if (!snmpcom_req(q, mp, 5128 icmp_snmp_set, icmp_snmp_get, cr)) 5129 (void) svr4_optcom_req(q, mp, cr, 5130 &icmp_opt_obj); 5131 return; 5132 case T_OPTMGMT_REQ: 5133 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj); 5134 return; 5135 default: 5136 icmp_err_ack(q, mp, TSYSERR, ENOTSUP); 5137 return; 5138 } 5139 /* NOTREACHED */ 5140 case M_IOCTL: 5141 miocnak(q, mp, 0, ENOTSUP); 5142 break; 5143 case M_FLUSH: 5144 putnext(q, mp); 5145 break; 5146 default: 5147 freemsg(mp); 5148 break; 5149 } 5150 } 5151 5152 static int 5153 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5154 void *thisdg_attrs) 5155 { 5156 icmp_t *icmp; 5157 struct T_unitdata_req *udreqp; 5158 int is_absreq_failure; 5159 cred_t *cr; 5160 5161 icmp = (icmp_t *)q->q_ptr; 5162 5163 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5164 *errorp = 0; 5165 5166 cr = DB_CREDDEF(mp, icmp->icmp_credp); 5167 5168 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5169 udreqp->OPT_offset, cr, &icmp_opt_obj, 5170 thisdg_attrs, &is_absreq_failure); 5171 5172 if (*errorp != 0) { 5173 /* 5174 * Note: No special action needed in this 5175 * module for "is_absreq_failure" 5176 */ 5177 return (-1); /* failure */ 5178 } 5179 ASSERT(is_absreq_failure == 0); 5180 return (0); /* success */ 5181 } 5182 5183 void 5184 icmp_ddi_init(void) 5185 { 5186 ICMP6_MAJ = ddi_name_to_major(ICMP6); 5187 icmp_max_optsize = 5188 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5189 icmp_opt_obj.odb_opt_arr_cnt); 5190 5191 (void) icmp_param_register(icmp_param_arr, A_CNT(icmp_param_arr)); 5192 5193 rawip_kstat_init(); 5194 } 5195 5196 void 5197 icmp_ddi_destroy(void) 5198 { 5199 nd_free(&icmp_g_nd); 5200 5201 rawip_kstat_fini(); 5202 } 5203 5204 static void 5205 rawip_kstat_init(void) { 5206 5207 rawip_named_kstat_t template = { 5208 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5209 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5210 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5211 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5212 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5213 }; 5214 5215 rawip_mibkp = kstat_create("icmp", 0, "rawip", "mib2", 5216 KSTAT_TYPE_NAMED, 5217 NUM_OF_FIELDS(rawip_named_kstat_t), 5218 0); 5219 if (rawip_mibkp == NULL) 5220 return; 5221 5222 bcopy(&template, rawip_mibkp->ks_data, sizeof (template)); 5223 5224 rawip_mibkp->ks_update = rawip_kstat_update; 5225 5226 kstat_install(rawip_mibkp); 5227 } 5228 5229 static void 5230 rawip_kstat_fini(void) { 5231 if (rawip_mibkp) { 5232 kstat_delete(rawip_mibkp); 5233 rawip_mibkp = NULL; 5234 } 5235 } 5236 5237 static int 5238 rawip_kstat_update(kstat_t *kp, int rw) { 5239 rawip_named_kstat_t *rawipkp; 5240 5241 if ((kp == NULL) || (kp->ks_data == NULL)) 5242 return (EIO); 5243 5244 if (rw == KSTAT_WRITE) 5245 return (EACCES); 5246 5247 rawipkp = (rawip_named_kstat_t *)kp->ks_data; 5248 5249 rawipkp->inDatagrams.value.ui32 = rawip_mib.rawipInDatagrams; 5250 rawipkp->inCksumErrs.value.ui32 = rawip_mib.rawipInCksumErrs; 5251 rawipkp->inErrors.value.ui32 = rawip_mib.rawipInErrors; 5252 rawipkp->outDatagrams.value.ui32 = rawip_mib.rawipOutDatagrams; 5253 rawipkp->outErrors.value.ui32 = rawip_mib.rawipOutErrors; 5254 5255 return (0); 5256 } 5257