1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 icmp->icmp_ip_snd_options_len = 0; 733 } 734 735 if (icmp->icmp_filter != NULL) { 736 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 737 icmp->icmp_filter = NULL; 738 } 739 /* Free memory associated with sticky options */ 740 if (icmp->icmp_sticky_hdrs_len != 0) { 741 kmem_free(icmp->icmp_sticky_hdrs, 742 icmp->icmp_sticky_hdrs_len); 743 icmp->icmp_sticky_hdrs = NULL; 744 icmp->icmp_sticky_hdrs_len = 0; 745 } 746 ip6_pkt_free(&icmp->icmp_sticky_ipp); 747 748 /* 749 * Clear any fields which the kmem_cache constructor clears. 750 * Only icmp_connp needs to be preserved. 751 * TBD: We should make this more efficient to avoid clearing 752 * everything. 753 */ 754 ASSERT(icmp->icmp_connp == connp); 755 bzero(icmp, sizeof (icmp_t)); 756 icmp->icmp_connp = connp; 757 } 758 759 static int 760 icmp_close(queue_t *q) 761 { 762 conn_t *connp = (conn_t *)q->q_ptr; 763 764 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 765 766 ip_quiesce_conn(connp); 767 768 qprocsoff(connp->conn_rq); 769 770 icmp_close_free(connp); 771 772 /* 773 * Now we are truly single threaded on this stream, and can 774 * delete the things hanging off the connp, and finally the connp. 775 * We removed this connp from the fanout list, it cannot be 776 * accessed thru the fanouts, and we already waited for the 777 * conn_ref to drop to 0. We are already in close, so 778 * there cannot be any other thread from the top. qprocsoff 779 * has completed, and service has completed or won't run in 780 * future. 781 */ 782 ASSERT(connp->conn_ref == 1); 783 784 inet_minor_free(ip_minor_arena, connp->conn_dev); 785 786 connp->conn_ref--; 787 ipcl_conn_destroy(connp); 788 789 q->q_ptr = WR(q)->q_ptr = NULL; 790 return (0); 791 } 792 793 /* 794 * This routine handles each T_DISCON_REQ message passed to icmp 795 * as an indicating that ICMP is no longer connected. This results 796 * in sending a T_BIND_REQ to IP to restore the binding to just 797 * the local address. 798 * 799 * This routine sends down a T_BIND_REQ to IP with the following mblks: 800 * T_BIND_REQ - specifying just the local address. 801 * T_OK_ACK - for the T_DISCON_REQ 802 * 803 * The disconnect completes in icmp_bind_result. 804 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 805 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 806 * convert it to an error ack for the appropriate primitive. 807 */ 808 static void 809 icmp_disconnect(queue_t *q, mblk_t *mp) 810 { 811 icmp_t *icmp; 812 mblk_t *mp1; 813 conn_t *connp = Q_TO_CONN(q); 814 815 icmp = connp->conn_icmp; 816 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 817 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 818 rw_exit(&icmp->icmp_rwlock); 819 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 820 "icmp_disconnect: bad state, %d", icmp->icmp_state); 821 icmp_err_ack(q, mp, TOUTSTATE, 0); 822 return; 823 } 824 icmp->icmp_pending_op = T_DISCON_REQ; 825 icmp->icmp_v6src = icmp->icmp_bound_v6src; 826 icmp->icmp_state = TS_IDLE; 827 828 /* 829 * Send down bind to IP to remove the full binding and revert 830 * to the local address binding. 831 */ 832 if (icmp->icmp_family == AF_INET) { 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 834 } else { 835 ASSERT(icmp->icmp_family == AF_INET6); 836 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 837 } 838 if (mp1 == NULL) { 839 icmp->icmp_pending_op = -1; 840 rw_exit(&icmp->icmp_rwlock); 841 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 842 return; 843 } 844 mp = mi_tpi_ok_ack_alloc(mp); 845 if (mp == NULL) { 846 /* Unable to reuse the T_DISCON_REQ for the ack. */ 847 icmp->icmp_pending_op = -1; 848 rw_exit(&icmp->icmp_rwlock); 849 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 850 return; 851 } 852 853 if (icmp->icmp_family == AF_INET6) { 854 int error; 855 856 /* Rebuild the header template */ 857 error = icmp_build_hdrs(icmp); 858 if (error != 0) { 859 icmp->icmp_pending_op = -1; 860 rw_exit(&icmp->icmp_rwlock); 861 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 862 freemsg(mp1); 863 return; 864 } 865 } 866 867 rw_exit(&icmp->icmp_rwlock); 868 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 869 linkb(mp1, mp); 870 871 if (icmp->icmp_family == AF_INET6) 872 mp1 = ip_bind_v6(q, mp1, connp, NULL); 873 else 874 mp1 = ip_bind_v4(q, mp1, connp); 875 876 /* The above return NULL if the bind needs to be deferred */ 877 if (mp1 != NULL) 878 icmp_bind_result(connp, mp1); 879 else 880 CONN_INC_REF(connp); 881 } 882 883 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 884 static void 885 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 886 { 887 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 888 qreply(q, mp); 889 } 890 891 /* Shorthand to generate and send TPI error acks to our client */ 892 static void 893 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 894 t_scalar_t t_error, int sys_error) 895 { 896 struct T_error_ack *teackp; 897 898 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 899 M_PCPROTO, T_ERROR_ACK)) != NULL) { 900 teackp = (struct T_error_ack *)mp->b_rptr; 901 teackp->ERROR_prim = primitive; 902 teackp->TLI_error = t_error; 903 teackp->UNIX_error = sys_error; 904 qreply(q, mp); 905 } 906 } 907 908 /* 909 * icmp_icmp_error is called by icmp_input to process ICMP 910 * messages passed up by IP. 911 * Generates the appropriate T_UDERROR_IND for permanent 912 * (non-transient) errors. 913 * Assumes that IP has pulled up everything up to and including 914 * the ICMP header. 915 */ 916 static void 917 icmp_icmp_error(queue_t *q, mblk_t *mp) 918 { 919 icmph_t *icmph; 920 ipha_t *ipha; 921 int iph_hdr_length; 922 sin_t sin; 923 sin6_t sin6; 924 mblk_t *mp1; 925 int error = 0; 926 icmp_t *icmp = Q_TO_ICMP(q); 927 928 ipha = (ipha_t *)mp->b_rptr; 929 930 ASSERT(OK_32PTR(mp->b_rptr)); 931 932 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 933 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 934 icmp_icmp_error_ipv6(q, mp); 935 return; 936 } 937 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 938 939 /* Skip past the outer IP and ICMP headers */ 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 942 ipha = (ipha_t *)&icmph[1]; 943 iph_hdr_length = IPH_HDR_LENGTH(ipha); 944 945 switch (icmph->icmph_type) { 946 case ICMP_DEST_UNREACHABLE: 947 switch (icmph->icmph_code) { 948 case ICMP_FRAGMENTATION_NEEDED: 949 /* 950 * IP has already adjusted the path MTU. 951 */ 952 break; 953 case ICMP_PORT_UNREACHABLE: 954 case ICMP_PROTOCOL_UNREACHABLE: 955 error = ECONNREFUSED; 956 break; 957 default: 958 /* Transient errors */ 959 break; 960 } 961 break; 962 default: 963 /* Transient errors */ 964 break; 965 } 966 if (error == 0) { 967 freemsg(mp); 968 return; 969 } 970 971 /* 972 * Deliver T_UDERROR_IND when the application has asked for it. 973 * The socket layer enables this automatically when connected. 974 */ 975 if (!icmp->icmp_dgram_errind) { 976 freemsg(mp); 977 return; 978 } 979 980 switch (icmp->icmp_family) { 981 case AF_INET: 982 sin = sin_null; 983 sin.sin_family = AF_INET; 984 sin.sin_addr.s_addr = ipha->ipha_dst; 985 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 986 error); 987 break; 988 case AF_INET6: 989 sin6 = sin6_null; 990 sin6.sin6_family = AF_INET6; 991 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 994 NULL, 0, error); 995 break; 996 } 997 if (mp1) 998 putnext(q, mp1); 999 freemsg(mp); 1000 } 1001 1002 /* 1003 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1004 * for IPv6 packets. 1005 * Send permanent (non-transient) errors upstream. 1006 * Assumes that IP has pulled up all the extension headers as well 1007 * as the ICMPv6 header. 1008 */ 1009 static void 1010 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1011 { 1012 icmp6_t *icmp6; 1013 ip6_t *ip6h, *outer_ip6h; 1014 uint16_t iph_hdr_length; 1015 uint8_t *nexthdrp; 1016 sin6_t sin6; 1017 mblk_t *mp1; 1018 int error = 0; 1019 icmp_t *icmp = Q_TO_ICMP(q); 1020 1021 outer_ip6h = (ip6_t *)mp->b_rptr; 1022 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1023 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1024 else 1025 iph_hdr_length = IPV6_HDR_LEN; 1026 1027 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1028 ip6h = (ip6_t *)&icmp6[1]; 1029 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 switch (icmp6->icmp6_type) { 1035 case ICMP6_DST_UNREACH: 1036 switch (icmp6->icmp6_code) { 1037 case ICMP6_DST_UNREACH_NOPORT: 1038 error = ECONNREFUSED; 1039 break; 1040 case ICMP6_DST_UNREACH_ADMIN: 1041 case ICMP6_DST_UNREACH_NOROUTE: 1042 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1043 case ICMP6_DST_UNREACH_ADDR: 1044 /* Transient errors */ 1045 break; 1046 default: 1047 break; 1048 } 1049 break; 1050 case ICMP6_PACKET_TOO_BIG: { 1051 struct T_unitdata_ind *tudi; 1052 struct T_opthdr *toh; 1053 size_t udi_size; 1054 mblk_t *newmp; 1055 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1056 sizeof (struct ip6_mtuinfo); 1057 sin6_t *sin6; 1058 struct ip6_mtuinfo *mtuinfo; 1059 1060 /* 1061 * If the application has requested to receive path mtu 1062 * information, send up an empty message containing an 1063 * IPV6_PATHMTU ancillary data item. 1064 */ 1065 if (!icmp->icmp_ipv6_recvpathmtu) 1066 break; 1067 1068 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1069 opt_length; 1070 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1071 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1072 break; 1073 } 1074 1075 /* 1076 * newmp->b_cont is left to NULL on purpose. This is an 1077 * empty message containing only ancillary data. 1078 */ 1079 newmp->b_datap->db_type = M_PROTO; 1080 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1081 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1082 tudi->PRIM_type = T_UNITDATA_IND; 1083 tudi->SRC_length = sizeof (sin6_t); 1084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1085 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1086 tudi->OPT_length = opt_length; 1087 1088 sin6 = (sin6_t *)&tudi[1]; 1089 bzero(sin6, sizeof (sin6_t)); 1090 sin6->sin6_family = AF_INET6; 1091 sin6->sin6_addr = icmp->icmp_v6dst; 1092 1093 toh = (struct T_opthdr *)&sin6[1]; 1094 toh->level = IPPROTO_IPV6; 1095 toh->name = IPV6_PATHMTU; 1096 toh->len = opt_length; 1097 toh->status = 0; 1098 1099 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1100 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1101 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1102 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1103 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1104 /* 1105 * We've consumed everything we need from the original 1106 * message. Free it, then send our empty message. 1107 */ 1108 freemsg(mp); 1109 putnext(q, newmp); 1110 return; 1111 } 1112 case ICMP6_TIME_EXCEEDED: 1113 /* Transient errors */ 1114 break; 1115 case ICMP6_PARAM_PROB: 1116 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1117 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1118 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1119 (uchar_t *)nexthdrp) { 1120 error = ECONNREFUSED; 1121 break; 1122 } 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin6 = sin6_null; 1140 sin6.sin6_family = AF_INET6; 1141 sin6.sin6_addr = ip6h->ip6_dst; 1142 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1143 1144 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1145 error); 1146 if (mp1) 1147 putnext(q, mp1); 1148 freemsg(mp); 1149 } 1150 1151 /* 1152 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1153 * The local address is filled in if endpoint is bound. The remote address 1154 * is filled in if remote address has been precified ("connected endpoint") 1155 * (The concept of connected CLTS sockets is alien to published TPI 1156 * but we support it anyway). 1157 */ 1158 static void 1159 icmp_addr_req(queue_t *q, mblk_t *mp) 1160 { 1161 icmp_t *icmp = Q_TO_ICMP(q); 1162 mblk_t *ackmp; 1163 struct T_addr_ack *taa; 1164 1165 /* Make it large enough for worst case */ 1166 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1167 2 * sizeof (sin6_t), 1); 1168 if (ackmp == NULL) { 1169 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1170 return; 1171 } 1172 taa = (struct T_addr_ack *)ackmp->b_rptr; 1173 1174 bzero(taa, sizeof (struct T_addr_ack)); 1175 ackmp->b_wptr = (uchar_t *)&taa[1]; 1176 1177 taa->PRIM_type = T_ADDR_ACK; 1178 ackmp->b_datap->db_type = M_PCPROTO; 1179 rw_enter(&icmp->icmp_rwlock, RW_READER); 1180 /* 1181 * Note: Following code assumes 32 bit alignment of basic 1182 * data structures like sin_t and struct T_addr_ack. 1183 */ 1184 if (icmp->icmp_state != TS_UNBND) { 1185 /* 1186 * Fill in local address 1187 */ 1188 taa->LOCADDR_offset = sizeof (*taa); 1189 if (icmp->icmp_family == AF_INET) { 1190 sin_t *sin; 1191 1192 taa->LOCADDR_length = sizeof (sin_t); 1193 sin = (sin_t *)&taa[1]; 1194 /* Fill zeroes and then intialize non-zero fields */ 1195 *sin = sin_null; 1196 sin->sin_family = AF_INET; 1197 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1198 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1199 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1200 sin->sin_addr.s_addr); 1201 } else { 1202 /* 1203 * INADDR_ANY 1204 * icmp_v6src is not set, we might be bound to 1205 * broadcast/multicast. Use icmp_bound_v6src as 1206 * local address instead (that could 1207 * also still be INADDR_ANY) 1208 */ 1209 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1210 sin->sin_addr.s_addr); 1211 } 1212 ackmp->b_wptr = (uchar_t *)&sin[1]; 1213 } else { 1214 sin6_t *sin6; 1215 1216 ASSERT(icmp->icmp_family == AF_INET6); 1217 taa->LOCADDR_length = sizeof (sin6_t); 1218 sin6 = (sin6_t *)&taa[1]; 1219 /* Fill zeroes and then intialize non-zero fields */ 1220 *sin6 = sin6_null; 1221 sin6->sin6_family = AF_INET6; 1222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1223 sin6->sin6_addr = icmp->icmp_v6src; 1224 } else { 1225 /* 1226 * UNSPECIFIED 1227 * icmp_v6src is not set, we might be bound to 1228 * broadcast/multicast. Use icmp_bound_v6src as 1229 * local address instead (that could 1230 * also still be UNSPECIFIED) 1231 */ 1232 sin6->sin6_addr = icmp->icmp_bound_v6src; 1233 } 1234 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1235 } 1236 } 1237 rw_exit(&icmp->icmp_rwlock); 1238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1239 qreply(q, ackmp); 1240 } 1241 1242 static void 1243 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1244 { 1245 *tap = icmp_g_t_info_ack; 1246 1247 if (icmp->icmp_family == AF_INET6) 1248 tap->ADDR_size = sizeof (sin6_t); 1249 else 1250 tap->ADDR_size = sizeof (sin_t); 1251 tap->CURRENT_state = icmp->icmp_state; 1252 tap->OPT_size = icmp_max_optsize; 1253 } 1254 1255 /* 1256 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1257 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1258 * icmp_g_t_info_ack. The current state of the stream is copied from 1259 * icmp_state. 1260 */ 1261 static void 1262 icmp_capability_req(queue_t *q, mblk_t *mp) 1263 { 1264 icmp_t *icmp = Q_TO_ICMP(q); 1265 t_uscalar_t cap_bits1; 1266 struct T_capability_ack *tcap; 1267 1268 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1269 1270 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1271 mp->b_datap->db_type, T_CAPABILITY_ACK); 1272 if (!mp) 1273 return; 1274 1275 tcap = (struct T_capability_ack *)mp->b_rptr; 1276 tcap->CAP_bits1 = 0; 1277 1278 if (cap_bits1 & TC1_INFO) { 1279 icmp_copy_info(&tcap->INFO_ack, icmp); 1280 tcap->CAP_bits1 |= TC1_INFO; 1281 } 1282 1283 qreply(q, mp); 1284 } 1285 1286 /* 1287 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1288 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1289 * The current state of the stream is copied from icmp_state. 1290 */ 1291 static void 1292 icmp_info_req(queue_t *q, mblk_t *mp) 1293 { 1294 icmp_t *icmp = Q_TO_ICMP(q); 1295 1296 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1297 T_INFO_ACK); 1298 if (!mp) 1299 return; 1300 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1301 qreply(q, mp); 1302 } 1303 1304 /* 1305 * IP recognizes seven kinds of bind requests: 1306 * 1307 * - A zero-length address binds only to the protocol number. 1308 * 1309 * - A 4-byte address is treated as a request to 1310 * validate that the address is a valid local IPv4 1311 * address, appropriate for an application to bind to. 1312 * IP does the verification, but does not make any note 1313 * of the address at this time. 1314 * 1315 * - A 16-byte address contains is treated as a request 1316 * to validate a local IPv6 address, as the 4-byte 1317 * address case above. 1318 * 1319 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1323 * use it for the inbound fanout of packets. 1324 * 1325 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1326 * information consisting of local and remote addresses 1327 * and ports (unused for raw sockets). In this case, the addresses are both 1328 * validated as appropriate for this operation, and, if 1329 * so, the information is retained for use in the 1330 * inbound fanout. 1331 * 1332 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1333 * fanout information, like the 12-byte case above. 1334 * 1335 * IP will also fill in the IRE request mblk with information 1336 * regarding our peer. In all cases, we notify IP of our protocol 1337 * type by appending a single protocol byte to the bind request. 1338 */ 1339 static mblk_t * 1340 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1341 in_port_t fport) 1342 { 1343 char *cp; 1344 mblk_t *mp; 1345 struct T_bind_req *tbr; 1346 ipa_conn_t *ac; 1347 ipa6_conn_t *ac6; 1348 sin_t *sin; 1349 sin6_t *sin6; 1350 1351 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1352 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1353 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1354 if (mp == NULL) 1355 return (NULL); 1356 mp->b_datap->db_type = M_PROTO; 1357 tbr = (struct T_bind_req *)mp->b_rptr; 1358 tbr->PRIM_type = bind_prim; 1359 tbr->ADDR_offset = sizeof (*tbr); 1360 tbr->CONIND_number = 0; 1361 tbr->ADDR_length = addr_length; 1362 cp = (char *)&tbr[1]; 1363 switch (addr_length) { 1364 case sizeof (ipa_conn_t): 1365 ASSERT(icmp->icmp_family == AF_INET); 1366 /* Append a request for an IRE */ 1367 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1368 if (mp->b_cont == NULL) { 1369 freemsg(mp); 1370 return (NULL); 1371 } 1372 mp->b_cont->b_wptr += sizeof (ire_t); 1373 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1374 1375 /* cp known to be 32 bit aligned */ 1376 ac = (ipa_conn_t *)cp; 1377 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1378 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1379 ac->ac_fport = fport; 1380 ac->ac_lport = 0; 1381 break; 1382 1383 case sizeof (ipa6_conn_t): 1384 ASSERT(icmp->icmp_family == AF_INET6); 1385 /* Append a request for an IRE */ 1386 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1387 if (mp->b_cont == NULL) { 1388 freemsg(mp); 1389 return (NULL); 1390 } 1391 mp->b_cont->b_wptr += sizeof (ire_t); 1392 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1393 1394 /* cp known to be 32 bit aligned */ 1395 ac6 = (ipa6_conn_t *)cp; 1396 ac6->ac6_laddr = icmp->icmp_v6src; 1397 ac6->ac6_faddr = icmp->icmp_v6dst; 1398 ac6->ac6_fport = fport; 1399 ac6->ac6_lport = 0; 1400 break; 1401 1402 case sizeof (sin_t): 1403 ASSERT(icmp->icmp_family == AF_INET); 1404 /* Append a request for an IRE */ 1405 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1406 if (!mp->b_cont) { 1407 freemsg(mp); 1408 return (NULL); 1409 } 1410 mp->b_cont->b_wptr += sizeof (ire_t); 1411 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1412 1413 sin = (sin_t *)cp; 1414 *sin = sin_null; 1415 sin->sin_family = AF_INET; 1416 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1417 break; 1418 1419 case sizeof (sin6_t): 1420 ASSERT(icmp->icmp_family == AF_INET6); 1421 /* Append a request for an IRE */ 1422 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1423 if (!mp->b_cont) { 1424 freemsg(mp); 1425 return (NULL); 1426 } 1427 mp->b_cont->b_wptr += sizeof (ire_t); 1428 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1429 1430 sin6 = (sin6_t *)cp; 1431 *sin6 = sin6_null; 1432 sin6->sin6_family = AF_INET6; 1433 sin6->sin6_addr = icmp->icmp_bound_v6src; 1434 break; 1435 } 1436 /* Add protocol number to end */ 1437 cp[addr_length] = icmp->icmp_proto; 1438 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1439 return (mp); 1440 } 1441 1442 /* For /dev/icmp aka AF_INET open */ 1443 static int 1444 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1445 { 1446 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1447 } 1448 1449 /* For /dev/icmp6 aka AF_INET6 open */ 1450 static int 1451 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1454 } 1455 1456 /* 1457 * This is the open routine for icmp. It allocates a icmp_t structure for 1458 * the stream and, on the first open of the module, creates an ND table. 1459 */ 1460 /*ARGSUSED2*/ 1461 static int 1462 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1463 boolean_t isv6) 1464 { 1465 int err; 1466 icmp_t *icmp; 1467 conn_t *connp; 1468 dev_t conn_dev; 1469 zoneid_t zoneid; 1470 netstack_t *ns; 1471 icmp_stack_t *is; 1472 1473 /* If the stream is already open, return immediately. */ 1474 if (q->q_ptr != NULL) 1475 return (0); 1476 1477 if (sflag == MODOPEN) 1478 return (EINVAL); 1479 1480 ns = netstack_find_by_cred(credp); 1481 ASSERT(ns != NULL); 1482 is = ns->netstack_icmp; 1483 ASSERT(is != NULL); 1484 1485 /* 1486 * For exclusive stacks we set the zoneid to zero 1487 * to make ICMP operate as if in the global zone. 1488 */ 1489 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1490 zoneid = GLOBAL_ZONEID; 1491 else 1492 zoneid = crgetzoneid(credp); 1493 1494 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1495 netstack_rele(ns); 1496 return (EBUSY); 1497 } 1498 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1499 1500 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1501 connp->conn_dev = conn_dev; 1502 icmp = connp->conn_icmp; 1503 1504 /* 1505 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1506 * done by netstack_find_by_cred() 1507 */ 1508 netstack_rele(ns); 1509 1510 /* 1511 * Initialize the icmp_t structure for this stream. 1512 */ 1513 q->q_ptr = connp; 1514 WR(q)->q_ptr = connp; 1515 connp->conn_rq = q; 1516 connp->conn_wq = WR(q); 1517 1518 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1519 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1520 ASSERT(connp->conn_icmp == icmp); 1521 ASSERT(icmp->icmp_connp == connp); 1522 1523 /* Set the initial state of the stream and the privilege status. */ 1524 icmp->icmp_state = TS_UNBND; 1525 if (isv6) { 1526 icmp->icmp_ipversion = IPV6_VERSION; 1527 icmp->icmp_family = AF_INET6; 1528 connp->conn_ulp = IPPROTO_ICMPV6; 1529 /* May be changed by a SO_PROTOTYPE socket option. */ 1530 icmp->icmp_proto = IPPROTO_ICMPV6; 1531 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1532 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1533 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1534 connp->conn_af_isv6 = B_TRUE; 1535 connp->conn_flags |= IPCL_ISV6; 1536 } else { 1537 icmp->icmp_ipversion = IPV4_VERSION; 1538 icmp->icmp_family = AF_INET; 1539 /* May be changed by a SO_PROTOTYPE socket option. */ 1540 icmp->icmp_proto = IPPROTO_ICMP; 1541 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1542 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1543 connp->conn_af_isv6 = B_FALSE; 1544 connp->conn_flags &= ~IPCL_ISV6; 1545 } 1546 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 icmp->icmp_pending_op = -1; 1548 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1549 connp->conn_zoneid = zoneid; 1550 1551 /* 1552 * If the caller has the process-wide flag set, then default to MAC 1553 * exempt mode. This allows read-down to unlabeled hosts. 1554 */ 1555 if (getpflags(NET_MAC_AWARE, credp) != 0) 1556 icmp->icmp_mac_exempt = B_TRUE; 1557 1558 connp->conn_ulp_labeled = is_system_labeled(); 1559 1560 icmp->icmp_is = is; 1561 1562 q->q_hiwat = is->is_recv_hiwat; 1563 WR(q)->q_hiwat = is->is_xmit_hiwat; 1564 WR(q)->q_lowat = is->is_xmit_lowat; 1565 1566 connp->conn_recv = icmp_input; 1567 crhold(credp); 1568 connp->conn_cred = credp; 1569 1570 mutex_enter(&connp->conn_lock); 1571 connp->conn_state_flags &= ~CONN_INCIPIENT; 1572 mutex_exit(&connp->conn_lock); 1573 1574 qprocson(q); 1575 1576 if (icmp->icmp_family == AF_INET6) { 1577 /* Build initial header template for transmit */ 1578 if ((err = icmp_build_hdrs(icmp)) != 0) { 1579 rw_exit(&icmp->icmp_rwlock); 1580 qprocsoff(q); 1581 ipcl_conn_destroy(connp); 1582 return (err); 1583 } 1584 } 1585 rw_exit(&icmp->icmp_rwlock); 1586 1587 /* Set the Stream head write offset. */ 1588 (void) mi_set_sth_wroff(q, 1589 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1590 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1591 1592 return (0); 1593 } 1594 1595 /* 1596 * Which ICMP options OK to set through T_UNITDATA_REQ... 1597 */ 1598 /* ARGSUSED */ 1599 static boolean_t 1600 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1601 { 1602 return (B_TRUE); 1603 } 1604 1605 /* 1606 * This routine gets default values of certain options whose default 1607 * values are maintained by protcol specific code 1608 */ 1609 /* ARGSUSED */ 1610 int 1611 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1612 { 1613 icmp_t *icmp = Q_TO_ICMP(q); 1614 icmp_stack_t *is = icmp->icmp_is; 1615 int *i1 = (int *)ptr; 1616 1617 switch (level) { 1618 case IPPROTO_IP: 1619 switch (name) { 1620 case IP_MULTICAST_TTL: 1621 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1622 return (sizeof (uchar_t)); 1623 case IP_MULTICAST_LOOP: 1624 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1625 return (sizeof (uchar_t)); 1626 } 1627 break; 1628 case IPPROTO_IPV6: 1629 switch (name) { 1630 case IPV6_MULTICAST_HOPS: 1631 *i1 = IP_DEFAULT_MULTICAST_TTL; 1632 return (sizeof (int)); 1633 case IPV6_MULTICAST_LOOP: 1634 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1635 return (sizeof (int)); 1636 case IPV6_UNICAST_HOPS: 1637 *i1 = is->is_ipv6_hoplimit; 1638 return (sizeof (int)); 1639 } 1640 break; 1641 case IPPROTO_ICMPV6: 1642 switch (name) { 1643 case ICMP6_FILTER: 1644 /* Make it look like "pass all" */ 1645 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1646 return (sizeof (icmp6_filter_t)); 1647 } 1648 break; 1649 } 1650 return (-1); 1651 } 1652 1653 /* 1654 * This routine retrieves the current status of socket options. 1655 * It returns the size of the option retrieved. 1656 */ 1657 int 1658 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1659 { 1660 conn_t *connp = Q_TO_CONN(q); 1661 icmp_t *icmp = connp->conn_icmp; 1662 icmp_stack_t *is = icmp->icmp_is; 1663 int *i1 = (int *)ptr; 1664 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1665 1666 switch (level) { 1667 case SOL_SOCKET: 1668 switch (name) { 1669 case SO_DEBUG: 1670 *i1 = icmp->icmp_debug; 1671 break; 1672 case SO_TYPE: 1673 *i1 = SOCK_RAW; 1674 break; 1675 case SO_PROTOTYPE: 1676 *i1 = icmp->icmp_proto; 1677 break; 1678 case SO_REUSEADDR: 1679 *i1 = icmp->icmp_reuseaddr; 1680 break; 1681 1682 /* 1683 * The following three items are available here, 1684 * but are only meaningful to IP. 1685 */ 1686 case SO_DONTROUTE: 1687 *i1 = icmp->icmp_dontroute; 1688 break; 1689 case SO_USELOOPBACK: 1690 *i1 = icmp->icmp_useloopback; 1691 break; 1692 case SO_BROADCAST: 1693 *i1 = icmp->icmp_broadcast; 1694 break; 1695 1696 case SO_SNDBUF: 1697 ASSERT(q->q_hiwat <= INT_MAX); 1698 *i1 = (int)q->q_hiwat; 1699 break; 1700 case SO_RCVBUF: 1701 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1702 *i1 = (int)RD(q)->q_hiwat; 1703 break; 1704 case SO_DGRAM_ERRIND: 1705 *i1 = icmp->icmp_dgram_errind; 1706 break; 1707 case SO_TIMESTAMP: 1708 *i1 = icmp->icmp_timestamp; 1709 break; 1710 case SO_MAC_EXEMPT: 1711 *i1 = icmp->icmp_mac_exempt; 1712 break; 1713 case SO_DOMAIN: 1714 *i1 = icmp->icmp_family; 1715 break; 1716 1717 /* 1718 * Following four not meaningful for icmp 1719 * Action is same as "default" to which we fallthrough 1720 * so we keep them in comments. 1721 * case SO_LINGER: 1722 * case SO_KEEPALIVE: 1723 * case SO_OOBINLINE: 1724 * case SO_ALLZONES: 1725 */ 1726 default: 1727 return (-1); 1728 } 1729 break; 1730 case IPPROTO_IP: 1731 /* 1732 * Only allow IPv4 option processing on IPv4 sockets. 1733 */ 1734 if (icmp->icmp_family != AF_INET) 1735 return (-1); 1736 1737 switch (name) { 1738 case IP_OPTIONS: 1739 case T_IP_OPTIONS: 1740 /* Options are passed up with each packet */ 1741 return (0); 1742 case IP_HDRINCL: 1743 *i1 = (int)icmp->icmp_hdrincl; 1744 break; 1745 case IP_TOS: 1746 case T_IP_TOS: 1747 *i1 = (int)icmp->icmp_type_of_service; 1748 break; 1749 case IP_TTL: 1750 *i1 = (int)icmp->icmp_ttl; 1751 break; 1752 case IP_MULTICAST_IF: 1753 /* 0 address if not set */ 1754 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1755 return (sizeof (ipaddr_t)); 1756 case IP_MULTICAST_TTL: 1757 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1758 return (sizeof (uchar_t)); 1759 case IP_MULTICAST_LOOP: 1760 *ptr = connp->conn_multicast_loop; 1761 return (sizeof (uint8_t)); 1762 case IP_BOUND_IF: 1763 /* Zero if not set */ 1764 *i1 = icmp->icmp_bound_if; 1765 break; /* goto sizeof (int) option return */ 1766 case IP_UNSPEC_SRC: 1767 *ptr = icmp->icmp_unspec_source; 1768 break; /* goto sizeof (int) option return */ 1769 case IP_RECVIF: 1770 *ptr = icmp->icmp_recvif; 1771 break; /* goto sizeof (int) option return */ 1772 case IP_RECVPKTINFO: 1773 /* 1774 * This also handles IP_PKTINFO. 1775 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1776 * Differentiation is based on the size of the argument 1777 * passed in. 1778 * This option is handled in IP which will return an 1779 * error for IP_PKTINFO as it's not supported as a 1780 * sticky option. 1781 */ 1782 return (-EINVAL); 1783 /* 1784 * Cannot "get" the value of following options 1785 * at this level. Action is same as "default" to 1786 * which we fallthrough so we keep them in comments. 1787 * 1788 * case IP_ADD_MEMBERSHIP: 1789 * case IP_DROP_MEMBERSHIP: 1790 * case IP_BLOCK_SOURCE: 1791 * case IP_UNBLOCK_SOURCE: 1792 * case IP_ADD_SOURCE_MEMBERSHIP: 1793 * case IP_DROP_SOURCE_MEMBERSHIP: 1794 * case MCAST_JOIN_GROUP: 1795 * case MCAST_LEAVE_GROUP: 1796 * case MCAST_BLOCK_SOURCE: 1797 * case MCAST_UNBLOCK_SOURCE: 1798 * case MCAST_JOIN_SOURCE_GROUP: 1799 * case MCAST_LEAVE_SOURCE_GROUP: 1800 * case MRT_INIT: 1801 * case MRT_DONE: 1802 * case MRT_ADD_VIF: 1803 * case MRT_DEL_VIF: 1804 * case MRT_ADD_MFC: 1805 * case MRT_DEL_MFC: 1806 * case MRT_VERSION: 1807 * case MRT_ASSERT: 1808 * case IP_SEC_OPT: 1809 * case IP_DONTFAILOVER_IF: 1810 * case IP_NEXTHOP: 1811 */ 1812 default: 1813 return (-1); 1814 } 1815 break; 1816 case IPPROTO_IPV6: 1817 /* 1818 * Only allow IPv6 option processing on native IPv6 sockets. 1819 */ 1820 if (icmp->icmp_family != AF_INET6) 1821 return (-1); 1822 switch (name) { 1823 case IPV6_UNICAST_HOPS: 1824 *i1 = (unsigned int)icmp->icmp_ttl; 1825 break; 1826 case IPV6_MULTICAST_IF: 1827 /* 0 index if not set */ 1828 *i1 = icmp->icmp_multicast_if_index; 1829 break; 1830 case IPV6_MULTICAST_HOPS: 1831 *i1 = icmp->icmp_multicast_ttl; 1832 break; 1833 case IPV6_MULTICAST_LOOP: 1834 *i1 = connp->conn_multicast_loop; 1835 break; 1836 case IPV6_BOUND_IF: 1837 /* Zero if not set */ 1838 *i1 = icmp->icmp_bound_if; 1839 break; 1840 case IPV6_UNSPEC_SRC: 1841 *i1 = icmp->icmp_unspec_source; 1842 break; 1843 case IPV6_CHECKSUM: 1844 /* 1845 * Return offset or -1 if no checksum offset. 1846 * Does not apply to IPPROTO_ICMPV6 1847 */ 1848 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1849 return (-1); 1850 1851 if (icmp->icmp_raw_checksum) { 1852 *i1 = icmp->icmp_checksum_off; 1853 } else { 1854 *i1 = -1; 1855 } 1856 break; 1857 case IPV6_JOIN_GROUP: 1858 case IPV6_LEAVE_GROUP: 1859 case MCAST_JOIN_GROUP: 1860 case MCAST_LEAVE_GROUP: 1861 case MCAST_BLOCK_SOURCE: 1862 case MCAST_UNBLOCK_SOURCE: 1863 case MCAST_JOIN_SOURCE_GROUP: 1864 case MCAST_LEAVE_SOURCE_GROUP: 1865 /* cannot "get" the value for these */ 1866 return (-1); 1867 case IPV6_RECVPKTINFO: 1868 *i1 = icmp->icmp_ip_recvpktinfo; 1869 break; 1870 case IPV6_RECVTCLASS: 1871 *i1 = icmp->icmp_ipv6_recvtclass; 1872 break; 1873 case IPV6_RECVPATHMTU: 1874 *i1 = icmp->icmp_ipv6_recvpathmtu; 1875 break; 1876 case IPV6_V6ONLY: 1877 *i1 = 1; 1878 break; 1879 case IPV6_RECVHOPLIMIT: 1880 *i1 = icmp->icmp_ipv6_recvhoplimit; 1881 break; 1882 case IPV6_RECVHOPOPTS: 1883 *i1 = icmp->icmp_ipv6_recvhopopts; 1884 break; 1885 case IPV6_RECVDSTOPTS: 1886 *i1 = icmp->icmp_ipv6_recvdstopts; 1887 break; 1888 case _OLD_IPV6_RECVDSTOPTS: 1889 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1890 break; 1891 case IPV6_RECVRTHDRDSTOPTS: 1892 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1893 break; 1894 case IPV6_RECVRTHDR: 1895 *i1 = icmp->icmp_ipv6_recvrthdr; 1896 break; 1897 case IPV6_PKTINFO: { 1898 /* XXX assumes that caller has room for max size! */ 1899 struct in6_pktinfo *pkti; 1900 1901 pkti = (struct in6_pktinfo *)ptr; 1902 if (ipp->ipp_fields & IPPF_IFINDEX) 1903 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1904 else 1905 pkti->ipi6_ifindex = 0; 1906 if (ipp->ipp_fields & IPPF_ADDR) 1907 pkti->ipi6_addr = ipp->ipp_addr; 1908 else 1909 pkti->ipi6_addr = ipv6_all_zeros; 1910 return (sizeof (struct in6_pktinfo)); 1911 } 1912 case IPV6_NEXTHOP: { 1913 sin6_t *sin6 = (sin6_t *)ptr; 1914 1915 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1916 return (0); 1917 *sin6 = sin6_null; 1918 sin6->sin6_family = AF_INET6; 1919 sin6->sin6_addr = ipp->ipp_nexthop; 1920 return (sizeof (sin6_t)); 1921 } 1922 case IPV6_HOPOPTS: 1923 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1924 return (0); 1925 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1926 return (0); 1927 bcopy((char *)ipp->ipp_hopopts + 1928 icmp->icmp_label_len_v6, ptr, 1929 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1930 if (icmp->icmp_label_len_v6 > 0) { 1931 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1932 ptr[1] = (ipp->ipp_hopoptslen - 1933 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1934 } 1935 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1936 case IPV6_RTHDRDSTOPTS: 1937 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1938 return (0); 1939 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1940 return (ipp->ipp_rtdstoptslen); 1941 case IPV6_RTHDR: 1942 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1943 return (0); 1944 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1945 return (ipp->ipp_rthdrlen); 1946 case IPV6_DSTOPTS: 1947 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1948 return (0); 1949 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1950 return (ipp->ipp_dstoptslen); 1951 case IPV6_PATHMTU: 1952 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1953 return (0); 1954 1955 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1956 (struct ip6_mtuinfo *)ptr, is->is_netstack)); 1957 case IPV6_TCLASS: 1958 if (ipp->ipp_fields & IPPF_TCLASS) 1959 *i1 = ipp->ipp_tclass; 1960 else 1961 *i1 = IPV6_FLOW_TCLASS( 1962 IPV6_DEFAULT_VERS_AND_FLOW); 1963 break; 1964 default: 1965 return (-1); 1966 } 1967 break; 1968 case IPPROTO_ICMPV6: 1969 /* 1970 * Only allow IPv6 option processing on native IPv6 sockets. 1971 */ 1972 if (icmp->icmp_family != AF_INET6) 1973 return (-1); 1974 1975 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1976 return (-1); 1977 1978 switch (name) { 1979 case ICMP6_FILTER: 1980 if (icmp->icmp_filter == NULL) { 1981 /* Make it look like "pass all" */ 1982 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1983 } else { 1984 (void) bcopy(icmp->icmp_filter, ptr, 1985 sizeof (icmp6_filter_t)); 1986 } 1987 return (sizeof (icmp6_filter_t)); 1988 default: 1989 return (-1); 1990 } 1991 default: 1992 return (-1); 1993 } 1994 return (sizeof (int)); 1995 } 1996 1997 /* 1998 * This routine retrieves the current status of socket options. 1999 * It returns the size of the option retrieved. 2000 */ 2001 int 2002 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2003 { 2004 icmp_t *icmp = Q_TO_ICMP(q); 2005 int err; 2006 2007 rw_enter(&icmp->icmp_rwlock, RW_READER); 2008 err = icmp_opt_get_locked(q, level, name, ptr); 2009 rw_exit(&icmp->icmp_rwlock); 2010 return (err); 2011 } 2012 2013 2014 /* This routine sets socket options. */ 2015 /* ARGSUSED */ 2016 int 2017 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2018 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2019 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2020 { 2021 conn_t *connp = Q_TO_CONN(q); 2022 icmp_t *icmp = connp->conn_icmp; 2023 icmp_stack_t *is = icmp->icmp_is; 2024 int *i1 = (int *)invalp; 2025 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2026 boolean_t checkonly; 2027 int error; 2028 2029 switch (optset_context) { 2030 case SETFN_OPTCOM_CHECKONLY: 2031 checkonly = B_TRUE; 2032 /* 2033 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2034 * inlen != 0 implies value supplied and 2035 * we have to "pretend" to set it. 2036 * inlen == 0 implies that there is no 2037 * value part in T_CHECK request and just validation 2038 * done elsewhere should be enough, we just return here. 2039 */ 2040 if (inlen == 0) { 2041 *outlenp = 0; 2042 return (0); 2043 } 2044 break; 2045 case SETFN_OPTCOM_NEGOTIATE: 2046 checkonly = B_FALSE; 2047 break; 2048 case SETFN_UD_NEGOTIATE: 2049 case SETFN_CONN_NEGOTIATE: 2050 checkonly = B_FALSE; 2051 /* 2052 * Negotiating local and "association-related" options 2053 * through T_UNITDATA_REQ. 2054 * 2055 * Following routine can filter out ones we do not 2056 * want to be "set" this way. 2057 */ 2058 if (!icmp_opt_allow_udr_set(level, name)) { 2059 *outlenp = 0; 2060 return (EINVAL); 2061 } 2062 break; 2063 default: 2064 /* 2065 * We should never get here 2066 */ 2067 *outlenp = 0; 2068 return (EINVAL); 2069 } 2070 2071 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2072 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2073 2074 /* 2075 * For fixed length options, no sanity check 2076 * of passed in length is done. It is assumed *_optcom_req() 2077 * routines do the right thing. 2078 */ 2079 2080 switch (level) { 2081 case SOL_SOCKET: 2082 switch (name) { 2083 case SO_DEBUG: 2084 if (!checkonly) 2085 icmp->icmp_debug = onoff; 2086 break; 2087 case SO_PROTOTYPE: 2088 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2089 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2090 secpolicy_net_rawaccess(cr) != 0) { 2091 *outlenp = 0; 2092 return (EACCES); 2093 } 2094 /* Can't use IPPROTO_RAW with IPv6 */ 2095 if ((*i1 & 0xFF) == IPPROTO_RAW && 2096 icmp->icmp_family == AF_INET6) { 2097 *outlenp = 0; 2098 return (EPROTONOSUPPORT); 2099 } 2100 if (checkonly) { 2101 /* T_CHECK case */ 2102 *(int *)outvalp = (*i1 & 0xFF); 2103 break; 2104 } 2105 icmp->icmp_proto = *i1 & 0xFF; 2106 if ((icmp->icmp_proto == IPPROTO_RAW || 2107 icmp->icmp_proto == IPPROTO_IGMP) && 2108 icmp->icmp_family == AF_INET) 2109 icmp->icmp_hdrincl = 1; 2110 else 2111 icmp->icmp_hdrincl = 0; 2112 2113 if (icmp->icmp_family == AF_INET6 && 2114 icmp->icmp_proto == IPPROTO_ICMPV6) { 2115 /* Set offset for icmp6_cksum */ 2116 icmp->icmp_raw_checksum = 0; 2117 icmp->icmp_checksum_off = 2; 2118 } 2119 if (icmp->icmp_proto == IPPROTO_UDP || 2120 icmp->icmp_proto == IPPROTO_TCP || 2121 icmp->icmp_proto == IPPROTO_SCTP) { 2122 icmp->icmp_no_tp_cksum = 1; 2123 icmp->icmp_sticky_ipp.ipp_fields |= 2124 IPPF_NO_CKSUM; 2125 } else { 2126 icmp->icmp_no_tp_cksum = 0; 2127 icmp->icmp_sticky_ipp.ipp_fields &= 2128 ~IPPF_NO_CKSUM; 2129 } 2130 2131 if (icmp->icmp_filter != NULL && 2132 icmp->icmp_proto != IPPROTO_ICMPV6) { 2133 kmem_free(icmp->icmp_filter, 2134 sizeof (icmp6_filter_t)); 2135 icmp->icmp_filter = NULL; 2136 } 2137 2138 /* Rebuild the header template */ 2139 error = icmp_build_hdrs(icmp); 2140 if (error != 0) { 2141 *outlenp = 0; 2142 return (error); 2143 } 2144 2145 /* 2146 * For SCTP, we don't use icmp_bind_proto() for 2147 * raw socket binding. Note that we do not need 2148 * to set *outlenp. 2149 * FIXME: how does SCTP work? 2150 */ 2151 if (icmp->icmp_proto == IPPROTO_SCTP) 2152 return (0); 2153 2154 *outlenp = sizeof (int); 2155 *(int *)outvalp = *i1 & 0xFF; 2156 2157 /* Drop lock across the bind operation */ 2158 rw_exit(&icmp->icmp_rwlock); 2159 icmp_bind_proto(q); 2160 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2161 return (0); 2162 case SO_REUSEADDR: 2163 if (!checkonly) 2164 icmp->icmp_reuseaddr = onoff; 2165 break; 2166 2167 /* 2168 * The following three items are available here, 2169 * but are only meaningful to IP. 2170 */ 2171 case SO_DONTROUTE: 2172 if (!checkonly) 2173 icmp->icmp_dontroute = onoff; 2174 break; 2175 case SO_USELOOPBACK: 2176 if (!checkonly) 2177 icmp->icmp_useloopback = onoff; 2178 break; 2179 case SO_BROADCAST: 2180 if (!checkonly) 2181 icmp->icmp_broadcast = onoff; 2182 break; 2183 2184 case SO_SNDBUF: 2185 if (*i1 > is->is_max_buf) { 2186 *outlenp = 0; 2187 return (ENOBUFS); 2188 } 2189 if (!checkonly) { 2190 q->q_hiwat = *i1; 2191 } 2192 break; 2193 case SO_RCVBUF: 2194 if (*i1 > is->is_max_buf) { 2195 *outlenp = 0; 2196 return (ENOBUFS); 2197 } 2198 if (!checkonly) { 2199 RD(q)->q_hiwat = *i1; 2200 rw_exit(&icmp->icmp_rwlock); 2201 (void) mi_set_sth_hiwat(RD(q), *i1); 2202 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2203 } 2204 break; 2205 case SO_DGRAM_ERRIND: 2206 if (!checkonly) 2207 icmp->icmp_dgram_errind = onoff; 2208 break; 2209 case SO_ALLZONES: 2210 /* 2211 * "soft" error (negative) 2212 * option not handled at this level 2213 * Note: Do not modify *outlenp 2214 */ 2215 return (-EINVAL); 2216 case SO_TIMESTAMP: 2217 if (!checkonly) { 2218 icmp->icmp_timestamp = onoff; 2219 } 2220 break; 2221 case SO_MAC_EXEMPT: 2222 if (secpolicy_net_mac_aware(cr) != 0 || 2223 icmp->icmp_state != TS_UNBND) 2224 return (EACCES); 2225 if (!checkonly) 2226 icmp->icmp_mac_exempt = onoff; 2227 break; 2228 /* 2229 * Following three not meaningful for icmp 2230 * Action is same as "default" so we keep them 2231 * in comments. 2232 * case SO_LINGER: 2233 * case SO_KEEPALIVE: 2234 * case SO_OOBINLINE: 2235 */ 2236 default: 2237 *outlenp = 0; 2238 return (EINVAL); 2239 } 2240 break; 2241 case IPPROTO_IP: 2242 /* 2243 * Only allow IPv4 option processing on IPv4 sockets. 2244 */ 2245 if (icmp->icmp_family != AF_INET) { 2246 *outlenp = 0; 2247 return (ENOPROTOOPT); 2248 } 2249 switch (name) { 2250 case IP_OPTIONS: 2251 case T_IP_OPTIONS: 2252 /* Save options for use by IP. */ 2253 if ((inlen & 0x3) || 2254 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2255 *outlenp = 0; 2256 return (EINVAL); 2257 } 2258 if (checkonly) 2259 break; 2260 2261 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2262 &icmp->icmp_ip_snd_options_len, 2263 icmp->icmp_label_len, invalp, inlen)) { 2264 *outlenp = 0; 2265 return (ENOMEM); 2266 } 2267 2268 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2269 icmp->icmp_ip_snd_options_len; 2270 rw_exit(&icmp->icmp_rwlock); 2271 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2272 is->is_wroff_extra); 2273 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2274 break; 2275 case IP_HDRINCL: 2276 if (!checkonly) 2277 icmp->icmp_hdrincl = onoff; 2278 break; 2279 case IP_TOS: 2280 case T_IP_TOS: 2281 if (!checkonly) { 2282 icmp->icmp_type_of_service = (uint8_t)*i1; 2283 } 2284 break; 2285 case IP_TTL: 2286 if (!checkonly) { 2287 icmp->icmp_ttl = (uint8_t)*i1; 2288 } 2289 break; 2290 case IP_MULTICAST_IF: 2291 /* 2292 * TODO should check OPTMGMT reply and undo this if 2293 * there is an error. 2294 */ 2295 if (!checkonly) 2296 icmp->icmp_multicast_if_addr = *i1; 2297 break; 2298 case IP_MULTICAST_TTL: 2299 if (!checkonly) 2300 icmp->icmp_multicast_ttl = *invalp; 2301 break; 2302 case IP_MULTICAST_LOOP: 2303 if (!checkonly) { 2304 connp->conn_multicast_loop = 2305 (*invalp == 0) ? 0 : 1; 2306 } 2307 break; 2308 case IP_BOUND_IF: 2309 if (!checkonly) 2310 icmp->icmp_bound_if = *i1; 2311 break; 2312 case IP_UNSPEC_SRC: 2313 if (!checkonly) 2314 icmp->icmp_unspec_source = onoff; 2315 break; 2316 case IP_RECVIF: 2317 if (!checkonly) 2318 icmp->icmp_recvif = onoff; 2319 /* 2320 * pass to ip 2321 */ 2322 return (-EINVAL); 2323 case IP_PKTINFO: { 2324 /* 2325 * This also handles IP_RECVPKTINFO. 2326 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2327 * Differentiation is based on the size of the argument 2328 * passed in. 2329 */ 2330 struct in_pktinfo *pktinfop; 2331 ip4_pkt_t *attr_pktinfop; 2332 2333 if (checkonly) 2334 break; 2335 2336 if (inlen == sizeof (int)) { 2337 /* 2338 * This is IP_RECVPKTINFO option. 2339 * Keep a local copy of wether this option is 2340 * set or not and pass it down to IP for 2341 * processing. 2342 */ 2343 icmp->icmp_ip_recvpktinfo = onoff; 2344 return (-EINVAL); 2345 } 2346 2347 2348 if (inlen != sizeof (struct in_pktinfo)) 2349 return (EINVAL); 2350 2351 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2352 == NULL) { 2353 /* 2354 * sticky option is not supported 2355 */ 2356 return (EINVAL); 2357 } 2358 2359 pktinfop = (struct in_pktinfo *)invalp; 2360 2361 /* 2362 * Atleast one of the values should be specified 2363 */ 2364 if (pktinfop->ipi_ifindex == 0 && 2365 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2366 return (EINVAL); 2367 } 2368 2369 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2370 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2371 } 2372 break; 2373 case IP_ADD_MEMBERSHIP: 2374 case IP_DROP_MEMBERSHIP: 2375 case IP_BLOCK_SOURCE: 2376 case IP_UNBLOCK_SOURCE: 2377 case IP_ADD_SOURCE_MEMBERSHIP: 2378 case IP_DROP_SOURCE_MEMBERSHIP: 2379 case MCAST_JOIN_GROUP: 2380 case MCAST_LEAVE_GROUP: 2381 case MCAST_BLOCK_SOURCE: 2382 case MCAST_UNBLOCK_SOURCE: 2383 case MCAST_JOIN_SOURCE_GROUP: 2384 case MCAST_LEAVE_SOURCE_GROUP: 2385 case MRT_INIT: 2386 case MRT_DONE: 2387 case MRT_ADD_VIF: 2388 case MRT_DEL_VIF: 2389 case MRT_ADD_MFC: 2390 case MRT_DEL_MFC: 2391 case MRT_VERSION: 2392 case MRT_ASSERT: 2393 case IP_SEC_OPT: 2394 case IP_DONTFAILOVER_IF: 2395 case IP_NEXTHOP: 2396 /* 2397 * "soft" error (negative) 2398 * option not handled at this level 2399 * Note: Do not modify *outlenp 2400 */ 2401 return (-EINVAL); 2402 default: 2403 *outlenp = 0; 2404 return (EINVAL); 2405 } 2406 break; 2407 case IPPROTO_IPV6: { 2408 ip6_pkt_t *ipp; 2409 boolean_t sticky; 2410 2411 if (icmp->icmp_family != AF_INET6) { 2412 *outlenp = 0; 2413 return (ENOPROTOOPT); 2414 } 2415 /* 2416 * Deal with both sticky options and ancillary data 2417 */ 2418 if (thisdg_attrs == NULL) { 2419 /* sticky options, or none */ 2420 ipp = &icmp->icmp_sticky_ipp; 2421 sticky = B_TRUE; 2422 } else { 2423 /* ancillary data */ 2424 ipp = (ip6_pkt_t *)thisdg_attrs; 2425 sticky = B_FALSE; 2426 } 2427 2428 switch (name) { 2429 case IPV6_MULTICAST_IF: 2430 if (!checkonly) 2431 icmp->icmp_multicast_if_index = *i1; 2432 break; 2433 case IPV6_UNICAST_HOPS: 2434 /* -1 means use default */ 2435 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2436 *outlenp = 0; 2437 return (EINVAL); 2438 } 2439 if (!checkonly) { 2440 if (*i1 == -1) { 2441 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2442 is->is_ipv6_hoplimit; 2443 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2444 /* Pass modified value to IP. */ 2445 *i1 = ipp->ipp_hoplimit; 2446 } else { 2447 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2448 (uint8_t)*i1; 2449 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2450 } 2451 /* Rebuild the header template */ 2452 error = icmp_build_hdrs(icmp); 2453 if (error != 0) { 2454 *outlenp = 0; 2455 return (error); 2456 } 2457 } 2458 break; 2459 case IPV6_MULTICAST_HOPS: 2460 /* -1 means use default */ 2461 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2462 *outlenp = 0; 2463 return (EINVAL); 2464 } 2465 if (!checkonly) { 2466 if (*i1 == -1) { 2467 icmp->icmp_multicast_ttl = 2468 ipp->ipp_multicast_hops = 2469 IP_DEFAULT_MULTICAST_TTL; 2470 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2471 /* Pass modified value to IP. */ 2472 *i1 = icmp->icmp_multicast_ttl; 2473 } else { 2474 icmp->icmp_multicast_ttl = 2475 ipp->ipp_multicast_hops = 2476 (uint8_t)*i1; 2477 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2478 } 2479 } 2480 break; 2481 case IPV6_MULTICAST_LOOP: 2482 if (*i1 != 0 && *i1 != 1) { 2483 *outlenp = 0; 2484 return (EINVAL); 2485 } 2486 if (!checkonly) 2487 connp->conn_multicast_loop = *i1; 2488 break; 2489 case IPV6_CHECKSUM: 2490 /* 2491 * Integer offset into the user data of where the 2492 * checksum is located. 2493 * Offset of -1 disables option. 2494 * Does not apply to IPPROTO_ICMPV6. 2495 */ 2496 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2497 *outlenp = 0; 2498 return (EINVAL); 2499 } 2500 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2501 /* Negative or not 16 bit aligned offset */ 2502 *outlenp = 0; 2503 return (EINVAL); 2504 } 2505 if (checkonly) 2506 break; 2507 2508 if (*i1 == -1) { 2509 icmp->icmp_raw_checksum = 0; 2510 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2511 } else { 2512 icmp->icmp_raw_checksum = 1; 2513 icmp->icmp_checksum_off = *i1; 2514 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2515 } 2516 /* Rebuild the header template */ 2517 error = icmp_build_hdrs(icmp); 2518 if (error != 0) { 2519 *outlenp = 0; 2520 return (error); 2521 } 2522 break; 2523 case IPV6_JOIN_GROUP: 2524 case IPV6_LEAVE_GROUP: 2525 case MCAST_JOIN_GROUP: 2526 case MCAST_LEAVE_GROUP: 2527 case MCAST_BLOCK_SOURCE: 2528 case MCAST_UNBLOCK_SOURCE: 2529 case MCAST_JOIN_SOURCE_GROUP: 2530 case MCAST_LEAVE_SOURCE_GROUP: 2531 /* 2532 * "soft" error (negative) 2533 * option not handled at this level 2534 * Note: Do not modify *outlenp 2535 */ 2536 return (-EINVAL); 2537 case IPV6_BOUND_IF: 2538 if (!checkonly) 2539 icmp->icmp_bound_if = *i1; 2540 break; 2541 case IPV6_UNSPEC_SRC: 2542 if (!checkonly) 2543 icmp->icmp_unspec_source = onoff; 2544 break; 2545 case IPV6_RECVTCLASS: 2546 if (!checkonly) 2547 icmp->icmp_ipv6_recvtclass = onoff; 2548 break; 2549 /* 2550 * Set boolean switches for ancillary data delivery 2551 */ 2552 case IPV6_RECVPKTINFO: 2553 if (!checkonly) 2554 icmp->icmp_ip_recvpktinfo = onoff; 2555 break; 2556 case IPV6_RECVPATHMTU: 2557 if (!checkonly) 2558 icmp->icmp_ipv6_recvpathmtu = onoff; 2559 break; 2560 case IPV6_RECVHOPLIMIT: 2561 if (!checkonly) 2562 icmp->icmp_ipv6_recvhoplimit = onoff; 2563 break; 2564 case IPV6_RECVHOPOPTS: 2565 if (!checkonly) 2566 icmp->icmp_ipv6_recvhopopts = onoff; 2567 break; 2568 case IPV6_RECVDSTOPTS: 2569 if (!checkonly) 2570 icmp->icmp_ipv6_recvdstopts = onoff; 2571 break; 2572 case _OLD_IPV6_RECVDSTOPTS: 2573 if (!checkonly) 2574 icmp->icmp_old_ipv6_recvdstopts = onoff; 2575 break; 2576 case IPV6_RECVRTHDRDSTOPTS: 2577 if (!checkonly) 2578 icmp->icmp_ipv6_recvrtdstopts = onoff; 2579 break; 2580 case IPV6_RECVRTHDR: 2581 if (!checkonly) 2582 icmp->icmp_ipv6_recvrthdr = onoff; 2583 break; 2584 /* 2585 * Set sticky options or ancillary data. 2586 * If sticky options, (re)build any extension headers 2587 * that might be needed as a result. 2588 */ 2589 case IPV6_PKTINFO: 2590 /* 2591 * The source address and ifindex are verified 2592 * in ip_opt_set(). For ancillary data the 2593 * source address is checked in ip_wput_v6. 2594 */ 2595 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2596 return (EINVAL); 2597 if (checkonly) 2598 break; 2599 2600 if (inlen == 0) { 2601 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2602 ipp->ipp_sticky_ignored |= 2603 (IPPF_IFINDEX|IPPF_ADDR); 2604 } else { 2605 struct in6_pktinfo *pkti; 2606 2607 pkti = (struct in6_pktinfo *)invalp; 2608 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2609 ipp->ipp_addr = pkti->ipi6_addr; 2610 if (ipp->ipp_ifindex != 0) 2611 ipp->ipp_fields |= IPPF_IFINDEX; 2612 else 2613 ipp->ipp_fields &= ~IPPF_IFINDEX; 2614 if (!IN6_IS_ADDR_UNSPECIFIED( 2615 &ipp->ipp_addr)) 2616 ipp->ipp_fields |= IPPF_ADDR; 2617 else 2618 ipp->ipp_fields &= ~IPPF_ADDR; 2619 } 2620 if (sticky) { 2621 error = icmp_build_hdrs(icmp); 2622 if (error != 0) 2623 return (error); 2624 } 2625 break; 2626 case IPV6_HOPLIMIT: 2627 /* This option can only be used as ancillary data. */ 2628 if (sticky) 2629 return (EINVAL); 2630 if (inlen != 0 && inlen != sizeof (int)) 2631 return (EINVAL); 2632 if (checkonly) 2633 break; 2634 2635 if (inlen == 0) { 2636 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2637 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2638 } else { 2639 if (*i1 > 255 || *i1 < -1) 2640 return (EINVAL); 2641 if (*i1 == -1) 2642 ipp->ipp_hoplimit = 2643 is->is_ipv6_hoplimit; 2644 else 2645 ipp->ipp_hoplimit = *i1; 2646 ipp->ipp_fields |= IPPF_HOPLIMIT; 2647 } 2648 break; 2649 case IPV6_TCLASS: 2650 /* 2651 * IPV6_RECVTCLASS accepts -1 as use kernel default 2652 * and [0, 255] as the actualy traffic class. 2653 */ 2654 if (inlen != 0 && inlen != sizeof (int)) 2655 return (EINVAL); 2656 if (checkonly) 2657 break; 2658 2659 if (inlen == 0) { 2660 ipp->ipp_fields &= ~IPPF_TCLASS; 2661 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2662 } else { 2663 if (*i1 >= 256 || *i1 < -1) 2664 return (EINVAL); 2665 if (*i1 == -1) { 2666 ipp->ipp_tclass = 2667 IPV6_FLOW_TCLASS( 2668 IPV6_DEFAULT_VERS_AND_FLOW); 2669 } else { 2670 ipp->ipp_tclass = *i1; 2671 } 2672 ipp->ipp_fields |= IPPF_TCLASS; 2673 } 2674 if (sticky) { 2675 error = icmp_build_hdrs(icmp); 2676 if (error != 0) 2677 return (error); 2678 } 2679 break; 2680 case IPV6_NEXTHOP: 2681 /* 2682 * IP will verify that the nexthop is reachable 2683 * and fail for sticky options. 2684 */ 2685 if (inlen != 0 && inlen != sizeof (sin6_t)) 2686 return (EINVAL); 2687 if (checkonly) 2688 break; 2689 2690 if (inlen == 0) { 2691 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2692 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2693 } else { 2694 sin6_t *sin6 = (sin6_t *)invalp; 2695 2696 if (sin6->sin6_family != AF_INET6) 2697 return (EAFNOSUPPORT); 2698 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2699 return (EADDRNOTAVAIL); 2700 ipp->ipp_nexthop = sin6->sin6_addr; 2701 if (!IN6_IS_ADDR_UNSPECIFIED( 2702 &ipp->ipp_nexthop)) 2703 ipp->ipp_fields |= IPPF_NEXTHOP; 2704 else 2705 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2706 } 2707 if (sticky) { 2708 error = icmp_build_hdrs(icmp); 2709 if (error != 0) 2710 return (error); 2711 } 2712 break; 2713 case IPV6_HOPOPTS: { 2714 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2715 /* 2716 * Sanity checks - minimum size, size a multiple of 2717 * eight bytes, and matching size passed in. 2718 */ 2719 if (inlen != 0 && 2720 inlen != (8 * (hopts->ip6h_len + 1))) 2721 return (EINVAL); 2722 2723 if (checkonly) 2724 break; 2725 error = optcom_pkt_set(invalp, inlen, sticky, 2726 (uchar_t **)&ipp->ipp_hopopts, 2727 &ipp->ipp_hopoptslen, 2728 sticky ? icmp->icmp_label_len_v6 : 0); 2729 if (error != 0) 2730 return (error); 2731 if (ipp->ipp_hopoptslen == 0) { 2732 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2733 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2734 } else { 2735 ipp->ipp_fields |= IPPF_HOPOPTS; 2736 } 2737 if (sticky) { 2738 error = icmp_build_hdrs(icmp); 2739 if (error != 0) 2740 return (error); 2741 } 2742 break; 2743 } 2744 case IPV6_RTHDRDSTOPTS: { 2745 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2746 2747 /* 2748 * Sanity checks - minimum size, size a multiple of 2749 * eight bytes, and matching size passed in. 2750 */ 2751 if (inlen != 0 && 2752 inlen != (8 * (dopts->ip6d_len + 1))) 2753 return (EINVAL); 2754 2755 if (checkonly) 2756 break; 2757 2758 if (inlen == 0) { 2759 if (sticky && 2760 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2761 kmem_free(ipp->ipp_rtdstopts, 2762 ipp->ipp_rtdstoptslen); 2763 ipp->ipp_rtdstopts = NULL; 2764 ipp->ipp_rtdstoptslen = 0; 2765 } 2766 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2767 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2768 } else { 2769 error = optcom_pkt_set(invalp, inlen, sticky, 2770 (uchar_t **)&ipp->ipp_rtdstopts, 2771 &ipp->ipp_rtdstoptslen, 0); 2772 if (error != 0) 2773 return (error); 2774 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2775 } 2776 if (sticky) { 2777 error = icmp_build_hdrs(icmp); 2778 if (error != 0) 2779 return (error); 2780 } 2781 break; 2782 } 2783 case IPV6_DSTOPTS: { 2784 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2785 2786 /* 2787 * Sanity checks - minimum size, size a multiple of 2788 * eight bytes, and matching size passed in. 2789 */ 2790 if (inlen != 0 && 2791 inlen != (8 * (dopts->ip6d_len + 1))) 2792 return (EINVAL); 2793 2794 if (checkonly) 2795 break; 2796 2797 if (inlen == 0) { 2798 if (sticky && 2799 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2800 kmem_free(ipp->ipp_dstopts, 2801 ipp->ipp_dstoptslen); 2802 ipp->ipp_dstopts = NULL; 2803 ipp->ipp_dstoptslen = 0; 2804 } 2805 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2806 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2807 } else { 2808 error = optcom_pkt_set(invalp, inlen, sticky, 2809 (uchar_t **)&ipp->ipp_dstopts, 2810 &ipp->ipp_dstoptslen, 0); 2811 if (error != 0) 2812 return (error); 2813 ipp->ipp_fields |= IPPF_DSTOPTS; 2814 } 2815 if (sticky) { 2816 error = icmp_build_hdrs(icmp); 2817 if (error != 0) 2818 return (error); 2819 } 2820 break; 2821 } 2822 case IPV6_RTHDR: { 2823 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2824 2825 /* 2826 * Sanity checks - minimum size, size a multiple of 2827 * eight bytes, and matching size passed in. 2828 */ 2829 if (inlen != 0 && 2830 inlen != (8 * (rt->ip6r_len + 1))) 2831 return (EINVAL); 2832 2833 if (checkonly) 2834 break; 2835 2836 if (inlen == 0) { 2837 if (sticky && 2838 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2839 kmem_free(ipp->ipp_rthdr, 2840 ipp->ipp_rthdrlen); 2841 ipp->ipp_rthdr = NULL; 2842 ipp->ipp_rthdrlen = 0; 2843 } 2844 ipp->ipp_fields &= ~IPPF_RTHDR; 2845 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2846 } else { 2847 error = optcom_pkt_set(invalp, inlen, sticky, 2848 (uchar_t **)&ipp->ipp_rthdr, 2849 &ipp->ipp_rthdrlen, 0); 2850 if (error != 0) 2851 return (error); 2852 ipp->ipp_fields |= IPPF_RTHDR; 2853 } 2854 if (sticky) { 2855 error = icmp_build_hdrs(icmp); 2856 if (error != 0) 2857 return (error); 2858 } 2859 break; 2860 } 2861 2862 case IPV6_DONTFRAG: 2863 if (checkonly) 2864 break; 2865 2866 if (onoff) { 2867 ipp->ipp_fields |= IPPF_DONTFRAG; 2868 } else { 2869 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2870 } 2871 break; 2872 2873 case IPV6_USE_MIN_MTU: 2874 if (inlen != sizeof (int)) 2875 return (EINVAL); 2876 2877 if (*i1 < -1 || *i1 > 1) 2878 return (EINVAL); 2879 2880 if (checkonly) 2881 break; 2882 2883 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2884 ipp->ipp_use_min_mtu = *i1; 2885 break; 2886 2887 /* 2888 * This option can't be set. Its only returned via 2889 * getsockopt() or ancillary data. 2890 */ 2891 case IPV6_PATHMTU: 2892 return (EINVAL); 2893 2894 case IPV6_BOUND_PIF: 2895 case IPV6_SEC_OPT: 2896 case IPV6_DONTFAILOVER_IF: 2897 case IPV6_SRC_PREFERENCES: 2898 case IPV6_V6ONLY: 2899 /* Handled at IP level */ 2900 return (-EINVAL); 2901 default: 2902 *outlenp = 0; 2903 return (EINVAL); 2904 } 2905 break; 2906 } /* end IPPROTO_IPV6 */ 2907 2908 case IPPROTO_ICMPV6: 2909 /* 2910 * Only allow IPv6 option processing on IPv6 sockets. 2911 */ 2912 if (icmp->icmp_family != AF_INET6) { 2913 *outlenp = 0; 2914 return (ENOPROTOOPT); 2915 } 2916 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2917 *outlenp = 0; 2918 return (ENOPROTOOPT); 2919 } 2920 switch (name) { 2921 case ICMP6_FILTER: 2922 if (!checkonly) { 2923 if ((inlen != 0) && 2924 (inlen != sizeof (icmp6_filter_t))) 2925 return (EINVAL); 2926 2927 if (inlen == 0) { 2928 if (icmp->icmp_filter != NULL) { 2929 kmem_free(icmp->icmp_filter, 2930 sizeof (icmp6_filter_t)); 2931 icmp->icmp_filter = NULL; 2932 } 2933 } else { 2934 if (icmp->icmp_filter == NULL) { 2935 icmp->icmp_filter = kmem_alloc( 2936 sizeof (icmp6_filter_t), 2937 KM_NOSLEEP); 2938 if (icmp->icmp_filter == NULL) { 2939 *outlenp = 0; 2940 return (ENOBUFS); 2941 } 2942 } 2943 (void) bcopy(invalp, icmp->icmp_filter, 2944 inlen); 2945 } 2946 } 2947 break; 2948 2949 default: 2950 *outlenp = 0; 2951 return (EINVAL); 2952 } 2953 break; 2954 default: 2955 *outlenp = 0; 2956 return (EINVAL); 2957 } 2958 /* 2959 * Common case of OK return with outval same as inval. 2960 */ 2961 if (invalp != outvalp) { 2962 /* don't trust bcopy for identical src/dst */ 2963 (void) bcopy(invalp, outvalp, inlen); 2964 } 2965 *outlenp = inlen; 2966 return (0); 2967 } 2968 /* This routine sets socket options. */ 2969 /* ARGSUSED */ 2970 int 2971 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2972 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2973 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2974 { 2975 icmp_t *icmp; 2976 int err; 2977 2978 icmp = Q_TO_ICMP(q); 2979 2980 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2981 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2982 outlenp, outvalp, thisdg_attrs, cr, mblk); 2983 rw_exit(&icmp->icmp_rwlock); 2984 return (err); 2985 } 2986 2987 /* 2988 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2989 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2990 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2991 * headers. 2992 * Returns failure if can't allocate memory. 2993 */ 2994 static int 2995 icmp_build_hdrs(icmp_t *icmp) 2996 { 2997 icmp_stack_t *is = icmp->icmp_is; 2998 uchar_t *hdrs; 2999 uint_t hdrs_len; 3000 ip6_t *ip6h; 3001 ip6i_t *ip6i; 3002 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3003 3004 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3005 hdrs_len = ip_total_hdrs_len_v6(ipp); 3006 ASSERT(hdrs_len != 0); 3007 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3008 /* Need to reallocate */ 3009 if (hdrs_len != 0) { 3010 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3011 if (hdrs == NULL) 3012 return (ENOMEM); 3013 } else { 3014 hdrs = NULL; 3015 } 3016 if (icmp->icmp_sticky_hdrs_len != 0) { 3017 kmem_free(icmp->icmp_sticky_hdrs, 3018 icmp->icmp_sticky_hdrs_len); 3019 } 3020 icmp->icmp_sticky_hdrs = hdrs; 3021 icmp->icmp_sticky_hdrs_len = hdrs_len; 3022 } 3023 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3024 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3025 3026 /* Set header fields not in ipp */ 3027 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3028 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3029 ip6h = (ip6_t *)&ip6i[1]; 3030 3031 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3032 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3033 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3034 } 3035 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3036 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3037 } 3038 } else { 3039 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3040 } 3041 3042 if (!(ipp->ipp_fields & IPPF_ADDR)) 3043 ip6h->ip6_src = icmp->icmp_v6src; 3044 3045 /* Try to get everything in a single mblk */ 3046 if (hdrs_len > icmp->icmp_max_hdr_len) { 3047 icmp->icmp_max_hdr_len = hdrs_len; 3048 rw_exit(&icmp->icmp_rwlock); 3049 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3050 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3051 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3052 } 3053 return (0); 3054 } 3055 3056 /* 3057 * This routine retrieves the value of an ND variable in a icmpparam_t 3058 * structure. It is called through nd_getset when a user reads the 3059 * variable. 3060 */ 3061 /* ARGSUSED */ 3062 static int 3063 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3064 { 3065 icmpparam_t *icmppa = (icmpparam_t *)cp; 3066 3067 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3068 return (0); 3069 } 3070 3071 /* 3072 * Walk through the param array specified registering each element with the 3073 * named dispatch (ND) handler. 3074 */ 3075 static boolean_t 3076 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3077 { 3078 for (; cnt-- > 0; icmppa++) { 3079 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3080 if (!nd_load(ndp, icmppa->icmp_param_name, 3081 icmp_param_get, icmp_param_set, 3082 (caddr_t)icmppa)) { 3083 nd_free(ndp); 3084 return (B_FALSE); 3085 } 3086 } 3087 } 3088 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3089 NULL)) { 3090 nd_free(ndp); 3091 return (B_FALSE); 3092 } 3093 return (B_TRUE); 3094 } 3095 3096 /* This routine sets an ND variable in a icmpparam_t structure. */ 3097 /* ARGSUSED */ 3098 static int 3099 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3100 { 3101 long new_value; 3102 icmpparam_t *icmppa = (icmpparam_t *)cp; 3103 3104 /* 3105 * Fail the request if the new value does not lie within the 3106 * required bounds. 3107 */ 3108 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3109 new_value < icmppa->icmp_param_min || 3110 new_value > icmppa->icmp_param_max) { 3111 return (EINVAL); 3112 } 3113 /* Set the new value */ 3114 icmppa->icmp_param_value = new_value; 3115 return (0); 3116 } 3117 /*ARGSUSED2*/ 3118 static void 3119 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3120 { 3121 conn_t *connp = (conn_t *)arg1; 3122 struct T_unitdata_ind *tudi; 3123 uchar_t *rptr; 3124 icmp_t *icmp; 3125 icmp_stack_t *is; 3126 sin_t *sin; 3127 sin6_t *sin6; 3128 ip6_t *ip6h; 3129 ip6i_t *ip6i; 3130 mblk_t *mp1; 3131 int hdr_len; 3132 ipha_t *ipha; 3133 int udi_size; /* Size of T_unitdata_ind */ 3134 uint_t ipvers; 3135 ip6_pkt_t ipp; 3136 uint8_t nexthdr; 3137 ip_pktinfo_t *pinfo = NULL; 3138 mblk_t *options_mp = NULL; 3139 uint_t icmp_opt = 0; 3140 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3141 uint_t hopstrip; 3142 3143 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3144 3145 icmp = connp->conn_icmp; 3146 is = icmp->icmp_is; 3147 rptr = mp->b_rptr; 3148 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3149 ASSERT(OK_32PTR(rptr)); 3150 3151 /* 3152 * IP should have prepended the options data in an M_CTL 3153 * Check M_CTL "type" to make sure are not here bcos of 3154 * a valid ICMP message 3155 */ 3156 if (DB_TYPE(mp) == M_CTL) { 3157 /* 3158 * FIXME: does IP still do this? 3159 * IP sends up the IPSEC_IN message for handling IPSEC 3160 * policy at the TCP level. We don't need it here. 3161 */ 3162 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3163 mp1 = mp->b_cont; 3164 freeb(mp); 3165 mp = mp1; 3166 rptr = mp->b_rptr; 3167 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3168 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3169 IN_PKTINFO) { 3170 /* 3171 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3172 * has been prepended to the packet by IP. We need to 3173 * extract the mblk and adjust the rptr 3174 */ 3175 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3176 options_mp = mp; 3177 mp = mp->b_cont; 3178 rptr = mp->b_rptr; 3179 } else { 3180 /* 3181 * ICMP messages. 3182 */ 3183 icmp_icmp_error(connp->conn_rq, mp); 3184 return; 3185 } 3186 } 3187 3188 /* 3189 * Discard message if it is misaligned or smaller than the IP header. 3190 */ 3191 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3192 freemsg(mp); 3193 if (options_mp != NULL) 3194 freeb(options_mp); 3195 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3196 return; 3197 } 3198 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3199 3200 /* Handle M_DATA messages containing IP packets messages */ 3201 if (ipvers == IPV4_VERSION) { 3202 /* 3203 * Special case where IP attaches 3204 * the IRE needs to be handled so that we don't send up 3205 * IRE to the user land. 3206 */ 3207 ipha = (ipha_t *)rptr; 3208 hdr_len = IPH_HDR_LENGTH(ipha); 3209 3210 if (ipha->ipha_protocol == IPPROTO_TCP) { 3211 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3212 3213 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3214 TH_SYN) && mp->b_cont != NULL) { 3215 mp1 = mp->b_cont; 3216 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3217 freeb(mp1); 3218 mp->b_cont = NULL; 3219 } 3220 } 3221 } 3222 if (is->is_bsd_compat) { 3223 ushort_t len; 3224 len = ntohs(ipha->ipha_length); 3225 3226 if (mp->b_datap->db_ref > 1) { 3227 /* 3228 * Allocate a new IP header so that we can 3229 * modify ipha_length. 3230 */ 3231 mblk_t *mp1; 3232 3233 mp1 = allocb(hdr_len, BPRI_MED); 3234 if (!mp1) { 3235 freemsg(mp); 3236 if (options_mp != NULL) 3237 freeb(options_mp); 3238 BUMP_MIB(&is->is_rawip_mib, 3239 rawipInErrors); 3240 return; 3241 } 3242 bcopy(rptr, mp1->b_rptr, hdr_len); 3243 mp->b_rptr = rptr + hdr_len; 3244 rptr = mp1->b_rptr; 3245 ipha = (ipha_t *)rptr; 3246 mp1->b_cont = mp; 3247 mp1->b_wptr = rptr + hdr_len; 3248 mp = mp1; 3249 } 3250 len -= hdr_len; 3251 ipha->ipha_length = htons(len); 3252 } 3253 } 3254 3255 /* 3256 * This is the inbound data path. Packets are passed upstream as 3257 * T_UNITDATA_IND messages with full IP headers still attached. 3258 */ 3259 if (icmp->icmp_family == AF_INET) { 3260 ASSERT(ipvers == IPV4_VERSION); 3261 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3262 if (icmp->icmp_recvif && (pinfo != NULL) && 3263 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3264 udi_size += sizeof (struct T_opthdr) + 3265 sizeof (uint_t); 3266 } 3267 3268 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3269 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3270 udi_size += sizeof (struct T_opthdr) + 3271 sizeof (struct in_pktinfo); 3272 } 3273 3274 /* 3275 * If SO_TIMESTAMP is set allocate the appropriate sized 3276 * buffer. Since gethrestime() expects a pointer aligned 3277 * argument, we allocate space necessary for extra 3278 * alignment (even though it might not be used). 3279 */ 3280 if (icmp->icmp_timestamp) { 3281 udi_size += sizeof (struct T_opthdr) + 3282 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3283 } 3284 mp1 = allocb(udi_size, BPRI_MED); 3285 if (mp1 == NULL) { 3286 freemsg(mp); 3287 if (options_mp != NULL) 3288 freeb(options_mp); 3289 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3290 return; 3291 } 3292 mp1->b_cont = mp; 3293 mp = mp1; 3294 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3295 mp->b_datap->db_type = M_PROTO; 3296 mp->b_wptr = (uchar_t *)tudi + udi_size; 3297 tudi->PRIM_type = T_UNITDATA_IND; 3298 tudi->SRC_length = sizeof (sin_t); 3299 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3300 sin = (sin_t *)&tudi[1]; 3301 *sin = sin_null; 3302 sin->sin_family = AF_INET; 3303 sin->sin_addr.s_addr = ipha->ipha_src; 3304 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3305 sizeof (sin_t); 3306 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3307 tudi->OPT_length = udi_size; 3308 3309 /* 3310 * Add options if IP_RECVIF is set 3311 */ 3312 if (udi_size != 0) { 3313 char *dstopt; 3314 3315 dstopt = (char *)&sin[1]; 3316 if (icmp->icmp_recvif && (pinfo != NULL) && 3317 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3318 3319 struct T_opthdr *toh; 3320 uint_t *dstptr; 3321 3322 toh = (struct T_opthdr *)dstopt; 3323 toh->level = IPPROTO_IP; 3324 toh->name = IP_RECVIF; 3325 toh->len = sizeof (struct T_opthdr) + 3326 sizeof (uint_t); 3327 toh->status = 0; 3328 dstopt += sizeof (struct T_opthdr); 3329 dstptr = (uint_t *)dstopt; 3330 *dstptr = pinfo->ip_pkt_ifindex; 3331 dstopt += sizeof (uint_t); 3332 udi_size -= toh->len; 3333 } 3334 if (icmp->icmp_timestamp) { 3335 struct T_opthdr *toh; 3336 3337 toh = (struct T_opthdr *)dstopt; 3338 toh->level = SOL_SOCKET; 3339 toh->name = SCM_TIMESTAMP; 3340 toh->len = sizeof (struct T_opthdr) + 3341 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3342 toh->status = 0; 3343 dstopt += sizeof (struct T_opthdr); 3344 /* Align for gethrestime() */ 3345 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3346 sizeof (intptr_t)); 3347 gethrestime((timestruc_t *)dstopt); 3348 dstopt = (char *)toh + toh->len; 3349 udi_size -= toh->len; 3350 } 3351 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3352 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3353 struct T_opthdr *toh; 3354 struct in_pktinfo *pktinfop; 3355 3356 toh = (struct T_opthdr *)dstopt; 3357 toh->level = IPPROTO_IP; 3358 toh->name = IP_PKTINFO; 3359 toh->len = sizeof (struct T_opthdr) + 3360 sizeof (in_pktinfo_t); 3361 toh->status = 0; 3362 dstopt += sizeof (struct T_opthdr); 3363 pktinfop = (struct in_pktinfo *)dstopt; 3364 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3365 pktinfop->ipi_spec_dst = 3366 pinfo->ip_pkt_match_addr; 3367 3368 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3369 3370 dstopt += sizeof (struct in_pktinfo); 3371 udi_size -= toh->len; 3372 } 3373 3374 /* Consumed all of allocated space */ 3375 ASSERT(udi_size == 0); 3376 } 3377 3378 if (options_mp != NULL) 3379 freeb(options_mp); 3380 3381 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3382 putnext(connp->conn_rq, mp); 3383 return; 3384 } 3385 3386 /* 3387 * We don't need options_mp in the IPv6 path. 3388 */ 3389 if (options_mp != NULL) { 3390 freeb(options_mp); 3391 options_mp = NULL; 3392 } 3393 3394 /* 3395 * Discard message if it is smaller than the IPv6 header 3396 * or if the header is malformed. 3397 */ 3398 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3399 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3400 icmp->icmp_family != AF_INET6) { 3401 freemsg(mp); 3402 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3403 return; 3404 } 3405 3406 /* Initialize */ 3407 ipp.ipp_fields = 0; 3408 hopstrip = 0; 3409 3410 ip6h = (ip6_t *)rptr; 3411 /* 3412 * Call on ip_find_hdr_v6 which gets the total hdr len 3413 * as well as individual lenghts of ext hdrs (and ptrs to 3414 * them). 3415 */ 3416 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3417 /* Look for ifindex information */ 3418 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3419 ip6i = (ip6i_t *)ip6h; 3420 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3421 ASSERT(ip6i->ip6i_ifindex != 0); 3422 ipp.ipp_fields |= IPPF_IFINDEX; 3423 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3424 } 3425 rptr = (uchar_t *)&ip6i[1]; 3426 mp->b_rptr = rptr; 3427 if (rptr == mp->b_wptr) { 3428 mp1 = mp->b_cont; 3429 freeb(mp); 3430 mp = mp1; 3431 rptr = mp->b_rptr; 3432 } 3433 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3434 ip6h = (ip6_t *)rptr; 3435 } 3436 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3437 3438 /* 3439 * We need to lie a bit to the user because users inside 3440 * labeled compartments should not see their own labels. We 3441 * assume that in all other respects IP has checked the label, 3442 * and that the label is always first among the options. (If 3443 * it's not first, then this code won't see it, and the option 3444 * will be passed along to the user.) 3445 * 3446 * If we had multilevel ICMP sockets, then the following code 3447 * should be skipped for them to allow the user to see the 3448 * label. 3449 * 3450 * Alignment restrictions in the definition of IP options 3451 * (namely, the requirement that the 4-octet DOI goes on a 3452 * 4-octet boundary) mean that we know exactly where the option 3453 * should start, but we're lenient for other hosts. 3454 * 3455 * Note that there are no multilevel ICMP or raw IP sockets 3456 * yet, thus nobody ever sees the IP6OPT_LS option. 3457 */ 3458 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3459 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3460 const uchar_t *ucp = 3461 (const uchar_t *)ipp.ipp_hopopts + 2; 3462 int remlen = ipp.ipp_hopoptslen - 2; 3463 3464 while (remlen > 0) { 3465 if (*ucp == IP6OPT_PAD1) { 3466 remlen--; 3467 ucp++; 3468 } else if (*ucp == IP6OPT_PADN) { 3469 remlen -= ucp[1] + 2; 3470 ucp += ucp[1] + 2; 3471 } else if (*ucp == ip6opt_ls) { 3472 hopstrip = (ucp - 3473 (const uchar_t *)ipp.ipp_hopopts) + 3474 ucp[1] + 2; 3475 hopstrip = (hopstrip + 7) & ~7; 3476 break; 3477 } else { 3478 /* label option must be first */ 3479 break; 3480 } 3481 } 3482 } 3483 } else { 3484 hdr_len = IPV6_HDR_LEN; 3485 ip6i = NULL; 3486 nexthdr = ip6h->ip6_nxt; 3487 } 3488 /* 3489 * One special case where IP attaches the IRE needs to 3490 * be handled so that we don't send up IRE to the user land. 3491 */ 3492 if (nexthdr == IPPROTO_TCP) { 3493 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3494 3495 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3496 mp->b_cont != NULL) { 3497 mp1 = mp->b_cont; 3498 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3499 freeb(mp1); 3500 mp->b_cont = NULL; 3501 } 3502 } 3503 } 3504 /* 3505 * Check a filter for ICMPv6 types if needed. 3506 * Verify raw checksums if needed. 3507 */ 3508 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3509 if (icmp->icmp_filter != NULL) { 3510 int type; 3511 3512 /* Assumes that IP has done the pullupmsg */ 3513 type = mp->b_rptr[hdr_len]; 3514 3515 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3516 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3517 freemsg(mp); 3518 return; 3519 } 3520 } else { 3521 /* Checksum */ 3522 uint16_t *up; 3523 uint32_t sum; 3524 int remlen; 3525 3526 up = (uint16_t *)&ip6h->ip6_src; 3527 3528 remlen = msgdsize(mp) - hdr_len; 3529 sum = htons(icmp->icmp_proto + remlen) 3530 + up[0] + up[1] + up[2] + up[3] 3531 + up[4] + up[5] + up[6] + up[7] 3532 + up[8] + up[9] + up[10] + up[11] 3533 + up[12] + up[13] + up[14] + up[15]; 3534 sum = (sum & 0xffff) + (sum >> 16); 3535 sum = IP_CSUM(mp, hdr_len, sum); 3536 if (sum != 0) { 3537 /* IPv6 RAW checksum failed */ 3538 ip0dbg(("icmp_rput: RAW checksum " 3539 "failed %x\n", sum)); 3540 freemsg(mp); 3541 BUMP_MIB(&is->is_rawip_mib, 3542 rawipInCksumErrs); 3543 return; 3544 } 3545 } 3546 } 3547 /* Skip all the IPv6 headers per API */ 3548 mp->b_rptr += hdr_len; 3549 3550 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3551 3552 /* 3553 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3554 * maintain state information, instead of relying on icmp_t 3555 * structure, since there arent any locks protecting these members 3556 * and there is a window where there might be a race between a 3557 * thread setting options on the write side and a thread reading 3558 * these options on the read size. 3559 */ 3560 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3561 IPPF_RTHDR|IPPF_IFINDEX)) { 3562 if (icmp->icmp_ipv6_recvhopopts && 3563 (ipp.ipp_fields & IPPF_HOPOPTS) && 3564 ipp.ipp_hopoptslen > hopstrip) { 3565 udi_size += sizeof (struct T_opthdr) + 3566 ipp.ipp_hopoptslen - hopstrip; 3567 icmp_opt |= IPPF_HOPOPTS; 3568 } 3569 if ((icmp->icmp_ipv6_recvdstopts || 3570 icmp->icmp_old_ipv6_recvdstopts) && 3571 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3572 udi_size += sizeof (struct T_opthdr) + 3573 ipp.ipp_dstoptslen; 3574 icmp_opt |= IPPF_DSTOPTS; 3575 } 3576 if (((icmp->icmp_ipv6_recvdstopts && 3577 icmp->icmp_ipv6_recvrthdr && 3578 (ipp.ipp_fields & IPPF_RTHDR)) || 3579 icmp->icmp_ipv6_recvrtdstopts) && 3580 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3581 udi_size += sizeof (struct T_opthdr) + 3582 ipp.ipp_rtdstoptslen; 3583 icmp_opt |= IPPF_RTDSTOPTS; 3584 } 3585 if (icmp->icmp_ipv6_recvrthdr && 3586 (ipp.ipp_fields & IPPF_RTHDR)) { 3587 udi_size += sizeof (struct T_opthdr) + 3588 ipp.ipp_rthdrlen; 3589 icmp_opt |= IPPF_RTHDR; 3590 } 3591 if (icmp->icmp_ip_recvpktinfo && 3592 (ipp.ipp_fields & IPPF_IFINDEX)) { 3593 udi_size += sizeof (struct T_opthdr) + 3594 sizeof (struct in6_pktinfo); 3595 icmp_opt |= IPPF_IFINDEX; 3596 } 3597 } 3598 if (icmp->icmp_ipv6_recvhoplimit) { 3599 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3600 icmp_ipv6_recvhoplimit = B_TRUE; 3601 } 3602 3603 if (icmp->icmp_ipv6_recvtclass) 3604 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3605 3606 /* 3607 * If SO_TIMESTAMP is set allocate the appropriate sized 3608 * buffer. Since gethrestime() expects a pointer aligned 3609 * argument, we allocate space necessary for extra 3610 * alignment (even though it might not be used). 3611 */ 3612 if (icmp->icmp_timestamp) { 3613 udi_size += sizeof (struct T_opthdr) + 3614 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3615 } 3616 3617 mp1 = allocb(udi_size, BPRI_MED); 3618 if (mp1 == NULL) { 3619 freemsg(mp); 3620 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3621 return; 3622 } 3623 mp1->b_cont = mp; 3624 mp = mp1; 3625 mp->b_datap->db_type = M_PROTO; 3626 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3627 mp->b_wptr = (uchar_t *)tudi + udi_size; 3628 tudi->PRIM_type = T_UNITDATA_IND; 3629 tudi->SRC_length = sizeof (sin6_t); 3630 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3631 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3632 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3633 tudi->OPT_length = udi_size; 3634 sin6 = (sin6_t *)&tudi[1]; 3635 sin6->sin6_port = 0; 3636 sin6->sin6_family = AF_INET6; 3637 3638 sin6->sin6_addr = ip6h->ip6_src; 3639 /* No sin6_flowinfo per API */ 3640 sin6->sin6_flowinfo = 0; 3641 /* For link-scope source pass up scope id */ 3642 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3643 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3644 sin6->sin6_scope_id = ipp.ipp_ifindex; 3645 else 3646 sin6->sin6_scope_id = 0; 3647 3648 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3649 icmp->icmp_zoneid, is->is_netstack); 3650 3651 if (udi_size != 0) { 3652 uchar_t *dstopt; 3653 3654 dstopt = (uchar_t *)&sin6[1]; 3655 if (icmp_opt & IPPF_IFINDEX) { 3656 struct T_opthdr *toh; 3657 struct in6_pktinfo *pkti; 3658 3659 toh = (struct T_opthdr *)dstopt; 3660 toh->level = IPPROTO_IPV6; 3661 toh->name = IPV6_PKTINFO; 3662 toh->len = sizeof (struct T_opthdr) + 3663 sizeof (*pkti); 3664 toh->status = 0; 3665 dstopt += sizeof (struct T_opthdr); 3666 pkti = (struct in6_pktinfo *)dstopt; 3667 pkti->ipi6_addr = ip6h->ip6_dst; 3668 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3669 dstopt += sizeof (*pkti); 3670 udi_size -= toh->len; 3671 } 3672 if (icmp_ipv6_recvhoplimit) { 3673 struct T_opthdr *toh; 3674 3675 toh = (struct T_opthdr *)dstopt; 3676 toh->level = IPPROTO_IPV6; 3677 toh->name = IPV6_HOPLIMIT; 3678 toh->len = sizeof (struct T_opthdr) + 3679 sizeof (uint_t); 3680 toh->status = 0; 3681 dstopt += sizeof (struct T_opthdr); 3682 *(uint_t *)dstopt = ip6h->ip6_hops; 3683 dstopt += sizeof (uint_t); 3684 udi_size -= toh->len; 3685 } 3686 if (icmp->icmp_ipv6_recvtclass) { 3687 struct T_opthdr *toh; 3688 3689 toh = (struct T_opthdr *)dstopt; 3690 toh->level = IPPROTO_IPV6; 3691 toh->name = IPV6_TCLASS; 3692 toh->len = sizeof (struct T_opthdr) + 3693 sizeof (uint_t); 3694 toh->status = 0; 3695 dstopt += sizeof (struct T_opthdr); 3696 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3697 dstopt += sizeof (uint_t); 3698 udi_size -= toh->len; 3699 } 3700 if (icmp->icmp_timestamp) { 3701 struct T_opthdr *toh; 3702 3703 toh = (struct T_opthdr *)dstopt; 3704 toh->level = SOL_SOCKET; 3705 toh->name = SCM_TIMESTAMP; 3706 toh->len = sizeof (struct T_opthdr) + 3707 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3708 toh->status = 0; 3709 dstopt += sizeof (struct T_opthdr); 3710 /* Align for gethrestime() */ 3711 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3712 sizeof (intptr_t)); 3713 gethrestime((timestruc_t *)dstopt); 3714 dstopt = (uchar_t *)toh + toh->len; 3715 udi_size -= toh->len; 3716 } 3717 if (icmp_opt & IPPF_HOPOPTS) { 3718 struct T_opthdr *toh; 3719 3720 toh = (struct T_opthdr *)dstopt; 3721 toh->level = IPPROTO_IPV6; 3722 toh->name = IPV6_HOPOPTS; 3723 toh->len = sizeof (struct T_opthdr) + 3724 ipp.ipp_hopoptslen - hopstrip; 3725 toh->status = 0; 3726 dstopt += sizeof (struct T_opthdr); 3727 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3728 ipp.ipp_hopoptslen - hopstrip); 3729 if (hopstrip > 0) { 3730 /* copy next header value and fake length */ 3731 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3732 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3733 hopstrip / 8; 3734 } 3735 dstopt += ipp.ipp_hopoptslen - hopstrip; 3736 udi_size -= toh->len; 3737 } 3738 if (icmp_opt & IPPF_RTDSTOPTS) { 3739 struct T_opthdr *toh; 3740 3741 toh = (struct T_opthdr *)dstopt; 3742 toh->level = IPPROTO_IPV6; 3743 toh->name = IPV6_DSTOPTS; 3744 toh->len = sizeof (struct T_opthdr) + 3745 ipp.ipp_rtdstoptslen; 3746 toh->status = 0; 3747 dstopt += sizeof (struct T_opthdr); 3748 bcopy(ipp.ipp_rtdstopts, dstopt, 3749 ipp.ipp_rtdstoptslen); 3750 dstopt += ipp.ipp_rtdstoptslen; 3751 udi_size -= toh->len; 3752 } 3753 if (icmp_opt & IPPF_RTHDR) { 3754 struct T_opthdr *toh; 3755 3756 toh = (struct T_opthdr *)dstopt; 3757 toh->level = IPPROTO_IPV6; 3758 toh->name = IPV6_RTHDR; 3759 toh->len = sizeof (struct T_opthdr) + 3760 ipp.ipp_rthdrlen; 3761 toh->status = 0; 3762 dstopt += sizeof (struct T_opthdr); 3763 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3764 dstopt += ipp.ipp_rthdrlen; 3765 udi_size -= toh->len; 3766 } 3767 if (icmp_opt & IPPF_DSTOPTS) { 3768 struct T_opthdr *toh; 3769 3770 toh = (struct T_opthdr *)dstopt; 3771 toh->level = IPPROTO_IPV6; 3772 toh->name = IPV6_DSTOPTS; 3773 toh->len = sizeof (struct T_opthdr) + 3774 ipp.ipp_dstoptslen; 3775 toh->status = 0; 3776 dstopt += sizeof (struct T_opthdr); 3777 bcopy(ipp.ipp_dstopts, dstopt, 3778 ipp.ipp_dstoptslen); 3779 dstopt += ipp.ipp_dstoptslen; 3780 udi_size -= toh->len; 3781 } 3782 /* Consumed all of allocated space */ 3783 ASSERT(udi_size == 0); 3784 } 3785 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3786 putnext(connp->conn_rq, mp); 3787 } 3788 3789 /* 3790 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3791 * immediately. 3792 */ 3793 static void 3794 icmp_bind_result(conn_t *connp, mblk_t *mp) 3795 { 3796 struct T_error_ack *tea; 3797 3798 switch (mp->b_datap->db_type) { 3799 case M_PROTO: 3800 case M_PCPROTO: 3801 /* M_PROTO messages contain some type of TPI message. */ 3802 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3803 freemsg(mp); 3804 return; 3805 } 3806 tea = (struct T_error_ack *)mp->b_rptr; 3807 3808 switch (tea->PRIM_type) { 3809 case T_ERROR_ACK: 3810 switch (tea->ERROR_prim) { 3811 case O_T_BIND_REQ: 3812 case T_BIND_REQ: 3813 icmp_bind_error(connp, mp); 3814 return; 3815 default: 3816 break; 3817 } 3818 ASSERT(0); 3819 freemsg(mp); 3820 return; 3821 3822 case T_BIND_ACK: 3823 icmp_bind_ack(connp, mp); 3824 return; 3825 3826 default: 3827 break; 3828 } 3829 freemsg(mp); 3830 return; 3831 default: 3832 /* FIXME: other cases? */ 3833 ASSERT(0); 3834 freemsg(mp); 3835 return; 3836 } 3837 } 3838 3839 /* 3840 * Process a T_BIND_ACK 3841 */ 3842 static void 3843 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3844 { 3845 icmp_t *icmp = connp->conn_icmp; 3846 mblk_t *mp1; 3847 ire_t *ire; 3848 struct T_bind_ack *tba; 3849 uchar_t *addrp; 3850 ipa_conn_t *ac; 3851 ipa6_conn_t *ac6; 3852 3853 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3854 /* 3855 * We know if headers are included or not so we can 3856 * safely do this. 3857 */ 3858 if (icmp->icmp_state == TS_UNBND) { 3859 /* 3860 * TPI has not yet bound - bind sent by 3861 * icmp_bind_proto. 3862 */ 3863 freemsg(mp); 3864 rw_exit(&icmp->icmp_rwlock); 3865 return; 3866 } 3867 ASSERT(icmp->icmp_pending_op != -1); 3868 3869 /* 3870 * If a broadcast/multicast address was bound set 3871 * the source address to 0. 3872 * This ensures no datagrams with broadcast address 3873 * as source address are emitted (which would violate 3874 * RFC1122 - Hosts requirements) 3875 * 3876 * Note that when connecting the returned IRE is 3877 * for the destination address and we only perform 3878 * the broadcast check for the source address (it 3879 * is OK to connect to a broadcast/multicast address.) 3880 */ 3881 mp1 = mp->b_cont; 3882 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3883 ire = (ire_t *)mp1->b_rptr; 3884 3885 /* 3886 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3887 * local address. 3888 */ 3889 if (ire->ire_type == IRE_BROADCAST && 3890 icmp->icmp_state != TS_DATA_XFER) { 3891 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3892 icmp->icmp_pending_op == O_T_BIND_REQ); 3893 /* This was just a local bind to a MC/broadcast addr */ 3894 V6_SET_ZERO(icmp->icmp_v6src); 3895 if (icmp->icmp_family == AF_INET6) 3896 (void) icmp_build_hdrs(icmp); 3897 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3898 /* 3899 * Local address not yet set - pick it from the 3900 * T_bind_ack 3901 */ 3902 tba = (struct T_bind_ack *)mp->b_rptr; 3903 addrp = &mp->b_rptr[tba->ADDR_offset]; 3904 switch (icmp->icmp_family) { 3905 case AF_INET: 3906 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3907 ac = (ipa_conn_t *)addrp; 3908 } else { 3909 ASSERT(tba->ADDR_length == 3910 sizeof (ipa_conn_x_t)); 3911 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3912 } 3913 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3914 &icmp->icmp_v6src); 3915 break; 3916 case AF_INET6: 3917 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3918 ac6 = (ipa6_conn_t *)addrp; 3919 } else { 3920 ASSERT(tba->ADDR_length == 3921 sizeof (ipa6_conn_x_t)); 3922 ac6 = &((ipa6_conn_x_t *) 3923 addrp)->ac6x_conn; 3924 } 3925 icmp->icmp_v6src = ac6->ac6_laddr; 3926 (void) icmp_build_hdrs(icmp); 3927 } 3928 } 3929 mp1 = mp1->b_cont; 3930 } 3931 icmp->icmp_pending_op = -1; 3932 rw_exit(&icmp->icmp_rwlock); 3933 /* 3934 * Look for one or more appended ACK message added by 3935 * icmp_connect or icmp_disconnect. 3936 * If none found just send up the T_BIND_ACK. 3937 * icmp_connect has appended a T_OK_ACK and a 3938 * T_CONN_CON. 3939 * icmp_disconnect has appended a T_OK_ACK. 3940 */ 3941 if (mp1 != NULL) { 3942 if (mp->b_cont == mp1) 3943 mp->b_cont = NULL; 3944 else { 3945 ASSERT(mp->b_cont->b_cont == mp1); 3946 mp->b_cont->b_cont = NULL; 3947 } 3948 freemsg(mp); 3949 mp = mp1; 3950 while (mp != NULL) { 3951 mp1 = mp->b_cont; 3952 mp->b_cont = NULL; 3953 putnext(connp->conn_rq, mp); 3954 mp = mp1; 3955 } 3956 return; 3957 } 3958 freemsg(mp->b_cont); 3959 mp->b_cont = NULL; 3960 putnext(connp->conn_rq, mp); 3961 } 3962 3963 static void 3964 icmp_bind_error(conn_t *connp, mblk_t *mp) 3965 { 3966 icmp_t *icmp = connp->conn_icmp; 3967 struct T_error_ack *tea; 3968 3969 tea = (struct T_error_ack *)mp->b_rptr; 3970 /* 3971 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3972 * clear out the source address before 3973 * passing the message upstream. 3974 * If this was caused by a T_CONN_REQ 3975 * revert back to bound state. 3976 */ 3977 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3978 if (icmp->icmp_state == TS_UNBND) { 3979 /* 3980 * TPI has not yet bound - bind sent by icmp_bind_proto. 3981 */ 3982 freemsg(mp); 3983 rw_exit(&icmp->icmp_rwlock); 3984 return; 3985 } 3986 ASSERT(icmp->icmp_pending_op != -1); 3987 tea->ERROR_prim = icmp->icmp_pending_op; 3988 icmp->icmp_pending_op = -1; 3989 3990 switch (tea->ERROR_prim) { 3991 case T_CONN_REQ: 3992 ASSERT(icmp->icmp_state == TS_DATA_XFER); 3993 /* Connect failed */ 3994 /* Revert back to the bound source */ 3995 icmp->icmp_v6src = icmp->icmp_bound_v6src; 3996 icmp->icmp_state = TS_IDLE; 3997 if (icmp->icmp_family == AF_INET6) 3998 (void) icmp_build_hdrs(icmp); 3999 break; 4000 4001 case T_DISCON_REQ: 4002 case T_BIND_REQ: 4003 case O_T_BIND_REQ: 4004 V6_SET_ZERO(icmp->icmp_v6src); 4005 V6_SET_ZERO(icmp->icmp_bound_v6src); 4006 icmp->icmp_state = TS_UNBND; 4007 if (icmp->icmp_family == AF_INET6) 4008 (void) icmp_build_hdrs(icmp); 4009 break; 4010 default: 4011 break; 4012 } 4013 rw_exit(&icmp->icmp_rwlock); 4014 putnext(connp->conn_rq, mp); 4015 } 4016 4017 /* 4018 * return SNMP stuff in buffer in mpdata 4019 */ 4020 mblk_t * 4021 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4022 { 4023 mblk_t *mpdata; 4024 struct opthdr *optp; 4025 conn_t *connp = Q_TO_CONN(q); 4026 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4027 mblk_t *mp2ctl; 4028 4029 /* 4030 * make a copy of the original message 4031 */ 4032 mp2ctl = copymsg(mpctl); 4033 4034 if (mpctl == NULL || 4035 (mpdata = mpctl->b_cont) == NULL) { 4036 freemsg(mpctl); 4037 freemsg(mp2ctl); 4038 return (0); 4039 } 4040 4041 /* fixed length structure for IPv4 and IPv6 counters */ 4042 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4043 optp->level = EXPER_RAWIP; 4044 optp->name = 0; 4045 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4046 sizeof (is->is_rawip_mib)); 4047 optp->len = msgdsize(mpdata); 4048 qreply(q, mpctl); 4049 4050 return (mp2ctl); 4051 } 4052 4053 /* 4054 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4055 * TODO: If this ever actually tries to set anything, it needs to be 4056 * to do the appropriate locking. 4057 */ 4058 /* ARGSUSED */ 4059 int 4060 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4061 uchar_t *ptr, int len) 4062 { 4063 switch (level) { 4064 case EXPER_RAWIP: 4065 return (0); 4066 default: 4067 return (1); 4068 } 4069 } 4070 4071 /* Report for ndd "icmp_status" */ 4072 /* ARGSUSED */ 4073 static int 4074 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4075 { 4076 conn_t *connp; 4077 ip_stack_t *ipst; 4078 char laddrbuf[INET6_ADDRSTRLEN]; 4079 char faddrbuf[INET6_ADDRSTRLEN]; 4080 int i; 4081 4082 (void) mi_mpprintf(mp, 4083 "RAWIP " MI_COL_HDRPAD_STR 4084 /* 01234567[89ABCDEF] */ 4085 " src addr dest addr state"); 4086 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4087 4088 connp = Q_TO_CONN(q); 4089 ipst = connp->conn_netstack->netstack_ip; 4090 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4091 connf_t *connfp; 4092 char *state; 4093 4094 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4095 connp = NULL; 4096 4097 while ((connp = ipcl_get_next_conn(connfp, connp, 4098 IPCL_RAWIPCONN)) != NULL) { 4099 icmp_t *icmp; 4100 4101 mutex_enter(&(connp)->conn_lock); 4102 icmp = connp->conn_icmp; 4103 4104 if (icmp->icmp_state == TS_UNBND) 4105 state = "UNBOUND"; 4106 else if (icmp->icmp_state == TS_IDLE) 4107 state = "IDLE"; 4108 else if (icmp->icmp_state == TS_DATA_XFER) 4109 state = "CONNECTED"; 4110 else 4111 state = "UnkState"; 4112 4113 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4114 (void *)icmp, 4115 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4116 sizeof (faddrbuf)), 4117 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4118 sizeof (laddrbuf)), 4119 state); 4120 mutex_exit(&(connp)->conn_lock); 4121 } 4122 } 4123 return (0); 4124 } 4125 4126 /* 4127 * This routine creates a T_UDERROR_IND message and passes it upstream. 4128 * The address and options are copied from the T_UNITDATA_REQ message 4129 * passed in mp. This message is freed. 4130 */ 4131 static void 4132 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4133 { 4134 mblk_t *mp1; 4135 uchar_t *rptr = mp->b_rptr; 4136 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4137 4138 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4139 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4140 tudr->OPT_length, err); 4141 if (mp1) 4142 qreply(q, mp1); 4143 freemsg(mp); 4144 } 4145 4146 /* 4147 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4148 * After some error checking, the message is passed downstream to ip. 4149 */ 4150 static void 4151 icmp_unbind(queue_t *q, mblk_t *mp) 4152 { 4153 icmp_t *icmp = Q_TO_ICMP(q); 4154 4155 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4156 /* If a bind has not been done, we can't unbind. */ 4157 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4158 rw_exit(&icmp->icmp_rwlock); 4159 icmp_err_ack(q, mp, TOUTSTATE, 0); 4160 return; 4161 } 4162 icmp->icmp_pending_op = T_UNBIND_REQ; 4163 rw_exit(&icmp->icmp_rwlock); 4164 4165 /* 4166 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4167 * and therefore ip_unbind must never return NULL. 4168 */ 4169 mp = ip_unbind(q, mp); 4170 ASSERT(mp != NULL); 4171 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4172 4173 /* 4174 * Once we're unbound from IP, the pending operation may be cleared 4175 * here. 4176 */ 4177 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4178 V6_SET_ZERO(icmp->icmp_v6src); 4179 V6_SET_ZERO(icmp->icmp_bound_v6src); 4180 icmp->icmp_pending_op = -1; 4181 icmp->icmp_state = TS_UNBND; 4182 if (icmp->icmp_family == AF_INET6) 4183 (void) icmp_build_hdrs(icmp); 4184 rw_exit(&icmp->icmp_rwlock); 4185 4186 qreply(q, mp); 4187 } 4188 4189 /* 4190 * Process IPv4 packets that already include an IP header. 4191 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4192 * IPPROTO_IGMP). 4193 */ 4194 static void 4195 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4196 { 4197 icmp_stack_t *is = icmp->icmp_is; 4198 ipha_t *ipha; 4199 int ip_hdr_length; 4200 int tp_hdr_len; 4201 mblk_t *mp1; 4202 uint_t pkt_len; 4203 ip_opt_info_t optinfo; 4204 conn_t *connp = icmp->icmp_connp; 4205 4206 optinfo.ip_opt_flags = 0; 4207 optinfo.ip_opt_ill_index = 0; 4208 ipha = (ipha_t *)mp->b_rptr; 4209 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4210 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4211 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4212 ASSERT(icmp != NULL); 4213 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4214 freemsg(mp); 4215 return; 4216 } 4217 ipha = (ipha_t *)mp->b_rptr; 4218 } 4219 ipha->ipha_version_and_hdr_length = 4220 (IP_VERSION<<4) | (ip_hdr_length>>2); 4221 4222 /* 4223 * For the socket of SOCK_RAW type, the checksum is provided in the 4224 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4225 * tell IP that the application has sent a complete IP header and not 4226 * to compute the transport checksum nor change the DF flag. 4227 */ 4228 ipha->ipha_ident = IP_HDR_INCLUDED; 4229 ipha->ipha_hdr_checksum = 0; 4230 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4231 /* Insert options if any */ 4232 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4233 /* 4234 * Put the IP header plus any transport header that is 4235 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4236 * that at least the checksum field is in the first mblk.) 4237 */ 4238 switch (ipha->ipha_protocol) { 4239 case IPPROTO_UDP: 4240 tp_hdr_len = 8; 4241 break; 4242 case IPPROTO_TCP: 4243 tp_hdr_len = 20; 4244 break; 4245 default: 4246 tp_hdr_len = 0; 4247 break; 4248 } 4249 /* 4250 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4251 * tp_hdr_len bytes will be in a single mblk. 4252 */ 4253 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4254 tp_hdr_len)) { 4255 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4256 tp_hdr_len)) { 4257 BUMP_MIB(&is->is_rawip_mib, 4258 rawipOutErrors); 4259 freemsg(mp); 4260 return; 4261 } 4262 ipha = (ipha_t *)mp->b_rptr; 4263 } 4264 4265 /* 4266 * if the length is larger then the max allowed IP packet, 4267 * then send an error and abort the processing. 4268 */ 4269 pkt_len = ntohs(ipha->ipha_length) 4270 + icmp->icmp_ip_snd_options_len; 4271 if (pkt_len > IP_MAXPACKET) { 4272 icmp_ud_err(q, mp, EMSGSIZE); 4273 return; 4274 } 4275 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4276 tp_hdr_len, BPRI_LO))) { 4277 icmp_ud_err(q, mp, ENOMEM); 4278 return; 4279 } 4280 mp1->b_rptr += is->is_wroff_extra; 4281 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4282 4283 ipha->ipha_length = htons((uint16_t)pkt_len); 4284 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4285 4286 /* Copy transport header if any */ 4287 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4288 mp1->b_wptr += tp_hdr_len; 4289 4290 /* Add options */ 4291 ipha = (ipha_t *)mp1->b_rptr; 4292 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4293 icmp->icmp_ip_snd_options_len); 4294 4295 /* Drop IP header and transport header from original */ 4296 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4297 4298 mp1->b_cont = mp; 4299 mp = mp1; 4300 /* 4301 * Massage source route putting first source 4302 * route in ipha_dst. 4303 */ 4304 (void) ip_massage_options(ipha, is->is_netstack); 4305 } 4306 4307 if (pktinfop != NULL) { 4308 /* 4309 * Over write the source address provided in the header 4310 */ 4311 if (pktinfop->ip4_addr != INADDR_ANY) { 4312 ipha->ipha_src = pktinfop->ip4_addr; 4313 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4314 } 4315 4316 if (pktinfop->ip4_ill_index != 0) { 4317 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4318 } 4319 } 4320 4321 mblk_setcred(mp, connp->conn_cred); 4322 ip_output_options(connp, mp, q, IP_WPUT, 4323 &optinfo); 4324 } 4325 4326 static boolean_t 4327 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4328 { 4329 int err; 4330 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4331 icmp_stack_t *is = icmp->icmp_is; 4332 conn_t *connp = icmp->icmp_connp; 4333 4334 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4335 opt_storage, icmp->icmp_mac_exempt, 4336 is->is_netstack->netstack_ip); 4337 if (err == 0) { 4338 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4339 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4340 opt_storage); 4341 } 4342 if (err != 0) { 4343 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4344 DTRACE_PROBE4( 4345 tx__ip__log__drop__updatelabel__icmp, 4346 char *, "queue(1) failed to update options(2) on mp(3)", 4347 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4348 icmp_ud_err(q, mp, err); 4349 return (B_FALSE); 4350 } 4351 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4352 return (B_TRUE); 4353 } 4354 4355 /* 4356 * This routine handles all messages passed downstream. It either 4357 * consumes the message or passes it downstream; it never queues a 4358 * a message. 4359 */ 4360 static void 4361 icmp_wput(queue_t *q, mblk_t *mp) 4362 { 4363 uchar_t *rptr = mp->b_rptr; 4364 ipha_t *ipha; 4365 mblk_t *mp1; 4366 int ip_hdr_length; 4367 #define tudr ((struct T_unitdata_req *)rptr) 4368 size_t ip_len; 4369 conn_t *connp = Q_TO_CONN(q); 4370 icmp_t *icmp = connp->conn_icmp; 4371 icmp_stack_t *is = icmp->icmp_is; 4372 sin6_t *sin6; 4373 sin_t *sin; 4374 ipaddr_t v4dst; 4375 ip4_pkt_t pktinfo; 4376 ip4_pkt_t *pktinfop = &pktinfo; 4377 ip_opt_info_t optinfo; 4378 4379 switch (mp->b_datap->db_type) { 4380 case M_DATA: 4381 if (icmp->icmp_hdrincl) { 4382 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4383 ipha = (ipha_t *)mp->b_rptr; 4384 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4385 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4386 BUMP_MIB(&is->is_rawip_mib, 4387 rawipOutErrors); 4388 freemsg(mp); 4389 return; 4390 } 4391 ipha = (ipha_t *)mp->b_rptr; 4392 } 4393 /* 4394 * If this connection was used for v6 (inconceivable!) 4395 * or if we have a new destination, then it's time to 4396 * figure a new label. 4397 */ 4398 if (is_system_labeled() && 4399 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4400 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4401 ipha->ipha_dst) && 4402 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4403 return; 4404 } 4405 icmp_wput_hdrincl(q, mp, icmp, NULL); 4406 return; 4407 } 4408 freemsg(mp); 4409 return; 4410 case M_PROTO: 4411 case M_PCPROTO: 4412 ip_len = mp->b_wptr - rptr; 4413 if (ip_len >= sizeof (struct T_unitdata_req)) { 4414 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4415 if (((union T_primitives *)rptr)->type 4416 == T_UNITDATA_REQ) 4417 break; 4418 } 4419 /* FALLTHRU */ 4420 default: 4421 icmp_wput_other(q, mp); 4422 return; 4423 } 4424 4425 /* Handle T_UNITDATA_REQ messages here. */ 4426 4427 4428 4429 if (icmp->icmp_state == TS_UNBND) { 4430 /* If a port has not been bound to the stream, fail. */ 4431 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4432 icmp_ud_err(q, mp, EPROTO); 4433 return; 4434 } 4435 mp1 = mp->b_cont; 4436 if (mp1 == NULL) { 4437 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4438 icmp_ud_err(q, mp, EPROTO); 4439 return; 4440 } 4441 4442 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4443 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4444 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4445 return; 4446 } 4447 4448 switch (icmp->icmp_family) { 4449 case AF_INET6: 4450 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4451 if (!OK_32PTR((char *)sin6) || 4452 tudr->DEST_length != sizeof (sin6_t) || 4453 sin6->sin6_family != AF_INET6) { 4454 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4455 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4456 return; 4457 } 4458 4459 /* No support for mapped addresses on raw sockets */ 4460 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4461 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4462 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4463 return; 4464 } 4465 4466 /* 4467 * Destination is a native IPv6 address. 4468 * Send out an IPv6 format packet. 4469 */ 4470 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4471 return; 4472 4473 case AF_INET: 4474 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4475 if (!OK_32PTR((char *)sin) || 4476 tudr->DEST_length != sizeof (sin_t) || 4477 sin->sin_family != AF_INET) { 4478 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4479 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4480 return; 4481 } 4482 /* Extract and ipaddr */ 4483 v4dst = sin->sin_addr.s_addr; 4484 break; 4485 4486 default: 4487 ASSERT(0); 4488 } 4489 4490 pktinfop->ip4_ill_index = 0; 4491 pktinfop->ip4_addr = INADDR_ANY; 4492 optinfo.ip_opt_flags = 0; 4493 optinfo.ip_opt_ill_index = 0; 4494 4495 4496 /* 4497 * If options passed in, feed it for verification and handling 4498 */ 4499 if (tudr->OPT_length != 0) { 4500 int error; 4501 4502 error = 0; 4503 if (icmp_unitdata_opt_process(q, mp, &error, 4504 (void *)pktinfop) < 0) { 4505 /* failure */ 4506 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4507 icmp_ud_err(q, mp, error); 4508 return; 4509 } 4510 ASSERT(error == 0); 4511 /* 4512 * Note: Success in processing options. 4513 * mp option buffer represented by 4514 * OPT_length/offset now potentially modified 4515 * and contain option setting results 4516 */ 4517 4518 } 4519 4520 if (v4dst == INADDR_ANY) 4521 v4dst = htonl(INADDR_LOOPBACK); 4522 4523 /* Check if our saved options are valid; update if not */ 4524 if (is_system_labeled() && 4525 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4526 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4527 !icmp_update_label(q, icmp, mp, v4dst)) { 4528 return; 4529 } 4530 4531 /* Protocol 255 contains full IP headers */ 4532 if (icmp->icmp_hdrincl) { 4533 freeb(mp); 4534 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4535 return; 4536 } 4537 4538 4539 /* Add an IP header */ 4540 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4541 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4542 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4543 mp1->b_datap->db_ref != 1 || 4544 !OK_32PTR(ipha)) { 4545 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4546 BPRI_LO))) { 4547 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4548 icmp_ud_err(q, mp, ENOMEM); 4549 return; 4550 } 4551 mp1->b_cont = mp->b_cont; 4552 ipha = (ipha_t *)mp1->b_datap->db_lim; 4553 mp1->b_wptr = (uchar_t *)ipha; 4554 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4555 } 4556 #ifdef _BIG_ENDIAN 4557 /* Set version, header length, and tos */ 4558 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4559 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4560 icmp->icmp_type_of_service); 4561 /* Set ttl and protocol */ 4562 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4563 #else 4564 /* Set version, header length, and tos */ 4565 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4566 ((icmp->icmp_type_of_service << 8) | 4567 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4568 /* Set ttl and protocol */ 4569 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4570 #endif 4571 if (pktinfop->ip4_addr != INADDR_ANY) { 4572 ipha->ipha_src = pktinfop->ip4_addr; 4573 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4574 } else { 4575 4576 /* 4577 * Copy our address into the packet. If this is zero, 4578 * ip will fill in the real source address. 4579 */ 4580 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4581 } 4582 4583 ipha->ipha_fragment_offset_and_flags = 0; 4584 4585 if (pktinfop->ip4_ill_index != 0) { 4586 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4587 } 4588 4589 4590 /* 4591 * For the socket of SOCK_RAW type, the checksum is provided in the 4592 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4593 * tell IP that the application has sent a complete IP header and not 4594 * to compute the transport checksum nor change the DF flag. 4595 */ 4596 ipha->ipha_ident = IP_HDR_INCLUDED; 4597 4598 /* Finish common formatting of the packet. */ 4599 mp1->b_rptr = (uchar_t *)ipha; 4600 4601 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4602 if (mp1->b_cont != NULL) 4603 ip_len += msgdsize(mp1->b_cont); 4604 4605 /* 4606 * Set the length into the IP header. 4607 * If the length is greater than the maximum allowed by IP, 4608 * then free the message and return. Do not try and send it 4609 * as this can cause problems in layers below. 4610 */ 4611 if (ip_len > IP_MAXPACKET) { 4612 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4613 icmp_ud_err(q, mp, EMSGSIZE); 4614 return; 4615 } 4616 ipha->ipha_length = htons((uint16_t)ip_len); 4617 /* 4618 * Copy in the destination address from the T_UNITDATA 4619 * request 4620 */ 4621 ipha->ipha_dst = v4dst; 4622 4623 /* 4624 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4625 */ 4626 if (CLASSD(v4dst)) 4627 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4628 4629 /* Copy in options if any */ 4630 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4631 bcopy(icmp->icmp_ip_snd_options, 4632 &ipha[1], icmp->icmp_ip_snd_options_len); 4633 /* 4634 * Massage source route putting first source route in ipha_dst. 4635 * Ignore the destination in the T_unitdata_req. 4636 */ 4637 (void) ip_massage_options(ipha, is->is_netstack); 4638 } 4639 4640 freeb(mp); 4641 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4642 mblk_setcred(mp1, connp->conn_cred); 4643 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4644 #undef ipha 4645 #undef tudr 4646 } 4647 4648 static boolean_t 4649 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4650 { 4651 int err; 4652 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4653 icmp_stack_t *is = icmp->icmp_is; 4654 conn_t *connp = icmp->icmp_connp; 4655 4656 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4657 opt_storage, icmp->icmp_mac_exempt, 4658 is->is_netstack->netstack_ip); 4659 if (err == 0) { 4660 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4661 &icmp->icmp_label_len_v6, opt_storage); 4662 } 4663 if (err != 0) { 4664 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4665 DTRACE_PROBE4( 4666 tx__ip__log__drop__updatelabel__icmp6, 4667 char *, "queue(1) failed to update options(2) on mp(3)", 4668 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4669 icmp_ud_err(wq, mp, err); 4670 return (B_FALSE); 4671 } 4672 4673 icmp->icmp_v6lastdst = *dst; 4674 return (B_TRUE); 4675 } 4676 4677 /* 4678 * icmp_wput_ipv6(): 4679 * Assumes that icmp_wput did some sanity checking on the destination 4680 * address, but that the label may not yet be correct. 4681 */ 4682 void 4683 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4684 { 4685 ip6_t *ip6h; 4686 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4687 mblk_t *mp1; 4688 int ip_hdr_len = IPV6_HDR_LEN; 4689 size_t ip_len; 4690 icmp_t *icmp = Q_TO_ICMP(q); 4691 icmp_stack_t *is = icmp->icmp_is; 4692 ip6_pkt_t ipp_s; /* For ancillary data options */ 4693 ip6_pkt_t *ipp = &ipp_s; 4694 ip6_pkt_t *tipp; 4695 uint32_t csum = 0; 4696 uint_t ignore = 0; 4697 uint_t option_exists = 0, is_sticky = 0; 4698 uint8_t *cp; 4699 uint8_t *nxthdr_ptr; 4700 in6_addr_t ip6_dst; 4701 4702 /* 4703 * If the local address is a mapped address return 4704 * an error. 4705 * It would be possible to send an IPv6 packet but the 4706 * response would never make it back to the application 4707 * since it is bound to a mapped address. 4708 */ 4709 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4710 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4711 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4712 return; 4713 } 4714 4715 ipp->ipp_fields = 0; 4716 ipp->ipp_sticky_ignored = 0; 4717 4718 /* 4719 * If TPI options passed in, feed it for verification and handling 4720 */ 4721 if (tudr_optlen != 0) { 4722 int error; 4723 4724 if (icmp_unitdata_opt_process(q, mp, &error, 4725 (void *)ipp) < 0) { 4726 /* failure */ 4727 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4728 icmp_ud_err(q, mp, error); 4729 return; 4730 } 4731 ignore = ipp->ipp_sticky_ignored; 4732 ASSERT(error == 0); 4733 } 4734 4735 if (sin6->sin6_scope_id != 0 && 4736 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4737 /* 4738 * IPPF_SCOPE_ID is special. It's neither a sticky 4739 * option nor ancillary data. It needs to be 4740 * explicitly set in options_exists. 4741 */ 4742 option_exists |= IPPF_SCOPE_ID; 4743 } 4744 4745 /* 4746 * Compute the destination address 4747 */ 4748 ip6_dst = sin6->sin6_addr; 4749 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4750 ip6_dst = ipv6_loopback; 4751 4752 /* 4753 * If we're not going to the same destination as last time, then 4754 * recompute the label required. This is done in a separate routine to 4755 * avoid blowing up our stack here. 4756 */ 4757 if (is_system_labeled() && 4758 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4759 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4760 return; 4761 } 4762 4763 /* 4764 * If there's a security label here, then we ignore any options the 4765 * user may try to set. We keep the peer's label as a hidden sticky 4766 * option. 4767 */ 4768 if (icmp->icmp_label_len_v6 > 0) { 4769 ignore &= ~IPPF_HOPOPTS; 4770 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4771 } 4772 4773 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4774 (ipp->ipp_fields == 0)) { 4775 /* No sticky options nor ancillary data. */ 4776 goto no_options; 4777 } 4778 4779 /* 4780 * Go through the options figuring out where each is going to 4781 * come from and build two masks. The first mask indicates if 4782 * the option exists at all. The second mask indicates if the 4783 * option is sticky or ancillary. 4784 */ 4785 if (!(ignore & IPPF_HOPOPTS)) { 4786 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4787 option_exists |= IPPF_HOPOPTS; 4788 ip_hdr_len += ipp->ipp_hopoptslen; 4789 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4790 option_exists |= IPPF_HOPOPTS; 4791 is_sticky |= IPPF_HOPOPTS; 4792 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4793 } 4794 } 4795 4796 if (!(ignore & IPPF_RTHDR)) { 4797 if (ipp->ipp_fields & IPPF_RTHDR) { 4798 option_exists |= IPPF_RTHDR; 4799 ip_hdr_len += ipp->ipp_rthdrlen; 4800 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4801 option_exists |= IPPF_RTHDR; 4802 is_sticky |= IPPF_RTHDR; 4803 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4804 } 4805 } 4806 4807 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4808 /* 4809 * Need to have a router header to use these. 4810 */ 4811 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4812 option_exists |= IPPF_RTDSTOPTS; 4813 ip_hdr_len += ipp->ipp_rtdstoptslen; 4814 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4815 option_exists |= IPPF_RTDSTOPTS; 4816 is_sticky |= IPPF_RTDSTOPTS; 4817 ip_hdr_len += 4818 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4819 } 4820 } 4821 4822 if (!(ignore & IPPF_DSTOPTS)) { 4823 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4824 option_exists |= IPPF_DSTOPTS; 4825 ip_hdr_len += ipp->ipp_dstoptslen; 4826 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4827 option_exists |= IPPF_DSTOPTS; 4828 is_sticky |= IPPF_DSTOPTS; 4829 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4830 } 4831 } 4832 4833 if (!(ignore & IPPF_IFINDEX)) { 4834 if (ipp->ipp_fields & IPPF_IFINDEX) { 4835 option_exists |= IPPF_IFINDEX; 4836 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4837 option_exists |= IPPF_IFINDEX; 4838 is_sticky |= IPPF_IFINDEX; 4839 } 4840 } 4841 4842 if (!(ignore & IPPF_ADDR)) { 4843 if (ipp->ipp_fields & IPPF_ADDR) { 4844 option_exists |= IPPF_ADDR; 4845 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4846 option_exists |= IPPF_ADDR; 4847 is_sticky |= IPPF_ADDR; 4848 } 4849 } 4850 4851 if (!(ignore & IPPF_DONTFRAG)) { 4852 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4853 option_exists |= IPPF_DONTFRAG; 4854 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4855 option_exists |= IPPF_DONTFRAG; 4856 is_sticky |= IPPF_DONTFRAG; 4857 } 4858 } 4859 4860 if (!(ignore & IPPF_USE_MIN_MTU)) { 4861 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4862 option_exists |= IPPF_USE_MIN_MTU; 4863 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4864 IPPF_USE_MIN_MTU) { 4865 option_exists |= IPPF_USE_MIN_MTU; 4866 is_sticky |= IPPF_USE_MIN_MTU; 4867 } 4868 } 4869 4870 if (!(ignore & IPPF_NEXTHOP)) { 4871 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4872 option_exists |= IPPF_NEXTHOP; 4873 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4874 option_exists |= IPPF_NEXTHOP; 4875 is_sticky |= IPPF_NEXTHOP; 4876 } 4877 } 4878 4879 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4880 option_exists |= IPPF_HOPLIMIT; 4881 /* IPV6_HOPLIMIT can never be sticky */ 4882 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4883 4884 if (!(ignore & IPPF_UNICAST_HOPS) && 4885 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4886 option_exists |= IPPF_UNICAST_HOPS; 4887 is_sticky |= IPPF_UNICAST_HOPS; 4888 } 4889 4890 if (!(ignore & IPPF_MULTICAST_HOPS) && 4891 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4892 option_exists |= IPPF_MULTICAST_HOPS; 4893 is_sticky |= IPPF_MULTICAST_HOPS; 4894 } 4895 4896 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4897 /* This is a sticky socket option only */ 4898 option_exists |= IPPF_NO_CKSUM; 4899 is_sticky |= IPPF_NO_CKSUM; 4900 } 4901 4902 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4903 /* This is a sticky socket option only */ 4904 option_exists |= IPPF_RAW_CKSUM; 4905 is_sticky |= IPPF_RAW_CKSUM; 4906 } 4907 4908 if (!(ignore & IPPF_TCLASS)) { 4909 if (ipp->ipp_fields & IPPF_TCLASS) { 4910 option_exists |= IPPF_TCLASS; 4911 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4912 option_exists |= IPPF_TCLASS; 4913 is_sticky |= IPPF_TCLASS; 4914 } 4915 } 4916 4917 no_options: 4918 4919 /* 4920 * If any options carried in the ip6i_t were specified, we 4921 * need to account for the ip6i_t in the data we'll be sending 4922 * down. 4923 */ 4924 if (option_exists & IPPF_HAS_IP6I) 4925 ip_hdr_len += sizeof (ip6i_t); 4926 4927 /* check/fix buffer config, setup pointers into it */ 4928 mp1 = mp->b_cont; 4929 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4930 if ((mp1->b_datap->db_ref != 1) || 4931 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4932 !OK_32PTR(ip6h)) { 4933 /* Try to get everything in a single mblk next time */ 4934 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4935 icmp->icmp_max_hdr_len = ip_hdr_len; 4936 (void) mi_set_sth_wroff(RD(q), 4937 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4938 } 4939 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4940 if (!mp1) { 4941 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4942 icmp_ud_err(q, mp, ENOMEM); 4943 return; 4944 } 4945 mp1->b_cont = mp->b_cont; 4946 mp1->b_wptr = mp1->b_datap->db_lim; 4947 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4948 } 4949 mp1->b_rptr = (unsigned char *)ip6h; 4950 ip6i = (ip6i_t *)ip6h; 4951 4952 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4953 if (option_exists & IPPF_HAS_IP6I) { 4954 ip6h = (ip6_t *)&ip6i[1]; 4955 ip6i->ip6i_flags = 0; 4956 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4957 4958 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4959 if (option_exists & IPPF_SCOPE_ID) { 4960 ip6i->ip6i_flags |= IP6I_IFINDEX; 4961 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4962 } else if (option_exists & IPPF_IFINDEX) { 4963 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4964 ASSERT(tipp->ipp_ifindex != 0); 4965 ip6i->ip6i_flags |= IP6I_IFINDEX; 4966 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4967 } 4968 4969 if (option_exists & IPPF_RAW_CKSUM) { 4970 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4971 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4972 } 4973 4974 if (option_exists & IPPF_NO_CKSUM) { 4975 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4976 } 4977 4978 if (option_exists & IPPF_ADDR) { 4979 /* 4980 * Enable per-packet source address verification if 4981 * IPV6_PKTINFO specified the source address. 4982 * ip6_src is set in the transport's _wput function. 4983 */ 4984 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4985 } 4986 4987 if (option_exists & IPPF_DONTFRAG) { 4988 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4989 } 4990 4991 if (option_exists & IPPF_USE_MIN_MTU) { 4992 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4993 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4994 } 4995 4996 if (option_exists & IPPF_NEXTHOP) { 4997 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4998 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4999 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5000 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5001 } 5002 5003 /* 5004 * tell IP this is an ip6i_t private header 5005 */ 5006 ip6i->ip6i_nxt = IPPROTO_RAW; 5007 } 5008 5009 /* Initialize IPv6 header */ 5010 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5011 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5012 5013 /* Set the hoplimit of the outgoing packet. */ 5014 if (option_exists & IPPF_HOPLIMIT) { 5015 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5016 ip6h->ip6_hops = ipp->ipp_hoplimit; 5017 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5018 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5019 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5020 if (option_exists & IPPF_MULTICAST_HOPS) 5021 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5022 } else { 5023 ip6h->ip6_hops = icmp->icmp_ttl; 5024 if (option_exists & IPPF_UNICAST_HOPS) 5025 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5026 } 5027 5028 if (option_exists & IPPF_ADDR) { 5029 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5030 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5031 ip6h->ip6_src = tipp->ipp_addr; 5032 } else { 5033 /* 5034 * The source address was not set using IPV6_PKTINFO. 5035 * First look at the bound source. 5036 * If unspecified fallback to __sin6_src_id. 5037 */ 5038 ip6h->ip6_src = icmp->icmp_v6src; 5039 if (sin6->__sin6_src_id != 0 && 5040 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5041 ip_srcid_find_id(sin6->__sin6_src_id, 5042 &ip6h->ip6_src, icmp->icmp_zoneid, 5043 is->is_netstack); 5044 } 5045 } 5046 5047 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5048 cp = (uint8_t *)&ip6h[1]; 5049 5050 /* 5051 * Here's where we have to start stringing together 5052 * any extension headers in the right order: 5053 * Hop-by-hop, destination, routing, and final destination opts. 5054 */ 5055 if (option_exists & IPPF_HOPOPTS) { 5056 /* Hop-by-hop options */ 5057 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5058 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5059 5060 *nxthdr_ptr = IPPROTO_HOPOPTS; 5061 nxthdr_ptr = &hbh->ip6h_nxt; 5062 5063 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5064 cp += tipp->ipp_hopoptslen; 5065 } 5066 /* 5067 * En-route destination options 5068 * Only do them if there's a routing header as well 5069 */ 5070 if (option_exists & IPPF_RTDSTOPTS) { 5071 ip6_dest_t *dst = (ip6_dest_t *)cp; 5072 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5073 5074 *nxthdr_ptr = IPPROTO_DSTOPTS; 5075 nxthdr_ptr = &dst->ip6d_nxt; 5076 5077 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5078 cp += tipp->ipp_rtdstoptslen; 5079 } 5080 /* 5081 * Routing header next 5082 */ 5083 if (option_exists & IPPF_RTHDR) { 5084 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5085 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5086 5087 *nxthdr_ptr = IPPROTO_ROUTING; 5088 nxthdr_ptr = &rt->ip6r_nxt; 5089 5090 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5091 cp += tipp->ipp_rthdrlen; 5092 } 5093 /* 5094 * Do ultimate destination options 5095 */ 5096 if (option_exists & IPPF_DSTOPTS) { 5097 ip6_dest_t *dest = (ip6_dest_t *)cp; 5098 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5099 5100 *nxthdr_ptr = IPPROTO_DSTOPTS; 5101 nxthdr_ptr = &dest->ip6d_nxt; 5102 5103 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5104 cp += tipp->ipp_dstoptslen; 5105 } 5106 5107 /* 5108 * Now set the last header pointer to the proto passed in 5109 */ 5110 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5111 *nxthdr_ptr = icmp->icmp_proto; 5112 5113 /* 5114 * Copy in the destination address 5115 */ 5116 ip6h->ip6_dst = ip6_dst; 5117 5118 ip6h->ip6_vcf = 5119 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5120 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5121 5122 if (option_exists & IPPF_TCLASS) { 5123 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5124 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5125 tipp->ipp_tclass); 5126 } 5127 if (option_exists & IPPF_RTHDR) { 5128 ip6_rthdr_t *rth; 5129 5130 /* 5131 * Perform any processing needed for source routing. 5132 * We know that all extension headers will be in the same mblk 5133 * as the IPv6 header. 5134 */ 5135 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5136 if (rth != NULL && rth->ip6r_segleft != 0) { 5137 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5138 /* 5139 * Drop packet - only support Type 0 routing. 5140 * Notify the application as well. 5141 */ 5142 icmp_ud_err(q, mp, EPROTO); 5143 BUMP_MIB(&is->is_rawip_mib, 5144 rawipOutErrors); 5145 return; 5146 } 5147 /* 5148 * rth->ip6r_len is twice the number of 5149 * addresses in the header 5150 */ 5151 if (rth->ip6r_len & 0x1) { 5152 icmp_ud_err(q, mp, EPROTO); 5153 BUMP_MIB(&is->is_rawip_mib, 5154 rawipOutErrors); 5155 return; 5156 } 5157 /* 5158 * Shuffle the routing header and ip6_dst 5159 * addresses, and get the checksum difference 5160 * between the first hop (in ip6_dst) and 5161 * the destination (in the last routing hdr entry). 5162 */ 5163 csum = ip_massage_options_v6(ip6h, rth, 5164 is->is_netstack); 5165 /* 5166 * Verify that the first hop isn't a mapped address. 5167 * Routers along the path need to do this verification 5168 * for subsequent hops. 5169 */ 5170 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5171 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5172 BUMP_MIB(&is->is_rawip_mib, 5173 rawipOutErrors); 5174 return; 5175 } 5176 } 5177 } 5178 5179 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5180 if (mp1->b_cont != NULL) 5181 ip_len += msgdsize(mp1->b_cont); 5182 5183 /* 5184 * Set the length into the IP header. 5185 * If the length is greater than the maximum allowed by IP, 5186 * then free the message and return. Do not try and send it 5187 * as this can cause problems in layers below. 5188 */ 5189 if (ip_len > IP_MAXPACKET) { 5190 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5191 icmp_ud_err(q, mp, EMSGSIZE); 5192 return; 5193 } 5194 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5195 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5196 uint16_t *cksum_ptr; 5197 uint_t ext_hdrs_len; 5198 5199 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5200 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5201 icmp->icmp_checksum_off == 2); 5202 5203 /* 5204 * We make it easy for IP to include our pseudo header 5205 * by putting our length in uh_checksum, modified (if 5206 * we have a routing header) by the checksum difference 5207 * between the ultimate destination and first hop addresses. 5208 * Note: ICMPv6 must always checksum the packet. 5209 */ 5210 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5211 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5212 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5213 BUMP_MIB(&is->is_rawip_mib, 5214 rawipOutErrors); 5215 freemsg(mp); 5216 return; 5217 } 5218 ip6i = (ip6i_t *)mp1->b_rptr; 5219 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5220 ip6h = (ip6_t *)&ip6i[1]; 5221 else 5222 ip6h = (ip6_t *)ip6i; 5223 } 5224 /* Add payload length to checksum */ 5225 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5226 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5227 csum += htons(ip_len - ext_hdrs_len); 5228 5229 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5230 csum = (csum & 0xFFFF) + (csum >> 16); 5231 *cksum_ptr = (uint16_t)csum; 5232 } 5233 5234 #ifdef _LITTLE_ENDIAN 5235 ip_len = htons(ip_len); 5236 #endif 5237 ip6h->ip6_plen = (uint16_t)ip_len; 5238 5239 freeb(mp); 5240 5241 /* We're done. Pass the packet to IP */ 5242 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5243 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5244 } 5245 5246 static void 5247 icmp_wput_other(queue_t *q, mblk_t *mp) 5248 { 5249 uchar_t *rptr = mp->b_rptr; 5250 struct iocblk *iocp; 5251 #define tudr ((struct T_unitdata_req *)rptr) 5252 conn_t *connp = Q_TO_CONN(q); 5253 icmp_t *icmp = connp->conn_icmp; 5254 icmp_stack_t *is = icmp->icmp_is; 5255 cred_t *cr; 5256 5257 cr = DB_CREDDEF(mp, connp->conn_cred); 5258 5259 switch (mp->b_datap->db_type) { 5260 case M_PROTO: 5261 case M_PCPROTO: 5262 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5263 /* 5264 * If the message does not contain a PRIM_type, 5265 * throw it away. 5266 */ 5267 freemsg(mp); 5268 return; 5269 } 5270 switch (((union T_primitives *)rptr)->type) { 5271 case T_ADDR_REQ: 5272 icmp_addr_req(q, mp); 5273 return; 5274 case O_T_BIND_REQ: 5275 case T_BIND_REQ: 5276 icmp_bind(q, mp); 5277 return; 5278 case T_CONN_REQ: 5279 icmp_connect(q, mp); 5280 return; 5281 case T_CAPABILITY_REQ: 5282 icmp_capability_req(q, mp); 5283 return; 5284 case T_INFO_REQ: 5285 icmp_info_req(q, mp); 5286 return; 5287 case T_UNITDATA_REQ: 5288 /* 5289 * If a T_UNITDATA_REQ gets here, the address must 5290 * be bad. Valid T_UNITDATA_REQs are found above 5291 * and break to below this switch. 5292 */ 5293 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5294 return; 5295 case T_UNBIND_REQ: 5296 icmp_unbind(q, mp); 5297 return; 5298 5299 case T_SVR4_OPTMGMT_REQ: 5300 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5301 cr)) { 5302 /* Only IP can return anything meaningful */ 5303 (void) svr4_optcom_req(q, mp, cr, 5304 &icmp_opt_obj, B_TRUE); 5305 } 5306 return; 5307 5308 case T_OPTMGMT_REQ: 5309 /* Only IP can return anything meaningful */ 5310 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5311 return; 5312 5313 case T_DISCON_REQ: 5314 icmp_disconnect(q, mp); 5315 return; 5316 5317 /* The following TPI message is not supported by icmp. */ 5318 case O_T_CONN_RES: 5319 case T_CONN_RES: 5320 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5321 return; 5322 5323 /* The following 3 TPI requests are illegal for icmp. */ 5324 case T_DATA_REQ: 5325 case T_EXDATA_REQ: 5326 case T_ORDREL_REQ: 5327 freemsg(mp); 5328 (void) putctl1(RD(q), M_ERROR, EPROTO); 5329 return; 5330 default: 5331 break; 5332 } 5333 break; 5334 case M_IOCTL: 5335 iocp = (struct iocblk *)mp->b_rptr; 5336 switch (iocp->ioc_cmd) { 5337 case TI_GETPEERNAME: 5338 if (icmp->icmp_state != TS_DATA_XFER) { 5339 /* 5340 * If a default destination address has not 5341 * been associated with the stream, then we 5342 * don't know the peer's name. 5343 */ 5344 iocp->ioc_error = ENOTCONN; 5345 err_ret:; 5346 iocp->ioc_count = 0; 5347 mp->b_datap->db_type = M_IOCACK; 5348 qreply(q, mp); 5349 return; 5350 } 5351 /* FALLTHRU */ 5352 case TI_GETMYNAME: 5353 /* 5354 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5355 * need to copyin the user's strbuf structure. 5356 * Processing will continue in the M_IOCDATA case 5357 * below. 5358 */ 5359 mi_copyin(q, mp, NULL, 5360 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5361 return; 5362 case ND_SET: 5363 /* nd_getset performs the necessary error checking */ 5364 case ND_GET: 5365 if (nd_getset(q, is->is_nd, mp)) { 5366 qreply(q, mp); 5367 return; 5368 } 5369 break; 5370 default: 5371 break; 5372 } 5373 break; 5374 case M_IOCDATA: 5375 icmp_wput_iocdata(q, mp); 5376 return; 5377 default: 5378 break; 5379 } 5380 ip_wput(q, mp); 5381 } 5382 5383 /* 5384 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5385 * messages. 5386 */ 5387 static void 5388 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5389 { 5390 mblk_t *mp1; 5391 STRUCT_HANDLE(strbuf, sb); 5392 icmp_t *icmp; 5393 in6_addr_t v6addr; 5394 ipaddr_t v4addr; 5395 uint32_t flowinfo = 0; 5396 int addrlen; 5397 5398 /* Make sure it is one of ours. */ 5399 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5400 case TI_GETMYNAME: 5401 case TI_GETPEERNAME: 5402 break; 5403 default: 5404 icmp = Q_TO_ICMP(q); 5405 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5406 return; 5407 } 5408 switch (mi_copy_state(q, mp, &mp1)) { 5409 case -1: 5410 return; 5411 case MI_COPY_CASE(MI_COPY_IN, 1): 5412 break; 5413 case MI_COPY_CASE(MI_COPY_OUT, 1): 5414 /* 5415 * The address has been copied out, so now 5416 * copyout the strbuf. 5417 */ 5418 mi_copyout(q, mp); 5419 return; 5420 case MI_COPY_CASE(MI_COPY_OUT, 2): 5421 /* 5422 * The address and strbuf have been copied out. 5423 * We're done, so just acknowledge the original 5424 * M_IOCTL. 5425 */ 5426 mi_copy_done(q, mp, 0); 5427 return; 5428 default: 5429 /* 5430 * Something strange has happened, so acknowledge 5431 * the original M_IOCTL with an EPROTO error. 5432 */ 5433 mi_copy_done(q, mp, EPROTO); 5434 return; 5435 } 5436 /* 5437 * Now we have the strbuf structure for TI_GETMYNAME 5438 * and TI_GETPEERNAME. Next we copyout the requested 5439 * address and then we'll copyout the strbuf. 5440 */ 5441 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5442 (void *)mp1->b_rptr); 5443 icmp = Q_TO_ICMP(q); 5444 if (icmp->icmp_family == AF_INET) 5445 addrlen = sizeof (sin_t); 5446 else 5447 addrlen = sizeof (sin6_t); 5448 5449 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5450 mi_copy_done(q, mp, EINVAL); 5451 return; 5452 } 5453 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5454 case TI_GETMYNAME: 5455 if (icmp->icmp_family == AF_INET) { 5456 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5457 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5458 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5459 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5460 } else { 5461 /* 5462 * INADDR_ANY 5463 * icmp_v6src is not set, we might be bound to 5464 * broadcast/multicast. Use icmp_bound_v6src as 5465 * local address instead (that could 5466 * also still be INADDR_ANY) 5467 */ 5468 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5469 } 5470 } else { 5471 /* icmp->icmp_family == AF_INET6 */ 5472 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5473 v6addr = icmp->icmp_v6src; 5474 } else { 5475 /* 5476 * UNSPECIFIED 5477 * icmp_v6src is not set, we might be bound to 5478 * broadcast/multicast. Use icmp_bound_v6src as 5479 * local address instead (that could 5480 * also still be UNSPECIFIED) 5481 */ 5482 v6addr = icmp->icmp_bound_v6src; 5483 } 5484 } 5485 break; 5486 case TI_GETPEERNAME: 5487 if (icmp->icmp_family == AF_INET) { 5488 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5489 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5490 } else { 5491 /* icmp->icmp_family == AF_INET6) */ 5492 v6addr = icmp->icmp_v6dst; 5493 flowinfo = icmp->icmp_flowinfo; 5494 } 5495 break; 5496 default: 5497 mi_copy_done(q, mp, EPROTO); 5498 return; 5499 } 5500 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5501 if (!mp1) 5502 return; 5503 5504 if (icmp->icmp_family == AF_INET) { 5505 sin_t *sin; 5506 5507 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5508 sin = (sin_t *)mp1->b_rptr; 5509 mp1->b_wptr = (uchar_t *)&sin[1]; 5510 *sin = sin_null; 5511 sin->sin_family = AF_INET; 5512 sin->sin_addr.s_addr = v4addr; 5513 } else { 5514 /* icmp->icmp_family == AF_INET6 */ 5515 sin6_t *sin6; 5516 5517 ASSERT(icmp->icmp_family == AF_INET6); 5518 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5519 sin6 = (sin6_t *)mp1->b_rptr; 5520 mp1->b_wptr = (uchar_t *)&sin6[1]; 5521 *sin6 = sin6_null; 5522 sin6->sin6_family = AF_INET6; 5523 sin6->sin6_flowinfo = flowinfo; 5524 sin6->sin6_addr = v6addr; 5525 } 5526 /* Copy out the address */ 5527 mi_copyout(q, mp); 5528 } 5529 5530 static int 5531 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5532 void *thisdg_attrs) 5533 { 5534 conn_t *connp = Q_TO_CONN(q); 5535 struct T_unitdata_req *udreqp; 5536 int is_absreq_failure; 5537 cred_t *cr; 5538 5539 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5540 *errorp = 0; 5541 5542 cr = DB_CREDDEF(mp, connp->conn_cred); 5543 5544 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5545 udreqp->OPT_offset, cr, &icmp_opt_obj, 5546 thisdg_attrs, &is_absreq_failure); 5547 5548 if (*errorp != 0) { 5549 /* 5550 * Note: No special action needed in this 5551 * module for "is_absreq_failure" 5552 */ 5553 return (-1); /* failure */ 5554 } 5555 ASSERT(is_absreq_failure == 0); 5556 return (0); /* success */ 5557 } 5558 5559 void 5560 icmp_ddi_init(void) 5561 { 5562 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5563 icmp_opt_obj.odb_opt_arr_cnt); 5564 5565 /* 5566 * We want to be informed each time a stack is created or 5567 * destroyed in the kernel, so we can maintain the 5568 * set of icmp_stack_t's. 5569 */ 5570 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5571 } 5572 5573 void 5574 icmp_ddi_destroy(void) 5575 { 5576 netstack_unregister(NS_ICMP); 5577 } 5578 5579 /* 5580 * Initialize the ICMP stack instance. 5581 */ 5582 static void * 5583 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5584 { 5585 icmp_stack_t *is; 5586 icmpparam_t *pa; 5587 5588 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5589 is->is_netstack = ns; 5590 5591 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5592 is->is_param_arr = pa; 5593 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5594 5595 (void) icmp_param_register(&is->is_nd, 5596 is->is_param_arr, A_CNT(icmp_param_arr)); 5597 is->is_ksp = rawip_kstat_init(stackid); 5598 return (is); 5599 } 5600 5601 /* 5602 * Free the ICMP stack instance. 5603 */ 5604 static void 5605 rawip_stack_fini(netstackid_t stackid, void *arg) 5606 { 5607 icmp_stack_t *is = (icmp_stack_t *)arg; 5608 5609 nd_free(&is->is_nd); 5610 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5611 is->is_param_arr = NULL; 5612 5613 rawip_kstat_fini(stackid, is->is_ksp); 5614 is->is_ksp = NULL; 5615 kmem_free(is, sizeof (*is)); 5616 } 5617 5618 static void * 5619 rawip_kstat_init(netstackid_t stackid) { 5620 kstat_t *ksp; 5621 5622 rawip_named_kstat_t template = { 5623 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5624 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5625 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5626 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5627 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5628 }; 5629 5630 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5631 KSTAT_TYPE_NAMED, 5632 NUM_OF_FIELDS(rawip_named_kstat_t), 5633 0, stackid); 5634 if (ksp == NULL || ksp->ks_data == NULL) 5635 return (NULL); 5636 5637 bcopy(&template, ksp->ks_data, sizeof (template)); 5638 ksp->ks_update = rawip_kstat_update; 5639 ksp->ks_private = (void *)(uintptr_t)stackid; 5640 5641 kstat_install(ksp); 5642 return (ksp); 5643 } 5644 5645 static void 5646 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5647 { 5648 if (ksp != NULL) { 5649 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5650 kstat_delete_netstack(ksp, stackid); 5651 } 5652 } 5653 5654 static int 5655 rawip_kstat_update(kstat_t *ksp, int rw) 5656 { 5657 rawip_named_kstat_t *rawipkp; 5658 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5659 netstack_t *ns; 5660 icmp_stack_t *is; 5661 5662 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5663 return (EIO); 5664 5665 if (rw == KSTAT_WRITE) 5666 return (EACCES); 5667 5668 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5669 5670 ns = netstack_find_by_stackid(stackid); 5671 if (ns == NULL) 5672 return (-1); 5673 is = ns->netstack_icmp; 5674 if (is == NULL) { 5675 netstack_rele(ns); 5676 return (-1); 5677 } 5678 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5679 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5680 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5681 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5682 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5683 netstack_rele(ns); 5684 return (0); 5685 } 5686