1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 } 733 734 if (icmp->icmp_filter != NULL) { 735 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 736 icmp->icmp_filter = NULL; 737 } 738 /* Free memory associated with sticky options */ 739 if (icmp->icmp_sticky_hdrs_len != 0) { 740 kmem_free(icmp->icmp_sticky_hdrs, 741 icmp->icmp_sticky_hdrs_len); 742 icmp->icmp_sticky_hdrs = NULL; 743 icmp->icmp_sticky_hdrs_len = 0; 744 } 745 ip6_pkt_free(&icmp->icmp_sticky_ipp); 746 } 747 748 static int 749 icmp_close(queue_t *q) 750 { 751 conn_t *connp = (conn_t *)q->q_ptr; 752 753 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 754 755 ip_quiesce_conn(connp); 756 757 qprocsoff(connp->conn_rq); 758 759 icmp_close_free(connp); 760 761 /* 762 * Now we are truly single threaded on this stream, and can 763 * delete the things hanging off the connp, and finally the connp. 764 * We removed this connp from the fanout list, it cannot be 765 * accessed thru the fanouts, and we already waited for the 766 * conn_ref to drop to 0. We are already in close, so 767 * there cannot be any other thread from the top. qprocsoff 768 * has completed, and service has completed or won't run in 769 * future. 770 */ 771 ASSERT(connp->conn_ref == 1); 772 773 inet_minor_free(ip_minor_arena, connp->conn_dev); 774 775 connp->conn_ref--; 776 ipcl_conn_destroy(connp); 777 778 q->q_ptr = WR(q)->q_ptr = NULL; 779 return (0); 780 } 781 782 /* 783 * This routine handles each T_DISCON_REQ message passed to icmp 784 * as an indicating that ICMP is no longer connected. This results 785 * in sending a T_BIND_REQ to IP to restore the binding to just 786 * the local address. 787 * 788 * This routine sends down a T_BIND_REQ to IP with the following mblks: 789 * T_BIND_REQ - specifying just the local address. 790 * T_OK_ACK - for the T_DISCON_REQ 791 * 792 * The disconnect completes in icmp_bind_result. 793 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 794 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 795 * convert it to an error ack for the appropriate primitive. 796 */ 797 static void 798 icmp_disconnect(queue_t *q, mblk_t *mp) 799 { 800 icmp_t *icmp; 801 mblk_t *mp1; 802 conn_t *connp = Q_TO_CONN(q); 803 804 icmp = connp->conn_icmp; 805 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 806 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 807 rw_exit(&icmp->icmp_rwlock); 808 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 809 "icmp_disconnect: bad state, %d", icmp->icmp_state); 810 icmp_err_ack(q, mp, TOUTSTATE, 0); 811 return; 812 } 813 icmp->icmp_pending_op = T_DISCON_REQ; 814 icmp->icmp_v6src = icmp->icmp_bound_v6src; 815 icmp->icmp_state = TS_IDLE; 816 817 /* 818 * Send down bind to IP to remove the full binding and revert 819 * to the local address binding. 820 */ 821 if (icmp->icmp_family == AF_INET) { 822 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 823 } else { 824 ASSERT(icmp->icmp_family == AF_INET6); 825 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 826 } 827 if (mp1 == NULL) { 828 icmp->icmp_pending_op = -1; 829 rw_exit(&icmp->icmp_rwlock); 830 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 831 return; 832 } 833 mp = mi_tpi_ok_ack_alloc(mp); 834 if (mp == NULL) { 835 /* Unable to reuse the T_DISCON_REQ for the ack. */ 836 icmp->icmp_pending_op = -1; 837 rw_exit(&icmp->icmp_rwlock); 838 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 839 return; 840 } 841 842 if (icmp->icmp_family == AF_INET6) { 843 int error; 844 845 /* Rebuild the header template */ 846 error = icmp_build_hdrs(icmp); 847 if (error != 0) { 848 icmp->icmp_pending_op = -1; 849 rw_exit(&icmp->icmp_rwlock); 850 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 851 freemsg(mp1); 852 return; 853 } 854 } 855 856 rw_exit(&icmp->icmp_rwlock); 857 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 858 linkb(mp1, mp); 859 860 if (icmp->icmp_family == AF_INET6) 861 mp1 = ip_bind_v6(q, mp1, connp, NULL); 862 else 863 mp1 = ip_bind_v4(q, mp1, connp); 864 865 /* The above return NULL if the bind needs to be deferred */ 866 if (mp1 != NULL) 867 icmp_bind_result(connp, mp1); 868 else 869 CONN_INC_REF(connp); 870 } 871 872 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 873 static void 874 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 875 { 876 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 877 qreply(q, mp); 878 } 879 880 /* Shorthand to generate and send TPI error acks to our client */ 881 static void 882 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 883 t_scalar_t t_error, int sys_error) 884 { 885 struct T_error_ack *teackp; 886 887 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 888 M_PCPROTO, T_ERROR_ACK)) != NULL) { 889 teackp = (struct T_error_ack *)mp->b_rptr; 890 teackp->ERROR_prim = primitive; 891 teackp->TLI_error = t_error; 892 teackp->UNIX_error = sys_error; 893 qreply(q, mp); 894 } 895 } 896 897 /* 898 * icmp_icmp_error is called by icmp_input to process ICMP 899 * messages passed up by IP. 900 * Generates the appropriate T_UDERROR_IND for permanent 901 * (non-transient) errors. 902 * Assumes that IP has pulled up everything up to and including 903 * the ICMP header. 904 */ 905 static void 906 icmp_icmp_error(queue_t *q, mblk_t *mp) 907 { 908 icmph_t *icmph; 909 ipha_t *ipha; 910 int iph_hdr_length; 911 sin_t sin; 912 sin6_t sin6; 913 mblk_t *mp1; 914 int error = 0; 915 icmp_t *icmp = Q_TO_ICMP(q); 916 917 ipha = (ipha_t *)mp->b_rptr; 918 919 ASSERT(OK_32PTR(mp->b_rptr)); 920 921 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 922 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 923 icmp_icmp_error_ipv6(q, mp); 924 return; 925 } 926 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 927 928 /* Skip past the outer IP and ICMP headers */ 929 iph_hdr_length = IPH_HDR_LENGTH(ipha); 930 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 931 ipha = (ipha_t *)&icmph[1]; 932 iph_hdr_length = IPH_HDR_LENGTH(ipha); 933 934 switch (icmph->icmph_type) { 935 case ICMP_DEST_UNREACHABLE: 936 switch (icmph->icmph_code) { 937 case ICMP_FRAGMENTATION_NEEDED: 938 /* 939 * IP has already adjusted the path MTU. 940 */ 941 break; 942 case ICMP_PORT_UNREACHABLE: 943 case ICMP_PROTOCOL_UNREACHABLE: 944 error = ECONNREFUSED; 945 break; 946 default: 947 /* Transient errors */ 948 break; 949 } 950 break; 951 default: 952 /* Transient errors */ 953 break; 954 } 955 if (error == 0) { 956 freemsg(mp); 957 return; 958 } 959 960 /* 961 * Deliver T_UDERROR_IND when the application has asked for it. 962 * The socket layer enables this automatically when connected. 963 */ 964 if (!icmp->icmp_dgram_errind) { 965 freemsg(mp); 966 return; 967 } 968 969 switch (icmp->icmp_family) { 970 case AF_INET: 971 sin = sin_null; 972 sin.sin_family = AF_INET; 973 sin.sin_addr.s_addr = ipha->ipha_dst; 974 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 975 error); 976 break; 977 case AF_INET6: 978 sin6 = sin6_null; 979 sin6.sin6_family = AF_INET6; 980 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 981 982 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 983 NULL, 0, error); 984 break; 985 } 986 if (mp1) 987 putnext(q, mp1); 988 freemsg(mp); 989 } 990 991 /* 992 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 993 * for IPv6 packets. 994 * Send permanent (non-transient) errors upstream. 995 * Assumes that IP has pulled up all the extension headers as well 996 * as the ICMPv6 header. 997 */ 998 static void 999 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1000 { 1001 icmp6_t *icmp6; 1002 ip6_t *ip6h, *outer_ip6h; 1003 uint16_t iph_hdr_length; 1004 uint8_t *nexthdrp; 1005 sin6_t sin6; 1006 mblk_t *mp1; 1007 int error = 0; 1008 icmp_t *icmp = Q_TO_ICMP(q); 1009 1010 outer_ip6h = (ip6_t *)mp->b_rptr; 1011 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1012 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1013 else 1014 iph_hdr_length = IPV6_HDR_LEN; 1015 1016 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1017 ip6h = (ip6_t *)&icmp6[1]; 1018 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1019 freemsg(mp); 1020 return; 1021 } 1022 1023 switch (icmp6->icmp6_type) { 1024 case ICMP6_DST_UNREACH: 1025 switch (icmp6->icmp6_code) { 1026 case ICMP6_DST_UNREACH_NOPORT: 1027 error = ECONNREFUSED; 1028 break; 1029 case ICMP6_DST_UNREACH_ADMIN: 1030 case ICMP6_DST_UNREACH_NOROUTE: 1031 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1032 case ICMP6_DST_UNREACH_ADDR: 1033 /* Transient errors */ 1034 break; 1035 default: 1036 break; 1037 } 1038 break; 1039 case ICMP6_PACKET_TOO_BIG: { 1040 struct T_unitdata_ind *tudi; 1041 struct T_opthdr *toh; 1042 size_t udi_size; 1043 mblk_t *newmp; 1044 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1045 sizeof (struct ip6_mtuinfo); 1046 sin6_t *sin6; 1047 struct ip6_mtuinfo *mtuinfo; 1048 1049 /* 1050 * If the application has requested to receive path mtu 1051 * information, send up an empty message containing an 1052 * IPV6_PATHMTU ancillary data item. 1053 */ 1054 if (!icmp->icmp_ipv6_recvpathmtu) 1055 break; 1056 1057 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1058 opt_length; 1059 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1060 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1061 break; 1062 } 1063 1064 /* 1065 * newmp->b_cont is left to NULL on purpose. This is an 1066 * empty message containing only ancillary data. 1067 */ 1068 newmp->b_datap->db_type = M_PROTO; 1069 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1070 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1071 tudi->PRIM_type = T_UNITDATA_IND; 1072 tudi->SRC_length = sizeof (sin6_t); 1073 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1074 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1075 tudi->OPT_length = opt_length; 1076 1077 sin6 = (sin6_t *)&tudi[1]; 1078 bzero(sin6, sizeof (sin6_t)); 1079 sin6->sin6_family = AF_INET6; 1080 sin6->sin6_addr = icmp->icmp_v6dst; 1081 1082 toh = (struct T_opthdr *)&sin6[1]; 1083 toh->level = IPPROTO_IPV6; 1084 toh->name = IPV6_PATHMTU; 1085 toh->len = opt_length; 1086 toh->status = 0; 1087 1088 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1089 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1090 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1091 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1092 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1093 /* 1094 * We've consumed everything we need from the original 1095 * message. Free it, then send our empty message. 1096 */ 1097 freemsg(mp); 1098 putnext(q, newmp); 1099 return; 1100 } 1101 case ICMP6_TIME_EXCEEDED: 1102 /* Transient errors */ 1103 break; 1104 case ICMP6_PARAM_PROB: 1105 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1106 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1107 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1108 (uchar_t *)nexthdrp) { 1109 error = ECONNREFUSED; 1110 break; 1111 } 1112 break; 1113 } 1114 if (error == 0) { 1115 freemsg(mp); 1116 return; 1117 } 1118 1119 /* 1120 * Deliver T_UDERROR_IND when the application has asked for it. 1121 * The socket layer enables this automatically when connected. 1122 */ 1123 if (!icmp->icmp_dgram_errind) { 1124 freemsg(mp); 1125 return; 1126 } 1127 1128 sin6 = sin6_null; 1129 sin6.sin6_family = AF_INET6; 1130 sin6.sin6_addr = ip6h->ip6_dst; 1131 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1132 1133 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1134 error); 1135 if (mp1) 1136 putnext(q, mp1); 1137 freemsg(mp); 1138 } 1139 1140 /* 1141 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1142 * The local address is filled in if endpoint is bound. The remote address 1143 * is filled in if remote address has been precified ("connected endpoint") 1144 * (The concept of connected CLTS sockets is alien to published TPI 1145 * but we support it anyway). 1146 */ 1147 static void 1148 icmp_addr_req(queue_t *q, mblk_t *mp) 1149 { 1150 icmp_t *icmp = Q_TO_ICMP(q); 1151 mblk_t *ackmp; 1152 struct T_addr_ack *taa; 1153 1154 /* Make it large enough for worst case */ 1155 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1156 2 * sizeof (sin6_t), 1); 1157 if (ackmp == NULL) { 1158 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1159 return; 1160 } 1161 taa = (struct T_addr_ack *)ackmp->b_rptr; 1162 1163 bzero(taa, sizeof (struct T_addr_ack)); 1164 ackmp->b_wptr = (uchar_t *)&taa[1]; 1165 1166 taa->PRIM_type = T_ADDR_ACK; 1167 ackmp->b_datap->db_type = M_PCPROTO; 1168 rw_enter(&icmp->icmp_rwlock, RW_READER); 1169 /* 1170 * Note: Following code assumes 32 bit alignment of basic 1171 * data structures like sin_t and struct T_addr_ack. 1172 */ 1173 if (icmp->icmp_state != TS_UNBND) { 1174 /* 1175 * Fill in local address 1176 */ 1177 taa->LOCADDR_offset = sizeof (*taa); 1178 if (icmp->icmp_family == AF_INET) { 1179 sin_t *sin; 1180 1181 taa->LOCADDR_length = sizeof (sin_t); 1182 sin = (sin_t *)&taa[1]; 1183 /* Fill zeroes and then intialize non-zero fields */ 1184 *sin = sin_null; 1185 sin->sin_family = AF_INET; 1186 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1187 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1188 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1189 sin->sin_addr.s_addr); 1190 } else { 1191 /* 1192 * INADDR_ANY 1193 * icmp_v6src is not set, we might be bound to 1194 * broadcast/multicast. Use icmp_bound_v6src as 1195 * local address instead (that could 1196 * also still be INADDR_ANY) 1197 */ 1198 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1199 sin->sin_addr.s_addr); 1200 } 1201 ackmp->b_wptr = (uchar_t *)&sin[1]; 1202 } else { 1203 sin6_t *sin6; 1204 1205 ASSERT(icmp->icmp_family == AF_INET6); 1206 taa->LOCADDR_length = sizeof (sin6_t); 1207 sin6 = (sin6_t *)&taa[1]; 1208 /* Fill zeroes and then intialize non-zero fields */ 1209 *sin6 = sin6_null; 1210 sin6->sin6_family = AF_INET6; 1211 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1212 sin6->sin6_addr = icmp->icmp_v6src; 1213 } else { 1214 /* 1215 * UNSPECIFIED 1216 * icmp_v6src is not set, we might be bound to 1217 * broadcast/multicast. Use icmp_bound_v6src as 1218 * local address instead (that could 1219 * also still be UNSPECIFIED) 1220 */ 1221 sin6->sin6_addr = icmp->icmp_bound_v6src; 1222 } 1223 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1224 } 1225 } 1226 rw_exit(&icmp->icmp_rwlock); 1227 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1228 qreply(q, ackmp); 1229 } 1230 1231 static void 1232 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1233 { 1234 *tap = icmp_g_t_info_ack; 1235 1236 if (icmp->icmp_family == AF_INET6) 1237 tap->ADDR_size = sizeof (sin6_t); 1238 else 1239 tap->ADDR_size = sizeof (sin_t); 1240 tap->CURRENT_state = icmp->icmp_state; 1241 tap->OPT_size = icmp_max_optsize; 1242 } 1243 1244 /* 1245 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1246 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1247 * icmp_g_t_info_ack. The current state of the stream is copied from 1248 * icmp_state. 1249 */ 1250 static void 1251 icmp_capability_req(queue_t *q, mblk_t *mp) 1252 { 1253 icmp_t *icmp = Q_TO_ICMP(q); 1254 t_uscalar_t cap_bits1; 1255 struct T_capability_ack *tcap; 1256 1257 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1258 1259 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1260 mp->b_datap->db_type, T_CAPABILITY_ACK); 1261 if (!mp) 1262 return; 1263 1264 tcap = (struct T_capability_ack *)mp->b_rptr; 1265 tcap->CAP_bits1 = 0; 1266 1267 if (cap_bits1 & TC1_INFO) { 1268 icmp_copy_info(&tcap->INFO_ack, icmp); 1269 tcap->CAP_bits1 |= TC1_INFO; 1270 } 1271 1272 qreply(q, mp); 1273 } 1274 1275 /* 1276 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1277 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1278 * The current state of the stream is copied from icmp_state. 1279 */ 1280 static void 1281 icmp_info_req(queue_t *q, mblk_t *mp) 1282 { 1283 icmp_t *icmp = Q_TO_ICMP(q); 1284 1285 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1286 T_INFO_ACK); 1287 if (!mp) 1288 return; 1289 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1290 qreply(q, mp); 1291 } 1292 1293 /* 1294 * IP recognizes seven kinds of bind requests: 1295 * 1296 * - A zero-length address binds only to the protocol number. 1297 * 1298 * - A 4-byte address is treated as a request to 1299 * validate that the address is a valid local IPv4 1300 * address, appropriate for an application to bind to. 1301 * IP does the verification, but does not make any note 1302 * of the address at this time. 1303 * 1304 * - A 16-byte address contains is treated as a request 1305 * to validate a local IPv6 address, as the 4-byte 1306 * address case above. 1307 * 1308 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1309 * use it for the inbound fanout of packets. 1310 * 1311 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1312 * use it for the inbound fanout of packets. 1313 * 1314 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1315 * information consisting of local and remote addresses 1316 * and ports (unused for raw sockets). In this case, the addresses are both 1317 * validated as appropriate for this operation, and, if 1318 * so, the information is retained for use in the 1319 * inbound fanout. 1320 * 1321 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1322 * fanout information, like the 12-byte case above. 1323 * 1324 * IP will also fill in the IRE request mblk with information 1325 * regarding our peer. In all cases, we notify IP of our protocol 1326 * type by appending a single protocol byte to the bind request. 1327 */ 1328 static mblk_t * 1329 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1330 in_port_t fport) 1331 { 1332 char *cp; 1333 mblk_t *mp; 1334 struct T_bind_req *tbr; 1335 ipa_conn_t *ac; 1336 ipa6_conn_t *ac6; 1337 sin_t *sin; 1338 sin6_t *sin6; 1339 1340 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1341 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1342 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1343 if (mp == NULL) 1344 return (NULL); 1345 mp->b_datap->db_type = M_PROTO; 1346 tbr = (struct T_bind_req *)mp->b_rptr; 1347 tbr->PRIM_type = bind_prim; 1348 tbr->ADDR_offset = sizeof (*tbr); 1349 tbr->CONIND_number = 0; 1350 tbr->ADDR_length = addr_length; 1351 cp = (char *)&tbr[1]; 1352 switch (addr_length) { 1353 case sizeof (ipa_conn_t): 1354 ASSERT(icmp->icmp_family == AF_INET); 1355 /* Append a request for an IRE */ 1356 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1357 if (mp->b_cont == NULL) { 1358 freemsg(mp); 1359 return (NULL); 1360 } 1361 mp->b_cont->b_wptr += sizeof (ire_t); 1362 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1363 1364 /* cp known to be 32 bit aligned */ 1365 ac = (ipa_conn_t *)cp; 1366 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1367 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1368 ac->ac_fport = fport; 1369 ac->ac_lport = 0; 1370 break; 1371 1372 case sizeof (ipa6_conn_t): 1373 ASSERT(icmp->icmp_family == AF_INET6); 1374 /* Append a request for an IRE */ 1375 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1376 if (mp->b_cont == NULL) { 1377 freemsg(mp); 1378 return (NULL); 1379 } 1380 mp->b_cont->b_wptr += sizeof (ire_t); 1381 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1382 1383 /* cp known to be 32 bit aligned */ 1384 ac6 = (ipa6_conn_t *)cp; 1385 ac6->ac6_laddr = icmp->icmp_v6src; 1386 ac6->ac6_faddr = icmp->icmp_v6dst; 1387 ac6->ac6_fport = fport; 1388 ac6->ac6_lport = 0; 1389 break; 1390 1391 case sizeof (sin_t): 1392 ASSERT(icmp->icmp_family == AF_INET); 1393 /* Append a request for an IRE */ 1394 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1395 if (!mp->b_cont) { 1396 freemsg(mp); 1397 return (NULL); 1398 } 1399 mp->b_cont->b_wptr += sizeof (ire_t); 1400 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1401 1402 sin = (sin_t *)cp; 1403 *sin = sin_null; 1404 sin->sin_family = AF_INET; 1405 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1406 break; 1407 1408 case sizeof (sin6_t): 1409 ASSERT(icmp->icmp_family == AF_INET6); 1410 /* Append a request for an IRE */ 1411 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1412 if (!mp->b_cont) { 1413 freemsg(mp); 1414 return (NULL); 1415 } 1416 mp->b_cont->b_wptr += sizeof (ire_t); 1417 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1418 1419 sin6 = (sin6_t *)cp; 1420 *sin6 = sin6_null; 1421 sin6->sin6_family = AF_INET6; 1422 sin6->sin6_addr = icmp->icmp_bound_v6src; 1423 break; 1424 } 1425 /* Add protocol number to end */ 1426 cp[addr_length] = icmp->icmp_proto; 1427 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1428 return (mp); 1429 } 1430 1431 /* For /dev/icmp aka AF_INET open */ 1432 static int 1433 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1434 { 1435 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1436 } 1437 1438 /* For /dev/icmp6 aka AF_INET6 open */ 1439 static int 1440 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1441 { 1442 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1443 } 1444 1445 /* 1446 * This is the open routine for icmp. It allocates a icmp_t structure for 1447 * the stream and, on the first open of the module, creates an ND table. 1448 */ 1449 /*ARGSUSED2*/ 1450 static int 1451 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1452 boolean_t isv6) 1453 { 1454 int err; 1455 icmp_t *icmp; 1456 conn_t *connp; 1457 dev_t conn_dev; 1458 zoneid_t zoneid; 1459 netstack_t *ns; 1460 icmp_stack_t *is; 1461 1462 /* If the stream is already open, return immediately. */ 1463 if (q->q_ptr != NULL) 1464 return (0); 1465 1466 if (sflag == MODOPEN) 1467 return (EINVAL); 1468 1469 ns = netstack_find_by_cred(credp); 1470 ASSERT(ns != NULL); 1471 is = ns->netstack_icmp; 1472 ASSERT(is != NULL); 1473 1474 /* 1475 * For exclusive stacks we set the zoneid to zero 1476 * to make ICMP operate as if in the global zone. 1477 */ 1478 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1479 zoneid = GLOBAL_ZONEID; 1480 else 1481 zoneid = crgetzoneid(credp); 1482 1483 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1484 netstack_rele(ns); 1485 return (EBUSY); 1486 } 1487 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1488 1489 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1490 connp->conn_dev = conn_dev; 1491 icmp = connp->conn_icmp; 1492 1493 /* 1494 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1495 * done by netstack_find_by_cred() 1496 */ 1497 netstack_rele(ns); 1498 1499 /* 1500 * Initialize the icmp_t structure for this stream. 1501 */ 1502 q->q_ptr = connp; 1503 WR(q)->q_ptr = connp; 1504 connp->conn_rq = q; 1505 connp->conn_wq = WR(q); 1506 1507 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1508 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1509 ASSERT(connp->conn_icmp == icmp); 1510 ASSERT(icmp->icmp_connp == connp); 1511 1512 /* Set the initial state of the stream and the privilege status. */ 1513 icmp->icmp_state = TS_UNBND; 1514 if (isv6) { 1515 icmp->icmp_ipversion = IPV6_VERSION; 1516 icmp->icmp_family = AF_INET6; 1517 connp->conn_ulp = IPPROTO_ICMPV6; 1518 /* May be changed by a SO_PROTOTYPE socket option. */ 1519 icmp->icmp_proto = IPPROTO_ICMPV6; 1520 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1521 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1522 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1523 connp->conn_af_isv6 = B_TRUE; 1524 connp->conn_flags |= IPCL_ISV6; 1525 } else { 1526 icmp->icmp_ipversion = IPV4_VERSION; 1527 icmp->icmp_family = AF_INET; 1528 /* May be changed by a SO_PROTOTYPE socket option. */ 1529 icmp->icmp_proto = IPPROTO_ICMP; 1530 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1531 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1532 connp->conn_af_isv6 = B_FALSE; 1533 connp->conn_flags &= ~IPCL_ISV6; 1534 } 1535 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1536 icmp->icmp_pending_op = -1; 1537 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1538 connp->conn_zoneid = zoneid; 1539 1540 /* 1541 * If the caller has the process-wide flag set, then default to MAC 1542 * exempt mode. This allows read-down to unlabeled hosts. 1543 */ 1544 if (getpflags(NET_MAC_AWARE, credp) != 0) 1545 icmp->icmp_mac_exempt = B_TRUE; 1546 1547 connp->conn_ulp_labeled = is_system_labeled(); 1548 1549 icmp->icmp_is = is; 1550 1551 q->q_hiwat = is->is_recv_hiwat; 1552 WR(q)->q_hiwat = is->is_xmit_hiwat; 1553 WR(q)->q_lowat = is->is_xmit_lowat; 1554 1555 connp->conn_recv = icmp_input; 1556 crhold(credp); 1557 connp->conn_cred = credp; 1558 1559 mutex_enter(&connp->conn_lock); 1560 connp->conn_state_flags &= ~CONN_INCIPIENT; 1561 mutex_exit(&connp->conn_lock); 1562 1563 qprocson(q); 1564 1565 if (icmp->icmp_family == AF_INET6) { 1566 /* Build initial header template for transmit */ 1567 if ((err = icmp_build_hdrs(icmp)) != 0) { 1568 rw_exit(&icmp->icmp_rwlock); 1569 qprocsoff(q); 1570 ipcl_conn_destroy(connp); 1571 return (err); 1572 } 1573 } 1574 rw_exit(&icmp->icmp_rwlock); 1575 1576 /* Set the Stream head write offset. */ 1577 (void) mi_set_sth_wroff(q, 1578 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1579 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1580 1581 return (0); 1582 } 1583 1584 /* 1585 * Which ICMP options OK to set through T_UNITDATA_REQ... 1586 */ 1587 /* ARGSUSED */ 1588 static boolean_t 1589 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1590 { 1591 return (B_TRUE); 1592 } 1593 1594 /* 1595 * This routine gets default values of certain options whose default 1596 * values are maintained by protcol specific code 1597 */ 1598 /* ARGSUSED */ 1599 int 1600 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1601 { 1602 icmp_t *icmp = Q_TO_ICMP(q); 1603 icmp_stack_t *is = icmp->icmp_is; 1604 int *i1 = (int *)ptr; 1605 1606 switch (level) { 1607 case IPPROTO_IP: 1608 switch (name) { 1609 case IP_MULTICAST_TTL: 1610 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1611 return (sizeof (uchar_t)); 1612 case IP_MULTICAST_LOOP: 1613 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1614 return (sizeof (uchar_t)); 1615 } 1616 break; 1617 case IPPROTO_IPV6: 1618 switch (name) { 1619 case IPV6_MULTICAST_HOPS: 1620 *i1 = IP_DEFAULT_MULTICAST_TTL; 1621 return (sizeof (int)); 1622 case IPV6_MULTICAST_LOOP: 1623 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1624 return (sizeof (int)); 1625 case IPV6_UNICAST_HOPS: 1626 *i1 = is->is_ipv6_hoplimit; 1627 return (sizeof (int)); 1628 } 1629 break; 1630 case IPPROTO_ICMPV6: 1631 switch (name) { 1632 case ICMP6_FILTER: 1633 /* Make it look like "pass all" */ 1634 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1635 return (sizeof (icmp6_filter_t)); 1636 } 1637 break; 1638 } 1639 return (-1); 1640 } 1641 1642 /* 1643 * This routine retrieves the current status of socket options. 1644 * It returns the size of the option retrieved. 1645 */ 1646 int 1647 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1648 { 1649 conn_t *connp = Q_TO_CONN(q); 1650 icmp_t *icmp = connp->conn_icmp; 1651 icmp_stack_t *is = icmp->icmp_is; 1652 int *i1 = (int *)ptr; 1653 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1654 1655 switch (level) { 1656 case SOL_SOCKET: 1657 switch (name) { 1658 case SO_DEBUG: 1659 *i1 = icmp->icmp_debug; 1660 break; 1661 case SO_TYPE: 1662 *i1 = SOCK_RAW; 1663 break; 1664 case SO_PROTOTYPE: 1665 *i1 = icmp->icmp_proto; 1666 break; 1667 case SO_REUSEADDR: 1668 *i1 = icmp->icmp_reuseaddr; 1669 break; 1670 1671 /* 1672 * The following three items are available here, 1673 * but are only meaningful to IP. 1674 */ 1675 case SO_DONTROUTE: 1676 *i1 = icmp->icmp_dontroute; 1677 break; 1678 case SO_USELOOPBACK: 1679 *i1 = icmp->icmp_useloopback; 1680 break; 1681 case SO_BROADCAST: 1682 *i1 = icmp->icmp_broadcast; 1683 break; 1684 1685 case SO_SNDBUF: 1686 ASSERT(q->q_hiwat <= INT_MAX); 1687 *i1 = (int)q->q_hiwat; 1688 break; 1689 case SO_RCVBUF: 1690 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1691 *i1 = (int)RD(q)->q_hiwat; 1692 break; 1693 case SO_DGRAM_ERRIND: 1694 *i1 = icmp->icmp_dgram_errind; 1695 break; 1696 case SO_TIMESTAMP: 1697 *i1 = icmp->icmp_timestamp; 1698 break; 1699 case SO_MAC_EXEMPT: 1700 *i1 = icmp->icmp_mac_exempt; 1701 break; 1702 case SO_DOMAIN: 1703 *i1 = icmp->icmp_family; 1704 break; 1705 1706 /* 1707 * Following four not meaningful for icmp 1708 * Action is same as "default" to which we fallthrough 1709 * so we keep them in comments. 1710 * case SO_LINGER: 1711 * case SO_KEEPALIVE: 1712 * case SO_OOBINLINE: 1713 * case SO_ALLZONES: 1714 */ 1715 default: 1716 return (-1); 1717 } 1718 break; 1719 case IPPROTO_IP: 1720 /* 1721 * Only allow IPv4 option processing on IPv4 sockets. 1722 */ 1723 if (icmp->icmp_family != AF_INET) 1724 return (-1); 1725 1726 switch (name) { 1727 case IP_OPTIONS: 1728 case T_IP_OPTIONS: 1729 /* Options are passed up with each packet */ 1730 return (0); 1731 case IP_HDRINCL: 1732 *i1 = (int)icmp->icmp_hdrincl; 1733 break; 1734 case IP_TOS: 1735 case T_IP_TOS: 1736 *i1 = (int)icmp->icmp_type_of_service; 1737 break; 1738 case IP_TTL: 1739 *i1 = (int)icmp->icmp_ttl; 1740 break; 1741 case IP_MULTICAST_IF: 1742 /* 0 address if not set */ 1743 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1744 return (sizeof (ipaddr_t)); 1745 case IP_MULTICAST_TTL: 1746 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1747 return (sizeof (uchar_t)); 1748 case IP_MULTICAST_LOOP: 1749 *ptr = connp->conn_multicast_loop; 1750 return (sizeof (uint8_t)); 1751 case IP_BOUND_IF: 1752 /* Zero if not set */ 1753 *i1 = icmp->icmp_bound_if; 1754 break; /* goto sizeof (int) option return */ 1755 case IP_UNSPEC_SRC: 1756 *ptr = icmp->icmp_unspec_source; 1757 break; /* goto sizeof (int) option return */ 1758 case IP_XMIT_IF: 1759 *i1 = icmp->icmp_xmit_if; 1760 break; /* goto sizeof (int) option return */ 1761 case IP_RECVIF: 1762 *ptr = icmp->icmp_recvif; 1763 break; /* goto sizeof (int) option return */ 1764 case IP_RECVPKTINFO: 1765 /* 1766 * This also handles IP_PKTINFO. 1767 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1768 * Differentiation is based on the size of the argument 1769 * passed in. 1770 * This option is handled in IP which will return an 1771 * error for IP_PKTINFO as it's not supported as a 1772 * sticky option. 1773 */ 1774 return (-EINVAL); 1775 /* 1776 * Cannot "get" the value of following options 1777 * at this level. Action is same as "default" to 1778 * which we fallthrough so we keep them in comments. 1779 * 1780 * case IP_ADD_MEMBERSHIP: 1781 * case IP_DROP_MEMBERSHIP: 1782 * case IP_BLOCK_SOURCE: 1783 * case IP_UNBLOCK_SOURCE: 1784 * case IP_ADD_SOURCE_MEMBERSHIP: 1785 * case IP_DROP_SOURCE_MEMBERSHIP: 1786 * case MCAST_JOIN_GROUP: 1787 * case MCAST_LEAVE_GROUP: 1788 * case MCAST_BLOCK_SOURCE: 1789 * case MCAST_UNBLOCK_SOURCE: 1790 * case MCAST_JOIN_SOURCE_GROUP: 1791 * case MCAST_LEAVE_SOURCE_GROUP: 1792 * case MRT_INIT: 1793 * case MRT_DONE: 1794 * case MRT_ADD_VIF: 1795 * case MRT_DEL_VIF: 1796 * case MRT_ADD_MFC: 1797 * case MRT_DEL_MFC: 1798 * case MRT_VERSION: 1799 * case MRT_ASSERT: 1800 * case IP_SEC_OPT: 1801 * case IP_DONTFAILOVER_IF: 1802 * case IP_NEXTHOP: 1803 */ 1804 default: 1805 return (-1); 1806 } 1807 break; 1808 case IPPROTO_IPV6: 1809 /* 1810 * Only allow IPv6 option processing on native IPv6 sockets. 1811 */ 1812 if (icmp->icmp_family != AF_INET6) 1813 return (-1); 1814 switch (name) { 1815 case IPV6_UNICAST_HOPS: 1816 *i1 = (unsigned int)icmp->icmp_ttl; 1817 break; 1818 case IPV6_MULTICAST_IF: 1819 /* 0 index if not set */ 1820 *i1 = icmp->icmp_multicast_if_index; 1821 break; 1822 case IPV6_MULTICAST_HOPS: 1823 *i1 = icmp->icmp_multicast_ttl; 1824 break; 1825 case IPV6_MULTICAST_LOOP: 1826 *i1 = connp->conn_multicast_loop; 1827 break; 1828 case IPV6_BOUND_IF: 1829 /* Zero if not set */ 1830 *i1 = icmp->icmp_bound_if; 1831 break; 1832 case IPV6_UNSPEC_SRC: 1833 *i1 = icmp->icmp_unspec_source; 1834 break; 1835 case IPV6_CHECKSUM: 1836 /* 1837 * Return offset or -1 if no checksum offset. 1838 * Does not apply to IPPROTO_ICMPV6 1839 */ 1840 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1841 return (-1); 1842 1843 if (icmp->icmp_raw_checksum) { 1844 *i1 = icmp->icmp_checksum_off; 1845 } else { 1846 *i1 = -1; 1847 } 1848 break; 1849 case IPV6_JOIN_GROUP: 1850 case IPV6_LEAVE_GROUP: 1851 case MCAST_JOIN_GROUP: 1852 case MCAST_LEAVE_GROUP: 1853 case MCAST_BLOCK_SOURCE: 1854 case MCAST_UNBLOCK_SOURCE: 1855 case MCAST_JOIN_SOURCE_GROUP: 1856 case MCAST_LEAVE_SOURCE_GROUP: 1857 /* cannot "get" the value for these */ 1858 return (-1); 1859 case IPV6_RECVPKTINFO: 1860 *i1 = icmp->icmp_ip_recvpktinfo; 1861 break; 1862 case IPV6_RECVTCLASS: 1863 *i1 = icmp->icmp_ipv6_recvtclass; 1864 break; 1865 case IPV6_RECVPATHMTU: 1866 *i1 = icmp->icmp_ipv6_recvpathmtu; 1867 break; 1868 case IPV6_V6ONLY: 1869 *i1 = 1; 1870 break; 1871 case IPV6_RECVHOPLIMIT: 1872 *i1 = icmp->icmp_ipv6_recvhoplimit; 1873 break; 1874 case IPV6_RECVHOPOPTS: 1875 *i1 = icmp->icmp_ipv6_recvhopopts; 1876 break; 1877 case IPV6_RECVDSTOPTS: 1878 *i1 = icmp->icmp_ipv6_recvdstopts; 1879 break; 1880 case _OLD_IPV6_RECVDSTOPTS: 1881 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1882 break; 1883 case IPV6_RECVRTHDRDSTOPTS: 1884 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1885 break; 1886 case IPV6_RECVRTHDR: 1887 *i1 = icmp->icmp_ipv6_recvrthdr; 1888 break; 1889 case IPV6_PKTINFO: { 1890 /* XXX assumes that caller has room for max size! */ 1891 struct in6_pktinfo *pkti; 1892 1893 pkti = (struct in6_pktinfo *)ptr; 1894 if (ipp->ipp_fields & IPPF_IFINDEX) 1895 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1896 else 1897 pkti->ipi6_ifindex = 0; 1898 if (ipp->ipp_fields & IPPF_ADDR) 1899 pkti->ipi6_addr = ipp->ipp_addr; 1900 else 1901 pkti->ipi6_addr = ipv6_all_zeros; 1902 return (sizeof (struct in6_pktinfo)); 1903 } 1904 case IPV6_NEXTHOP: { 1905 sin6_t *sin6 = (sin6_t *)ptr; 1906 1907 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1908 return (0); 1909 *sin6 = sin6_null; 1910 sin6->sin6_family = AF_INET6; 1911 sin6->sin6_addr = ipp->ipp_nexthop; 1912 return (sizeof (sin6_t)); 1913 } 1914 case IPV6_HOPOPTS: 1915 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1916 return (0); 1917 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1918 return (0); 1919 bcopy((char *)ipp->ipp_hopopts + 1920 icmp->icmp_label_len_v6, ptr, 1921 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1922 if (icmp->icmp_label_len_v6 > 0) { 1923 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1924 ptr[1] = (ipp->ipp_hopoptslen - 1925 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1926 } 1927 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1928 case IPV6_RTHDRDSTOPTS: 1929 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1930 return (0); 1931 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1932 return (ipp->ipp_rtdstoptslen); 1933 case IPV6_RTHDR: 1934 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1935 return (0); 1936 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1937 return (ipp->ipp_rthdrlen); 1938 case IPV6_DSTOPTS: 1939 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1940 return (0); 1941 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1942 return (ipp->ipp_dstoptslen); 1943 case IPV6_PATHMTU: 1944 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1945 return (0); 1946 1947 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1948 (struct ip6_mtuinfo *)ptr, 1949 is->is_netstack)); 1950 case IPV6_TCLASS: 1951 if (ipp->ipp_fields & IPPF_TCLASS) 1952 *i1 = ipp->ipp_tclass; 1953 else 1954 *i1 = IPV6_FLOW_TCLASS( 1955 IPV6_DEFAULT_VERS_AND_FLOW); 1956 break; 1957 default: 1958 return (-1); 1959 } 1960 break; 1961 case IPPROTO_ICMPV6: 1962 /* 1963 * Only allow IPv6 option processing on native IPv6 sockets. 1964 */ 1965 if (icmp->icmp_family != AF_INET6) 1966 return (-1); 1967 1968 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1969 return (-1); 1970 1971 switch (name) { 1972 case ICMP6_FILTER: 1973 if (icmp->icmp_filter == NULL) { 1974 /* Make it look like "pass all" */ 1975 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1976 } else { 1977 (void) bcopy(icmp->icmp_filter, ptr, 1978 sizeof (icmp6_filter_t)); 1979 } 1980 return (sizeof (icmp6_filter_t)); 1981 default: 1982 return (-1); 1983 } 1984 default: 1985 return (-1); 1986 } 1987 return (sizeof (int)); 1988 } 1989 1990 /* 1991 * This routine retrieves the current status of socket options. 1992 * It returns the size of the option retrieved. 1993 */ 1994 int 1995 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1996 { 1997 icmp_t *icmp = Q_TO_ICMP(q); 1998 int err; 1999 2000 rw_enter(&icmp->icmp_rwlock, RW_READER); 2001 err = icmp_opt_get_locked(q, level, name, ptr); 2002 rw_exit(&icmp->icmp_rwlock); 2003 return (err); 2004 } 2005 2006 2007 /* This routine sets socket options. */ 2008 /* ARGSUSED */ 2009 int 2010 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2011 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2012 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2013 { 2014 conn_t *connp = Q_TO_CONN(q); 2015 icmp_t *icmp = connp->conn_icmp; 2016 icmp_stack_t *is = icmp->icmp_is; 2017 int *i1 = (int *)invalp; 2018 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2019 boolean_t checkonly; 2020 int error; 2021 2022 switch (optset_context) { 2023 case SETFN_OPTCOM_CHECKONLY: 2024 checkonly = B_TRUE; 2025 /* 2026 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2027 * inlen != 0 implies value supplied and 2028 * we have to "pretend" to set it. 2029 * inlen == 0 implies that there is no 2030 * value part in T_CHECK request and just validation 2031 * done elsewhere should be enough, we just return here. 2032 */ 2033 if (inlen == 0) { 2034 *outlenp = 0; 2035 return (0); 2036 } 2037 break; 2038 case SETFN_OPTCOM_NEGOTIATE: 2039 checkonly = B_FALSE; 2040 break; 2041 case SETFN_UD_NEGOTIATE: 2042 case SETFN_CONN_NEGOTIATE: 2043 checkonly = B_FALSE; 2044 /* 2045 * Negotiating local and "association-related" options 2046 * through T_UNITDATA_REQ. 2047 * 2048 * Following routine can filter out ones we do not 2049 * want to be "set" this way. 2050 */ 2051 if (!icmp_opt_allow_udr_set(level, name)) { 2052 *outlenp = 0; 2053 return (EINVAL); 2054 } 2055 break; 2056 default: 2057 /* 2058 * We should never get here 2059 */ 2060 *outlenp = 0; 2061 return (EINVAL); 2062 } 2063 2064 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2065 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2066 2067 /* 2068 * For fixed length options, no sanity check 2069 * of passed in length is done. It is assumed *_optcom_req() 2070 * routines do the right thing. 2071 */ 2072 2073 switch (level) { 2074 case SOL_SOCKET: 2075 switch (name) { 2076 case SO_DEBUG: 2077 if (!checkonly) 2078 icmp->icmp_debug = onoff; 2079 break; 2080 case SO_PROTOTYPE: 2081 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2082 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2083 secpolicy_net_rawaccess(cr) != 0) { 2084 *outlenp = 0; 2085 return (EACCES); 2086 } 2087 /* Can't use IPPROTO_RAW with IPv6 */ 2088 if ((*i1 & 0xFF) == IPPROTO_RAW && 2089 icmp->icmp_family == AF_INET6) { 2090 *outlenp = 0; 2091 return (EPROTONOSUPPORT); 2092 } 2093 if (checkonly) { 2094 /* T_CHECK case */ 2095 *(int *)outvalp = (*i1 & 0xFF); 2096 break; 2097 } 2098 icmp->icmp_proto = *i1 & 0xFF; 2099 if ((icmp->icmp_proto == IPPROTO_RAW || 2100 icmp->icmp_proto == IPPROTO_IGMP) && 2101 icmp->icmp_family == AF_INET) 2102 icmp->icmp_hdrincl = 1; 2103 else 2104 icmp->icmp_hdrincl = 0; 2105 2106 if (icmp->icmp_family == AF_INET6 && 2107 icmp->icmp_proto == IPPROTO_ICMPV6) { 2108 /* Set offset for icmp6_cksum */ 2109 icmp->icmp_raw_checksum = 0; 2110 icmp->icmp_checksum_off = 2; 2111 } 2112 if (icmp->icmp_proto == IPPROTO_UDP || 2113 icmp->icmp_proto == IPPROTO_TCP || 2114 icmp->icmp_proto == IPPROTO_SCTP) { 2115 icmp->icmp_no_tp_cksum = 1; 2116 icmp->icmp_sticky_ipp.ipp_fields |= 2117 IPPF_NO_CKSUM; 2118 } else { 2119 icmp->icmp_no_tp_cksum = 0; 2120 icmp->icmp_sticky_ipp.ipp_fields &= 2121 ~IPPF_NO_CKSUM; 2122 } 2123 2124 if (icmp->icmp_filter != NULL && 2125 icmp->icmp_proto != IPPROTO_ICMPV6) { 2126 kmem_free(icmp->icmp_filter, 2127 sizeof (icmp6_filter_t)); 2128 icmp->icmp_filter = NULL; 2129 } 2130 2131 /* Rebuild the header template */ 2132 error = icmp_build_hdrs(icmp); 2133 if (error != 0) { 2134 *outlenp = 0; 2135 return (error); 2136 } 2137 2138 /* 2139 * For SCTP, we don't use icmp_bind_proto() for 2140 * raw socket binding. Note that we do not need 2141 * to set *outlenp. 2142 * FIXME: how does SCTP work? 2143 */ 2144 if (icmp->icmp_proto == IPPROTO_SCTP) 2145 return (0); 2146 2147 *outlenp = sizeof (int); 2148 *(int *)outvalp = *i1 & 0xFF; 2149 2150 /* Drop lock across the bind operation */ 2151 rw_exit(&icmp->icmp_rwlock); 2152 icmp_bind_proto(q); 2153 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2154 return (0); 2155 case SO_REUSEADDR: 2156 if (!checkonly) 2157 icmp->icmp_reuseaddr = onoff; 2158 break; 2159 2160 /* 2161 * The following three items are available here, 2162 * but are only meaningful to IP. 2163 */ 2164 case SO_DONTROUTE: 2165 if (!checkonly) 2166 icmp->icmp_dontroute = onoff; 2167 break; 2168 case SO_USELOOPBACK: 2169 if (!checkonly) 2170 icmp->icmp_useloopback = onoff; 2171 break; 2172 case SO_BROADCAST: 2173 if (!checkonly) 2174 icmp->icmp_broadcast = onoff; 2175 break; 2176 2177 case SO_SNDBUF: 2178 if (*i1 > is->is_max_buf) { 2179 *outlenp = 0; 2180 return (ENOBUFS); 2181 } 2182 if (!checkonly) { 2183 q->q_hiwat = *i1; 2184 } 2185 break; 2186 case SO_RCVBUF: 2187 if (*i1 > is->is_max_buf) { 2188 *outlenp = 0; 2189 return (ENOBUFS); 2190 } 2191 if (!checkonly) { 2192 RD(q)->q_hiwat = *i1; 2193 rw_exit(&icmp->icmp_rwlock); 2194 (void) mi_set_sth_hiwat(RD(q), *i1); 2195 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2196 } 2197 break; 2198 case SO_DGRAM_ERRIND: 2199 if (!checkonly) 2200 icmp->icmp_dgram_errind = onoff; 2201 break; 2202 case SO_ALLZONES: 2203 /* 2204 * "soft" error (negative) 2205 * option not handled at this level 2206 * Note: Do not modify *outlenp 2207 */ 2208 return (-EINVAL); 2209 case SO_TIMESTAMP: 2210 if (!checkonly) { 2211 icmp->icmp_timestamp = onoff; 2212 } 2213 break; 2214 case SO_MAC_EXEMPT: 2215 if (secpolicy_net_mac_aware(cr) != 0 || 2216 icmp->icmp_state != TS_UNBND) 2217 return (EACCES); 2218 if (!checkonly) 2219 icmp->icmp_mac_exempt = onoff; 2220 break; 2221 /* 2222 * Following three not meaningful for icmp 2223 * Action is same as "default" so we keep them 2224 * in comments. 2225 * case SO_LINGER: 2226 * case SO_KEEPALIVE: 2227 * case SO_OOBINLINE: 2228 */ 2229 default: 2230 *outlenp = 0; 2231 return (EINVAL); 2232 } 2233 break; 2234 case IPPROTO_IP: 2235 /* 2236 * Only allow IPv4 option processing on IPv4 sockets. 2237 */ 2238 if (icmp->icmp_family != AF_INET) { 2239 *outlenp = 0; 2240 return (ENOPROTOOPT); 2241 } 2242 switch (name) { 2243 case IP_OPTIONS: 2244 case T_IP_OPTIONS: 2245 /* Save options for use by IP. */ 2246 if ((inlen & 0x3) || 2247 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2248 *outlenp = 0; 2249 return (EINVAL); 2250 } 2251 if (checkonly) 2252 break; 2253 2254 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2255 &icmp->icmp_ip_snd_options_len, 2256 icmp->icmp_label_len, invalp, inlen)) { 2257 *outlenp = 0; 2258 return (ENOMEM); 2259 } 2260 2261 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2262 icmp->icmp_ip_snd_options_len; 2263 rw_exit(&icmp->icmp_rwlock); 2264 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2265 is->is_wroff_extra); 2266 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2267 break; 2268 case IP_HDRINCL: 2269 if (!checkonly) 2270 icmp->icmp_hdrincl = onoff; 2271 break; 2272 case IP_TOS: 2273 case T_IP_TOS: 2274 if (!checkonly) { 2275 icmp->icmp_type_of_service = (uint8_t)*i1; 2276 } 2277 break; 2278 case IP_TTL: 2279 if (!checkonly) { 2280 icmp->icmp_ttl = (uint8_t)*i1; 2281 } 2282 break; 2283 case IP_MULTICAST_IF: 2284 /* 2285 * TODO should check OPTMGMT reply and undo this if 2286 * there is an error. 2287 */ 2288 if (!checkonly) 2289 icmp->icmp_multicast_if_addr = *i1; 2290 break; 2291 case IP_MULTICAST_TTL: 2292 if (!checkonly) 2293 icmp->icmp_multicast_ttl = *invalp; 2294 break; 2295 case IP_MULTICAST_LOOP: 2296 if (!checkonly) { 2297 connp->conn_multicast_loop = 2298 (*invalp == 0) ? 0 : 1; 2299 } 2300 break; 2301 case IP_BOUND_IF: 2302 if (!checkonly) 2303 icmp->icmp_bound_if = *i1; 2304 break; 2305 case IP_UNSPEC_SRC: 2306 if (!checkonly) 2307 icmp->icmp_unspec_source = onoff; 2308 break; 2309 case IP_XMIT_IF: 2310 if (!checkonly) 2311 icmp->icmp_xmit_if = *i1; 2312 break; 2313 case IP_RECVIF: 2314 if (!checkonly) 2315 icmp->icmp_recvif = onoff; 2316 /* 2317 * pass to ip 2318 */ 2319 return (-EINVAL); 2320 case IP_PKTINFO: { 2321 /* 2322 * This also handles IP_RECVPKTINFO. 2323 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2324 * Differentiation is based on the size of the argument 2325 * passed in. 2326 */ 2327 struct in_pktinfo *pktinfop; 2328 ip4_pkt_t *attr_pktinfop; 2329 2330 if (checkonly) 2331 break; 2332 2333 if (inlen == sizeof (int)) { 2334 /* 2335 * This is IP_RECVPKTINFO option. 2336 * Keep a local copy of wether this option is 2337 * set or not and pass it down to IP for 2338 * processing. 2339 */ 2340 icmp->icmp_ip_recvpktinfo = onoff; 2341 return (-EINVAL); 2342 } 2343 2344 2345 if (inlen != sizeof (struct in_pktinfo)) 2346 return (EINVAL); 2347 2348 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2349 == NULL) { 2350 /* 2351 * sticky option is not supported 2352 */ 2353 return (EINVAL); 2354 } 2355 2356 pktinfop = (struct in_pktinfo *)invalp; 2357 2358 /* 2359 * Atleast one of the values should be specified 2360 */ 2361 if (pktinfop->ipi_ifindex == 0 && 2362 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2363 return (EINVAL); 2364 } 2365 2366 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2367 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2368 } 2369 break; 2370 case IP_ADD_MEMBERSHIP: 2371 case IP_DROP_MEMBERSHIP: 2372 case IP_BLOCK_SOURCE: 2373 case IP_UNBLOCK_SOURCE: 2374 case IP_ADD_SOURCE_MEMBERSHIP: 2375 case IP_DROP_SOURCE_MEMBERSHIP: 2376 case MCAST_JOIN_GROUP: 2377 case MCAST_LEAVE_GROUP: 2378 case MCAST_BLOCK_SOURCE: 2379 case MCAST_UNBLOCK_SOURCE: 2380 case MCAST_JOIN_SOURCE_GROUP: 2381 case MCAST_LEAVE_SOURCE_GROUP: 2382 case MRT_INIT: 2383 case MRT_DONE: 2384 case MRT_ADD_VIF: 2385 case MRT_DEL_VIF: 2386 case MRT_ADD_MFC: 2387 case MRT_DEL_MFC: 2388 case MRT_VERSION: 2389 case MRT_ASSERT: 2390 case IP_SEC_OPT: 2391 case IP_DONTFAILOVER_IF: 2392 case IP_NEXTHOP: 2393 /* 2394 * "soft" error (negative) 2395 * option not handled at this level 2396 * Note: Do not modify *outlenp 2397 */ 2398 return (-EINVAL); 2399 default: 2400 *outlenp = 0; 2401 return (EINVAL); 2402 } 2403 break; 2404 case IPPROTO_IPV6: { 2405 ip6_pkt_t *ipp; 2406 boolean_t sticky; 2407 2408 if (icmp->icmp_family != AF_INET6) { 2409 *outlenp = 0; 2410 return (ENOPROTOOPT); 2411 } 2412 /* 2413 * Deal with both sticky options and ancillary data 2414 */ 2415 if (thisdg_attrs == NULL) { 2416 /* sticky options, or none */ 2417 ipp = &icmp->icmp_sticky_ipp; 2418 sticky = B_TRUE; 2419 } else { 2420 /* ancillary data */ 2421 ipp = (ip6_pkt_t *)thisdg_attrs; 2422 sticky = B_FALSE; 2423 } 2424 2425 switch (name) { 2426 case IPV6_MULTICAST_IF: 2427 if (!checkonly) 2428 icmp->icmp_multicast_if_index = *i1; 2429 break; 2430 case IPV6_UNICAST_HOPS: 2431 /* -1 means use default */ 2432 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2433 *outlenp = 0; 2434 return (EINVAL); 2435 } 2436 if (!checkonly) { 2437 if (*i1 == -1) { 2438 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2439 is->is_ipv6_hoplimit; 2440 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2441 /* Pass modified value to IP. */ 2442 *i1 = ipp->ipp_hoplimit; 2443 } else { 2444 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2445 (uint8_t)*i1; 2446 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2447 } 2448 /* Rebuild the header template */ 2449 error = icmp_build_hdrs(icmp); 2450 if (error != 0) { 2451 *outlenp = 0; 2452 return (error); 2453 } 2454 } 2455 break; 2456 case IPV6_MULTICAST_HOPS: 2457 /* -1 means use default */ 2458 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2459 *outlenp = 0; 2460 return (EINVAL); 2461 } 2462 if (!checkonly) { 2463 if (*i1 == -1) { 2464 icmp->icmp_multicast_ttl = 2465 ipp->ipp_multicast_hops = 2466 IP_DEFAULT_MULTICAST_TTL; 2467 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2468 /* Pass modified value to IP. */ 2469 *i1 = icmp->icmp_multicast_ttl; 2470 } else { 2471 icmp->icmp_multicast_ttl = 2472 ipp->ipp_multicast_hops = 2473 (uint8_t)*i1; 2474 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2475 } 2476 } 2477 break; 2478 case IPV6_MULTICAST_LOOP: 2479 if (*i1 != 0 && *i1 != 1) { 2480 *outlenp = 0; 2481 return (EINVAL); 2482 } 2483 if (!checkonly) 2484 connp->conn_multicast_loop = *i1; 2485 break; 2486 case IPV6_CHECKSUM: 2487 /* 2488 * Integer offset into the user data of where the 2489 * checksum is located. 2490 * Offset of -1 disables option. 2491 * Does not apply to IPPROTO_ICMPV6. 2492 */ 2493 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2494 *outlenp = 0; 2495 return (EINVAL); 2496 } 2497 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2498 /* Negative or not 16 bit aligned offset */ 2499 *outlenp = 0; 2500 return (EINVAL); 2501 } 2502 if (checkonly) 2503 break; 2504 2505 if (*i1 == -1) { 2506 icmp->icmp_raw_checksum = 0; 2507 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2508 } else { 2509 icmp->icmp_raw_checksum = 1; 2510 icmp->icmp_checksum_off = *i1; 2511 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2512 } 2513 /* Rebuild the header template */ 2514 error = icmp_build_hdrs(icmp); 2515 if (error != 0) { 2516 *outlenp = 0; 2517 return (error); 2518 } 2519 break; 2520 case IPV6_JOIN_GROUP: 2521 case IPV6_LEAVE_GROUP: 2522 case MCAST_JOIN_GROUP: 2523 case MCAST_LEAVE_GROUP: 2524 case MCAST_BLOCK_SOURCE: 2525 case MCAST_UNBLOCK_SOURCE: 2526 case MCAST_JOIN_SOURCE_GROUP: 2527 case MCAST_LEAVE_SOURCE_GROUP: 2528 /* 2529 * "soft" error (negative) 2530 * option not handled at this level 2531 * Note: Do not modify *outlenp 2532 */ 2533 return (-EINVAL); 2534 case IPV6_BOUND_IF: 2535 if (!checkonly) 2536 icmp->icmp_bound_if = *i1; 2537 break; 2538 case IPV6_UNSPEC_SRC: 2539 if (!checkonly) 2540 icmp->icmp_unspec_source = onoff; 2541 break; 2542 case IPV6_RECVTCLASS: 2543 if (!checkonly) 2544 icmp->icmp_ipv6_recvtclass = onoff; 2545 break; 2546 /* 2547 * Set boolean switches for ancillary data delivery 2548 */ 2549 case IPV6_RECVPKTINFO: 2550 if (!checkonly) 2551 icmp->icmp_ip_recvpktinfo = onoff; 2552 break; 2553 case IPV6_RECVPATHMTU: 2554 if (!checkonly) 2555 icmp->icmp_ipv6_recvpathmtu = onoff; 2556 break; 2557 case IPV6_RECVHOPLIMIT: 2558 if (!checkonly) 2559 icmp->icmp_ipv6_recvhoplimit = onoff; 2560 break; 2561 case IPV6_RECVHOPOPTS: 2562 if (!checkonly) 2563 icmp->icmp_ipv6_recvhopopts = onoff; 2564 break; 2565 case IPV6_RECVDSTOPTS: 2566 if (!checkonly) 2567 icmp->icmp_ipv6_recvdstopts = onoff; 2568 break; 2569 case _OLD_IPV6_RECVDSTOPTS: 2570 if (!checkonly) 2571 icmp->icmp_old_ipv6_recvdstopts = onoff; 2572 break; 2573 case IPV6_RECVRTHDRDSTOPTS: 2574 if (!checkonly) 2575 icmp->icmp_ipv6_recvrtdstopts = onoff; 2576 break; 2577 case IPV6_RECVRTHDR: 2578 if (!checkonly) 2579 icmp->icmp_ipv6_recvrthdr = onoff; 2580 break; 2581 /* 2582 * Set sticky options or ancillary data. 2583 * If sticky options, (re)build any extension headers 2584 * that might be needed as a result. 2585 */ 2586 case IPV6_PKTINFO: 2587 /* 2588 * The source address and ifindex are verified 2589 * in ip_opt_set(). For ancillary data the 2590 * source address is checked in ip_wput_v6. 2591 */ 2592 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2593 return (EINVAL); 2594 if (checkonly) 2595 break; 2596 2597 if (inlen == 0) { 2598 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2599 ipp->ipp_sticky_ignored |= 2600 (IPPF_IFINDEX|IPPF_ADDR); 2601 } else { 2602 struct in6_pktinfo *pkti; 2603 2604 pkti = (struct in6_pktinfo *)invalp; 2605 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2606 ipp->ipp_addr = pkti->ipi6_addr; 2607 if (ipp->ipp_ifindex != 0) 2608 ipp->ipp_fields |= IPPF_IFINDEX; 2609 else 2610 ipp->ipp_fields &= ~IPPF_IFINDEX; 2611 if (!IN6_IS_ADDR_UNSPECIFIED( 2612 &ipp->ipp_addr)) 2613 ipp->ipp_fields |= IPPF_ADDR; 2614 else 2615 ipp->ipp_fields &= ~IPPF_ADDR; 2616 } 2617 if (sticky) { 2618 error = icmp_build_hdrs(icmp); 2619 if (error != 0) 2620 return (error); 2621 } 2622 break; 2623 case IPV6_HOPLIMIT: 2624 /* This option can only be used as ancillary data. */ 2625 if (sticky) 2626 return (EINVAL); 2627 if (inlen != 0 && inlen != sizeof (int)) 2628 return (EINVAL); 2629 if (checkonly) 2630 break; 2631 2632 if (inlen == 0) { 2633 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2634 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2635 } else { 2636 if (*i1 > 255 || *i1 < -1) 2637 return (EINVAL); 2638 if (*i1 == -1) 2639 ipp->ipp_hoplimit = 2640 is->is_ipv6_hoplimit; 2641 else 2642 ipp->ipp_hoplimit = *i1; 2643 ipp->ipp_fields |= IPPF_HOPLIMIT; 2644 } 2645 break; 2646 case IPV6_TCLASS: 2647 /* 2648 * IPV6_RECVTCLASS accepts -1 as use kernel default 2649 * and [0, 255] as the actualy traffic class. 2650 */ 2651 if (inlen != 0 && inlen != sizeof (int)) 2652 return (EINVAL); 2653 if (checkonly) 2654 break; 2655 2656 if (inlen == 0) { 2657 ipp->ipp_fields &= ~IPPF_TCLASS; 2658 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2659 } else { 2660 if (*i1 >= 256 || *i1 < -1) 2661 return (EINVAL); 2662 if (*i1 == -1) { 2663 ipp->ipp_tclass = 2664 IPV6_FLOW_TCLASS( 2665 IPV6_DEFAULT_VERS_AND_FLOW); 2666 } else { 2667 ipp->ipp_tclass = *i1; 2668 } 2669 ipp->ipp_fields |= IPPF_TCLASS; 2670 } 2671 if (sticky) { 2672 error = icmp_build_hdrs(icmp); 2673 if (error != 0) 2674 return (error); 2675 } 2676 break; 2677 case IPV6_NEXTHOP: 2678 /* 2679 * IP will verify that the nexthop is reachable 2680 * and fail for sticky options. 2681 */ 2682 if (inlen != 0 && inlen != sizeof (sin6_t)) 2683 return (EINVAL); 2684 if (checkonly) 2685 break; 2686 2687 if (inlen == 0) { 2688 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2689 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2690 } else { 2691 sin6_t *sin6 = (sin6_t *)invalp; 2692 2693 if (sin6->sin6_family != AF_INET6) 2694 return (EAFNOSUPPORT); 2695 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2696 return (EADDRNOTAVAIL); 2697 ipp->ipp_nexthop = sin6->sin6_addr; 2698 if (!IN6_IS_ADDR_UNSPECIFIED( 2699 &ipp->ipp_nexthop)) 2700 ipp->ipp_fields |= IPPF_NEXTHOP; 2701 else 2702 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2703 } 2704 if (sticky) { 2705 error = icmp_build_hdrs(icmp); 2706 if (error != 0) 2707 return (error); 2708 } 2709 break; 2710 case IPV6_HOPOPTS: { 2711 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2712 /* 2713 * Sanity checks - minimum size, size a multiple of 2714 * eight bytes, and matching size passed in. 2715 */ 2716 if (inlen != 0 && 2717 inlen != (8 * (hopts->ip6h_len + 1))) 2718 return (EINVAL); 2719 2720 if (checkonly) 2721 break; 2722 error = optcom_pkt_set(invalp, inlen, sticky, 2723 (uchar_t **)&ipp->ipp_hopopts, 2724 &ipp->ipp_hopoptslen, 2725 sticky ? icmp->icmp_label_len_v6 : 0); 2726 if (error != 0) 2727 return (error); 2728 if (ipp->ipp_hopoptslen == 0) { 2729 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2730 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2731 } else { 2732 ipp->ipp_fields |= IPPF_HOPOPTS; 2733 } 2734 if (sticky) { 2735 error = icmp_build_hdrs(icmp); 2736 if (error != 0) 2737 return (error); 2738 } 2739 break; 2740 } 2741 case IPV6_RTHDRDSTOPTS: { 2742 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2743 2744 /* 2745 * Sanity checks - minimum size, size a multiple of 2746 * eight bytes, and matching size passed in. 2747 */ 2748 if (inlen != 0 && 2749 inlen != (8 * (dopts->ip6d_len + 1))) 2750 return (EINVAL); 2751 2752 if (checkonly) 2753 break; 2754 2755 if (inlen == 0) { 2756 if (sticky && 2757 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2758 kmem_free(ipp->ipp_rtdstopts, 2759 ipp->ipp_rtdstoptslen); 2760 ipp->ipp_rtdstopts = NULL; 2761 ipp->ipp_rtdstoptslen = 0; 2762 } 2763 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2764 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2765 } else { 2766 error = optcom_pkt_set(invalp, inlen, sticky, 2767 (uchar_t **)&ipp->ipp_rtdstopts, 2768 &ipp->ipp_rtdstoptslen, 0); 2769 if (error != 0) 2770 return (error); 2771 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2772 } 2773 if (sticky) { 2774 error = icmp_build_hdrs(icmp); 2775 if (error != 0) 2776 return (error); 2777 } 2778 break; 2779 } 2780 case IPV6_DSTOPTS: { 2781 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2782 2783 /* 2784 * Sanity checks - minimum size, size a multiple of 2785 * eight bytes, and matching size passed in. 2786 */ 2787 if (inlen != 0 && 2788 inlen != (8 * (dopts->ip6d_len + 1))) 2789 return (EINVAL); 2790 2791 if (checkonly) 2792 break; 2793 2794 if (inlen == 0) { 2795 if (sticky && 2796 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2797 kmem_free(ipp->ipp_dstopts, 2798 ipp->ipp_dstoptslen); 2799 ipp->ipp_dstopts = NULL; 2800 ipp->ipp_dstoptslen = 0; 2801 } 2802 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2803 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2804 } else { 2805 error = optcom_pkt_set(invalp, inlen, sticky, 2806 (uchar_t **)&ipp->ipp_dstopts, 2807 &ipp->ipp_dstoptslen, 0); 2808 if (error != 0) 2809 return (error); 2810 ipp->ipp_fields |= IPPF_DSTOPTS; 2811 } 2812 if (sticky) { 2813 error = icmp_build_hdrs(icmp); 2814 if (error != 0) 2815 return (error); 2816 } 2817 break; 2818 } 2819 case IPV6_RTHDR: { 2820 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2821 2822 /* 2823 * Sanity checks - minimum size, size a multiple of 2824 * eight bytes, and matching size passed in. 2825 */ 2826 if (inlen != 0 && 2827 inlen != (8 * (rt->ip6r_len + 1))) 2828 return (EINVAL); 2829 2830 if (checkonly) 2831 break; 2832 2833 if (inlen == 0) { 2834 if (sticky && 2835 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2836 kmem_free(ipp->ipp_rthdr, 2837 ipp->ipp_rthdrlen); 2838 ipp->ipp_rthdr = NULL; 2839 ipp->ipp_rthdrlen = 0; 2840 } 2841 ipp->ipp_fields &= ~IPPF_RTHDR; 2842 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2843 } else { 2844 error = optcom_pkt_set(invalp, inlen, sticky, 2845 (uchar_t **)&ipp->ipp_rthdr, 2846 &ipp->ipp_rthdrlen, 0); 2847 if (error != 0) 2848 return (error); 2849 ipp->ipp_fields |= IPPF_RTHDR; 2850 } 2851 if (sticky) { 2852 error = icmp_build_hdrs(icmp); 2853 if (error != 0) 2854 return (error); 2855 } 2856 break; 2857 } 2858 2859 case IPV6_DONTFRAG: 2860 if (checkonly) 2861 break; 2862 2863 if (onoff) { 2864 ipp->ipp_fields |= IPPF_DONTFRAG; 2865 } else { 2866 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2867 } 2868 break; 2869 2870 case IPV6_USE_MIN_MTU: 2871 if (inlen != sizeof (int)) 2872 return (EINVAL); 2873 2874 if (*i1 < -1 || *i1 > 1) 2875 return (EINVAL); 2876 2877 if (checkonly) 2878 break; 2879 2880 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2881 ipp->ipp_use_min_mtu = *i1; 2882 break; 2883 2884 /* 2885 * This option can't be set. Its only returned via 2886 * getsockopt() or ancillary data. 2887 */ 2888 case IPV6_PATHMTU: 2889 return (EINVAL); 2890 2891 case IPV6_BOUND_PIF: 2892 case IPV6_SEC_OPT: 2893 case IPV6_DONTFAILOVER_IF: 2894 case IPV6_SRC_PREFERENCES: 2895 case IPV6_V6ONLY: 2896 /* Handled at IP level */ 2897 return (-EINVAL); 2898 default: 2899 *outlenp = 0; 2900 return (EINVAL); 2901 } 2902 break; 2903 } /* end IPPROTO_IPV6 */ 2904 2905 case IPPROTO_ICMPV6: 2906 /* 2907 * Only allow IPv6 option processing on IPv6 sockets. 2908 */ 2909 if (icmp->icmp_family != AF_INET6) { 2910 *outlenp = 0; 2911 return (ENOPROTOOPT); 2912 } 2913 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2914 *outlenp = 0; 2915 return (ENOPROTOOPT); 2916 } 2917 switch (name) { 2918 case ICMP6_FILTER: 2919 if (!checkonly) { 2920 if ((inlen != 0) && 2921 (inlen != sizeof (icmp6_filter_t))) 2922 return (EINVAL); 2923 2924 if (inlen == 0) { 2925 if (icmp->icmp_filter != NULL) { 2926 kmem_free(icmp->icmp_filter, 2927 sizeof (icmp6_filter_t)); 2928 icmp->icmp_filter = NULL; 2929 } 2930 } else { 2931 if (icmp->icmp_filter == NULL) { 2932 icmp->icmp_filter = kmem_alloc( 2933 sizeof (icmp6_filter_t), 2934 KM_NOSLEEP); 2935 if (icmp->icmp_filter == NULL) { 2936 *outlenp = 0; 2937 return (ENOBUFS); 2938 } 2939 } 2940 (void) bcopy(invalp, icmp->icmp_filter, 2941 inlen); 2942 } 2943 } 2944 break; 2945 2946 default: 2947 *outlenp = 0; 2948 return (EINVAL); 2949 } 2950 break; 2951 default: 2952 *outlenp = 0; 2953 return (EINVAL); 2954 } 2955 /* 2956 * Common case of OK return with outval same as inval. 2957 */ 2958 if (invalp != outvalp) { 2959 /* don't trust bcopy for identical src/dst */ 2960 (void) bcopy(invalp, outvalp, inlen); 2961 } 2962 *outlenp = inlen; 2963 return (0); 2964 } 2965 /* This routine sets socket options. */ 2966 /* ARGSUSED */ 2967 int 2968 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2969 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2970 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2971 { 2972 icmp_t *icmp; 2973 int err; 2974 2975 icmp = Q_TO_ICMP(q); 2976 2977 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2978 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2979 outlenp, outvalp, thisdg_attrs, cr, mblk); 2980 rw_exit(&icmp->icmp_rwlock); 2981 return (err); 2982 } 2983 2984 /* 2985 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2986 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2987 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2988 * headers. 2989 * Returns failure if can't allocate memory. 2990 */ 2991 static int 2992 icmp_build_hdrs(icmp_t *icmp) 2993 { 2994 icmp_stack_t *is = icmp->icmp_is; 2995 uchar_t *hdrs; 2996 uint_t hdrs_len; 2997 ip6_t *ip6h; 2998 ip6i_t *ip6i; 2999 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3000 3001 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3002 hdrs_len = ip_total_hdrs_len_v6(ipp); 3003 ASSERT(hdrs_len != 0); 3004 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3005 /* Need to reallocate */ 3006 if (hdrs_len != 0) { 3007 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3008 if (hdrs == NULL) 3009 return (ENOMEM); 3010 } else { 3011 hdrs = NULL; 3012 } 3013 if (icmp->icmp_sticky_hdrs_len != 0) { 3014 kmem_free(icmp->icmp_sticky_hdrs, 3015 icmp->icmp_sticky_hdrs_len); 3016 } 3017 icmp->icmp_sticky_hdrs = hdrs; 3018 icmp->icmp_sticky_hdrs_len = hdrs_len; 3019 } 3020 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3021 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3022 3023 /* Set header fields not in ipp */ 3024 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3025 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3026 ip6h = (ip6_t *)&ip6i[1]; 3027 3028 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3029 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3030 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3031 } 3032 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3033 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3034 } 3035 } else { 3036 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3037 } 3038 3039 if (!(ipp->ipp_fields & IPPF_ADDR)) 3040 ip6h->ip6_src = icmp->icmp_v6src; 3041 3042 /* Try to get everything in a single mblk */ 3043 if (hdrs_len > icmp->icmp_max_hdr_len) { 3044 icmp->icmp_max_hdr_len = hdrs_len; 3045 rw_exit(&icmp->icmp_rwlock); 3046 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3047 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3048 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3049 } 3050 return (0); 3051 } 3052 3053 /* 3054 * This routine retrieves the value of an ND variable in a icmpparam_t 3055 * structure. It is called through nd_getset when a user reads the 3056 * variable. 3057 */ 3058 /* ARGSUSED */ 3059 static int 3060 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3061 { 3062 icmpparam_t *icmppa = (icmpparam_t *)cp; 3063 3064 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3065 return (0); 3066 } 3067 3068 /* 3069 * Walk through the param array specified registering each element with the 3070 * named dispatch (ND) handler. 3071 */ 3072 static boolean_t 3073 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3074 { 3075 for (; cnt-- > 0; icmppa++) { 3076 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3077 if (!nd_load(ndp, icmppa->icmp_param_name, 3078 icmp_param_get, icmp_param_set, 3079 (caddr_t)icmppa)) { 3080 nd_free(ndp); 3081 return (B_FALSE); 3082 } 3083 } 3084 } 3085 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3086 NULL)) { 3087 nd_free(ndp); 3088 return (B_FALSE); 3089 } 3090 return (B_TRUE); 3091 } 3092 3093 /* This routine sets an ND variable in a icmpparam_t structure. */ 3094 /* ARGSUSED */ 3095 static int 3096 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3097 { 3098 long new_value; 3099 icmpparam_t *icmppa = (icmpparam_t *)cp; 3100 3101 /* 3102 * Fail the request if the new value does not lie within the 3103 * required bounds. 3104 */ 3105 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3106 new_value < icmppa->icmp_param_min || 3107 new_value > icmppa->icmp_param_max) { 3108 return (EINVAL); 3109 } 3110 /* Set the new value */ 3111 icmppa->icmp_param_value = new_value; 3112 return (0); 3113 } 3114 /*ARGSUSED2*/ 3115 static void 3116 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3117 { 3118 conn_t *connp = (conn_t *)arg1; 3119 struct T_unitdata_ind *tudi; 3120 uchar_t *rptr; 3121 icmp_t *icmp; 3122 icmp_stack_t *is; 3123 sin_t *sin; 3124 sin6_t *sin6; 3125 ip6_t *ip6h; 3126 ip6i_t *ip6i; 3127 mblk_t *mp1; 3128 int hdr_len; 3129 ipha_t *ipha; 3130 int udi_size; /* Size of T_unitdata_ind */ 3131 uint_t ipvers; 3132 ip6_pkt_t ipp; 3133 uint8_t nexthdr; 3134 ip_pktinfo_t *pinfo = NULL; 3135 mblk_t *options_mp = NULL; 3136 uint_t icmp_opt = 0; 3137 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3138 uint_t hopstrip; 3139 3140 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3141 3142 icmp = connp->conn_icmp; 3143 is = icmp->icmp_is; 3144 rptr = mp->b_rptr; 3145 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3146 ASSERT(OK_32PTR(rptr)); 3147 3148 /* 3149 * IP should have prepended the options data in an M_CTL 3150 * Check M_CTL "type" to make sure are not here bcos of 3151 * a valid ICMP message 3152 */ 3153 if (DB_TYPE(mp) == M_CTL) { 3154 /* 3155 * FIXME: does IP still do this? 3156 * IP sends up the IPSEC_IN message for handling IPSEC 3157 * policy at the TCP level. We don't need it here. 3158 */ 3159 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3160 mp1 = mp->b_cont; 3161 freeb(mp); 3162 mp = mp1; 3163 rptr = mp->b_rptr; 3164 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3165 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3166 IN_PKTINFO) { 3167 /* 3168 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3169 * has been prepended to the packet by IP. We need to 3170 * extract the mblk and adjust the rptr 3171 */ 3172 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3173 options_mp = mp; 3174 mp = mp->b_cont; 3175 rptr = mp->b_rptr; 3176 } else { 3177 /* 3178 * ICMP messages. 3179 */ 3180 icmp_icmp_error(connp->conn_rq, mp); 3181 return; 3182 } 3183 } 3184 3185 /* 3186 * Discard message if it is misaligned or smaller than the IP header. 3187 */ 3188 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3189 freemsg(mp); 3190 if (options_mp != NULL) 3191 freeb(options_mp); 3192 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3193 return; 3194 } 3195 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3196 3197 /* Handle M_DATA messages containing IP packets messages */ 3198 if (ipvers == IPV4_VERSION) { 3199 /* 3200 * Special case where IP attaches 3201 * the IRE needs to be handled so that we don't send up 3202 * IRE to the user land. 3203 */ 3204 ipha = (ipha_t *)rptr; 3205 hdr_len = IPH_HDR_LENGTH(ipha); 3206 3207 if (ipha->ipha_protocol == IPPROTO_TCP) { 3208 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3209 3210 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3211 TH_SYN) && mp->b_cont != NULL) { 3212 mp1 = mp->b_cont; 3213 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3214 freeb(mp1); 3215 mp->b_cont = NULL; 3216 } 3217 } 3218 } 3219 if (is->is_bsd_compat) { 3220 ushort_t len; 3221 len = ntohs(ipha->ipha_length); 3222 3223 if (mp->b_datap->db_ref > 1) { 3224 /* 3225 * Allocate a new IP header so that we can 3226 * modify ipha_length. 3227 */ 3228 mblk_t *mp1; 3229 3230 mp1 = allocb(hdr_len, BPRI_MED); 3231 if (!mp1) { 3232 freemsg(mp); 3233 if (options_mp != NULL) 3234 freeb(options_mp); 3235 BUMP_MIB(&is->is_rawip_mib, 3236 rawipInErrors); 3237 return; 3238 } 3239 bcopy(rptr, mp1->b_rptr, hdr_len); 3240 mp->b_rptr = rptr + hdr_len; 3241 rptr = mp1->b_rptr; 3242 ipha = (ipha_t *)rptr; 3243 mp1->b_cont = mp; 3244 mp1->b_wptr = rptr + hdr_len; 3245 mp = mp1; 3246 } 3247 len -= hdr_len; 3248 ipha->ipha_length = htons(len); 3249 } 3250 } 3251 3252 /* 3253 * This is the inbound data path. Packets are passed upstream as 3254 * T_UNITDATA_IND messages with full IP headers still attached. 3255 */ 3256 if (icmp->icmp_family == AF_INET) { 3257 ASSERT(ipvers == IPV4_VERSION); 3258 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3259 if (icmp->icmp_recvif && (pinfo != NULL) && 3260 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3261 udi_size += sizeof (struct T_opthdr) + 3262 sizeof (uint_t); 3263 } 3264 3265 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3266 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3267 udi_size += sizeof (struct T_opthdr) + 3268 sizeof (struct in_pktinfo); 3269 } 3270 3271 /* 3272 * If SO_TIMESTAMP is set allocate the appropriate sized 3273 * buffer. Since gethrestime() expects a pointer aligned 3274 * argument, we allocate space necessary for extra 3275 * alignment (even though it might not be used). 3276 */ 3277 if (icmp->icmp_timestamp) { 3278 udi_size += sizeof (struct T_opthdr) + 3279 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3280 } 3281 mp1 = allocb(udi_size, BPRI_MED); 3282 if (mp1 == NULL) { 3283 freemsg(mp); 3284 if (options_mp != NULL) 3285 freeb(options_mp); 3286 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3287 return; 3288 } 3289 mp1->b_cont = mp; 3290 mp = mp1; 3291 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3292 mp->b_datap->db_type = M_PROTO; 3293 mp->b_wptr = (uchar_t *)tudi + udi_size; 3294 tudi->PRIM_type = T_UNITDATA_IND; 3295 tudi->SRC_length = sizeof (sin_t); 3296 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3297 sin = (sin_t *)&tudi[1]; 3298 *sin = sin_null; 3299 sin->sin_family = AF_INET; 3300 sin->sin_addr.s_addr = ipha->ipha_src; 3301 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3302 sizeof (sin_t); 3303 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3304 tudi->OPT_length = udi_size; 3305 3306 /* 3307 * Add options if IP_RECVIF is set 3308 */ 3309 if (udi_size != 0) { 3310 char *dstopt; 3311 3312 dstopt = (char *)&sin[1]; 3313 if (icmp->icmp_recvif && (pinfo != NULL) && 3314 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3315 3316 struct T_opthdr *toh; 3317 uint_t *dstptr; 3318 3319 toh = (struct T_opthdr *)dstopt; 3320 toh->level = IPPROTO_IP; 3321 toh->name = IP_RECVIF; 3322 toh->len = sizeof (struct T_opthdr) + 3323 sizeof (uint_t); 3324 toh->status = 0; 3325 dstopt += sizeof (struct T_opthdr); 3326 dstptr = (uint_t *)dstopt; 3327 *dstptr = pinfo->ip_pkt_ifindex; 3328 dstopt += sizeof (uint_t); 3329 udi_size -= toh->len; 3330 } 3331 if (icmp->icmp_timestamp) { 3332 struct T_opthdr *toh; 3333 3334 toh = (struct T_opthdr *)dstopt; 3335 toh->level = SOL_SOCKET; 3336 toh->name = SCM_TIMESTAMP; 3337 toh->len = sizeof (struct T_opthdr) + 3338 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3339 toh->status = 0; 3340 dstopt += sizeof (struct T_opthdr); 3341 /* Align for gethrestime() */ 3342 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3343 sizeof (intptr_t)); 3344 gethrestime((timestruc_t *)dstopt); 3345 dstopt = (char *)toh + toh->len; 3346 udi_size -= toh->len; 3347 } 3348 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3349 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3350 struct T_opthdr *toh; 3351 struct in_pktinfo *pktinfop; 3352 3353 toh = (struct T_opthdr *)dstopt; 3354 toh->level = IPPROTO_IP; 3355 toh->name = IP_PKTINFO; 3356 toh->len = sizeof (struct T_opthdr) + 3357 sizeof (in_pktinfo_t); 3358 toh->status = 0; 3359 dstopt += sizeof (struct T_opthdr); 3360 pktinfop = (struct in_pktinfo *)dstopt; 3361 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3362 pktinfop->ipi_spec_dst = 3363 pinfo->ip_pkt_match_addr; 3364 3365 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3366 3367 dstopt += sizeof (struct in_pktinfo); 3368 udi_size -= toh->len; 3369 } 3370 3371 /* Consumed all of allocated space */ 3372 ASSERT(udi_size == 0); 3373 } 3374 3375 if (options_mp != NULL) 3376 freeb(options_mp); 3377 3378 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3379 putnext(connp->conn_rq, mp); 3380 return; 3381 } 3382 3383 /* 3384 * We don't need options_mp in the IPv6 path. 3385 */ 3386 if (options_mp != NULL) { 3387 freeb(options_mp); 3388 options_mp = NULL; 3389 } 3390 3391 /* 3392 * Discard message if it is smaller than the IPv6 header 3393 * or if the header is malformed. 3394 */ 3395 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3396 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3397 icmp->icmp_family != AF_INET6) { 3398 freemsg(mp); 3399 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3400 return; 3401 } 3402 3403 /* Initialize */ 3404 ipp.ipp_fields = 0; 3405 hopstrip = 0; 3406 3407 ip6h = (ip6_t *)rptr; 3408 /* 3409 * Call on ip_find_hdr_v6 which gets the total hdr len 3410 * as well as individual lenghts of ext hdrs (and ptrs to 3411 * them). 3412 */ 3413 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3414 /* Look for ifindex information */ 3415 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3416 ip6i = (ip6i_t *)ip6h; 3417 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3418 ASSERT(ip6i->ip6i_ifindex != 0); 3419 ipp.ipp_fields |= IPPF_IFINDEX; 3420 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3421 } 3422 rptr = (uchar_t *)&ip6i[1]; 3423 mp->b_rptr = rptr; 3424 if (rptr == mp->b_wptr) { 3425 mp1 = mp->b_cont; 3426 freeb(mp); 3427 mp = mp1; 3428 rptr = mp->b_rptr; 3429 } 3430 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3431 ip6h = (ip6_t *)rptr; 3432 } 3433 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3434 3435 /* 3436 * We need to lie a bit to the user because users inside 3437 * labeled compartments should not see their own labels. We 3438 * assume that in all other respects IP has checked the label, 3439 * and that the label is always first among the options. (If 3440 * it's not first, then this code won't see it, and the option 3441 * will be passed along to the user.) 3442 * 3443 * If we had multilevel ICMP sockets, then the following code 3444 * should be skipped for them to allow the user to see the 3445 * label. 3446 * 3447 * Alignment restrictions in the definition of IP options 3448 * (namely, the requirement that the 4-octet DOI goes on a 3449 * 4-octet boundary) mean that we know exactly where the option 3450 * should start, but we're lenient for other hosts. 3451 * 3452 * Note that there are no multilevel ICMP or raw IP sockets 3453 * yet, thus nobody ever sees the IP6OPT_LS option. 3454 */ 3455 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3456 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3457 const uchar_t *ucp = 3458 (const uchar_t *)ipp.ipp_hopopts + 2; 3459 int remlen = ipp.ipp_hopoptslen - 2; 3460 3461 while (remlen > 0) { 3462 if (*ucp == IP6OPT_PAD1) { 3463 remlen--; 3464 ucp++; 3465 } else if (*ucp == IP6OPT_PADN) { 3466 remlen -= ucp[1] + 2; 3467 ucp += ucp[1] + 2; 3468 } else if (*ucp == ip6opt_ls) { 3469 hopstrip = (ucp - 3470 (const uchar_t *)ipp.ipp_hopopts) + 3471 ucp[1] + 2; 3472 hopstrip = (hopstrip + 7) & ~7; 3473 break; 3474 } else { 3475 /* label option must be first */ 3476 break; 3477 } 3478 } 3479 } 3480 } else { 3481 hdr_len = IPV6_HDR_LEN; 3482 ip6i = NULL; 3483 nexthdr = ip6h->ip6_nxt; 3484 } 3485 /* 3486 * One special case where IP attaches the IRE needs to 3487 * be handled so that we don't send up IRE to the user land. 3488 */ 3489 if (nexthdr == IPPROTO_TCP) { 3490 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3491 3492 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3493 mp->b_cont != NULL) { 3494 mp1 = mp->b_cont; 3495 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3496 freeb(mp1); 3497 mp->b_cont = NULL; 3498 } 3499 } 3500 } 3501 /* 3502 * Check a filter for ICMPv6 types if needed. 3503 * Verify raw checksums if needed. 3504 */ 3505 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3506 if (icmp->icmp_filter != NULL) { 3507 int type; 3508 3509 /* Assumes that IP has done the pullupmsg */ 3510 type = mp->b_rptr[hdr_len]; 3511 3512 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3513 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3514 freemsg(mp); 3515 return; 3516 } 3517 } else { 3518 /* Checksum */ 3519 uint16_t *up; 3520 uint32_t sum; 3521 int remlen; 3522 3523 up = (uint16_t *)&ip6h->ip6_src; 3524 3525 remlen = msgdsize(mp) - hdr_len; 3526 sum = htons(icmp->icmp_proto + remlen) 3527 + up[0] + up[1] + up[2] + up[3] 3528 + up[4] + up[5] + up[6] + up[7] 3529 + up[8] + up[9] + up[10] + up[11] 3530 + up[12] + up[13] + up[14] + up[15]; 3531 sum = (sum & 0xffff) + (sum >> 16); 3532 sum = IP_CSUM(mp, hdr_len, sum); 3533 if (sum != 0) { 3534 /* IPv6 RAW checksum failed */ 3535 ip0dbg(("icmp_rput: RAW checksum " 3536 "failed %x\n", sum)); 3537 freemsg(mp); 3538 BUMP_MIB(&is->is_rawip_mib, 3539 rawipInCksumErrs); 3540 return; 3541 } 3542 } 3543 } 3544 /* Skip all the IPv6 headers per API */ 3545 mp->b_rptr += hdr_len; 3546 3547 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3548 3549 /* 3550 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3551 * maintain state information, instead of relying on icmp_t 3552 * structure, since there arent any locks protecting these members 3553 * and there is a window where there might be a race between a 3554 * thread setting options on the write side and a thread reading 3555 * these options on the read size. 3556 */ 3557 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3558 IPPF_RTHDR|IPPF_IFINDEX)) { 3559 if (icmp->icmp_ipv6_recvhopopts && 3560 (ipp.ipp_fields & IPPF_HOPOPTS) && 3561 ipp.ipp_hopoptslen > hopstrip) { 3562 udi_size += sizeof (struct T_opthdr) + 3563 ipp.ipp_hopoptslen - hopstrip; 3564 icmp_opt |= IPPF_HOPOPTS; 3565 } 3566 if ((icmp->icmp_ipv6_recvdstopts || 3567 icmp->icmp_old_ipv6_recvdstopts) && 3568 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3569 udi_size += sizeof (struct T_opthdr) + 3570 ipp.ipp_dstoptslen; 3571 icmp_opt |= IPPF_DSTOPTS; 3572 } 3573 if (((icmp->icmp_ipv6_recvdstopts && 3574 icmp->icmp_ipv6_recvrthdr && 3575 (ipp.ipp_fields & IPPF_RTHDR)) || 3576 icmp->icmp_ipv6_recvrtdstopts) && 3577 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3578 udi_size += sizeof (struct T_opthdr) + 3579 ipp.ipp_rtdstoptslen; 3580 icmp_opt |= IPPF_RTDSTOPTS; 3581 } 3582 if (icmp->icmp_ipv6_recvrthdr && 3583 (ipp.ipp_fields & IPPF_RTHDR)) { 3584 udi_size += sizeof (struct T_opthdr) + 3585 ipp.ipp_rthdrlen; 3586 icmp_opt |= IPPF_RTHDR; 3587 } 3588 if (icmp->icmp_ip_recvpktinfo && 3589 (ipp.ipp_fields & IPPF_IFINDEX)) { 3590 udi_size += sizeof (struct T_opthdr) + 3591 sizeof (struct in6_pktinfo); 3592 icmp_opt |= IPPF_IFINDEX; 3593 } 3594 } 3595 if (icmp->icmp_ipv6_recvhoplimit) { 3596 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3597 icmp_ipv6_recvhoplimit = B_TRUE; 3598 } 3599 3600 if (icmp->icmp_ipv6_recvtclass) 3601 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3602 3603 mp1 = allocb(udi_size, BPRI_MED); 3604 if (mp1 == NULL) { 3605 freemsg(mp); 3606 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3607 return; 3608 } 3609 mp1->b_cont = mp; 3610 mp = mp1; 3611 mp->b_datap->db_type = M_PROTO; 3612 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3613 mp->b_wptr = (uchar_t *)tudi + udi_size; 3614 tudi->PRIM_type = T_UNITDATA_IND; 3615 tudi->SRC_length = sizeof (sin6_t); 3616 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3617 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3618 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3619 tudi->OPT_length = udi_size; 3620 sin6 = (sin6_t *)&tudi[1]; 3621 sin6->sin6_port = 0; 3622 sin6->sin6_family = AF_INET6; 3623 3624 sin6->sin6_addr = ip6h->ip6_src; 3625 /* No sin6_flowinfo per API */ 3626 sin6->sin6_flowinfo = 0; 3627 /* For link-scope source pass up scope id */ 3628 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3629 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3630 sin6->sin6_scope_id = ipp.ipp_ifindex; 3631 else 3632 sin6->sin6_scope_id = 0; 3633 3634 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3635 icmp->icmp_zoneid, is->is_netstack); 3636 3637 if (udi_size != 0) { 3638 uchar_t *dstopt; 3639 3640 dstopt = (uchar_t *)&sin6[1]; 3641 if (icmp_opt & IPPF_IFINDEX) { 3642 struct T_opthdr *toh; 3643 struct in6_pktinfo *pkti; 3644 3645 toh = (struct T_opthdr *)dstopt; 3646 toh->level = IPPROTO_IPV6; 3647 toh->name = IPV6_PKTINFO; 3648 toh->len = sizeof (struct T_opthdr) + 3649 sizeof (*pkti); 3650 toh->status = 0; 3651 dstopt += sizeof (struct T_opthdr); 3652 pkti = (struct in6_pktinfo *)dstopt; 3653 pkti->ipi6_addr = ip6h->ip6_dst; 3654 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3655 dstopt += sizeof (*pkti); 3656 udi_size -= toh->len; 3657 } 3658 if (icmp_ipv6_recvhoplimit) { 3659 struct T_opthdr *toh; 3660 3661 toh = (struct T_opthdr *)dstopt; 3662 toh->level = IPPROTO_IPV6; 3663 toh->name = IPV6_HOPLIMIT; 3664 toh->len = sizeof (struct T_opthdr) + 3665 sizeof (uint_t); 3666 toh->status = 0; 3667 dstopt += sizeof (struct T_opthdr); 3668 *(uint_t *)dstopt = ip6h->ip6_hops; 3669 dstopt += sizeof (uint_t); 3670 udi_size -= toh->len; 3671 } 3672 if (icmp->icmp_ipv6_recvtclass) { 3673 struct T_opthdr *toh; 3674 3675 toh = (struct T_opthdr *)dstopt; 3676 toh->level = IPPROTO_IPV6; 3677 toh->name = IPV6_TCLASS; 3678 toh->len = sizeof (struct T_opthdr) + 3679 sizeof (uint_t); 3680 toh->status = 0; 3681 dstopt += sizeof (struct T_opthdr); 3682 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3683 dstopt += sizeof (uint_t); 3684 udi_size -= toh->len; 3685 } 3686 if (icmp_opt & IPPF_HOPOPTS) { 3687 struct T_opthdr *toh; 3688 3689 toh = (struct T_opthdr *)dstopt; 3690 toh->level = IPPROTO_IPV6; 3691 toh->name = IPV6_HOPOPTS; 3692 toh->len = sizeof (struct T_opthdr) + 3693 ipp.ipp_hopoptslen - hopstrip; 3694 toh->status = 0; 3695 dstopt += sizeof (struct T_opthdr); 3696 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3697 ipp.ipp_hopoptslen - hopstrip); 3698 if (hopstrip > 0) { 3699 /* copy next header value and fake length */ 3700 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3701 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3702 hopstrip / 8; 3703 } 3704 dstopt += ipp.ipp_hopoptslen - hopstrip; 3705 udi_size -= toh->len; 3706 } 3707 if (icmp_opt & IPPF_RTDSTOPTS) { 3708 struct T_opthdr *toh; 3709 3710 toh = (struct T_opthdr *)dstopt; 3711 toh->level = IPPROTO_IPV6; 3712 toh->name = IPV6_DSTOPTS; 3713 toh->len = sizeof (struct T_opthdr) + 3714 ipp.ipp_rtdstoptslen; 3715 toh->status = 0; 3716 dstopt += sizeof (struct T_opthdr); 3717 bcopy(ipp.ipp_rtdstopts, dstopt, 3718 ipp.ipp_rtdstoptslen); 3719 dstopt += ipp.ipp_rtdstoptslen; 3720 udi_size -= toh->len; 3721 } 3722 if (icmp_opt & IPPF_RTHDR) { 3723 struct T_opthdr *toh; 3724 3725 toh = (struct T_opthdr *)dstopt; 3726 toh->level = IPPROTO_IPV6; 3727 toh->name = IPV6_RTHDR; 3728 toh->len = sizeof (struct T_opthdr) + 3729 ipp.ipp_rthdrlen; 3730 toh->status = 0; 3731 dstopt += sizeof (struct T_opthdr); 3732 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3733 dstopt += ipp.ipp_rthdrlen; 3734 udi_size -= toh->len; 3735 } 3736 if (icmp_opt & IPPF_DSTOPTS) { 3737 struct T_opthdr *toh; 3738 3739 toh = (struct T_opthdr *)dstopt; 3740 toh->level = IPPROTO_IPV6; 3741 toh->name = IPV6_DSTOPTS; 3742 toh->len = sizeof (struct T_opthdr) + 3743 ipp.ipp_dstoptslen; 3744 toh->status = 0; 3745 dstopt += sizeof (struct T_opthdr); 3746 bcopy(ipp.ipp_dstopts, dstopt, 3747 ipp.ipp_dstoptslen); 3748 dstopt += ipp.ipp_dstoptslen; 3749 udi_size -= toh->len; 3750 } 3751 /* Consumed all of allocated space */ 3752 ASSERT(udi_size == 0); 3753 } 3754 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3755 putnext(connp->conn_rq, mp); 3756 } 3757 3758 /* 3759 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3760 * immediately. 3761 */ 3762 static void 3763 icmp_bind_result(conn_t *connp, mblk_t *mp) 3764 { 3765 struct T_error_ack *tea; 3766 3767 switch (mp->b_datap->db_type) { 3768 case M_PROTO: 3769 case M_PCPROTO: 3770 /* M_PROTO messages contain some type of TPI message. */ 3771 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3772 freemsg(mp); 3773 return; 3774 } 3775 tea = (struct T_error_ack *)mp->b_rptr; 3776 3777 switch (tea->PRIM_type) { 3778 case T_ERROR_ACK: 3779 switch (tea->ERROR_prim) { 3780 case O_T_BIND_REQ: 3781 case T_BIND_REQ: 3782 icmp_bind_error(connp, mp); 3783 return; 3784 default: 3785 break; 3786 } 3787 ASSERT(0); 3788 freemsg(mp); 3789 return; 3790 3791 case T_BIND_ACK: 3792 icmp_bind_ack(connp, mp); 3793 return; 3794 3795 default: 3796 break; 3797 } 3798 freemsg(mp); 3799 return; 3800 default: 3801 /* FIXME: other cases? */ 3802 ASSERT(0); 3803 freemsg(mp); 3804 return; 3805 } 3806 } 3807 3808 /* 3809 * Process a T_BIND_ACK 3810 */ 3811 static void 3812 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3813 { 3814 icmp_t *icmp = connp->conn_icmp; 3815 mblk_t *mp1; 3816 ire_t *ire; 3817 struct T_bind_ack *tba; 3818 uchar_t *addrp; 3819 ipa_conn_t *ac; 3820 ipa6_conn_t *ac6; 3821 3822 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3823 /* 3824 * We know if headers are included or not so we can 3825 * safely do this. 3826 */ 3827 if (icmp->icmp_state == TS_UNBND) { 3828 /* 3829 * TPI has not yet bound - bind sent by 3830 * icmp_bind_proto. 3831 */ 3832 freemsg(mp); 3833 rw_exit(&icmp->icmp_rwlock); 3834 return; 3835 } 3836 ASSERT(icmp->icmp_pending_op != -1); 3837 3838 /* 3839 * If a broadcast/multicast address was bound set 3840 * the source address to 0. 3841 * This ensures no datagrams with broadcast address 3842 * as source address are emitted (which would violate 3843 * RFC1122 - Hosts requirements) 3844 * 3845 * Note that when connecting the returned IRE is 3846 * for the destination address and we only perform 3847 * the broadcast check for the source address (it 3848 * is OK to connect to a broadcast/multicast address.) 3849 */ 3850 mp1 = mp->b_cont; 3851 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3852 ire = (ire_t *)mp1->b_rptr; 3853 3854 /* 3855 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3856 * local address. 3857 */ 3858 if (ire->ire_type == IRE_BROADCAST && 3859 icmp->icmp_state != TS_DATA_XFER) { 3860 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3861 icmp->icmp_pending_op == O_T_BIND_REQ); 3862 /* This was just a local bind to a MC/broadcast addr */ 3863 V6_SET_ZERO(icmp->icmp_v6src); 3864 if (icmp->icmp_family == AF_INET6) 3865 (void) icmp_build_hdrs(icmp); 3866 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3867 /* 3868 * Local address not yet set - pick it from the 3869 * T_bind_ack 3870 */ 3871 tba = (struct T_bind_ack *)mp->b_rptr; 3872 addrp = &mp->b_rptr[tba->ADDR_offset]; 3873 switch (icmp->icmp_family) { 3874 case AF_INET: 3875 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3876 ac = (ipa_conn_t *)addrp; 3877 } else { 3878 ASSERT(tba->ADDR_length == 3879 sizeof (ipa_conn_x_t)); 3880 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3881 } 3882 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3883 &icmp->icmp_v6src); 3884 break; 3885 case AF_INET6: 3886 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3887 ac6 = (ipa6_conn_t *)addrp; 3888 } else { 3889 ASSERT(tba->ADDR_length == 3890 sizeof (ipa6_conn_x_t)); 3891 ac6 = &((ipa6_conn_x_t *) 3892 addrp)->ac6x_conn; 3893 } 3894 icmp->icmp_v6src = ac6->ac6_laddr; 3895 (void) icmp_build_hdrs(icmp); 3896 } 3897 } 3898 mp1 = mp1->b_cont; 3899 } 3900 icmp->icmp_pending_op = -1; 3901 rw_exit(&icmp->icmp_rwlock); 3902 /* 3903 * Look for one or more appended ACK message added by 3904 * icmp_connect or icmp_disconnect. 3905 * If none found just send up the T_BIND_ACK. 3906 * icmp_connect has appended a T_OK_ACK and a 3907 * T_CONN_CON. 3908 * icmp_disconnect has appended a T_OK_ACK. 3909 */ 3910 if (mp1 != NULL) { 3911 if (mp->b_cont == mp1) 3912 mp->b_cont = NULL; 3913 else { 3914 ASSERT(mp->b_cont->b_cont == mp1); 3915 mp->b_cont->b_cont = NULL; 3916 } 3917 freemsg(mp); 3918 mp = mp1; 3919 while (mp != NULL) { 3920 mp1 = mp->b_cont; 3921 mp->b_cont = NULL; 3922 putnext(connp->conn_rq, mp); 3923 mp = mp1; 3924 } 3925 return; 3926 } 3927 freemsg(mp->b_cont); 3928 mp->b_cont = NULL; 3929 putnext(connp->conn_rq, mp); 3930 } 3931 3932 static void 3933 icmp_bind_error(conn_t *connp, mblk_t *mp) 3934 { 3935 icmp_t *icmp = connp->conn_icmp; 3936 struct T_error_ack *tea; 3937 3938 tea = (struct T_error_ack *)mp->b_rptr; 3939 /* 3940 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3941 * clear out the source address before 3942 * passing the message upstream. 3943 * If this was caused by a T_CONN_REQ 3944 * revert back to bound state. 3945 */ 3946 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3947 if (icmp->icmp_state == TS_UNBND) { 3948 /* 3949 * TPI has not yet bound - bind sent by icmp_bind_proto. 3950 */ 3951 freemsg(mp); 3952 rw_exit(&icmp->icmp_rwlock); 3953 return; 3954 } 3955 ASSERT(icmp->icmp_pending_op != -1); 3956 tea->ERROR_prim = icmp->icmp_pending_op; 3957 icmp->icmp_pending_op = -1; 3958 3959 switch (tea->ERROR_prim) { 3960 case T_CONN_REQ: 3961 ASSERT(icmp->icmp_state == TS_DATA_XFER); 3962 /* Connect failed */ 3963 /* Revert back to the bound source */ 3964 icmp->icmp_v6src = icmp->icmp_bound_v6src; 3965 icmp->icmp_state = TS_IDLE; 3966 if (icmp->icmp_family == AF_INET6) 3967 (void) icmp_build_hdrs(icmp); 3968 break; 3969 3970 case T_DISCON_REQ: 3971 case T_BIND_REQ: 3972 case O_T_BIND_REQ: 3973 V6_SET_ZERO(icmp->icmp_v6src); 3974 V6_SET_ZERO(icmp->icmp_bound_v6src); 3975 icmp->icmp_state = TS_UNBND; 3976 if (icmp->icmp_family == AF_INET6) 3977 (void) icmp_build_hdrs(icmp); 3978 break; 3979 default: 3980 break; 3981 } 3982 rw_exit(&icmp->icmp_rwlock); 3983 putnext(connp->conn_rq, mp); 3984 } 3985 3986 /* 3987 * return SNMP stuff in buffer in mpdata 3988 */ 3989 mblk_t * 3990 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3991 { 3992 mblk_t *mpdata; 3993 struct opthdr *optp; 3994 conn_t *connp = Q_TO_CONN(q); 3995 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 3996 mblk_t *mp2ctl; 3997 3998 /* 3999 * make a copy of the original message 4000 */ 4001 mp2ctl = copymsg(mpctl); 4002 4003 if (mpctl == NULL || 4004 (mpdata = mpctl->b_cont) == NULL) { 4005 freemsg(mpctl); 4006 freemsg(mp2ctl); 4007 return (0); 4008 } 4009 4010 /* fixed length structure for IPv4 and IPv6 counters */ 4011 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4012 optp->level = EXPER_RAWIP; 4013 optp->name = 0; 4014 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4015 sizeof (is->is_rawip_mib)); 4016 optp->len = msgdsize(mpdata); 4017 qreply(q, mpctl); 4018 4019 return (mp2ctl); 4020 } 4021 4022 /* 4023 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4024 * TODO: If this ever actually tries to set anything, it needs to be 4025 * to do the appropriate locking. 4026 */ 4027 /* ARGSUSED */ 4028 int 4029 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4030 uchar_t *ptr, int len) 4031 { 4032 switch (level) { 4033 case EXPER_RAWIP: 4034 return (0); 4035 default: 4036 return (1); 4037 } 4038 } 4039 4040 /* Report for ndd "icmp_status" */ 4041 /* ARGSUSED */ 4042 static int 4043 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4044 { 4045 conn_t *connp; 4046 ip_stack_t *ipst; 4047 char laddrbuf[INET6_ADDRSTRLEN]; 4048 char faddrbuf[INET6_ADDRSTRLEN]; 4049 int i; 4050 4051 (void) mi_mpprintf(mp, 4052 "RAWIP " MI_COL_HDRPAD_STR 4053 /* 01234567[89ABCDEF] */ 4054 " src addr dest addr state"); 4055 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4056 4057 connp = Q_TO_CONN(q); 4058 ipst = connp->conn_netstack->netstack_ip; 4059 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4060 connf_t *connfp; 4061 char *state; 4062 4063 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4064 connp = NULL; 4065 4066 while ((connp = ipcl_get_next_conn(connfp, connp, 4067 IPCL_RAWIPCONN)) != NULL) { 4068 icmp_t *icmp; 4069 4070 mutex_enter(&(connp)->conn_lock); 4071 icmp = connp->conn_icmp; 4072 4073 if (icmp->icmp_state == TS_UNBND) 4074 state = "UNBOUND"; 4075 else if (icmp->icmp_state == TS_IDLE) 4076 state = "IDLE"; 4077 else if (icmp->icmp_state == TS_DATA_XFER) 4078 state = "CONNECTED"; 4079 else 4080 state = "UnkState"; 4081 4082 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4083 (void *)icmp, 4084 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4085 sizeof (faddrbuf)), 4086 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4087 sizeof (laddrbuf)), 4088 state); 4089 mutex_exit(&(connp)->conn_lock); 4090 } 4091 } 4092 return (0); 4093 } 4094 4095 /* 4096 * This routine creates a T_UDERROR_IND message and passes it upstream. 4097 * The address and options are copied from the T_UNITDATA_REQ message 4098 * passed in mp. This message is freed. 4099 */ 4100 static void 4101 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4102 { 4103 mblk_t *mp1; 4104 uchar_t *rptr = mp->b_rptr; 4105 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4106 4107 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4108 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4109 tudr->OPT_length, err); 4110 if (mp1) 4111 qreply(q, mp1); 4112 freemsg(mp); 4113 } 4114 4115 /* 4116 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4117 * After some error checking, the message is passed downstream to ip. 4118 */ 4119 static void 4120 icmp_unbind(queue_t *q, mblk_t *mp) 4121 { 4122 icmp_t *icmp = Q_TO_ICMP(q); 4123 4124 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4125 /* If a bind has not been done, we can't unbind. */ 4126 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4127 rw_exit(&icmp->icmp_rwlock); 4128 icmp_err_ack(q, mp, TOUTSTATE, 0); 4129 return; 4130 } 4131 icmp->icmp_pending_op = T_UNBIND_REQ; 4132 rw_exit(&icmp->icmp_rwlock); 4133 4134 /* 4135 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4136 * and therefore ip_unbind must never return NULL. 4137 */ 4138 mp = ip_unbind(q, mp); 4139 ASSERT(mp != NULL); 4140 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4141 4142 /* 4143 * Once we're unbound from IP, the pending operation may be cleared 4144 * here. 4145 */ 4146 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4147 V6_SET_ZERO(icmp->icmp_v6src); 4148 V6_SET_ZERO(icmp->icmp_bound_v6src); 4149 icmp->icmp_pending_op = -1; 4150 icmp->icmp_state = TS_UNBND; 4151 if (icmp->icmp_family == AF_INET6) 4152 (void) icmp_build_hdrs(icmp); 4153 rw_exit(&icmp->icmp_rwlock); 4154 4155 qreply(q, mp); 4156 } 4157 4158 /* 4159 * Process IPv4 packets that already include an IP header. 4160 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4161 * IPPROTO_IGMP). 4162 */ 4163 static void 4164 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4165 { 4166 icmp_stack_t *is = icmp->icmp_is; 4167 ipha_t *ipha; 4168 int ip_hdr_length; 4169 int tp_hdr_len; 4170 mblk_t *mp1; 4171 uint_t pkt_len; 4172 ip_opt_info_t optinfo; 4173 conn_t *connp = icmp->icmp_connp; 4174 4175 optinfo.ip_opt_flags = 0; 4176 optinfo.ip_opt_ill_index = 0; 4177 ipha = (ipha_t *)mp->b_rptr; 4178 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4179 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4180 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4181 ASSERT(icmp != NULL); 4182 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4183 freemsg(mp); 4184 return; 4185 } 4186 ipha = (ipha_t *)mp->b_rptr; 4187 } 4188 ipha->ipha_version_and_hdr_length = 4189 (IP_VERSION<<4) | (ip_hdr_length>>2); 4190 4191 /* 4192 * For the socket of SOCK_RAW type, the checksum is provided in the 4193 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4194 * tell IP that the application has sent a complete IP header and not 4195 * to compute the transport checksum nor change the DF flag. 4196 */ 4197 ipha->ipha_ident = IP_HDR_INCLUDED; 4198 ipha->ipha_hdr_checksum = 0; 4199 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4200 /* Insert options if any */ 4201 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4202 /* 4203 * Put the IP header plus any transport header that is 4204 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4205 * that at least the checksum field is in the first mblk.) 4206 */ 4207 switch (ipha->ipha_protocol) { 4208 case IPPROTO_UDP: 4209 tp_hdr_len = 8; 4210 break; 4211 case IPPROTO_TCP: 4212 tp_hdr_len = 20; 4213 break; 4214 default: 4215 tp_hdr_len = 0; 4216 break; 4217 } 4218 /* 4219 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4220 * tp_hdr_len bytes will be in a single mblk. 4221 */ 4222 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4223 tp_hdr_len)) { 4224 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4225 tp_hdr_len)) { 4226 BUMP_MIB(&is->is_rawip_mib, 4227 rawipOutErrors); 4228 freemsg(mp); 4229 return; 4230 } 4231 ipha = (ipha_t *)mp->b_rptr; 4232 } 4233 4234 /* 4235 * if the length is larger then the max allowed IP packet, 4236 * then send an error and abort the processing. 4237 */ 4238 pkt_len = ntohs(ipha->ipha_length) 4239 + icmp->icmp_ip_snd_options_len; 4240 if (pkt_len > IP_MAXPACKET) { 4241 icmp_ud_err(q, mp, EMSGSIZE); 4242 return; 4243 } 4244 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4245 tp_hdr_len, BPRI_LO))) { 4246 icmp_ud_err(q, mp, ENOMEM); 4247 return; 4248 } 4249 mp1->b_rptr += is->is_wroff_extra; 4250 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4251 4252 ipha->ipha_length = htons((uint16_t)pkt_len); 4253 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4254 4255 /* Copy transport header if any */ 4256 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4257 mp1->b_wptr += tp_hdr_len; 4258 4259 /* Add options */ 4260 ipha = (ipha_t *)mp1->b_rptr; 4261 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4262 icmp->icmp_ip_snd_options_len); 4263 4264 /* Drop IP header and transport header from original */ 4265 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4266 4267 mp1->b_cont = mp; 4268 mp = mp1; 4269 /* 4270 * Massage source route putting first source 4271 * route in ipha_dst. 4272 */ 4273 (void) ip_massage_options(ipha, is->is_netstack); 4274 } 4275 4276 if (pktinfop != NULL) { 4277 /* 4278 * Over write the source address provided in the header 4279 */ 4280 if (pktinfop->ip4_addr != INADDR_ANY) { 4281 ipha->ipha_src = pktinfop->ip4_addr; 4282 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4283 } 4284 4285 if (pktinfop->ip4_ill_index != 0) { 4286 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4287 } 4288 } 4289 4290 mblk_setcred(mp, connp->conn_cred); 4291 ip_output_options(connp, mp, q, IP_WPUT, 4292 &optinfo); 4293 } 4294 4295 static boolean_t 4296 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4297 { 4298 int err; 4299 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4300 icmp_stack_t *is = icmp->icmp_is; 4301 conn_t *connp = icmp->icmp_connp; 4302 4303 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4304 opt_storage, icmp->icmp_mac_exempt, 4305 is->is_netstack->netstack_ip); 4306 if (err == 0) { 4307 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4308 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4309 opt_storage); 4310 } 4311 if (err != 0) { 4312 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4313 DTRACE_PROBE4( 4314 tx__ip__log__drop__updatelabel__icmp, 4315 char *, "queue(1) failed to update options(2) on mp(3)", 4316 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4317 icmp_ud_err(q, mp, err); 4318 return (B_FALSE); 4319 } 4320 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4321 return (B_TRUE); 4322 } 4323 4324 /* 4325 * This routine handles all messages passed downstream. It either 4326 * consumes the message or passes it downstream; it never queues a 4327 * a message. 4328 */ 4329 static void 4330 icmp_wput(queue_t *q, mblk_t *mp) 4331 { 4332 uchar_t *rptr = mp->b_rptr; 4333 ipha_t *ipha; 4334 mblk_t *mp1; 4335 int ip_hdr_length; 4336 #define tudr ((struct T_unitdata_req *)rptr) 4337 size_t ip_len; 4338 conn_t *connp = Q_TO_CONN(q); 4339 icmp_t *icmp = connp->conn_icmp; 4340 icmp_stack_t *is = icmp->icmp_is; 4341 sin6_t *sin6; 4342 sin_t *sin; 4343 ipaddr_t v4dst; 4344 ip4_pkt_t pktinfo; 4345 ip4_pkt_t *pktinfop = &pktinfo; 4346 ip_opt_info_t optinfo; 4347 4348 switch (mp->b_datap->db_type) { 4349 case M_DATA: 4350 if (icmp->icmp_hdrincl) { 4351 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4352 ipha = (ipha_t *)mp->b_rptr; 4353 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4354 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4355 BUMP_MIB(&is->is_rawip_mib, 4356 rawipOutErrors); 4357 freemsg(mp); 4358 return; 4359 } 4360 ipha = (ipha_t *)mp->b_rptr; 4361 } 4362 /* 4363 * If this connection was used for v6 (inconceivable!) 4364 * or if we have a new destination, then it's time to 4365 * figure a new label. 4366 */ 4367 if (is_system_labeled() && 4368 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4369 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4370 ipha->ipha_dst) && 4371 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4372 return; 4373 } 4374 icmp_wput_hdrincl(q, mp, icmp, NULL); 4375 return; 4376 } 4377 freemsg(mp); 4378 return; 4379 case M_PROTO: 4380 case M_PCPROTO: 4381 ip_len = mp->b_wptr - rptr; 4382 if (ip_len >= sizeof (struct T_unitdata_req)) { 4383 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4384 if (((union T_primitives *)rptr)->type 4385 == T_UNITDATA_REQ) 4386 break; 4387 } 4388 /* FALLTHRU */ 4389 default: 4390 icmp_wput_other(q, mp); 4391 return; 4392 } 4393 4394 /* Handle T_UNITDATA_REQ messages here. */ 4395 4396 4397 4398 if (icmp->icmp_state == TS_UNBND) { 4399 /* If a port has not been bound to the stream, fail. */ 4400 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4401 icmp_ud_err(q, mp, EPROTO); 4402 return; 4403 } 4404 mp1 = mp->b_cont; 4405 if (mp1 == NULL) { 4406 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4407 icmp_ud_err(q, mp, EPROTO); 4408 return; 4409 } 4410 4411 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4412 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4413 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4414 return; 4415 } 4416 4417 switch (icmp->icmp_family) { 4418 case AF_INET6: 4419 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4420 if (!OK_32PTR((char *)sin6) || 4421 tudr->DEST_length != sizeof (sin6_t) || 4422 sin6->sin6_family != AF_INET6) { 4423 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4424 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4425 return; 4426 } 4427 4428 /* No support for mapped addresses on raw sockets */ 4429 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4430 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4431 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4432 return; 4433 } 4434 4435 /* 4436 * Destination is a native IPv6 address. 4437 * Send out an IPv6 format packet. 4438 */ 4439 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4440 return; 4441 4442 case AF_INET: 4443 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4444 if (!OK_32PTR((char *)sin) || 4445 tudr->DEST_length != sizeof (sin_t) || 4446 sin->sin_family != AF_INET) { 4447 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4448 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4449 return; 4450 } 4451 /* Extract and ipaddr */ 4452 v4dst = sin->sin_addr.s_addr; 4453 break; 4454 4455 default: 4456 ASSERT(0); 4457 } 4458 4459 pktinfop->ip4_ill_index = 0; 4460 pktinfop->ip4_addr = INADDR_ANY; 4461 optinfo.ip_opt_flags = 0; 4462 optinfo.ip_opt_ill_index = 0; 4463 4464 4465 /* 4466 * If options passed in, feed it for verification and handling 4467 */ 4468 if (tudr->OPT_length != 0) { 4469 int error; 4470 4471 error = 0; 4472 if (icmp_unitdata_opt_process(q, mp, &error, 4473 (void *)pktinfop) < 0) { 4474 /* failure */ 4475 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4476 icmp_ud_err(q, mp, error); 4477 return; 4478 } 4479 ASSERT(error == 0); 4480 /* 4481 * Note: Success in processing options. 4482 * mp option buffer represented by 4483 * OPT_length/offset now potentially modified 4484 * and contain option setting results 4485 */ 4486 4487 } 4488 4489 if (v4dst == INADDR_ANY) 4490 v4dst = htonl(INADDR_LOOPBACK); 4491 4492 /* Check if our saved options are valid; update if not */ 4493 if (is_system_labeled() && 4494 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4495 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4496 !icmp_update_label(q, icmp, mp, v4dst)) { 4497 return; 4498 } 4499 4500 /* Protocol 255 contains full IP headers */ 4501 if (icmp->icmp_hdrincl) { 4502 freeb(mp); 4503 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4504 return; 4505 } 4506 4507 4508 /* Add an IP header */ 4509 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4510 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4511 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4512 mp1->b_datap->db_ref != 1 || 4513 !OK_32PTR(ipha)) { 4514 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4515 BPRI_LO))) { 4516 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4517 icmp_ud_err(q, mp, ENOMEM); 4518 return; 4519 } 4520 mp1->b_cont = mp->b_cont; 4521 ipha = (ipha_t *)mp1->b_datap->db_lim; 4522 mp1->b_wptr = (uchar_t *)ipha; 4523 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4524 } 4525 #ifdef _BIG_ENDIAN 4526 /* Set version, header length, and tos */ 4527 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4528 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4529 icmp->icmp_type_of_service); 4530 /* Set ttl and protocol */ 4531 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4532 #else 4533 /* Set version, header length, and tos */ 4534 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4535 ((icmp->icmp_type_of_service << 8) | 4536 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4537 /* Set ttl and protocol */ 4538 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4539 #endif 4540 if (pktinfop->ip4_addr != INADDR_ANY) { 4541 ipha->ipha_src = pktinfop->ip4_addr; 4542 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4543 } else { 4544 4545 /* 4546 * Copy our address into the packet. If this is zero, 4547 * ip will fill in the real source address. 4548 */ 4549 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4550 } 4551 4552 ipha->ipha_fragment_offset_and_flags = 0; 4553 4554 if (pktinfop->ip4_ill_index != 0) { 4555 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4556 } 4557 4558 4559 /* 4560 * For the socket of SOCK_RAW type, the checksum is provided in the 4561 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4562 * tell IP that the application has sent a complete IP header and not 4563 * to compute the transport checksum nor change the DF flag. 4564 */ 4565 ipha->ipha_ident = IP_HDR_INCLUDED; 4566 4567 /* Finish common formatting of the packet. */ 4568 mp1->b_rptr = (uchar_t *)ipha; 4569 4570 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4571 if (mp1->b_cont != NULL) 4572 ip_len += msgdsize(mp1->b_cont); 4573 4574 /* 4575 * Set the length into the IP header. 4576 * If the length is greater than the maximum allowed by IP, 4577 * then free the message and return. Do not try and send it 4578 * as this can cause problems in layers below. 4579 */ 4580 if (ip_len > IP_MAXPACKET) { 4581 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4582 icmp_ud_err(q, mp, EMSGSIZE); 4583 return; 4584 } 4585 ipha->ipha_length = htons((uint16_t)ip_len); 4586 /* 4587 * Copy in the destination address from the T_UNITDATA 4588 * request 4589 */ 4590 ipha->ipha_dst = v4dst; 4591 4592 /* 4593 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4594 */ 4595 if (CLASSD(v4dst)) 4596 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4597 4598 /* Copy in options if any */ 4599 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4600 bcopy(icmp->icmp_ip_snd_options, 4601 &ipha[1], icmp->icmp_ip_snd_options_len); 4602 /* 4603 * Massage source route putting first source route in ipha_dst. 4604 * Ignore the destination in the T_unitdata_req. 4605 */ 4606 (void) ip_massage_options(ipha, is->is_netstack); 4607 } 4608 4609 freeb(mp); 4610 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4611 mblk_setcred(mp1, connp->conn_cred); 4612 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4613 #undef ipha 4614 #undef tudr 4615 } 4616 4617 static boolean_t 4618 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4619 { 4620 int err; 4621 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4622 icmp_stack_t *is = icmp->icmp_is; 4623 conn_t *connp = icmp->icmp_connp; 4624 4625 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4626 opt_storage, icmp->icmp_mac_exempt, 4627 is->is_netstack->netstack_ip); 4628 if (err == 0) { 4629 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4630 &icmp->icmp_label_len_v6, opt_storage); 4631 } 4632 if (err != 0) { 4633 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4634 DTRACE_PROBE4( 4635 tx__ip__log__drop__updatelabel__icmp6, 4636 char *, "queue(1) failed to update options(2) on mp(3)", 4637 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4638 icmp_ud_err(wq, mp, err); 4639 return (B_FALSE); 4640 } 4641 4642 icmp->icmp_v6lastdst = *dst; 4643 return (B_TRUE); 4644 } 4645 4646 /* 4647 * icmp_wput_ipv6(): 4648 * Assumes that icmp_wput did some sanity checking on the destination 4649 * address, but that the label may not yet be correct. 4650 */ 4651 void 4652 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4653 { 4654 ip6_t *ip6h; 4655 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4656 mblk_t *mp1; 4657 int ip_hdr_len = IPV6_HDR_LEN; 4658 size_t ip_len; 4659 icmp_t *icmp = Q_TO_ICMP(q); 4660 icmp_stack_t *is = icmp->icmp_is; 4661 ip6_pkt_t ipp_s; /* For ancillary data options */ 4662 ip6_pkt_t *ipp = &ipp_s; 4663 ip6_pkt_t *tipp; 4664 uint32_t csum = 0; 4665 uint_t ignore = 0; 4666 uint_t option_exists = 0, is_sticky = 0; 4667 uint8_t *cp; 4668 uint8_t *nxthdr_ptr; 4669 in6_addr_t ip6_dst; 4670 4671 /* 4672 * If the local address is a mapped address return 4673 * an error. 4674 * It would be possible to send an IPv6 packet but the 4675 * response would never make it back to the application 4676 * since it is bound to a mapped address. 4677 */ 4678 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4679 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4680 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4681 return; 4682 } 4683 4684 ipp->ipp_fields = 0; 4685 ipp->ipp_sticky_ignored = 0; 4686 4687 /* 4688 * If TPI options passed in, feed it for verification and handling 4689 */ 4690 if (tudr_optlen != 0) { 4691 int error; 4692 4693 if (icmp_unitdata_opt_process(q, mp, &error, 4694 (void *)ipp) < 0) { 4695 /* failure */ 4696 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4697 icmp_ud_err(q, mp, error); 4698 return; 4699 } 4700 ignore = ipp->ipp_sticky_ignored; 4701 ASSERT(error == 0); 4702 } 4703 4704 if (sin6->sin6_scope_id != 0 && 4705 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4706 /* 4707 * IPPF_SCOPE_ID is special. It's neither a sticky 4708 * option nor ancillary data. It needs to be 4709 * explicitly set in options_exists. 4710 */ 4711 option_exists |= IPPF_SCOPE_ID; 4712 } 4713 4714 /* 4715 * Compute the destination address 4716 */ 4717 ip6_dst = sin6->sin6_addr; 4718 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4719 ip6_dst = ipv6_loopback; 4720 4721 /* 4722 * If we're not going to the same destination as last time, then 4723 * recompute the label required. This is done in a separate routine to 4724 * avoid blowing up our stack here. 4725 */ 4726 if (is_system_labeled() && 4727 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4728 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4729 return; 4730 } 4731 4732 /* 4733 * If there's a security label here, then we ignore any options the 4734 * user may try to set. We keep the peer's label as a hidden sticky 4735 * option. 4736 */ 4737 if (icmp->icmp_label_len_v6 > 0) { 4738 ignore &= ~IPPF_HOPOPTS; 4739 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4740 } 4741 4742 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4743 (ipp->ipp_fields == 0)) { 4744 /* No sticky options nor ancillary data. */ 4745 goto no_options; 4746 } 4747 4748 /* 4749 * Go through the options figuring out where each is going to 4750 * come from and build two masks. The first mask indicates if 4751 * the option exists at all. The second mask indicates if the 4752 * option is sticky or ancillary. 4753 */ 4754 if (!(ignore & IPPF_HOPOPTS)) { 4755 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4756 option_exists |= IPPF_HOPOPTS; 4757 ip_hdr_len += ipp->ipp_hopoptslen; 4758 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4759 option_exists |= IPPF_HOPOPTS; 4760 is_sticky |= IPPF_HOPOPTS; 4761 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4762 } 4763 } 4764 4765 if (!(ignore & IPPF_RTHDR)) { 4766 if (ipp->ipp_fields & IPPF_RTHDR) { 4767 option_exists |= IPPF_RTHDR; 4768 ip_hdr_len += ipp->ipp_rthdrlen; 4769 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4770 option_exists |= IPPF_RTHDR; 4771 is_sticky |= IPPF_RTHDR; 4772 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4773 } 4774 } 4775 4776 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4777 /* 4778 * Need to have a router header to use these. 4779 */ 4780 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4781 option_exists |= IPPF_RTDSTOPTS; 4782 ip_hdr_len += ipp->ipp_rtdstoptslen; 4783 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4784 option_exists |= IPPF_RTDSTOPTS; 4785 is_sticky |= IPPF_RTDSTOPTS; 4786 ip_hdr_len += 4787 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4788 } 4789 } 4790 4791 if (!(ignore & IPPF_DSTOPTS)) { 4792 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4793 option_exists |= IPPF_DSTOPTS; 4794 ip_hdr_len += ipp->ipp_dstoptslen; 4795 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4796 option_exists |= IPPF_DSTOPTS; 4797 is_sticky |= IPPF_DSTOPTS; 4798 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4799 } 4800 } 4801 4802 if (!(ignore & IPPF_IFINDEX)) { 4803 if (ipp->ipp_fields & IPPF_IFINDEX) { 4804 option_exists |= IPPF_IFINDEX; 4805 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4806 option_exists |= IPPF_IFINDEX; 4807 is_sticky |= IPPF_IFINDEX; 4808 } 4809 } 4810 4811 if (!(ignore & IPPF_ADDR)) { 4812 if (ipp->ipp_fields & IPPF_ADDR) { 4813 option_exists |= IPPF_ADDR; 4814 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4815 option_exists |= IPPF_ADDR; 4816 is_sticky |= IPPF_ADDR; 4817 } 4818 } 4819 4820 if (!(ignore & IPPF_DONTFRAG)) { 4821 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4822 option_exists |= IPPF_DONTFRAG; 4823 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4824 option_exists |= IPPF_DONTFRAG; 4825 is_sticky |= IPPF_DONTFRAG; 4826 } 4827 } 4828 4829 if (!(ignore & IPPF_USE_MIN_MTU)) { 4830 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4831 option_exists |= IPPF_USE_MIN_MTU; 4832 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4833 IPPF_USE_MIN_MTU) { 4834 option_exists |= IPPF_USE_MIN_MTU; 4835 is_sticky |= IPPF_USE_MIN_MTU; 4836 } 4837 } 4838 4839 if (!(ignore & IPPF_NEXTHOP)) { 4840 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4841 option_exists |= IPPF_NEXTHOP; 4842 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4843 option_exists |= IPPF_NEXTHOP; 4844 is_sticky |= IPPF_NEXTHOP; 4845 } 4846 } 4847 4848 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4849 option_exists |= IPPF_HOPLIMIT; 4850 /* IPV6_HOPLIMIT can never be sticky */ 4851 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4852 4853 if (!(ignore & IPPF_UNICAST_HOPS) && 4854 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4855 option_exists |= IPPF_UNICAST_HOPS; 4856 is_sticky |= IPPF_UNICAST_HOPS; 4857 } 4858 4859 if (!(ignore & IPPF_MULTICAST_HOPS) && 4860 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4861 option_exists |= IPPF_MULTICAST_HOPS; 4862 is_sticky |= IPPF_MULTICAST_HOPS; 4863 } 4864 4865 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4866 /* This is a sticky socket option only */ 4867 option_exists |= IPPF_NO_CKSUM; 4868 is_sticky |= IPPF_NO_CKSUM; 4869 } 4870 4871 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4872 /* This is a sticky socket option only */ 4873 option_exists |= IPPF_RAW_CKSUM; 4874 is_sticky |= IPPF_RAW_CKSUM; 4875 } 4876 4877 if (!(ignore & IPPF_TCLASS)) { 4878 if (ipp->ipp_fields & IPPF_TCLASS) { 4879 option_exists |= IPPF_TCLASS; 4880 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4881 option_exists |= IPPF_TCLASS; 4882 is_sticky |= IPPF_TCLASS; 4883 } 4884 } 4885 4886 no_options: 4887 4888 /* 4889 * If any options carried in the ip6i_t were specified, we 4890 * need to account for the ip6i_t in the data we'll be sending 4891 * down. 4892 */ 4893 if (option_exists & IPPF_HAS_IP6I) 4894 ip_hdr_len += sizeof (ip6i_t); 4895 4896 /* check/fix buffer config, setup pointers into it */ 4897 mp1 = mp->b_cont; 4898 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4899 if ((mp1->b_datap->db_ref != 1) || 4900 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4901 !OK_32PTR(ip6h)) { 4902 /* Try to get everything in a single mblk next time */ 4903 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4904 icmp->icmp_max_hdr_len = ip_hdr_len; 4905 (void) mi_set_sth_wroff(RD(q), 4906 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4907 } 4908 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4909 if (!mp1) { 4910 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4911 icmp_ud_err(q, mp, ENOMEM); 4912 return; 4913 } 4914 mp1->b_cont = mp->b_cont; 4915 mp1->b_wptr = mp1->b_datap->db_lim; 4916 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4917 } 4918 mp1->b_rptr = (unsigned char *)ip6h; 4919 ip6i = (ip6i_t *)ip6h; 4920 4921 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4922 if (option_exists & IPPF_HAS_IP6I) { 4923 ip6h = (ip6_t *)&ip6i[1]; 4924 ip6i->ip6i_flags = 0; 4925 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4926 4927 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4928 if (option_exists & IPPF_SCOPE_ID) { 4929 ip6i->ip6i_flags |= IP6I_IFINDEX; 4930 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4931 } else if (option_exists & IPPF_IFINDEX) { 4932 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4933 ASSERT(tipp->ipp_ifindex != 0); 4934 ip6i->ip6i_flags |= IP6I_IFINDEX; 4935 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4936 } 4937 4938 if (option_exists & IPPF_RAW_CKSUM) { 4939 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4940 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4941 } 4942 4943 if (option_exists & IPPF_NO_CKSUM) { 4944 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4945 } 4946 4947 if (option_exists & IPPF_ADDR) { 4948 /* 4949 * Enable per-packet source address verification if 4950 * IPV6_PKTINFO specified the source address. 4951 * ip6_src is set in the transport's _wput function. 4952 */ 4953 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4954 } 4955 4956 if (option_exists & IPPF_DONTFRAG) { 4957 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4958 } 4959 4960 if (option_exists & IPPF_USE_MIN_MTU) { 4961 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4962 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4963 } 4964 4965 if (option_exists & IPPF_NEXTHOP) { 4966 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4967 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4968 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4969 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4970 } 4971 4972 /* 4973 * tell IP this is an ip6i_t private header 4974 */ 4975 ip6i->ip6i_nxt = IPPROTO_RAW; 4976 } 4977 4978 /* Initialize IPv6 header */ 4979 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4980 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4981 4982 /* Set the hoplimit of the outgoing packet. */ 4983 if (option_exists & IPPF_HOPLIMIT) { 4984 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4985 ip6h->ip6_hops = ipp->ipp_hoplimit; 4986 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4987 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4988 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4989 if (option_exists & IPPF_MULTICAST_HOPS) 4990 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4991 } else { 4992 ip6h->ip6_hops = icmp->icmp_ttl; 4993 if (option_exists & IPPF_UNICAST_HOPS) 4994 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4995 } 4996 4997 if (option_exists & IPPF_ADDR) { 4998 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4999 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5000 ip6h->ip6_src = tipp->ipp_addr; 5001 } else { 5002 /* 5003 * The source address was not set using IPV6_PKTINFO. 5004 * First look at the bound source. 5005 * If unspecified fallback to __sin6_src_id. 5006 */ 5007 ip6h->ip6_src = icmp->icmp_v6src; 5008 if (sin6->__sin6_src_id != 0 && 5009 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5010 ip_srcid_find_id(sin6->__sin6_src_id, 5011 &ip6h->ip6_src, icmp->icmp_zoneid, 5012 is->is_netstack); 5013 } 5014 } 5015 5016 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5017 cp = (uint8_t *)&ip6h[1]; 5018 5019 /* 5020 * Here's where we have to start stringing together 5021 * any extension headers in the right order: 5022 * Hop-by-hop, destination, routing, and final destination opts. 5023 */ 5024 if (option_exists & IPPF_HOPOPTS) { 5025 /* Hop-by-hop options */ 5026 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5027 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5028 5029 *nxthdr_ptr = IPPROTO_HOPOPTS; 5030 nxthdr_ptr = &hbh->ip6h_nxt; 5031 5032 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5033 cp += tipp->ipp_hopoptslen; 5034 } 5035 /* 5036 * En-route destination options 5037 * Only do them if there's a routing header as well 5038 */ 5039 if (option_exists & IPPF_RTDSTOPTS) { 5040 ip6_dest_t *dst = (ip6_dest_t *)cp; 5041 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5042 5043 *nxthdr_ptr = IPPROTO_DSTOPTS; 5044 nxthdr_ptr = &dst->ip6d_nxt; 5045 5046 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5047 cp += tipp->ipp_rtdstoptslen; 5048 } 5049 /* 5050 * Routing header next 5051 */ 5052 if (option_exists & IPPF_RTHDR) { 5053 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5054 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5055 5056 *nxthdr_ptr = IPPROTO_ROUTING; 5057 nxthdr_ptr = &rt->ip6r_nxt; 5058 5059 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5060 cp += tipp->ipp_rthdrlen; 5061 } 5062 /* 5063 * Do ultimate destination options 5064 */ 5065 if (option_exists & IPPF_DSTOPTS) { 5066 ip6_dest_t *dest = (ip6_dest_t *)cp; 5067 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5068 5069 *nxthdr_ptr = IPPROTO_DSTOPTS; 5070 nxthdr_ptr = &dest->ip6d_nxt; 5071 5072 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5073 cp += tipp->ipp_dstoptslen; 5074 } 5075 5076 /* 5077 * Now set the last header pointer to the proto passed in 5078 */ 5079 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5080 *nxthdr_ptr = icmp->icmp_proto; 5081 5082 /* 5083 * Copy in the destination address 5084 */ 5085 ip6h->ip6_dst = ip6_dst; 5086 5087 ip6h->ip6_vcf = 5088 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5089 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5090 5091 if (option_exists & IPPF_TCLASS) { 5092 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5093 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5094 tipp->ipp_tclass); 5095 } 5096 if (option_exists & IPPF_RTHDR) { 5097 ip6_rthdr_t *rth; 5098 5099 /* 5100 * Perform any processing needed for source routing. 5101 * We know that all extension headers will be in the same mblk 5102 * as the IPv6 header. 5103 */ 5104 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5105 if (rth != NULL && rth->ip6r_segleft != 0) { 5106 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5107 /* 5108 * Drop packet - only support Type 0 routing. 5109 * Notify the application as well. 5110 */ 5111 icmp_ud_err(q, mp, EPROTO); 5112 BUMP_MIB(&is->is_rawip_mib, 5113 rawipOutErrors); 5114 return; 5115 } 5116 /* 5117 * rth->ip6r_len is twice the number of 5118 * addresses in the header 5119 */ 5120 if (rth->ip6r_len & 0x1) { 5121 icmp_ud_err(q, mp, EPROTO); 5122 BUMP_MIB(&is->is_rawip_mib, 5123 rawipOutErrors); 5124 return; 5125 } 5126 /* 5127 * Shuffle the routing header and ip6_dst 5128 * addresses, and get the checksum difference 5129 * between the first hop (in ip6_dst) and 5130 * the destination (in the last routing hdr entry). 5131 */ 5132 csum = ip_massage_options_v6(ip6h, rth, 5133 is->is_netstack); 5134 /* 5135 * Verify that the first hop isn't a mapped address. 5136 * Routers along the path need to do this verification 5137 * for subsequent hops. 5138 */ 5139 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5140 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5141 BUMP_MIB(&is->is_rawip_mib, 5142 rawipOutErrors); 5143 return; 5144 } 5145 } 5146 } 5147 5148 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5149 if (mp1->b_cont != NULL) 5150 ip_len += msgdsize(mp1->b_cont); 5151 5152 /* 5153 * Set the length into the IP header. 5154 * If the length is greater than the maximum allowed by IP, 5155 * then free the message and return. Do not try and send it 5156 * as this can cause problems in layers below. 5157 */ 5158 if (ip_len > IP_MAXPACKET) { 5159 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5160 icmp_ud_err(q, mp, EMSGSIZE); 5161 return; 5162 } 5163 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5164 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5165 uint16_t *cksum_ptr; 5166 uint_t ext_hdrs_len; 5167 5168 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5169 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5170 icmp->icmp_checksum_off == 2); 5171 5172 /* 5173 * We make it easy for IP to include our pseudo header 5174 * by putting our length in uh_checksum, modified (if 5175 * we have a routing header) by the checksum difference 5176 * between the ultimate destination and first hop addresses. 5177 * Note: ICMPv6 must always checksum the packet. 5178 */ 5179 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5180 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5181 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5182 BUMP_MIB(&is->is_rawip_mib, 5183 rawipOutErrors); 5184 freemsg(mp); 5185 return; 5186 } 5187 ip6i = (ip6i_t *)mp1->b_rptr; 5188 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5189 ip6h = (ip6_t *)&ip6i[1]; 5190 else 5191 ip6h = (ip6_t *)ip6i; 5192 } 5193 /* Add payload length to checksum */ 5194 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5195 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5196 csum += htons(ip_len - ext_hdrs_len); 5197 5198 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5199 csum = (csum & 0xFFFF) + (csum >> 16); 5200 *cksum_ptr = (uint16_t)csum; 5201 } 5202 5203 #ifdef _LITTLE_ENDIAN 5204 ip_len = htons(ip_len); 5205 #endif 5206 ip6h->ip6_plen = (uint16_t)ip_len; 5207 5208 freeb(mp); 5209 5210 /* We're done. Pass the packet to IP */ 5211 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5212 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5213 } 5214 5215 static void 5216 icmp_wput_other(queue_t *q, mblk_t *mp) 5217 { 5218 uchar_t *rptr = mp->b_rptr; 5219 struct iocblk *iocp; 5220 #define tudr ((struct T_unitdata_req *)rptr) 5221 conn_t *connp = Q_TO_CONN(q); 5222 icmp_t *icmp = connp->conn_icmp; 5223 icmp_stack_t *is = icmp->icmp_is; 5224 cred_t *cr; 5225 5226 cr = DB_CREDDEF(mp, connp->conn_cred); 5227 5228 switch (mp->b_datap->db_type) { 5229 case M_PROTO: 5230 case M_PCPROTO: 5231 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5232 /* 5233 * If the message does not contain a PRIM_type, 5234 * throw it away. 5235 */ 5236 freemsg(mp); 5237 return; 5238 } 5239 switch (((union T_primitives *)rptr)->type) { 5240 case T_ADDR_REQ: 5241 icmp_addr_req(q, mp); 5242 return; 5243 case O_T_BIND_REQ: 5244 case T_BIND_REQ: 5245 icmp_bind(q, mp); 5246 return; 5247 case T_CONN_REQ: 5248 icmp_connect(q, mp); 5249 return; 5250 case T_CAPABILITY_REQ: 5251 icmp_capability_req(q, mp); 5252 return; 5253 case T_INFO_REQ: 5254 icmp_info_req(q, mp); 5255 return; 5256 case T_UNITDATA_REQ: 5257 /* 5258 * If a T_UNITDATA_REQ gets here, the address must 5259 * be bad. Valid T_UNITDATA_REQs are found above 5260 * and break to below this switch. 5261 */ 5262 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5263 return; 5264 case T_UNBIND_REQ: 5265 icmp_unbind(q, mp); 5266 return; 5267 5268 case T_SVR4_OPTMGMT_REQ: 5269 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5270 cr)) { 5271 /* Only IP can return anything meaningful */ 5272 (void) svr4_optcom_req(q, mp, cr, 5273 &icmp_opt_obj, B_TRUE); 5274 } 5275 return; 5276 5277 case T_OPTMGMT_REQ: 5278 /* Only IP can return anything meaningful */ 5279 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5280 return; 5281 5282 case T_DISCON_REQ: 5283 icmp_disconnect(q, mp); 5284 return; 5285 5286 /* The following TPI message is not supported by icmp. */ 5287 case O_T_CONN_RES: 5288 case T_CONN_RES: 5289 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5290 return; 5291 5292 /* The following 3 TPI requests are illegal for icmp. */ 5293 case T_DATA_REQ: 5294 case T_EXDATA_REQ: 5295 case T_ORDREL_REQ: 5296 freemsg(mp); 5297 (void) putctl1(RD(q), M_ERROR, EPROTO); 5298 return; 5299 default: 5300 break; 5301 } 5302 break; 5303 case M_IOCTL: 5304 iocp = (struct iocblk *)mp->b_rptr; 5305 switch (iocp->ioc_cmd) { 5306 case TI_GETPEERNAME: 5307 if (icmp->icmp_state != TS_DATA_XFER) { 5308 /* 5309 * If a default destination address has not 5310 * been associated with the stream, then we 5311 * don't know the peer's name. 5312 */ 5313 iocp->ioc_error = ENOTCONN; 5314 err_ret:; 5315 iocp->ioc_count = 0; 5316 mp->b_datap->db_type = M_IOCACK; 5317 qreply(q, mp); 5318 return; 5319 } 5320 /* FALLTHRU */ 5321 case TI_GETMYNAME: 5322 /* 5323 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5324 * need to copyin the user's strbuf structure. 5325 * Processing will continue in the M_IOCDATA case 5326 * below. 5327 */ 5328 mi_copyin(q, mp, NULL, 5329 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5330 return; 5331 case ND_SET: 5332 /* nd_getset performs the necessary error checking */ 5333 case ND_GET: 5334 if (nd_getset(q, is->is_nd, mp)) { 5335 qreply(q, mp); 5336 return; 5337 } 5338 break; 5339 default: 5340 break; 5341 } 5342 break; 5343 case M_IOCDATA: 5344 icmp_wput_iocdata(q, mp); 5345 return; 5346 default: 5347 break; 5348 } 5349 ip_wput(q, mp); 5350 } 5351 5352 /* 5353 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5354 * messages. 5355 */ 5356 static void 5357 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5358 { 5359 mblk_t *mp1; 5360 STRUCT_HANDLE(strbuf, sb); 5361 icmp_t *icmp; 5362 in6_addr_t v6addr; 5363 ipaddr_t v4addr; 5364 uint32_t flowinfo = 0; 5365 int addrlen; 5366 5367 /* Make sure it is one of ours. */ 5368 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5369 case TI_GETMYNAME: 5370 case TI_GETPEERNAME: 5371 break; 5372 default: 5373 icmp = Q_TO_ICMP(q); 5374 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5375 return; 5376 } 5377 switch (mi_copy_state(q, mp, &mp1)) { 5378 case -1: 5379 return; 5380 case MI_COPY_CASE(MI_COPY_IN, 1): 5381 break; 5382 case MI_COPY_CASE(MI_COPY_OUT, 1): 5383 /* 5384 * The address has been copied out, so now 5385 * copyout the strbuf. 5386 */ 5387 mi_copyout(q, mp); 5388 return; 5389 case MI_COPY_CASE(MI_COPY_OUT, 2): 5390 /* 5391 * The address and strbuf have been copied out. 5392 * We're done, so just acknowledge the original 5393 * M_IOCTL. 5394 */ 5395 mi_copy_done(q, mp, 0); 5396 return; 5397 default: 5398 /* 5399 * Something strange has happened, so acknowledge 5400 * the original M_IOCTL with an EPROTO error. 5401 */ 5402 mi_copy_done(q, mp, EPROTO); 5403 return; 5404 } 5405 /* 5406 * Now we have the strbuf structure for TI_GETMYNAME 5407 * and TI_GETPEERNAME. Next we copyout the requested 5408 * address and then we'll copyout the strbuf. 5409 */ 5410 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5411 (void *)mp1->b_rptr); 5412 icmp = Q_TO_ICMP(q); 5413 if (icmp->icmp_family == AF_INET) 5414 addrlen = sizeof (sin_t); 5415 else 5416 addrlen = sizeof (sin6_t); 5417 5418 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5419 mi_copy_done(q, mp, EINVAL); 5420 return; 5421 } 5422 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5423 case TI_GETMYNAME: 5424 if (icmp->icmp_family == AF_INET) { 5425 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5426 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5427 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5428 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5429 } else { 5430 /* 5431 * INADDR_ANY 5432 * icmp_v6src is not set, we might be bound to 5433 * broadcast/multicast. Use icmp_bound_v6src as 5434 * local address instead (that could 5435 * also still be INADDR_ANY) 5436 */ 5437 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5438 } 5439 } else { 5440 /* icmp->icmp_family == AF_INET6 */ 5441 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5442 v6addr = icmp->icmp_v6src; 5443 } else { 5444 /* 5445 * UNSPECIFIED 5446 * icmp_v6src is not set, we might be bound to 5447 * broadcast/multicast. Use icmp_bound_v6src as 5448 * local address instead (that could 5449 * also still be UNSPECIFIED) 5450 */ 5451 v6addr = icmp->icmp_bound_v6src; 5452 } 5453 } 5454 break; 5455 case TI_GETPEERNAME: 5456 if (icmp->icmp_family == AF_INET) { 5457 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5458 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5459 } else { 5460 /* icmp->icmp_family == AF_INET6) */ 5461 v6addr = icmp->icmp_v6dst; 5462 flowinfo = icmp->icmp_flowinfo; 5463 } 5464 break; 5465 default: 5466 mi_copy_done(q, mp, EPROTO); 5467 return; 5468 } 5469 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5470 if (!mp1) 5471 return; 5472 5473 if (icmp->icmp_family == AF_INET) { 5474 sin_t *sin; 5475 5476 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5477 sin = (sin_t *)mp1->b_rptr; 5478 mp1->b_wptr = (uchar_t *)&sin[1]; 5479 *sin = sin_null; 5480 sin->sin_family = AF_INET; 5481 sin->sin_addr.s_addr = v4addr; 5482 } else { 5483 /* icmp->icmp_family == AF_INET6 */ 5484 sin6_t *sin6; 5485 5486 ASSERT(icmp->icmp_family == AF_INET6); 5487 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5488 sin6 = (sin6_t *)mp1->b_rptr; 5489 mp1->b_wptr = (uchar_t *)&sin6[1]; 5490 *sin6 = sin6_null; 5491 sin6->sin6_family = AF_INET6; 5492 sin6->sin6_flowinfo = flowinfo; 5493 sin6->sin6_addr = v6addr; 5494 } 5495 /* Copy out the address */ 5496 mi_copyout(q, mp); 5497 } 5498 5499 static int 5500 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5501 void *thisdg_attrs) 5502 { 5503 conn_t *connp = Q_TO_CONN(q); 5504 struct T_unitdata_req *udreqp; 5505 int is_absreq_failure; 5506 cred_t *cr; 5507 5508 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5509 *errorp = 0; 5510 5511 cr = DB_CREDDEF(mp, connp->conn_cred); 5512 5513 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5514 udreqp->OPT_offset, cr, &icmp_opt_obj, 5515 thisdg_attrs, &is_absreq_failure); 5516 5517 if (*errorp != 0) { 5518 /* 5519 * Note: No special action needed in this 5520 * module for "is_absreq_failure" 5521 */ 5522 return (-1); /* failure */ 5523 } 5524 ASSERT(is_absreq_failure == 0); 5525 return (0); /* success */ 5526 } 5527 5528 void 5529 icmp_ddi_init(void) 5530 { 5531 icmp_max_optsize = 5532 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5533 icmp_opt_obj.odb_opt_arr_cnt); 5534 5535 /* 5536 * We want to be informed each time a stack is created or 5537 * destroyed in the kernel, so we can maintain the 5538 * set of icmp_stack_t's. 5539 */ 5540 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5541 } 5542 5543 void 5544 icmp_ddi_destroy(void) 5545 { 5546 netstack_unregister(NS_ICMP); 5547 } 5548 5549 /* 5550 * Initialize the ICMP stack instance. 5551 */ 5552 static void * 5553 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5554 { 5555 icmp_stack_t *is; 5556 icmpparam_t *pa; 5557 5558 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5559 is->is_netstack = ns; 5560 5561 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5562 is->is_param_arr = pa; 5563 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5564 5565 (void) icmp_param_register(&is->is_nd, 5566 is->is_param_arr, A_CNT(icmp_param_arr)); 5567 is->is_ksp = rawip_kstat_init(stackid); 5568 return (is); 5569 } 5570 5571 /* 5572 * Free the ICMP stack instance. 5573 */ 5574 static void 5575 rawip_stack_fini(netstackid_t stackid, void *arg) 5576 { 5577 icmp_stack_t *is = (icmp_stack_t *)arg; 5578 5579 nd_free(&is->is_nd); 5580 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5581 is->is_param_arr = NULL; 5582 5583 rawip_kstat_fini(stackid, is->is_ksp); 5584 is->is_ksp = NULL; 5585 kmem_free(is, sizeof (*is)); 5586 } 5587 5588 static void * 5589 rawip_kstat_init(netstackid_t stackid) { 5590 kstat_t *ksp; 5591 5592 rawip_named_kstat_t template = { 5593 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5594 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5595 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5596 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5597 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5598 }; 5599 5600 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5601 KSTAT_TYPE_NAMED, 5602 NUM_OF_FIELDS(rawip_named_kstat_t), 5603 0, stackid); 5604 if (ksp == NULL || ksp->ks_data == NULL) 5605 return (NULL); 5606 5607 bcopy(&template, ksp->ks_data, sizeof (template)); 5608 ksp->ks_update = rawip_kstat_update; 5609 ksp->ks_private = (void *)(uintptr_t)stackid; 5610 5611 kstat_install(ksp); 5612 return (ksp); 5613 } 5614 5615 static void 5616 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5617 { 5618 if (ksp != NULL) { 5619 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5620 kstat_delete_netstack(ksp, stackid); 5621 } 5622 } 5623 5624 static int 5625 rawip_kstat_update(kstat_t *ksp, int rw) 5626 { 5627 rawip_named_kstat_t *rawipkp; 5628 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5629 netstack_t *ns; 5630 icmp_stack_t *is; 5631 5632 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5633 return (EIO); 5634 5635 if (rw == KSTAT_WRITE) 5636 return (EACCES); 5637 5638 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5639 5640 ns = netstack_find_by_stackid(stackid); 5641 if (ns == NULL) 5642 return (-1); 5643 is = ns->netstack_icmp; 5644 if (is == NULL) { 5645 netstack_rele(ns); 5646 return (-1); 5647 } 5648 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5649 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5650 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5651 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5652 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5653 netstack_rele(ns); 5654 return (0); 5655 } 5656