1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 icmp->icmp_ip_snd_options_len = 0; 733 } 734 735 if (icmp->icmp_filter != NULL) { 736 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 737 icmp->icmp_filter = NULL; 738 } 739 /* Free memory associated with sticky options */ 740 if (icmp->icmp_sticky_hdrs_len != 0) { 741 kmem_free(icmp->icmp_sticky_hdrs, 742 icmp->icmp_sticky_hdrs_len); 743 icmp->icmp_sticky_hdrs = NULL; 744 icmp->icmp_sticky_hdrs_len = 0; 745 } 746 ip6_pkt_free(&icmp->icmp_sticky_ipp); 747 748 /* 749 * Clear any fields which the kmem_cache constructor clears. 750 * Only icmp_connp needs to be preserved. 751 * TBD: We should make this more efficient to avoid clearing 752 * everything. 753 */ 754 ASSERT(icmp->icmp_connp == connp); 755 bzero(icmp, sizeof (icmp_t)); 756 icmp->icmp_connp = connp; 757 } 758 759 static int 760 icmp_close(queue_t *q) 761 { 762 conn_t *connp = (conn_t *)q->q_ptr; 763 764 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 765 766 ip_quiesce_conn(connp); 767 768 qprocsoff(connp->conn_rq); 769 770 icmp_close_free(connp); 771 772 /* 773 * Now we are truly single threaded on this stream, and can 774 * delete the things hanging off the connp, and finally the connp. 775 * We removed this connp from the fanout list, it cannot be 776 * accessed thru the fanouts, and we already waited for the 777 * conn_ref to drop to 0. We are already in close, so 778 * there cannot be any other thread from the top. qprocsoff 779 * has completed, and service has completed or won't run in 780 * future. 781 */ 782 ASSERT(connp->conn_ref == 1); 783 784 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 785 786 connp->conn_ref--; 787 ipcl_conn_destroy(connp); 788 789 q->q_ptr = WR(q)->q_ptr = NULL; 790 return (0); 791 } 792 793 /* 794 * This routine handles each T_DISCON_REQ message passed to icmp 795 * as an indicating that ICMP is no longer connected. This results 796 * in sending a T_BIND_REQ to IP to restore the binding to just 797 * the local address. 798 * 799 * This routine sends down a T_BIND_REQ to IP with the following mblks: 800 * T_BIND_REQ - specifying just the local address. 801 * T_OK_ACK - for the T_DISCON_REQ 802 * 803 * The disconnect completes in icmp_bind_result. 804 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 805 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 806 * convert it to an error ack for the appropriate primitive. 807 */ 808 static void 809 icmp_disconnect(queue_t *q, mblk_t *mp) 810 { 811 icmp_t *icmp; 812 mblk_t *mp1; 813 conn_t *connp = Q_TO_CONN(q); 814 815 icmp = connp->conn_icmp; 816 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 817 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 818 rw_exit(&icmp->icmp_rwlock); 819 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 820 "icmp_disconnect: bad state, %d", icmp->icmp_state); 821 icmp_err_ack(q, mp, TOUTSTATE, 0); 822 return; 823 } 824 icmp->icmp_pending_op = T_DISCON_REQ; 825 icmp->icmp_v6src = icmp->icmp_bound_v6src; 826 icmp->icmp_state = TS_IDLE; 827 828 /* 829 * Send down bind to IP to remove the full binding and revert 830 * to the local address binding. 831 */ 832 if (icmp->icmp_family == AF_INET) { 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 834 } else { 835 ASSERT(icmp->icmp_family == AF_INET6); 836 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 837 } 838 if (mp1 == NULL) { 839 icmp->icmp_pending_op = -1; 840 rw_exit(&icmp->icmp_rwlock); 841 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 842 return; 843 } 844 mp = mi_tpi_ok_ack_alloc(mp); 845 if (mp == NULL) { 846 /* Unable to reuse the T_DISCON_REQ for the ack. */ 847 icmp->icmp_pending_op = -1; 848 rw_exit(&icmp->icmp_rwlock); 849 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 850 return; 851 } 852 853 if (icmp->icmp_family == AF_INET6) { 854 int error; 855 856 /* Rebuild the header template */ 857 error = icmp_build_hdrs(icmp); 858 if (error != 0) { 859 icmp->icmp_pending_op = -1; 860 rw_exit(&icmp->icmp_rwlock); 861 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 862 freemsg(mp1); 863 return; 864 } 865 } 866 867 rw_exit(&icmp->icmp_rwlock); 868 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 869 linkb(mp1, mp); 870 871 if (icmp->icmp_family == AF_INET6) 872 mp1 = ip_bind_v6(q, mp1, connp, NULL); 873 else 874 mp1 = ip_bind_v4(q, mp1, connp); 875 876 /* The above return NULL if the bind needs to be deferred */ 877 if (mp1 != NULL) 878 icmp_bind_result(connp, mp1); 879 else 880 CONN_INC_REF(connp); 881 } 882 883 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 884 static void 885 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 886 { 887 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 888 qreply(q, mp); 889 } 890 891 /* Shorthand to generate and send TPI error acks to our client */ 892 static void 893 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 894 t_scalar_t t_error, int sys_error) 895 { 896 struct T_error_ack *teackp; 897 898 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 899 M_PCPROTO, T_ERROR_ACK)) != NULL) { 900 teackp = (struct T_error_ack *)mp->b_rptr; 901 teackp->ERROR_prim = primitive; 902 teackp->TLI_error = t_error; 903 teackp->UNIX_error = sys_error; 904 qreply(q, mp); 905 } 906 } 907 908 /* 909 * icmp_icmp_error is called by icmp_input to process ICMP 910 * messages passed up by IP. 911 * Generates the appropriate T_UDERROR_IND for permanent 912 * (non-transient) errors. 913 * Assumes that IP has pulled up everything up to and including 914 * the ICMP header. 915 */ 916 static void 917 icmp_icmp_error(queue_t *q, mblk_t *mp) 918 { 919 icmph_t *icmph; 920 ipha_t *ipha; 921 int iph_hdr_length; 922 sin_t sin; 923 sin6_t sin6; 924 mblk_t *mp1; 925 int error = 0; 926 icmp_t *icmp = Q_TO_ICMP(q); 927 928 ipha = (ipha_t *)mp->b_rptr; 929 930 ASSERT(OK_32PTR(mp->b_rptr)); 931 932 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 933 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 934 icmp_icmp_error_ipv6(q, mp); 935 return; 936 } 937 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 938 939 /* Skip past the outer IP and ICMP headers */ 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 942 ipha = (ipha_t *)&icmph[1]; 943 iph_hdr_length = IPH_HDR_LENGTH(ipha); 944 945 switch (icmph->icmph_type) { 946 case ICMP_DEST_UNREACHABLE: 947 switch (icmph->icmph_code) { 948 case ICMP_FRAGMENTATION_NEEDED: 949 /* 950 * IP has already adjusted the path MTU. 951 */ 952 break; 953 case ICMP_PORT_UNREACHABLE: 954 case ICMP_PROTOCOL_UNREACHABLE: 955 error = ECONNREFUSED; 956 break; 957 default: 958 /* Transient errors */ 959 break; 960 } 961 break; 962 default: 963 /* Transient errors */ 964 break; 965 } 966 if (error == 0) { 967 freemsg(mp); 968 return; 969 } 970 971 /* 972 * Deliver T_UDERROR_IND when the application has asked for it. 973 * The socket layer enables this automatically when connected. 974 */ 975 if (!icmp->icmp_dgram_errind) { 976 freemsg(mp); 977 return; 978 } 979 980 switch (icmp->icmp_family) { 981 case AF_INET: 982 sin = sin_null; 983 sin.sin_family = AF_INET; 984 sin.sin_addr.s_addr = ipha->ipha_dst; 985 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 986 error); 987 break; 988 case AF_INET6: 989 sin6 = sin6_null; 990 sin6.sin6_family = AF_INET6; 991 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 994 NULL, 0, error); 995 break; 996 } 997 if (mp1) 998 putnext(q, mp1); 999 freemsg(mp); 1000 } 1001 1002 /* 1003 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1004 * for IPv6 packets. 1005 * Send permanent (non-transient) errors upstream. 1006 * Assumes that IP has pulled up all the extension headers as well 1007 * as the ICMPv6 header. 1008 */ 1009 static void 1010 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1011 { 1012 icmp6_t *icmp6; 1013 ip6_t *ip6h, *outer_ip6h; 1014 uint16_t iph_hdr_length; 1015 uint8_t *nexthdrp; 1016 sin6_t sin6; 1017 mblk_t *mp1; 1018 int error = 0; 1019 icmp_t *icmp = Q_TO_ICMP(q); 1020 1021 outer_ip6h = (ip6_t *)mp->b_rptr; 1022 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1023 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1024 else 1025 iph_hdr_length = IPV6_HDR_LEN; 1026 1027 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1028 ip6h = (ip6_t *)&icmp6[1]; 1029 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 switch (icmp6->icmp6_type) { 1035 case ICMP6_DST_UNREACH: 1036 switch (icmp6->icmp6_code) { 1037 case ICMP6_DST_UNREACH_NOPORT: 1038 error = ECONNREFUSED; 1039 break; 1040 case ICMP6_DST_UNREACH_ADMIN: 1041 case ICMP6_DST_UNREACH_NOROUTE: 1042 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1043 case ICMP6_DST_UNREACH_ADDR: 1044 /* Transient errors */ 1045 break; 1046 default: 1047 break; 1048 } 1049 break; 1050 case ICMP6_PACKET_TOO_BIG: { 1051 struct T_unitdata_ind *tudi; 1052 struct T_opthdr *toh; 1053 size_t udi_size; 1054 mblk_t *newmp; 1055 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1056 sizeof (struct ip6_mtuinfo); 1057 sin6_t *sin6; 1058 struct ip6_mtuinfo *mtuinfo; 1059 1060 /* 1061 * If the application has requested to receive path mtu 1062 * information, send up an empty message containing an 1063 * IPV6_PATHMTU ancillary data item. 1064 */ 1065 if (!icmp->icmp_ipv6_recvpathmtu) 1066 break; 1067 1068 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1069 opt_length; 1070 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1071 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1072 break; 1073 } 1074 1075 /* 1076 * newmp->b_cont is left to NULL on purpose. This is an 1077 * empty message containing only ancillary data. 1078 */ 1079 newmp->b_datap->db_type = M_PROTO; 1080 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1081 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1082 tudi->PRIM_type = T_UNITDATA_IND; 1083 tudi->SRC_length = sizeof (sin6_t); 1084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1085 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1086 tudi->OPT_length = opt_length; 1087 1088 sin6 = (sin6_t *)&tudi[1]; 1089 bzero(sin6, sizeof (sin6_t)); 1090 sin6->sin6_family = AF_INET6; 1091 sin6->sin6_addr = icmp->icmp_v6dst; 1092 1093 toh = (struct T_opthdr *)&sin6[1]; 1094 toh->level = IPPROTO_IPV6; 1095 toh->name = IPV6_PATHMTU; 1096 toh->len = opt_length; 1097 toh->status = 0; 1098 1099 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1100 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1101 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1102 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1103 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1104 /* 1105 * We've consumed everything we need from the original 1106 * message. Free it, then send our empty message. 1107 */ 1108 freemsg(mp); 1109 putnext(q, newmp); 1110 return; 1111 } 1112 case ICMP6_TIME_EXCEEDED: 1113 /* Transient errors */ 1114 break; 1115 case ICMP6_PARAM_PROB: 1116 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1117 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1118 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1119 (uchar_t *)nexthdrp) { 1120 error = ECONNREFUSED; 1121 break; 1122 } 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin6 = sin6_null; 1140 sin6.sin6_family = AF_INET6; 1141 sin6.sin6_addr = ip6h->ip6_dst; 1142 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1143 1144 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1145 error); 1146 if (mp1) 1147 putnext(q, mp1); 1148 freemsg(mp); 1149 } 1150 1151 /* 1152 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1153 * The local address is filled in if endpoint is bound. The remote address 1154 * is filled in if remote address has been precified ("connected endpoint") 1155 * (The concept of connected CLTS sockets is alien to published TPI 1156 * but we support it anyway). 1157 */ 1158 static void 1159 icmp_addr_req(queue_t *q, mblk_t *mp) 1160 { 1161 icmp_t *icmp = Q_TO_ICMP(q); 1162 mblk_t *ackmp; 1163 struct T_addr_ack *taa; 1164 1165 /* Make it large enough for worst case */ 1166 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1167 2 * sizeof (sin6_t), 1); 1168 if (ackmp == NULL) { 1169 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1170 return; 1171 } 1172 taa = (struct T_addr_ack *)ackmp->b_rptr; 1173 1174 bzero(taa, sizeof (struct T_addr_ack)); 1175 ackmp->b_wptr = (uchar_t *)&taa[1]; 1176 1177 taa->PRIM_type = T_ADDR_ACK; 1178 ackmp->b_datap->db_type = M_PCPROTO; 1179 rw_enter(&icmp->icmp_rwlock, RW_READER); 1180 /* 1181 * Note: Following code assumes 32 bit alignment of basic 1182 * data structures like sin_t and struct T_addr_ack. 1183 */ 1184 if (icmp->icmp_state != TS_UNBND) { 1185 /* 1186 * Fill in local address 1187 */ 1188 taa->LOCADDR_offset = sizeof (*taa); 1189 if (icmp->icmp_family == AF_INET) { 1190 sin_t *sin; 1191 1192 taa->LOCADDR_length = sizeof (sin_t); 1193 sin = (sin_t *)&taa[1]; 1194 /* Fill zeroes and then intialize non-zero fields */ 1195 *sin = sin_null; 1196 sin->sin_family = AF_INET; 1197 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1198 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1199 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1200 sin->sin_addr.s_addr); 1201 } else { 1202 /* 1203 * INADDR_ANY 1204 * icmp_v6src is not set, we might be bound to 1205 * broadcast/multicast. Use icmp_bound_v6src as 1206 * local address instead (that could 1207 * also still be INADDR_ANY) 1208 */ 1209 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1210 sin->sin_addr.s_addr); 1211 } 1212 ackmp->b_wptr = (uchar_t *)&sin[1]; 1213 } else { 1214 sin6_t *sin6; 1215 1216 ASSERT(icmp->icmp_family == AF_INET6); 1217 taa->LOCADDR_length = sizeof (sin6_t); 1218 sin6 = (sin6_t *)&taa[1]; 1219 /* Fill zeroes and then intialize non-zero fields */ 1220 *sin6 = sin6_null; 1221 sin6->sin6_family = AF_INET6; 1222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1223 sin6->sin6_addr = icmp->icmp_v6src; 1224 } else { 1225 /* 1226 * UNSPECIFIED 1227 * icmp_v6src is not set, we might be bound to 1228 * broadcast/multicast. Use icmp_bound_v6src as 1229 * local address instead (that could 1230 * also still be UNSPECIFIED) 1231 */ 1232 sin6->sin6_addr = icmp->icmp_bound_v6src; 1233 } 1234 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1235 } 1236 } 1237 rw_exit(&icmp->icmp_rwlock); 1238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1239 qreply(q, ackmp); 1240 } 1241 1242 static void 1243 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1244 { 1245 *tap = icmp_g_t_info_ack; 1246 1247 if (icmp->icmp_family == AF_INET6) 1248 tap->ADDR_size = sizeof (sin6_t); 1249 else 1250 tap->ADDR_size = sizeof (sin_t); 1251 tap->CURRENT_state = icmp->icmp_state; 1252 tap->OPT_size = icmp_max_optsize; 1253 } 1254 1255 /* 1256 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1257 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1258 * icmp_g_t_info_ack. The current state of the stream is copied from 1259 * icmp_state. 1260 */ 1261 static void 1262 icmp_capability_req(queue_t *q, mblk_t *mp) 1263 { 1264 icmp_t *icmp = Q_TO_ICMP(q); 1265 t_uscalar_t cap_bits1; 1266 struct T_capability_ack *tcap; 1267 1268 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1269 1270 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1271 mp->b_datap->db_type, T_CAPABILITY_ACK); 1272 if (!mp) 1273 return; 1274 1275 tcap = (struct T_capability_ack *)mp->b_rptr; 1276 tcap->CAP_bits1 = 0; 1277 1278 if (cap_bits1 & TC1_INFO) { 1279 icmp_copy_info(&tcap->INFO_ack, icmp); 1280 tcap->CAP_bits1 |= TC1_INFO; 1281 } 1282 1283 qreply(q, mp); 1284 } 1285 1286 /* 1287 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1288 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1289 * The current state of the stream is copied from icmp_state. 1290 */ 1291 static void 1292 icmp_info_req(queue_t *q, mblk_t *mp) 1293 { 1294 icmp_t *icmp = Q_TO_ICMP(q); 1295 1296 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1297 T_INFO_ACK); 1298 if (!mp) 1299 return; 1300 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1301 qreply(q, mp); 1302 } 1303 1304 /* 1305 * IP recognizes seven kinds of bind requests: 1306 * 1307 * - A zero-length address binds only to the protocol number. 1308 * 1309 * - A 4-byte address is treated as a request to 1310 * validate that the address is a valid local IPv4 1311 * address, appropriate for an application to bind to. 1312 * IP does the verification, but does not make any note 1313 * of the address at this time. 1314 * 1315 * - A 16-byte address contains is treated as a request 1316 * to validate a local IPv6 address, as the 4-byte 1317 * address case above. 1318 * 1319 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1323 * use it for the inbound fanout of packets. 1324 * 1325 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1326 * information consisting of local and remote addresses 1327 * and ports (unused for raw sockets). In this case, the addresses are both 1328 * validated as appropriate for this operation, and, if 1329 * so, the information is retained for use in the 1330 * inbound fanout. 1331 * 1332 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1333 * fanout information, like the 12-byte case above. 1334 * 1335 * IP will also fill in the IRE request mblk with information 1336 * regarding our peer. In all cases, we notify IP of our protocol 1337 * type by appending a single protocol byte to the bind request. 1338 */ 1339 static mblk_t * 1340 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1341 in_port_t fport) 1342 { 1343 char *cp; 1344 mblk_t *mp; 1345 struct T_bind_req *tbr; 1346 ipa_conn_t *ac; 1347 ipa6_conn_t *ac6; 1348 sin_t *sin; 1349 sin6_t *sin6; 1350 1351 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1352 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1353 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1354 if (mp == NULL) 1355 return (NULL); 1356 mp->b_datap->db_type = M_PROTO; 1357 tbr = (struct T_bind_req *)mp->b_rptr; 1358 tbr->PRIM_type = bind_prim; 1359 tbr->ADDR_offset = sizeof (*tbr); 1360 tbr->CONIND_number = 0; 1361 tbr->ADDR_length = addr_length; 1362 cp = (char *)&tbr[1]; 1363 switch (addr_length) { 1364 case sizeof (ipa_conn_t): 1365 ASSERT(icmp->icmp_family == AF_INET); 1366 /* Append a request for an IRE */ 1367 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1368 if (mp->b_cont == NULL) { 1369 freemsg(mp); 1370 return (NULL); 1371 } 1372 mp->b_cont->b_wptr += sizeof (ire_t); 1373 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1374 1375 /* cp known to be 32 bit aligned */ 1376 ac = (ipa_conn_t *)cp; 1377 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1378 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1379 ac->ac_fport = fport; 1380 ac->ac_lport = 0; 1381 break; 1382 1383 case sizeof (ipa6_conn_t): 1384 ASSERT(icmp->icmp_family == AF_INET6); 1385 /* Append a request for an IRE */ 1386 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1387 if (mp->b_cont == NULL) { 1388 freemsg(mp); 1389 return (NULL); 1390 } 1391 mp->b_cont->b_wptr += sizeof (ire_t); 1392 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1393 1394 /* cp known to be 32 bit aligned */ 1395 ac6 = (ipa6_conn_t *)cp; 1396 ac6->ac6_laddr = icmp->icmp_v6src; 1397 ac6->ac6_faddr = icmp->icmp_v6dst; 1398 ac6->ac6_fport = fport; 1399 ac6->ac6_lport = 0; 1400 break; 1401 1402 case sizeof (sin_t): 1403 ASSERT(icmp->icmp_family == AF_INET); 1404 /* Append a request for an IRE */ 1405 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1406 if (!mp->b_cont) { 1407 freemsg(mp); 1408 return (NULL); 1409 } 1410 mp->b_cont->b_wptr += sizeof (ire_t); 1411 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1412 1413 sin = (sin_t *)cp; 1414 *sin = sin_null; 1415 sin->sin_family = AF_INET; 1416 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1417 break; 1418 1419 case sizeof (sin6_t): 1420 ASSERT(icmp->icmp_family == AF_INET6); 1421 /* Append a request for an IRE */ 1422 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1423 if (!mp->b_cont) { 1424 freemsg(mp); 1425 return (NULL); 1426 } 1427 mp->b_cont->b_wptr += sizeof (ire_t); 1428 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1429 1430 sin6 = (sin6_t *)cp; 1431 *sin6 = sin6_null; 1432 sin6->sin6_family = AF_INET6; 1433 sin6->sin6_addr = icmp->icmp_bound_v6src; 1434 break; 1435 } 1436 /* Add protocol number to end */ 1437 cp[addr_length] = icmp->icmp_proto; 1438 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1439 return (mp); 1440 } 1441 1442 /* For /dev/icmp aka AF_INET open */ 1443 static int 1444 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1445 { 1446 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1447 } 1448 1449 /* For /dev/icmp6 aka AF_INET6 open */ 1450 static int 1451 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1454 } 1455 1456 /* 1457 * This is the open routine for icmp. It allocates a icmp_t structure for 1458 * the stream and, on the first open of the module, creates an ND table. 1459 */ 1460 /*ARGSUSED2*/ 1461 static int 1462 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1463 boolean_t isv6) 1464 { 1465 int err; 1466 icmp_t *icmp; 1467 conn_t *connp; 1468 dev_t conn_dev; 1469 zoneid_t zoneid; 1470 netstack_t *ns; 1471 icmp_stack_t *is; 1472 1473 /* If the stream is already open, return immediately. */ 1474 if (q->q_ptr != NULL) 1475 return (0); 1476 1477 if (sflag == MODOPEN) 1478 return (EINVAL); 1479 1480 ns = netstack_find_by_cred(credp); 1481 ASSERT(ns != NULL); 1482 is = ns->netstack_icmp; 1483 ASSERT(is != NULL); 1484 1485 /* 1486 * For exclusive stacks we set the zoneid to zero 1487 * to make ICMP operate as if in the global zone. 1488 */ 1489 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1490 zoneid = GLOBAL_ZONEID; 1491 else 1492 zoneid = crgetzoneid(credp); 1493 1494 /* 1495 * Since ICMP is not used so heavily, allocating from the small 1496 * arena should be sufficient. 1497 */ 1498 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1499 netstack_rele(ns); 1500 return (EBUSY); 1501 } 1502 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1503 1504 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1505 connp->conn_dev = conn_dev; 1506 connp->conn_minor_arena = ip_minor_arena_sa; 1507 icmp = connp->conn_icmp; 1508 1509 /* 1510 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1511 * done by netstack_find_by_cred() 1512 */ 1513 netstack_rele(ns); 1514 1515 /* 1516 * Initialize the icmp_t structure for this stream. 1517 */ 1518 q->q_ptr = connp; 1519 WR(q)->q_ptr = connp; 1520 connp->conn_rq = q; 1521 connp->conn_wq = WR(q); 1522 1523 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1524 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1525 ASSERT(connp->conn_icmp == icmp); 1526 ASSERT(icmp->icmp_connp == connp); 1527 1528 /* Set the initial state of the stream and the privilege status. */ 1529 icmp->icmp_state = TS_UNBND; 1530 if (isv6) { 1531 icmp->icmp_ipversion = IPV6_VERSION; 1532 icmp->icmp_family = AF_INET6; 1533 connp->conn_ulp = IPPROTO_ICMPV6; 1534 /* May be changed by a SO_PROTOTYPE socket option. */ 1535 icmp->icmp_proto = IPPROTO_ICMPV6; 1536 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1537 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1538 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1539 connp->conn_af_isv6 = B_TRUE; 1540 connp->conn_flags |= IPCL_ISV6; 1541 } else { 1542 icmp->icmp_ipversion = IPV4_VERSION; 1543 icmp->icmp_family = AF_INET; 1544 /* May be changed by a SO_PROTOTYPE socket option. */ 1545 icmp->icmp_proto = IPPROTO_ICMP; 1546 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1547 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1548 connp->conn_af_isv6 = B_FALSE; 1549 connp->conn_flags &= ~IPCL_ISV6; 1550 } 1551 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1552 icmp->icmp_pending_op = -1; 1553 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1554 connp->conn_zoneid = zoneid; 1555 1556 /* 1557 * If the caller has the process-wide flag set, then default to MAC 1558 * exempt mode. This allows read-down to unlabeled hosts. 1559 */ 1560 if (getpflags(NET_MAC_AWARE, credp) != 0) 1561 icmp->icmp_mac_exempt = B_TRUE; 1562 1563 connp->conn_ulp_labeled = is_system_labeled(); 1564 1565 icmp->icmp_is = is; 1566 1567 q->q_hiwat = is->is_recv_hiwat; 1568 WR(q)->q_hiwat = is->is_xmit_hiwat; 1569 WR(q)->q_lowat = is->is_xmit_lowat; 1570 1571 connp->conn_recv = icmp_input; 1572 crhold(credp); 1573 connp->conn_cred = credp; 1574 1575 mutex_enter(&connp->conn_lock); 1576 connp->conn_state_flags &= ~CONN_INCIPIENT; 1577 mutex_exit(&connp->conn_lock); 1578 1579 qprocson(q); 1580 1581 if (icmp->icmp_family == AF_INET6) { 1582 /* Build initial header template for transmit */ 1583 if ((err = icmp_build_hdrs(icmp)) != 0) { 1584 rw_exit(&icmp->icmp_rwlock); 1585 qprocsoff(q); 1586 ipcl_conn_destroy(connp); 1587 return (err); 1588 } 1589 } 1590 rw_exit(&icmp->icmp_rwlock); 1591 1592 /* Set the Stream head write offset. */ 1593 (void) mi_set_sth_wroff(q, 1594 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1595 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1596 1597 return (0); 1598 } 1599 1600 /* 1601 * Which ICMP options OK to set through T_UNITDATA_REQ... 1602 */ 1603 /* ARGSUSED */ 1604 static boolean_t 1605 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1606 { 1607 return (B_TRUE); 1608 } 1609 1610 /* 1611 * This routine gets default values of certain options whose default 1612 * values are maintained by protcol specific code 1613 */ 1614 /* ARGSUSED */ 1615 int 1616 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1617 { 1618 icmp_t *icmp = Q_TO_ICMP(q); 1619 icmp_stack_t *is = icmp->icmp_is; 1620 int *i1 = (int *)ptr; 1621 1622 switch (level) { 1623 case IPPROTO_IP: 1624 switch (name) { 1625 case IP_MULTICAST_TTL: 1626 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1627 return (sizeof (uchar_t)); 1628 case IP_MULTICAST_LOOP: 1629 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1630 return (sizeof (uchar_t)); 1631 } 1632 break; 1633 case IPPROTO_IPV6: 1634 switch (name) { 1635 case IPV6_MULTICAST_HOPS: 1636 *i1 = IP_DEFAULT_MULTICAST_TTL; 1637 return (sizeof (int)); 1638 case IPV6_MULTICAST_LOOP: 1639 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1640 return (sizeof (int)); 1641 case IPV6_UNICAST_HOPS: 1642 *i1 = is->is_ipv6_hoplimit; 1643 return (sizeof (int)); 1644 } 1645 break; 1646 case IPPROTO_ICMPV6: 1647 switch (name) { 1648 case ICMP6_FILTER: 1649 /* Make it look like "pass all" */ 1650 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1651 return (sizeof (icmp6_filter_t)); 1652 } 1653 break; 1654 } 1655 return (-1); 1656 } 1657 1658 /* 1659 * This routine retrieves the current status of socket options. 1660 * It returns the size of the option retrieved. 1661 */ 1662 int 1663 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1664 { 1665 conn_t *connp = Q_TO_CONN(q); 1666 icmp_t *icmp = connp->conn_icmp; 1667 icmp_stack_t *is = icmp->icmp_is; 1668 int *i1 = (int *)ptr; 1669 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1670 1671 switch (level) { 1672 case SOL_SOCKET: 1673 switch (name) { 1674 case SO_DEBUG: 1675 *i1 = icmp->icmp_debug; 1676 break; 1677 case SO_TYPE: 1678 *i1 = SOCK_RAW; 1679 break; 1680 case SO_PROTOTYPE: 1681 *i1 = icmp->icmp_proto; 1682 break; 1683 case SO_REUSEADDR: 1684 *i1 = icmp->icmp_reuseaddr; 1685 break; 1686 1687 /* 1688 * The following three items are available here, 1689 * but are only meaningful to IP. 1690 */ 1691 case SO_DONTROUTE: 1692 *i1 = icmp->icmp_dontroute; 1693 break; 1694 case SO_USELOOPBACK: 1695 *i1 = icmp->icmp_useloopback; 1696 break; 1697 case SO_BROADCAST: 1698 *i1 = icmp->icmp_broadcast; 1699 break; 1700 1701 case SO_SNDBUF: 1702 ASSERT(q->q_hiwat <= INT_MAX); 1703 *i1 = (int)q->q_hiwat; 1704 break; 1705 case SO_RCVBUF: 1706 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1707 *i1 = (int)RD(q)->q_hiwat; 1708 break; 1709 case SO_DGRAM_ERRIND: 1710 *i1 = icmp->icmp_dgram_errind; 1711 break; 1712 case SO_TIMESTAMP: 1713 *i1 = icmp->icmp_timestamp; 1714 break; 1715 case SO_MAC_EXEMPT: 1716 *i1 = icmp->icmp_mac_exempt; 1717 break; 1718 case SO_DOMAIN: 1719 *i1 = icmp->icmp_family; 1720 break; 1721 1722 /* 1723 * Following four not meaningful for icmp 1724 * Action is same as "default" to which we fallthrough 1725 * so we keep them in comments. 1726 * case SO_LINGER: 1727 * case SO_KEEPALIVE: 1728 * case SO_OOBINLINE: 1729 * case SO_ALLZONES: 1730 */ 1731 default: 1732 return (-1); 1733 } 1734 break; 1735 case IPPROTO_IP: 1736 /* 1737 * Only allow IPv4 option processing on IPv4 sockets. 1738 */ 1739 if (icmp->icmp_family != AF_INET) 1740 return (-1); 1741 1742 switch (name) { 1743 case IP_OPTIONS: 1744 case T_IP_OPTIONS: 1745 /* Options are passed up with each packet */ 1746 return (0); 1747 case IP_HDRINCL: 1748 *i1 = (int)icmp->icmp_hdrincl; 1749 break; 1750 case IP_TOS: 1751 case T_IP_TOS: 1752 *i1 = (int)icmp->icmp_type_of_service; 1753 break; 1754 case IP_TTL: 1755 *i1 = (int)icmp->icmp_ttl; 1756 break; 1757 case IP_MULTICAST_IF: 1758 /* 0 address if not set */ 1759 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1760 return (sizeof (ipaddr_t)); 1761 case IP_MULTICAST_TTL: 1762 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1763 return (sizeof (uchar_t)); 1764 case IP_MULTICAST_LOOP: 1765 *ptr = connp->conn_multicast_loop; 1766 return (sizeof (uint8_t)); 1767 case IP_BOUND_IF: 1768 /* Zero if not set */ 1769 *i1 = icmp->icmp_bound_if; 1770 break; /* goto sizeof (int) option return */ 1771 case IP_UNSPEC_SRC: 1772 *ptr = icmp->icmp_unspec_source; 1773 break; /* goto sizeof (int) option return */ 1774 case IP_BROADCAST_TTL: 1775 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1776 return (sizeof (uchar_t)); 1777 case IP_RECVIF: 1778 *ptr = icmp->icmp_recvif; 1779 break; /* goto sizeof (int) option return */ 1780 case IP_RECVPKTINFO: 1781 /* 1782 * This also handles IP_PKTINFO. 1783 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1784 * Differentiation is based on the size of the argument 1785 * passed in. 1786 * This option is handled in IP which will return an 1787 * error for IP_PKTINFO as it's not supported as a 1788 * sticky option. 1789 */ 1790 return (-EINVAL); 1791 /* 1792 * Cannot "get" the value of following options 1793 * at this level. Action is same as "default" to 1794 * which we fallthrough so we keep them in comments. 1795 * 1796 * case IP_ADD_MEMBERSHIP: 1797 * case IP_DROP_MEMBERSHIP: 1798 * case IP_BLOCK_SOURCE: 1799 * case IP_UNBLOCK_SOURCE: 1800 * case IP_ADD_SOURCE_MEMBERSHIP: 1801 * case IP_DROP_SOURCE_MEMBERSHIP: 1802 * case MCAST_JOIN_GROUP: 1803 * case MCAST_LEAVE_GROUP: 1804 * case MCAST_BLOCK_SOURCE: 1805 * case MCAST_UNBLOCK_SOURCE: 1806 * case MCAST_JOIN_SOURCE_GROUP: 1807 * case MCAST_LEAVE_SOURCE_GROUP: 1808 * case MRT_INIT: 1809 * case MRT_DONE: 1810 * case MRT_ADD_VIF: 1811 * case MRT_DEL_VIF: 1812 * case MRT_ADD_MFC: 1813 * case MRT_DEL_MFC: 1814 * case MRT_VERSION: 1815 * case MRT_ASSERT: 1816 * case IP_SEC_OPT: 1817 * case IP_DONTFAILOVER_IF: 1818 * case IP_NEXTHOP: 1819 */ 1820 default: 1821 return (-1); 1822 } 1823 break; 1824 case IPPROTO_IPV6: 1825 /* 1826 * Only allow IPv6 option processing on native IPv6 sockets. 1827 */ 1828 if (icmp->icmp_family != AF_INET6) 1829 return (-1); 1830 switch (name) { 1831 case IPV6_UNICAST_HOPS: 1832 *i1 = (unsigned int)icmp->icmp_ttl; 1833 break; 1834 case IPV6_MULTICAST_IF: 1835 /* 0 index if not set */ 1836 *i1 = icmp->icmp_multicast_if_index; 1837 break; 1838 case IPV6_MULTICAST_HOPS: 1839 *i1 = icmp->icmp_multicast_ttl; 1840 break; 1841 case IPV6_MULTICAST_LOOP: 1842 *i1 = connp->conn_multicast_loop; 1843 break; 1844 case IPV6_BOUND_IF: 1845 /* Zero if not set */ 1846 *i1 = icmp->icmp_bound_if; 1847 break; 1848 case IPV6_UNSPEC_SRC: 1849 *i1 = icmp->icmp_unspec_source; 1850 break; 1851 case IPV6_CHECKSUM: 1852 /* 1853 * Return offset or -1 if no checksum offset. 1854 * Does not apply to IPPROTO_ICMPV6 1855 */ 1856 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1857 return (-1); 1858 1859 if (icmp->icmp_raw_checksum) { 1860 *i1 = icmp->icmp_checksum_off; 1861 } else { 1862 *i1 = -1; 1863 } 1864 break; 1865 case IPV6_JOIN_GROUP: 1866 case IPV6_LEAVE_GROUP: 1867 case MCAST_JOIN_GROUP: 1868 case MCAST_LEAVE_GROUP: 1869 case MCAST_BLOCK_SOURCE: 1870 case MCAST_UNBLOCK_SOURCE: 1871 case MCAST_JOIN_SOURCE_GROUP: 1872 case MCAST_LEAVE_SOURCE_GROUP: 1873 /* cannot "get" the value for these */ 1874 return (-1); 1875 case IPV6_RECVPKTINFO: 1876 *i1 = icmp->icmp_ip_recvpktinfo; 1877 break; 1878 case IPV6_RECVTCLASS: 1879 *i1 = icmp->icmp_ipv6_recvtclass; 1880 break; 1881 case IPV6_RECVPATHMTU: 1882 *i1 = icmp->icmp_ipv6_recvpathmtu; 1883 break; 1884 case IPV6_V6ONLY: 1885 *i1 = 1; 1886 break; 1887 case IPV6_RECVHOPLIMIT: 1888 *i1 = icmp->icmp_ipv6_recvhoplimit; 1889 break; 1890 case IPV6_RECVHOPOPTS: 1891 *i1 = icmp->icmp_ipv6_recvhopopts; 1892 break; 1893 case IPV6_RECVDSTOPTS: 1894 *i1 = icmp->icmp_ipv6_recvdstopts; 1895 break; 1896 case _OLD_IPV6_RECVDSTOPTS: 1897 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1898 break; 1899 case IPV6_RECVRTHDRDSTOPTS: 1900 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1901 break; 1902 case IPV6_RECVRTHDR: 1903 *i1 = icmp->icmp_ipv6_recvrthdr; 1904 break; 1905 case IPV6_PKTINFO: { 1906 /* XXX assumes that caller has room for max size! */ 1907 struct in6_pktinfo *pkti; 1908 1909 pkti = (struct in6_pktinfo *)ptr; 1910 if (ipp->ipp_fields & IPPF_IFINDEX) 1911 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1912 else 1913 pkti->ipi6_ifindex = 0; 1914 if (ipp->ipp_fields & IPPF_ADDR) 1915 pkti->ipi6_addr = ipp->ipp_addr; 1916 else 1917 pkti->ipi6_addr = ipv6_all_zeros; 1918 return (sizeof (struct in6_pktinfo)); 1919 } 1920 case IPV6_NEXTHOP: { 1921 sin6_t *sin6 = (sin6_t *)ptr; 1922 1923 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1924 return (0); 1925 *sin6 = sin6_null; 1926 sin6->sin6_family = AF_INET6; 1927 sin6->sin6_addr = ipp->ipp_nexthop; 1928 return (sizeof (sin6_t)); 1929 } 1930 case IPV6_HOPOPTS: 1931 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1932 return (0); 1933 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1934 return (0); 1935 bcopy((char *)ipp->ipp_hopopts + 1936 icmp->icmp_label_len_v6, ptr, 1937 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1938 if (icmp->icmp_label_len_v6 > 0) { 1939 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1940 ptr[1] = (ipp->ipp_hopoptslen - 1941 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1942 } 1943 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1944 case IPV6_RTHDRDSTOPTS: 1945 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1946 return (0); 1947 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1948 return (ipp->ipp_rtdstoptslen); 1949 case IPV6_RTHDR: 1950 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1951 return (0); 1952 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1953 return (ipp->ipp_rthdrlen); 1954 case IPV6_DSTOPTS: 1955 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1956 return (0); 1957 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1958 return (ipp->ipp_dstoptslen); 1959 case IPV6_PATHMTU: 1960 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1961 return (0); 1962 1963 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1964 (struct ip6_mtuinfo *)ptr, is->is_netstack)); 1965 case IPV6_TCLASS: 1966 if (ipp->ipp_fields & IPPF_TCLASS) 1967 *i1 = ipp->ipp_tclass; 1968 else 1969 *i1 = IPV6_FLOW_TCLASS( 1970 IPV6_DEFAULT_VERS_AND_FLOW); 1971 break; 1972 default: 1973 return (-1); 1974 } 1975 break; 1976 case IPPROTO_ICMPV6: 1977 /* 1978 * Only allow IPv6 option processing on native IPv6 sockets. 1979 */ 1980 if (icmp->icmp_family != AF_INET6) 1981 return (-1); 1982 1983 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1984 return (-1); 1985 1986 switch (name) { 1987 case ICMP6_FILTER: 1988 if (icmp->icmp_filter == NULL) { 1989 /* Make it look like "pass all" */ 1990 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1991 } else { 1992 (void) bcopy(icmp->icmp_filter, ptr, 1993 sizeof (icmp6_filter_t)); 1994 } 1995 return (sizeof (icmp6_filter_t)); 1996 default: 1997 return (-1); 1998 } 1999 default: 2000 return (-1); 2001 } 2002 return (sizeof (int)); 2003 } 2004 2005 /* 2006 * This routine retrieves the current status of socket options. 2007 * It returns the size of the option retrieved. 2008 */ 2009 int 2010 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2011 { 2012 icmp_t *icmp = Q_TO_ICMP(q); 2013 int err; 2014 2015 rw_enter(&icmp->icmp_rwlock, RW_READER); 2016 err = icmp_opt_get_locked(q, level, name, ptr); 2017 rw_exit(&icmp->icmp_rwlock); 2018 return (err); 2019 } 2020 2021 2022 /* This routine sets socket options. */ 2023 /* ARGSUSED */ 2024 int 2025 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2026 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2027 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2028 { 2029 conn_t *connp = Q_TO_CONN(q); 2030 icmp_t *icmp = connp->conn_icmp; 2031 icmp_stack_t *is = icmp->icmp_is; 2032 int *i1 = (int *)invalp; 2033 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2034 boolean_t checkonly; 2035 int error; 2036 2037 switch (optset_context) { 2038 case SETFN_OPTCOM_CHECKONLY: 2039 checkonly = B_TRUE; 2040 /* 2041 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2042 * inlen != 0 implies value supplied and 2043 * we have to "pretend" to set it. 2044 * inlen == 0 implies that there is no 2045 * value part in T_CHECK request and just validation 2046 * done elsewhere should be enough, we just return here. 2047 */ 2048 if (inlen == 0) { 2049 *outlenp = 0; 2050 return (0); 2051 } 2052 break; 2053 case SETFN_OPTCOM_NEGOTIATE: 2054 checkonly = B_FALSE; 2055 break; 2056 case SETFN_UD_NEGOTIATE: 2057 case SETFN_CONN_NEGOTIATE: 2058 checkonly = B_FALSE; 2059 /* 2060 * Negotiating local and "association-related" options 2061 * through T_UNITDATA_REQ. 2062 * 2063 * Following routine can filter out ones we do not 2064 * want to be "set" this way. 2065 */ 2066 if (!icmp_opt_allow_udr_set(level, name)) { 2067 *outlenp = 0; 2068 return (EINVAL); 2069 } 2070 break; 2071 default: 2072 /* 2073 * We should never get here 2074 */ 2075 *outlenp = 0; 2076 return (EINVAL); 2077 } 2078 2079 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2080 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2081 2082 /* 2083 * For fixed length options, no sanity check 2084 * of passed in length is done. It is assumed *_optcom_req() 2085 * routines do the right thing. 2086 */ 2087 2088 switch (level) { 2089 case SOL_SOCKET: 2090 switch (name) { 2091 case SO_DEBUG: 2092 if (!checkonly) 2093 icmp->icmp_debug = onoff; 2094 break; 2095 case SO_PROTOTYPE: 2096 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2097 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2098 secpolicy_net_rawaccess(cr) != 0) { 2099 *outlenp = 0; 2100 return (EACCES); 2101 } 2102 /* Can't use IPPROTO_RAW with IPv6 */ 2103 if ((*i1 & 0xFF) == IPPROTO_RAW && 2104 icmp->icmp_family == AF_INET6) { 2105 *outlenp = 0; 2106 return (EPROTONOSUPPORT); 2107 } 2108 if (checkonly) { 2109 /* T_CHECK case */ 2110 *(int *)outvalp = (*i1 & 0xFF); 2111 break; 2112 } 2113 icmp->icmp_proto = *i1 & 0xFF; 2114 if ((icmp->icmp_proto == IPPROTO_RAW || 2115 icmp->icmp_proto == IPPROTO_IGMP) && 2116 icmp->icmp_family == AF_INET) 2117 icmp->icmp_hdrincl = 1; 2118 else 2119 icmp->icmp_hdrincl = 0; 2120 2121 if (icmp->icmp_family == AF_INET6 && 2122 icmp->icmp_proto == IPPROTO_ICMPV6) { 2123 /* Set offset for icmp6_cksum */ 2124 icmp->icmp_raw_checksum = 0; 2125 icmp->icmp_checksum_off = 2; 2126 } 2127 if (icmp->icmp_proto == IPPROTO_UDP || 2128 icmp->icmp_proto == IPPROTO_TCP || 2129 icmp->icmp_proto == IPPROTO_SCTP) { 2130 icmp->icmp_no_tp_cksum = 1; 2131 icmp->icmp_sticky_ipp.ipp_fields |= 2132 IPPF_NO_CKSUM; 2133 } else { 2134 icmp->icmp_no_tp_cksum = 0; 2135 icmp->icmp_sticky_ipp.ipp_fields &= 2136 ~IPPF_NO_CKSUM; 2137 } 2138 2139 if (icmp->icmp_filter != NULL && 2140 icmp->icmp_proto != IPPROTO_ICMPV6) { 2141 kmem_free(icmp->icmp_filter, 2142 sizeof (icmp6_filter_t)); 2143 icmp->icmp_filter = NULL; 2144 } 2145 2146 /* Rebuild the header template */ 2147 error = icmp_build_hdrs(icmp); 2148 if (error != 0) { 2149 *outlenp = 0; 2150 return (error); 2151 } 2152 2153 /* 2154 * For SCTP, we don't use icmp_bind_proto() for 2155 * raw socket binding. Note that we do not need 2156 * to set *outlenp. 2157 * FIXME: how does SCTP work? 2158 */ 2159 if (icmp->icmp_proto == IPPROTO_SCTP) 2160 return (0); 2161 2162 *outlenp = sizeof (int); 2163 *(int *)outvalp = *i1 & 0xFF; 2164 2165 /* Drop lock across the bind operation */ 2166 rw_exit(&icmp->icmp_rwlock); 2167 icmp_bind_proto(q); 2168 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2169 return (0); 2170 case SO_REUSEADDR: 2171 if (!checkonly) 2172 icmp->icmp_reuseaddr = onoff; 2173 break; 2174 2175 /* 2176 * The following three items are available here, 2177 * but are only meaningful to IP. 2178 */ 2179 case SO_DONTROUTE: 2180 if (!checkonly) 2181 icmp->icmp_dontroute = onoff; 2182 break; 2183 case SO_USELOOPBACK: 2184 if (!checkonly) 2185 icmp->icmp_useloopback = onoff; 2186 break; 2187 case SO_BROADCAST: 2188 if (!checkonly) 2189 icmp->icmp_broadcast = onoff; 2190 break; 2191 2192 case SO_SNDBUF: 2193 if (*i1 > is->is_max_buf) { 2194 *outlenp = 0; 2195 return (ENOBUFS); 2196 } 2197 if (!checkonly) { 2198 q->q_hiwat = *i1; 2199 } 2200 break; 2201 case SO_RCVBUF: 2202 if (*i1 > is->is_max_buf) { 2203 *outlenp = 0; 2204 return (ENOBUFS); 2205 } 2206 if (!checkonly) { 2207 RD(q)->q_hiwat = *i1; 2208 rw_exit(&icmp->icmp_rwlock); 2209 (void) mi_set_sth_hiwat(RD(q), *i1); 2210 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2211 } 2212 break; 2213 case SO_DGRAM_ERRIND: 2214 if (!checkonly) 2215 icmp->icmp_dgram_errind = onoff; 2216 break; 2217 case SO_ALLZONES: 2218 /* 2219 * "soft" error (negative) 2220 * option not handled at this level 2221 * Note: Do not modify *outlenp 2222 */ 2223 return (-EINVAL); 2224 case SO_TIMESTAMP: 2225 if (!checkonly) { 2226 icmp->icmp_timestamp = onoff; 2227 } 2228 break; 2229 case SO_MAC_EXEMPT: 2230 if (secpolicy_net_mac_aware(cr) != 0 || 2231 icmp->icmp_state != TS_UNBND) 2232 return (EACCES); 2233 if (!checkonly) 2234 icmp->icmp_mac_exempt = onoff; 2235 break; 2236 /* 2237 * Following three not meaningful for icmp 2238 * Action is same as "default" so we keep them 2239 * in comments. 2240 * case SO_LINGER: 2241 * case SO_KEEPALIVE: 2242 * case SO_OOBINLINE: 2243 */ 2244 default: 2245 *outlenp = 0; 2246 return (EINVAL); 2247 } 2248 break; 2249 case IPPROTO_IP: 2250 /* 2251 * Only allow IPv4 option processing on IPv4 sockets. 2252 */ 2253 if (icmp->icmp_family != AF_INET) { 2254 *outlenp = 0; 2255 return (ENOPROTOOPT); 2256 } 2257 switch (name) { 2258 case IP_OPTIONS: 2259 case T_IP_OPTIONS: 2260 /* Save options for use by IP. */ 2261 if ((inlen & 0x3) || 2262 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2263 *outlenp = 0; 2264 return (EINVAL); 2265 } 2266 if (checkonly) 2267 break; 2268 2269 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2270 &icmp->icmp_ip_snd_options_len, 2271 icmp->icmp_label_len, invalp, inlen)) { 2272 *outlenp = 0; 2273 return (ENOMEM); 2274 } 2275 2276 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2277 icmp->icmp_ip_snd_options_len; 2278 rw_exit(&icmp->icmp_rwlock); 2279 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2280 is->is_wroff_extra); 2281 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2282 break; 2283 case IP_HDRINCL: 2284 if (!checkonly) 2285 icmp->icmp_hdrincl = onoff; 2286 break; 2287 case IP_TOS: 2288 case T_IP_TOS: 2289 if (!checkonly) { 2290 icmp->icmp_type_of_service = (uint8_t)*i1; 2291 } 2292 break; 2293 case IP_TTL: 2294 if (!checkonly) { 2295 icmp->icmp_ttl = (uint8_t)*i1; 2296 } 2297 break; 2298 case IP_MULTICAST_IF: 2299 /* 2300 * TODO should check OPTMGMT reply and undo this if 2301 * there is an error. 2302 */ 2303 if (!checkonly) 2304 icmp->icmp_multicast_if_addr = *i1; 2305 break; 2306 case IP_MULTICAST_TTL: 2307 if (!checkonly) 2308 icmp->icmp_multicast_ttl = *invalp; 2309 break; 2310 case IP_MULTICAST_LOOP: 2311 if (!checkonly) { 2312 connp->conn_multicast_loop = 2313 (*invalp == 0) ? 0 : 1; 2314 } 2315 break; 2316 case IP_BOUND_IF: 2317 if (!checkonly) 2318 icmp->icmp_bound_if = *i1; 2319 break; 2320 case IP_UNSPEC_SRC: 2321 if (!checkonly) 2322 icmp->icmp_unspec_source = onoff; 2323 break; 2324 case IP_BROADCAST_TTL: 2325 if (!checkonly) 2326 connp->conn_broadcast_ttl = *invalp; 2327 break; 2328 case IP_RECVIF: 2329 if (!checkonly) 2330 icmp->icmp_recvif = onoff; 2331 /* 2332 * pass to ip 2333 */ 2334 return (-EINVAL); 2335 case IP_PKTINFO: { 2336 /* 2337 * This also handles IP_RECVPKTINFO. 2338 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2339 * Differentiation is based on the size of the argument 2340 * passed in. 2341 */ 2342 struct in_pktinfo *pktinfop; 2343 ip4_pkt_t *attr_pktinfop; 2344 2345 if (checkonly) 2346 break; 2347 2348 if (inlen == sizeof (int)) { 2349 /* 2350 * This is IP_RECVPKTINFO option. 2351 * Keep a local copy of wether this option is 2352 * set or not and pass it down to IP for 2353 * processing. 2354 */ 2355 icmp->icmp_ip_recvpktinfo = onoff; 2356 return (-EINVAL); 2357 } 2358 2359 2360 if (inlen != sizeof (struct in_pktinfo)) 2361 return (EINVAL); 2362 2363 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2364 == NULL) { 2365 /* 2366 * sticky option is not supported 2367 */ 2368 return (EINVAL); 2369 } 2370 2371 pktinfop = (struct in_pktinfo *)invalp; 2372 2373 /* 2374 * Atleast one of the values should be specified 2375 */ 2376 if (pktinfop->ipi_ifindex == 0 && 2377 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2378 return (EINVAL); 2379 } 2380 2381 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2382 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2383 } 2384 break; 2385 case IP_ADD_MEMBERSHIP: 2386 case IP_DROP_MEMBERSHIP: 2387 case IP_BLOCK_SOURCE: 2388 case IP_UNBLOCK_SOURCE: 2389 case IP_ADD_SOURCE_MEMBERSHIP: 2390 case IP_DROP_SOURCE_MEMBERSHIP: 2391 case MCAST_JOIN_GROUP: 2392 case MCAST_LEAVE_GROUP: 2393 case MCAST_BLOCK_SOURCE: 2394 case MCAST_UNBLOCK_SOURCE: 2395 case MCAST_JOIN_SOURCE_GROUP: 2396 case MCAST_LEAVE_SOURCE_GROUP: 2397 case MRT_INIT: 2398 case MRT_DONE: 2399 case MRT_ADD_VIF: 2400 case MRT_DEL_VIF: 2401 case MRT_ADD_MFC: 2402 case MRT_DEL_MFC: 2403 case MRT_VERSION: 2404 case MRT_ASSERT: 2405 case IP_SEC_OPT: 2406 case IP_DONTFAILOVER_IF: 2407 case IP_NEXTHOP: 2408 /* 2409 * "soft" error (negative) 2410 * option not handled at this level 2411 * Note: Do not modify *outlenp 2412 */ 2413 return (-EINVAL); 2414 default: 2415 *outlenp = 0; 2416 return (EINVAL); 2417 } 2418 break; 2419 case IPPROTO_IPV6: { 2420 ip6_pkt_t *ipp; 2421 boolean_t sticky; 2422 2423 if (icmp->icmp_family != AF_INET6) { 2424 *outlenp = 0; 2425 return (ENOPROTOOPT); 2426 } 2427 /* 2428 * Deal with both sticky options and ancillary data 2429 */ 2430 if (thisdg_attrs == NULL) { 2431 /* sticky options, or none */ 2432 ipp = &icmp->icmp_sticky_ipp; 2433 sticky = B_TRUE; 2434 } else { 2435 /* ancillary data */ 2436 ipp = (ip6_pkt_t *)thisdg_attrs; 2437 sticky = B_FALSE; 2438 } 2439 2440 switch (name) { 2441 case IPV6_MULTICAST_IF: 2442 if (!checkonly) 2443 icmp->icmp_multicast_if_index = *i1; 2444 break; 2445 case IPV6_UNICAST_HOPS: 2446 /* -1 means use default */ 2447 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2448 *outlenp = 0; 2449 return (EINVAL); 2450 } 2451 if (!checkonly) { 2452 if (*i1 == -1) { 2453 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2454 is->is_ipv6_hoplimit; 2455 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2456 /* Pass modified value to IP. */ 2457 *i1 = ipp->ipp_hoplimit; 2458 } else { 2459 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2460 (uint8_t)*i1; 2461 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2462 } 2463 /* Rebuild the header template */ 2464 error = icmp_build_hdrs(icmp); 2465 if (error != 0) { 2466 *outlenp = 0; 2467 return (error); 2468 } 2469 } 2470 break; 2471 case IPV6_MULTICAST_HOPS: 2472 /* -1 means use default */ 2473 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2474 *outlenp = 0; 2475 return (EINVAL); 2476 } 2477 if (!checkonly) { 2478 if (*i1 == -1) { 2479 icmp->icmp_multicast_ttl = 2480 ipp->ipp_multicast_hops = 2481 IP_DEFAULT_MULTICAST_TTL; 2482 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2483 /* Pass modified value to IP. */ 2484 *i1 = icmp->icmp_multicast_ttl; 2485 } else { 2486 icmp->icmp_multicast_ttl = 2487 ipp->ipp_multicast_hops = 2488 (uint8_t)*i1; 2489 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2490 } 2491 } 2492 break; 2493 case IPV6_MULTICAST_LOOP: 2494 if (*i1 != 0 && *i1 != 1) { 2495 *outlenp = 0; 2496 return (EINVAL); 2497 } 2498 if (!checkonly) 2499 connp->conn_multicast_loop = *i1; 2500 break; 2501 case IPV6_CHECKSUM: 2502 /* 2503 * Integer offset into the user data of where the 2504 * checksum is located. 2505 * Offset of -1 disables option. 2506 * Does not apply to IPPROTO_ICMPV6. 2507 */ 2508 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2509 *outlenp = 0; 2510 return (EINVAL); 2511 } 2512 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2513 /* Negative or not 16 bit aligned offset */ 2514 *outlenp = 0; 2515 return (EINVAL); 2516 } 2517 if (checkonly) 2518 break; 2519 2520 if (*i1 == -1) { 2521 icmp->icmp_raw_checksum = 0; 2522 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2523 } else { 2524 icmp->icmp_raw_checksum = 1; 2525 icmp->icmp_checksum_off = *i1; 2526 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2527 } 2528 /* Rebuild the header template */ 2529 error = icmp_build_hdrs(icmp); 2530 if (error != 0) { 2531 *outlenp = 0; 2532 return (error); 2533 } 2534 break; 2535 case IPV6_JOIN_GROUP: 2536 case IPV6_LEAVE_GROUP: 2537 case MCAST_JOIN_GROUP: 2538 case MCAST_LEAVE_GROUP: 2539 case MCAST_BLOCK_SOURCE: 2540 case MCAST_UNBLOCK_SOURCE: 2541 case MCAST_JOIN_SOURCE_GROUP: 2542 case MCAST_LEAVE_SOURCE_GROUP: 2543 /* 2544 * "soft" error (negative) 2545 * option not handled at this level 2546 * Note: Do not modify *outlenp 2547 */ 2548 return (-EINVAL); 2549 case IPV6_BOUND_IF: 2550 if (!checkonly) 2551 icmp->icmp_bound_if = *i1; 2552 break; 2553 case IPV6_UNSPEC_SRC: 2554 if (!checkonly) 2555 icmp->icmp_unspec_source = onoff; 2556 break; 2557 case IPV6_RECVTCLASS: 2558 if (!checkonly) 2559 icmp->icmp_ipv6_recvtclass = onoff; 2560 break; 2561 /* 2562 * Set boolean switches for ancillary data delivery 2563 */ 2564 case IPV6_RECVPKTINFO: 2565 if (!checkonly) 2566 icmp->icmp_ip_recvpktinfo = onoff; 2567 break; 2568 case IPV6_RECVPATHMTU: 2569 if (!checkonly) 2570 icmp->icmp_ipv6_recvpathmtu = onoff; 2571 break; 2572 case IPV6_RECVHOPLIMIT: 2573 if (!checkonly) 2574 icmp->icmp_ipv6_recvhoplimit = onoff; 2575 break; 2576 case IPV6_RECVHOPOPTS: 2577 if (!checkonly) 2578 icmp->icmp_ipv6_recvhopopts = onoff; 2579 break; 2580 case IPV6_RECVDSTOPTS: 2581 if (!checkonly) 2582 icmp->icmp_ipv6_recvdstopts = onoff; 2583 break; 2584 case _OLD_IPV6_RECVDSTOPTS: 2585 if (!checkonly) 2586 icmp->icmp_old_ipv6_recvdstopts = onoff; 2587 break; 2588 case IPV6_RECVRTHDRDSTOPTS: 2589 if (!checkonly) 2590 icmp->icmp_ipv6_recvrtdstopts = onoff; 2591 break; 2592 case IPV6_RECVRTHDR: 2593 if (!checkonly) 2594 icmp->icmp_ipv6_recvrthdr = onoff; 2595 break; 2596 /* 2597 * Set sticky options or ancillary data. 2598 * If sticky options, (re)build any extension headers 2599 * that might be needed as a result. 2600 */ 2601 case IPV6_PKTINFO: 2602 /* 2603 * The source address and ifindex are verified 2604 * in ip_opt_set(). For ancillary data the 2605 * source address is checked in ip_wput_v6. 2606 */ 2607 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2608 return (EINVAL); 2609 if (checkonly) 2610 break; 2611 2612 if (inlen == 0) { 2613 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2614 ipp->ipp_sticky_ignored |= 2615 (IPPF_IFINDEX|IPPF_ADDR); 2616 } else { 2617 struct in6_pktinfo *pkti; 2618 2619 pkti = (struct in6_pktinfo *)invalp; 2620 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2621 ipp->ipp_addr = pkti->ipi6_addr; 2622 if (ipp->ipp_ifindex != 0) 2623 ipp->ipp_fields |= IPPF_IFINDEX; 2624 else 2625 ipp->ipp_fields &= ~IPPF_IFINDEX; 2626 if (!IN6_IS_ADDR_UNSPECIFIED( 2627 &ipp->ipp_addr)) 2628 ipp->ipp_fields |= IPPF_ADDR; 2629 else 2630 ipp->ipp_fields &= ~IPPF_ADDR; 2631 } 2632 if (sticky) { 2633 error = icmp_build_hdrs(icmp); 2634 if (error != 0) 2635 return (error); 2636 } 2637 break; 2638 case IPV6_HOPLIMIT: 2639 /* This option can only be used as ancillary data. */ 2640 if (sticky) 2641 return (EINVAL); 2642 if (inlen != 0 && inlen != sizeof (int)) 2643 return (EINVAL); 2644 if (checkonly) 2645 break; 2646 2647 if (inlen == 0) { 2648 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2649 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2650 } else { 2651 if (*i1 > 255 || *i1 < -1) 2652 return (EINVAL); 2653 if (*i1 == -1) 2654 ipp->ipp_hoplimit = 2655 is->is_ipv6_hoplimit; 2656 else 2657 ipp->ipp_hoplimit = *i1; 2658 ipp->ipp_fields |= IPPF_HOPLIMIT; 2659 } 2660 break; 2661 case IPV6_TCLASS: 2662 /* 2663 * IPV6_RECVTCLASS accepts -1 as use kernel default 2664 * and [0, 255] as the actualy traffic class. 2665 */ 2666 if (inlen != 0 && inlen != sizeof (int)) 2667 return (EINVAL); 2668 if (checkonly) 2669 break; 2670 2671 if (inlen == 0) { 2672 ipp->ipp_fields &= ~IPPF_TCLASS; 2673 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2674 } else { 2675 if (*i1 >= 256 || *i1 < -1) 2676 return (EINVAL); 2677 if (*i1 == -1) { 2678 ipp->ipp_tclass = 2679 IPV6_FLOW_TCLASS( 2680 IPV6_DEFAULT_VERS_AND_FLOW); 2681 } else { 2682 ipp->ipp_tclass = *i1; 2683 } 2684 ipp->ipp_fields |= IPPF_TCLASS; 2685 } 2686 if (sticky) { 2687 error = icmp_build_hdrs(icmp); 2688 if (error != 0) 2689 return (error); 2690 } 2691 break; 2692 case IPV6_NEXTHOP: 2693 /* 2694 * IP will verify that the nexthop is reachable 2695 * and fail for sticky options. 2696 */ 2697 if (inlen != 0 && inlen != sizeof (sin6_t)) 2698 return (EINVAL); 2699 if (checkonly) 2700 break; 2701 2702 if (inlen == 0) { 2703 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2704 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2705 } else { 2706 sin6_t *sin6 = (sin6_t *)invalp; 2707 2708 if (sin6->sin6_family != AF_INET6) 2709 return (EAFNOSUPPORT); 2710 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2711 return (EADDRNOTAVAIL); 2712 ipp->ipp_nexthop = sin6->sin6_addr; 2713 if (!IN6_IS_ADDR_UNSPECIFIED( 2714 &ipp->ipp_nexthop)) 2715 ipp->ipp_fields |= IPPF_NEXTHOP; 2716 else 2717 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2718 } 2719 if (sticky) { 2720 error = icmp_build_hdrs(icmp); 2721 if (error != 0) 2722 return (error); 2723 } 2724 break; 2725 case IPV6_HOPOPTS: { 2726 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2727 /* 2728 * Sanity checks - minimum size, size a multiple of 2729 * eight bytes, and matching size passed in. 2730 */ 2731 if (inlen != 0 && 2732 inlen != (8 * (hopts->ip6h_len + 1))) 2733 return (EINVAL); 2734 2735 if (checkonly) 2736 break; 2737 error = optcom_pkt_set(invalp, inlen, sticky, 2738 (uchar_t **)&ipp->ipp_hopopts, 2739 &ipp->ipp_hopoptslen, 2740 sticky ? icmp->icmp_label_len_v6 : 0); 2741 if (error != 0) 2742 return (error); 2743 if (ipp->ipp_hopoptslen == 0) { 2744 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2745 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2746 } else { 2747 ipp->ipp_fields |= IPPF_HOPOPTS; 2748 } 2749 if (sticky) { 2750 error = icmp_build_hdrs(icmp); 2751 if (error != 0) 2752 return (error); 2753 } 2754 break; 2755 } 2756 case IPV6_RTHDRDSTOPTS: { 2757 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2758 2759 /* 2760 * Sanity checks - minimum size, size a multiple of 2761 * eight bytes, and matching size passed in. 2762 */ 2763 if (inlen != 0 && 2764 inlen != (8 * (dopts->ip6d_len + 1))) 2765 return (EINVAL); 2766 2767 if (checkonly) 2768 break; 2769 2770 if (inlen == 0) { 2771 if (sticky && 2772 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2773 kmem_free(ipp->ipp_rtdstopts, 2774 ipp->ipp_rtdstoptslen); 2775 ipp->ipp_rtdstopts = NULL; 2776 ipp->ipp_rtdstoptslen = 0; 2777 } 2778 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2779 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2780 } else { 2781 error = optcom_pkt_set(invalp, inlen, sticky, 2782 (uchar_t **)&ipp->ipp_rtdstopts, 2783 &ipp->ipp_rtdstoptslen, 0); 2784 if (error != 0) 2785 return (error); 2786 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2787 } 2788 if (sticky) { 2789 error = icmp_build_hdrs(icmp); 2790 if (error != 0) 2791 return (error); 2792 } 2793 break; 2794 } 2795 case IPV6_DSTOPTS: { 2796 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2797 2798 /* 2799 * Sanity checks - minimum size, size a multiple of 2800 * eight bytes, and matching size passed in. 2801 */ 2802 if (inlen != 0 && 2803 inlen != (8 * (dopts->ip6d_len + 1))) 2804 return (EINVAL); 2805 2806 if (checkonly) 2807 break; 2808 2809 if (inlen == 0) { 2810 if (sticky && 2811 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2812 kmem_free(ipp->ipp_dstopts, 2813 ipp->ipp_dstoptslen); 2814 ipp->ipp_dstopts = NULL; 2815 ipp->ipp_dstoptslen = 0; 2816 } 2817 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2818 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2819 } else { 2820 error = optcom_pkt_set(invalp, inlen, sticky, 2821 (uchar_t **)&ipp->ipp_dstopts, 2822 &ipp->ipp_dstoptslen, 0); 2823 if (error != 0) 2824 return (error); 2825 ipp->ipp_fields |= IPPF_DSTOPTS; 2826 } 2827 if (sticky) { 2828 error = icmp_build_hdrs(icmp); 2829 if (error != 0) 2830 return (error); 2831 } 2832 break; 2833 } 2834 case IPV6_RTHDR: { 2835 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2836 2837 /* 2838 * Sanity checks - minimum size, size a multiple of 2839 * eight bytes, and matching size passed in. 2840 */ 2841 if (inlen != 0 && 2842 inlen != (8 * (rt->ip6r_len + 1))) 2843 return (EINVAL); 2844 2845 if (checkonly) 2846 break; 2847 2848 if (inlen == 0) { 2849 if (sticky && 2850 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2851 kmem_free(ipp->ipp_rthdr, 2852 ipp->ipp_rthdrlen); 2853 ipp->ipp_rthdr = NULL; 2854 ipp->ipp_rthdrlen = 0; 2855 } 2856 ipp->ipp_fields &= ~IPPF_RTHDR; 2857 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2858 } else { 2859 error = optcom_pkt_set(invalp, inlen, sticky, 2860 (uchar_t **)&ipp->ipp_rthdr, 2861 &ipp->ipp_rthdrlen, 0); 2862 if (error != 0) 2863 return (error); 2864 ipp->ipp_fields |= IPPF_RTHDR; 2865 } 2866 if (sticky) { 2867 error = icmp_build_hdrs(icmp); 2868 if (error != 0) 2869 return (error); 2870 } 2871 break; 2872 } 2873 2874 case IPV6_DONTFRAG: 2875 if (checkonly) 2876 break; 2877 2878 if (onoff) { 2879 ipp->ipp_fields |= IPPF_DONTFRAG; 2880 } else { 2881 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2882 } 2883 break; 2884 2885 case IPV6_USE_MIN_MTU: 2886 if (inlen != sizeof (int)) 2887 return (EINVAL); 2888 2889 if (*i1 < -1 || *i1 > 1) 2890 return (EINVAL); 2891 2892 if (checkonly) 2893 break; 2894 2895 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2896 ipp->ipp_use_min_mtu = *i1; 2897 break; 2898 2899 /* 2900 * This option can't be set. Its only returned via 2901 * getsockopt() or ancillary data. 2902 */ 2903 case IPV6_PATHMTU: 2904 return (EINVAL); 2905 2906 case IPV6_BOUND_PIF: 2907 case IPV6_SEC_OPT: 2908 case IPV6_DONTFAILOVER_IF: 2909 case IPV6_SRC_PREFERENCES: 2910 case IPV6_V6ONLY: 2911 /* Handled at IP level */ 2912 return (-EINVAL); 2913 default: 2914 *outlenp = 0; 2915 return (EINVAL); 2916 } 2917 break; 2918 } /* end IPPROTO_IPV6 */ 2919 2920 case IPPROTO_ICMPV6: 2921 /* 2922 * Only allow IPv6 option processing on IPv6 sockets. 2923 */ 2924 if (icmp->icmp_family != AF_INET6) { 2925 *outlenp = 0; 2926 return (ENOPROTOOPT); 2927 } 2928 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2929 *outlenp = 0; 2930 return (ENOPROTOOPT); 2931 } 2932 switch (name) { 2933 case ICMP6_FILTER: 2934 if (!checkonly) { 2935 if ((inlen != 0) && 2936 (inlen != sizeof (icmp6_filter_t))) 2937 return (EINVAL); 2938 2939 if (inlen == 0) { 2940 if (icmp->icmp_filter != NULL) { 2941 kmem_free(icmp->icmp_filter, 2942 sizeof (icmp6_filter_t)); 2943 icmp->icmp_filter = NULL; 2944 } 2945 } else { 2946 if (icmp->icmp_filter == NULL) { 2947 icmp->icmp_filter = kmem_alloc( 2948 sizeof (icmp6_filter_t), 2949 KM_NOSLEEP); 2950 if (icmp->icmp_filter == NULL) { 2951 *outlenp = 0; 2952 return (ENOBUFS); 2953 } 2954 } 2955 (void) bcopy(invalp, icmp->icmp_filter, 2956 inlen); 2957 } 2958 } 2959 break; 2960 2961 default: 2962 *outlenp = 0; 2963 return (EINVAL); 2964 } 2965 break; 2966 default: 2967 *outlenp = 0; 2968 return (EINVAL); 2969 } 2970 /* 2971 * Common case of OK return with outval same as inval. 2972 */ 2973 if (invalp != outvalp) { 2974 /* don't trust bcopy for identical src/dst */ 2975 (void) bcopy(invalp, outvalp, inlen); 2976 } 2977 *outlenp = inlen; 2978 return (0); 2979 } 2980 /* This routine sets socket options. */ 2981 /* ARGSUSED */ 2982 int 2983 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2984 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2985 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2986 { 2987 icmp_t *icmp; 2988 int err; 2989 2990 icmp = Q_TO_ICMP(q); 2991 2992 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2993 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2994 outlenp, outvalp, thisdg_attrs, cr, mblk); 2995 rw_exit(&icmp->icmp_rwlock); 2996 return (err); 2997 } 2998 2999 /* 3000 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3001 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3002 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3003 * headers. 3004 * Returns failure if can't allocate memory. 3005 */ 3006 static int 3007 icmp_build_hdrs(icmp_t *icmp) 3008 { 3009 icmp_stack_t *is = icmp->icmp_is; 3010 uchar_t *hdrs; 3011 uint_t hdrs_len; 3012 ip6_t *ip6h; 3013 ip6i_t *ip6i; 3014 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3015 3016 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3017 hdrs_len = ip_total_hdrs_len_v6(ipp); 3018 ASSERT(hdrs_len != 0); 3019 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3020 /* Need to reallocate */ 3021 if (hdrs_len != 0) { 3022 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3023 if (hdrs == NULL) 3024 return (ENOMEM); 3025 } else { 3026 hdrs = NULL; 3027 } 3028 if (icmp->icmp_sticky_hdrs_len != 0) { 3029 kmem_free(icmp->icmp_sticky_hdrs, 3030 icmp->icmp_sticky_hdrs_len); 3031 } 3032 icmp->icmp_sticky_hdrs = hdrs; 3033 icmp->icmp_sticky_hdrs_len = hdrs_len; 3034 } 3035 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3036 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3037 3038 /* Set header fields not in ipp */ 3039 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3040 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3041 ip6h = (ip6_t *)&ip6i[1]; 3042 3043 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3044 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3045 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3046 } 3047 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3048 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3049 } 3050 } else { 3051 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3052 } 3053 3054 if (!(ipp->ipp_fields & IPPF_ADDR)) 3055 ip6h->ip6_src = icmp->icmp_v6src; 3056 3057 /* Try to get everything in a single mblk */ 3058 if (hdrs_len > icmp->icmp_max_hdr_len) { 3059 icmp->icmp_max_hdr_len = hdrs_len; 3060 rw_exit(&icmp->icmp_rwlock); 3061 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3062 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3063 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3064 } 3065 return (0); 3066 } 3067 3068 /* 3069 * This routine retrieves the value of an ND variable in a icmpparam_t 3070 * structure. It is called through nd_getset when a user reads the 3071 * variable. 3072 */ 3073 /* ARGSUSED */ 3074 static int 3075 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3076 { 3077 icmpparam_t *icmppa = (icmpparam_t *)cp; 3078 3079 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3080 return (0); 3081 } 3082 3083 /* 3084 * Walk through the param array specified registering each element with the 3085 * named dispatch (ND) handler. 3086 */ 3087 static boolean_t 3088 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3089 { 3090 for (; cnt-- > 0; icmppa++) { 3091 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3092 if (!nd_load(ndp, icmppa->icmp_param_name, 3093 icmp_param_get, icmp_param_set, 3094 (caddr_t)icmppa)) { 3095 nd_free(ndp); 3096 return (B_FALSE); 3097 } 3098 } 3099 } 3100 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3101 NULL)) { 3102 nd_free(ndp); 3103 return (B_FALSE); 3104 } 3105 return (B_TRUE); 3106 } 3107 3108 /* This routine sets an ND variable in a icmpparam_t structure. */ 3109 /* ARGSUSED */ 3110 static int 3111 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3112 { 3113 long new_value; 3114 icmpparam_t *icmppa = (icmpparam_t *)cp; 3115 3116 /* 3117 * Fail the request if the new value does not lie within the 3118 * required bounds. 3119 */ 3120 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3121 new_value < icmppa->icmp_param_min || 3122 new_value > icmppa->icmp_param_max) { 3123 return (EINVAL); 3124 } 3125 /* Set the new value */ 3126 icmppa->icmp_param_value = new_value; 3127 return (0); 3128 } 3129 /*ARGSUSED2*/ 3130 static void 3131 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3132 { 3133 conn_t *connp = (conn_t *)arg1; 3134 struct T_unitdata_ind *tudi; 3135 uchar_t *rptr; 3136 icmp_t *icmp; 3137 icmp_stack_t *is; 3138 sin_t *sin; 3139 sin6_t *sin6; 3140 ip6_t *ip6h; 3141 ip6i_t *ip6i; 3142 mblk_t *mp1; 3143 int hdr_len; 3144 ipha_t *ipha; 3145 int udi_size; /* Size of T_unitdata_ind */ 3146 uint_t ipvers; 3147 ip6_pkt_t ipp; 3148 uint8_t nexthdr; 3149 ip_pktinfo_t *pinfo = NULL; 3150 mblk_t *options_mp = NULL; 3151 uint_t icmp_opt = 0; 3152 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3153 uint_t hopstrip; 3154 3155 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3156 3157 icmp = connp->conn_icmp; 3158 is = icmp->icmp_is; 3159 rptr = mp->b_rptr; 3160 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3161 ASSERT(OK_32PTR(rptr)); 3162 3163 /* 3164 * IP should have prepended the options data in an M_CTL 3165 * Check M_CTL "type" to make sure are not here bcos of 3166 * a valid ICMP message 3167 */ 3168 if (DB_TYPE(mp) == M_CTL) { 3169 /* 3170 * FIXME: does IP still do this? 3171 * IP sends up the IPSEC_IN message for handling IPSEC 3172 * policy at the TCP level. We don't need it here. 3173 */ 3174 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3175 mp1 = mp->b_cont; 3176 freeb(mp); 3177 mp = mp1; 3178 rptr = mp->b_rptr; 3179 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3180 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3181 IN_PKTINFO) { 3182 /* 3183 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3184 * has been prepended to the packet by IP. We need to 3185 * extract the mblk and adjust the rptr 3186 */ 3187 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3188 options_mp = mp; 3189 mp = mp->b_cont; 3190 rptr = mp->b_rptr; 3191 } else { 3192 /* 3193 * ICMP messages. 3194 */ 3195 icmp_icmp_error(connp->conn_rq, mp); 3196 return; 3197 } 3198 } 3199 3200 /* 3201 * Discard message if it is misaligned or smaller than the IP header. 3202 */ 3203 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3204 freemsg(mp); 3205 if (options_mp != NULL) 3206 freeb(options_mp); 3207 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3208 return; 3209 } 3210 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3211 3212 /* Handle M_DATA messages containing IP packets messages */ 3213 if (ipvers == IPV4_VERSION) { 3214 /* 3215 * Special case where IP attaches 3216 * the IRE needs to be handled so that we don't send up 3217 * IRE to the user land. 3218 */ 3219 ipha = (ipha_t *)rptr; 3220 hdr_len = IPH_HDR_LENGTH(ipha); 3221 3222 if (ipha->ipha_protocol == IPPROTO_TCP) { 3223 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3224 3225 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3226 TH_SYN) && mp->b_cont != NULL) { 3227 mp1 = mp->b_cont; 3228 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3229 freeb(mp1); 3230 mp->b_cont = NULL; 3231 } 3232 } 3233 } 3234 if (is->is_bsd_compat) { 3235 ushort_t len; 3236 len = ntohs(ipha->ipha_length); 3237 3238 if (mp->b_datap->db_ref > 1) { 3239 /* 3240 * Allocate a new IP header so that we can 3241 * modify ipha_length. 3242 */ 3243 mblk_t *mp1; 3244 3245 mp1 = allocb(hdr_len, BPRI_MED); 3246 if (!mp1) { 3247 freemsg(mp); 3248 if (options_mp != NULL) 3249 freeb(options_mp); 3250 BUMP_MIB(&is->is_rawip_mib, 3251 rawipInErrors); 3252 return; 3253 } 3254 bcopy(rptr, mp1->b_rptr, hdr_len); 3255 mp->b_rptr = rptr + hdr_len; 3256 rptr = mp1->b_rptr; 3257 ipha = (ipha_t *)rptr; 3258 mp1->b_cont = mp; 3259 mp1->b_wptr = rptr + hdr_len; 3260 mp = mp1; 3261 } 3262 len -= hdr_len; 3263 ipha->ipha_length = htons(len); 3264 } 3265 } 3266 3267 /* 3268 * This is the inbound data path. Packets are passed upstream as 3269 * T_UNITDATA_IND messages with full IP headers still attached. 3270 */ 3271 if (icmp->icmp_family == AF_INET) { 3272 ASSERT(ipvers == IPV4_VERSION); 3273 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3274 if (icmp->icmp_recvif && (pinfo != NULL) && 3275 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3276 udi_size += sizeof (struct T_opthdr) + 3277 sizeof (uint_t); 3278 } 3279 3280 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3281 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3282 udi_size += sizeof (struct T_opthdr) + 3283 sizeof (struct in_pktinfo); 3284 } 3285 3286 /* 3287 * If SO_TIMESTAMP is set allocate the appropriate sized 3288 * buffer. Since gethrestime() expects a pointer aligned 3289 * argument, we allocate space necessary for extra 3290 * alignment (even though it might not be used). 3291 */ 3292 if (icmp->icmp_timestamp) { 3293 udi_size += sizeof (struct T_opthdr) + 3294 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3295 } 3296 mp1 = allocb(udi_size, BPRI_MED); 3297 if (mp1 == NULL) { 3298 freemsg(mp); 3299 if (options_mp != NULL) 3300 freeb(options_mp); 3301 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3302 return; 3303 } 3304 mp1->b_cont = mp; 3305 mp = mp1; 3306 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3307 mp->b_datap->db_type = M_PROTO; 3308 mp->b_wptr = (uchar_t *)tudi + udi_size; 3309 tudi->PRIM_type = T_UNITDATA_IND; 3310 tudi->SRC_length = sizeof (sin_t); 3311 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3312 sin = (sin_t *)&tudi[1]; 3313 *sin = sin_null; 3314 sin->sin_family = AF_INET; 3315 sin->sin_addr.s_addr = ipha->ipha_src; 3316 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3317 sizeof (sin_t); 3318 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3319 tudi->OPT_length = udi_size; 3320 3321 /* 3322 * Add options if IP_RECVIF is set 3323 */ 3324 if (udi_size != 0) { 3325 char *dstopt; 3326 3327 dstopt = (char *)&sin[1]; 3328 if (icmp->icmp_recvif && (pinfo != NULL) && 3329 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3330 3331 struct T_opthdr *toh; 3332 uint_t *dstptr; 3333 3334 toh = (struct T_opthdr *)dstopt; 3335 toh->level = IPPROTO_IP; 3336 toh->name = IP_RECVIF; 3337 toh->len = sizeof (struct T_opthdr) + 3338 sizeof (uint_t); 3339 toh->status = 0; 3340 dstopt += sizeof (struct T_opthdr); 3341 dstptr = (uint_t *)dstopt; 3342 *dstptr = pinfo->ip_pkt_ifindex; 3343 dstopt += sizeof (uint_t); 3344 udi_size -= toh->len; 3345 } 3346 if (icmp->icmp_timestamp) { 3347 struct T_opthdr *toh; 3348 3349 toh = (struct T_opthdr *)dstopt; 3350 toh->level = SOL_SOCKET; 3351 toh->name = SCM_TIMESTAMP; 3352 toh->len = sizeof (struct T_opthdr) + 3353 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3354 toh->status = 0; 3355 dstopt += sizeof (struct T_opthdr); 3356 /* Align for gethrestime() */ 3357 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3358 sizeof (intptr_t)); 3359 gethrestime((timestruc_t *)dstopt); 3360 dstopt = (char *)toh + toh->len; 3361 udi_size -= toh->len; 3362 } 3363 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3364 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3365 struct T_opthdr *toh; 3366 struct in_pktinfo *pktinfop; 3367 3368 toh = (struct T_opthdr *)dstopt; 3369 toh->level = IPPROTO_IP; 3370 toh->name = IP_PKTINFO; 3371 toh->len = sizeof (struct T_opthdr) + 3372 sizeof (in_pktinfo_t); 3373 toh->status = 0; 3374 dstopt += sizeof (struct T_opthdr); 3375 pktinfop = (struct in_pktinfo *)dstopt; 3376 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3377 pktinfop->ipi_spec_dst = 3378 pinfo->ip_pkt_match_addr; 3379 3380 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3381 3382 dstopt += sizeof (struct in_pktinfo); 3383 udi_size -= toh->len; 3384 } 3385 3386 /* Consumed all of allocated space */ 3387 ASSERT(udi_size == 0); 3388 } 3389 3390 if (options_mp != NULL) 3391 freeb(options_mp); 3392 3393 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3394 putnext(connp->conn_rq, mp); 3395 return; 3396 } 3397 3398 /* 3399 * We don't need options_mp in the IPv6 path. 3400 */ 3401 if (options_mp != NULL) { 3402 freeb(options_mp); 3403 options_mp = NULL; 3404 } 3405 3406 /* 3407 * Discard message if it is smaller than the IPv6 header 3408 * or if the header is malformed. 3409 */ 3410 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3411 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3412 icmp->icmp_family != AF_INET6) { 3413 freemsg(mp); 3414 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3415 return; 3416 } 3417 3418 /* Initialize */ 3419 ipp.ipp_fields = 0; 3420 hopstrip = 0; 3421 3422 ip6h = (ip6_t *)rptr; 3423 /* 3424 * Call on ip_find_hdr_v6 which gets the total hdr len 3425 * as well as individual lenghts of ext hdrs (and ptrs to 3426 * them). 3427 */ 3428 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3429 /* Look for ifindex information */ 3430 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3431 ip6i = (ip6i_t *)ip6h; 3432 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3433 ASSERT(ip6i->ip6i_ifindex != 0); 3434 ipp.ipp_fields |= IPPF_IFINDEX; 3435 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3436 } 3437 rptr = (uchar_t *)&ip6i[1]; 3438 mp->b_rptr = rptr; 3439 if (rptr == mp->b_wptr) { 3440 mp1 = mp->b_cont; 3441 freeb(mp); 3442 mp = mp1; 3443 rptr = mp->b_rptr; 3444 } 3445 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3446 ip6h = (ip6_t *)rptr; 3447 } 3448 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3449 3450 /* 3451 * We need to lie a bit to the user because users inside 3452 * labeled compartments should not see their own labels. We 3453 * assume that in all other respects IP has checked the label, 3454 * and that the label is always first among the options. (If 3455 * it's not first, then this code won't see it, and the option 3456 * will be passed along to the user.) 3457 * 3458 * If we had multilevel ICMP sockets, then the following code 3459 * should be skipped for them to allow the user to see the 3460 * label. 3461 * 3462 * Alignment restrictions in the definition of IP options 3463 * (namely, the requirement that the 4-octet DOI goes on a 3464 * 4-octet boundary) mean that we know exactly where the option 3465 * should start, but we're lenient for other hosts. 3466 * 3467 * Note that there are no multilevel ICMP or raw IP sockets 3468 * yet, thus nobody ever sees the IP6OPT_LS option. 3469 */ 3470 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3471 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3472 const uchar_t *ucp = 3473 (const uchar_t *)ipp.ipp_hopopts + 2; 3474 int remlen = ipp.ipp_hopoptslen - 2; 3475 3476 while (remlen > 0) { 3477 if (*ucp == IP6OPT_PAD1) { 3478 remlen--; 3479 ucp++; 3480 } else if (*ucp == IP6OPT_PADN) { 3481 remlen -= ucp[1] + 2; 3482 ucp += ucp[1] + 2; 3483 } else if (*ucp == ip6opt_ls) { 3484 hopstrip = (ucp - 3485 (const uchar_t *)ipp.ipp_hopopts) + 3486 ucp[1] + 2; 3487 hopstrip = (hopstrip + 7) & ~7; 3488 break; 3489 } else { 3490 /* label option must be first */ 3491 break; 3492 } 3493 } 3494 } 3495 } else { 3496 hdr_len = IPV6_HDR_LEN; 3497 ip6i = NULL; 3498 nexthdr = ip6h->ip6_nxt; 3499 } 3500 /* 3501 * One special case where IP attaches the IRE needs to 3502 * be handled so that we don't send up IRE to the user land. 3503 */ 3504 if (nexthdr == IPPROTO_TCP) { 3505 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3506 3507 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3508 mp->b_cont != NULL) { 3509 mp1 = mp->b_cont; 3510 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3511 freeb(mp1); 3512 mp->b_cont = NULL; 3513 } 3514 } 3515 } 3516 /* 3517 * Check a filter for ICMPv6 types if needed. 3518 * Verify raw checksums if needed. 3519 */ 3520 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3521 if (icmp->icmp_filter != NULL) { 3522 int type; 3523 3524 /* Assumes that IP has done the pullupmsg */ 3525 type = mp->b_rptr[hdr_len]; 3526 3527 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3528 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3529 freemsg(mp); 3530 return; 3531 } 3532 } else { 3533 /* Checksum */ 3534 uint16_t *up; 3535 uint32_t sum; 3536 int remlen; 3537 3538 up = (uint16_t *)&ip6h->ip6_src; 3539 3540 remlen = msgdsize(mp) - hdr_len; 3541 sum = htons(icmp->icmp_proto + remlen) 3542 + up[0] + up[1] + up[2] + up[3] 3543 + up[4] + up[5] + up[6] + up[7] 3544 + up[8] + up[9] + up[10] + up[11] 3545 + up[12] + up[13] + up[14] + up[15]; 3546 sum = (sum & 0xffff) + (sum >> 16); 3547 sum = IP_CSUM(mp, hdr_len, sum); 3548 if (sum != 0) { 3549 /* IPv6 RAW checksum failed */ 3550 ip0dbg(("icmp_rput: RAW checksum " 3551 "failed %x\n", sum)); 3552 freemsg(mp); 3553 BUMP_MIB(&is->is_rawip_mib, 3554 rawipInCksumErrs); 3555 return; 3556 } 3557 } 3558 } 3559 /* Skip all the IPv6 headers per API */ 3560 mp->b_rptr += hdr_len; 3561 3562 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3563 3564 /* 3565 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3566 * maintain state information, instead of relying on icmp_t 3567 * structure, since there arent any locks protecting these members 3568 * and there is a window where there might be a race between a 3569 * thread setting options on the write side and a thread reading 3570 * these options on the read size. 3571 */ 3572 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3573 IPPF_RTHDR|IPPF_IFINDEX)) { 3574 if (icmp->icmp_ipv6_recvhopopts && 3575 (ipp.ipp_fields & IPPF_HOPOPTS) && 3576 ipp.ipp_hopoptslen > hopstrip) { 3577 udi_size += sizeof (struct T_opthdr) + 3578 ipp.ipp_hopoptslen - hopstrip; 3579 icmp_opt |= IPPF_HOPOPTS; 3580 } 3581 if ((icmp->icmp_ipv6_recvdstopts || 3582 icmp->icmp_old_ipv6_recvdstopts) && 3583 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3584 udi_size += sizeof (struct T_opthdr) + 3585 ipp.ipp_dstoptslen; 3586 icmp_opt |= IPPF_DSTOPTS; 3587 } 3588 if (((icmp->icmp_ipv6_recvdstopts && 3589 icmp->icmp_ipv6_recvrthdr && 3590 (ipp.ipp_fields & IPPF_RTHDR)) || 3591 icmp->icmp_ipv6_recvrtdstopts) && 3592 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3593 udi_size += sizeof (struct T_opthdr) + 3594 ipp.ipp_rtdstoptslen; 3595 icmp_opt |= IPPF_RTDSTOPTS; 3596 } 3597 if (icmp->icmp_ipv6_recvrthdr && 3598 (ipp.ipp_fields & IPPF_RTHDR)) { 3599 udi_size += sizeof (struct T_opthdr) + 3600 ipp.ipp_rthdrlen; 3601 icmp_opt |= IPPF_RTHDR; 3602 } 3603 if (icmp->icmp_ip_recvpktinfo && 3604 (ipp.ipp_fields & IPPF_IFINDEX)) { 3605 udi_size += sizeof (struct T_opthdr) + 3606 sizeof (struct in6_pktinfo); 3607 icmp_opt |= IPPF_IFINDEX; 3608 } 3609 } 3610 if (icmp->icmp_ipv6_recvhoplimit) { 3611 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3612 icmp_ipv6_recvhoplimit = B_TRUE; 3613 } 3614 3615 if (icmp->icmp_ipv6_recvtclass) 3616 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3617 3618 /* 3619 * If SO_TIMESTAMP is set allocate the appropriate sized 3620 * buffer. Since gethrestime() expects a pointer aligned 3621 * argument, we allocate space necessary for extra 3622 * alignment (even though it might not be used). 3623 */ 3624 if (icmp->icmp_timestamp) { 3625 udi_size += sizeof (struct T_opthdr) + 3626 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3627 } 3628 3629 mp1 = allocb(udi_size, BPRI_MED); 3630 if (mp1 == NULL) { 3631 freemsg(mp); 3632 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3633 return; 3634 } 3635 mp1->b_cont = mp; 3636 mp = mp1; 3637 mp->b_datap->db_type = M_PROTO; 3638 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3639 mp->b_wptr = (uchar_t *)tudi + udi_size; 3640 tudi->PRIM_type = T_UNITDATA_IND; 3641 tudi->SRC_length = sizeof (sin6_t); 3642 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3643 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3644 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3645 tudi->OPT_length = udi_size; 3646 sin6 = (sin6_t *)&tudi[1]; 3647 sin6->sin6_port = 0; 3648 sin6->sin6_family = AF_INET6; 3649 3650 sin6->sin6_addr = ip6h->ip6_src; 3651 /* No sin6_flowinfo per API */ 3652 sin6->sin6_flowinfo = 0; 3653 /* For link-scope source pass up scope id */ 3654 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3655 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3656 sin6->sin6_scope_id = ipp.ipp_ifindex; 3657 else 3658 sin6->sin6_scope_id = 0; 3659 3660 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3661 icmp->icmp_zoneid, is->is_netstack); 3662 3663 if (udi_size != 0) { 3664 uchar_t *dstopt; 3665 3666 dstopt = (uchar_t *)&sin6[1]; 3667 if (icmp_opt & IPPF_IFINDEX) { 3668 struct T_opthdr *toh; 3669 struct in6_pktinfo *pkti; 3670 3671 toh = (struct T_opthdr *)dstopt; 3672 toh->level = IPPROTO_IPV6; 3673 toh->name = IPV6_PKTINFO; 3674 toh->len = sizeof (struct T_opthdr) + 3675 sizeof (*pkti); 3676 toh->status = 0; 3677 dstopt += sizeof (struct T_opthdr); 3678 pkti = (struct in6_pktinfo *)dstopt; 3679 pkti->ipi6_addr = ip6h->ip6_dst; 3680 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3681 dstopt += sizeof (*pkti); 3682 udi_size -= toh->len; 3683 } 3684 if (icmp_ipv6_recvhoplimit) { 3685 struct T_opthdr *toh; 3686 3687 toh = (struct T_opthdr *)dstopt; 3688 toh->level = IPPROTO_IPV6; 3689 toh->name = IPV6_HOPLIMIT; 3690 toh->len = sizeof (struct T_opthdr) + 3691 sizeof (uint_t); 3692 toh->status = 0; 3693 dstopt += sizeof (struct T_opthdr); 3694 *(uint_t *)dstopt = ip6h->ip6_hops; 3695 dstopt += sizeof (uint_t); 3696 udi_size -= toh->len; 3697 } 3698 if (icmp->icmp_ipv6_recvtclass) { 3699 struct T_opthdr *toh; 3700 3701 toh = (struct T_opthdr *)dstopt; 3702 toh->level = IPPROTO_IPV6; 3703 toh->name = IPV6_TCLASS; 3704 toh->len = sizeof (struct T_opthdr) + 3705 sizeof (uint_t); 3706 toh->status = 0; 3707 dstopt += sizeof (struct T_opthdr); 3708 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3709 dstopt += sizeof (uint_t); 3710 udi_size -= toh->len; 3711 } 3712 if (icmp->icmp_timestamp) { 3713 struct T_opthdr *toh; 3714 3715 toh = (struct T_opthdr *)dstopt; 3716 toh->level = SOL_SOCKET; 3717 toh->name = SCM_TIMESTAMP; 3718 toh->len = sizeof (struct T_opthdr) + 3719 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3720 toh->status = 0; 3721 dstopt += sizeof (struct T_opthdr); 3722 /* Align for gethrestime() */ 3723 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3724 sizeof (intptr_t)); 3725 gethrestime((timestruc_t *)dstopt); 3726 dstopt = (uchar_t *)toh + toh->len; 3727 udi_size -= toh->len; 3728 } 3729 if (icmp_opt & IPPF_HOPOPTS) { 3730 struct T_opthdr *toh; 3731 3732 toh = (struct T_opthdr *)dstopt; 3733 toh->level = IPPROTO_IPV6; 3734 toh->name = IPV6_HOPOPTS; 3735 toh->len = sizeof (struct T_opthdr) + 3736 ipp.ipp_hopoptslen - hopstrip; 3737 toh->status = 0; 3738 dstopt += sizeof (struct T_opthdr); 3739 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3740 ipp.ipp_hopoptslen - hopstrip); 3741 if (hopstrip > 0) { 3742 /* copy next header value and fake length */ 3743 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3744 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3745 hopstrip / 8; 3746 } 3747 dstopt += ipp.ipp_hopoptslen - hopstrip; 3748 udi_size -= toh->len; 3749 } 3750 if (icmp_opt & IPPF_RTDSTOPTS) { 3751 struct T_opthdr *toh; 3752 3753 toh = (struct T_opthdr *)dstopt; 3754 toh->level = IPPROTO_IPV6; 3755 toh->name = IPV6_DSTOPTS; 3756 toh->len = sizeof (struct T_opthdr) + 3757 ipp.ipp_rtdstoptslen; 3758 toh->status = 0; 3759 dstopt += sizeof (struct T_opthdr); 3760 bcopy(ipp.ipp_rtdstopts, dstopt, 3761 ipp.ipp_rtdstoptslen); 3762 dstopt += ipp.ipp_rtdstoptslen; 3763 udi_size -= toh->len; 3764 } 3765 if (icmp_opt & IPPF_RTHDR) { 3766 struct T_opthdr *toh; 3767 3768 toh = (struct T_opthdr *)dstopt; 3769 toh->level = IPPROTO_IPV6; 3770 toh->name = IPV6_RTHDR; 3771 toh->len = sizeof (struct T_opthdr) + 3772 ipp.ipp_rthdrlen; 3773 toh->status = 0; 3774 dstopt += sizeof (struct T_opthdr); 3775 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3776 dstopt += ipp.ipp_rthdrlen; 3777 udi_size -= toh->len; 3778 } 3779 if (icmp_opt & IPPF_DSTOPTS) { 3780 struct T_opthdr *toh; 3781 3782 toh = (struct T_opthdr *)dstopt; 3783 toh->level = IPPROTO_IPV6; 3784 toh->name = IPV6_DSTOPTS; 3785 toh->len = sizeof (struct T_opthdr) + 3786 ipp.ipp_dstoptslen; 3787 toh->status = 0; 3788 dstopt += sizeof (struct T_opthdr); 3789 bcopy(ipp.ipp_dstopts, dstopt, 3790 ipp.ipp_dstoptslen); 3791 dstopt += ipp.ipp_dstoptslen; 3792 udi_size -= toh->len; 3793 } 3794 /* Consumed all of allocated space */ 3795 ASSERT(udi_size == 0); 3796 } 3797 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3798 putnext(connp->conn_rq, mp); 3799 } 3800 3801 /* 3802 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3803 * immediately. 3804 */ 3805 static void 3806 icmp_bind_result(conn_t *connp, mblk_t *mp) 3807 { 3808 struct T_error_ack *tea; 3809 3810 switch (mp->b_datap->db_type) { 3811 case M_PROTO: 3812 case M_PCPROTO: 3813 /* M_PROTO messages contain some type of TPI message. */ 3814 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3815 freemsg(mp); 3816 return; 3817 } 3818 tea = (struct T_error_ack *)mp->b_rptr; 3819 3820 switch (tea->PRIM_type) { 3821 case T_ERROR_ACK: 3822 switch (tea->ERROR_prim) { 3823 case O_T_BIND_REQ: 3824 case T_BIND_REQ: 3825 icmp_bind_error(connp, mp); 3826 return; 3827 default: 3828 break; 3829 } 3830 ASSERT(0); 3831 freemsg(mp); 3832 return; 3833 3834 case T_BIND_ACK: 3835 icmp_bind_ack(connp, mp); 3836 return; 3837 3838 default: 3839 break; 3840 } 3841 freemsg(mp); 3842 return; 3843 default: 3844 /* FIXME: other cases? */ 3845 ASSERT(0); 3846 freemsg(mp); 3847 return; 3848 } 3849 } 3850 3851 /* 3852 * Process a T_BIND_ACK 3853 */ 3854 static void 3855 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3856 { 3857 icmp_t *icmp = connp->conn_icmp; 3858 mblk_t *mp1; 3859 ire_t *ire; 3860 struct T_bind_ack *tba; 3861 uchar_t *addrp; 3862 ipa_conn_t *ac; 3863 ipa6_conn_t *ac6; 3864 3865 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3866 /* 3867 * We know if headers are included or not so we can 3868 * safely do this. 3869 */ 3870 if (icmp->icmp_state == TS_UNBND) { 3871 /* 3872 * TPI has not yet bound - bind sent by 3873 * icmp_bind_proto. 3874 */ 3875 freemsg(mp); 3876 rw_exit(&icmp->icmp_rwlock); 3877 return; 3878 } 3879 ASSERT(icmp->icmp_pending_op != -1); 3880 3881 /* 3882 * If a broadcast/multicast address was bound set 3883 * the source address to 0. 3884 * This ensures no datagrams with broadcast address 3885 * as source address are emitted (which would violate 3886 * RFC1122 - Hosts requirements) 3887 * 3888 * Note that when connecting the returned IRE is 3889 * for the destination address and we only perform 3890 * the broadcast check for the source address (it 3891 * is OK to connect to a broadcast/multicast address.) 3892 */ 3893 mp1 = mp->b_cont; 3894 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3895 ire = (ire_t *)mp1->b_rptr; 3896 3897 /* 3898 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3899 * local address. 3900 */ 3901 if (ire->ire_type == IRE_BROADCAST && 3902 icmp->icmp_state != TS_DATA_XFER) { 3903 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3904 icmp->icmp_pending_op == O_T_BIND_REQ); 3905 /* This was just a local bind to a MC/broadcast addr */ 3906 V6_SET_ZERO(icmp->icmp_v6src); 3907 if (icmp->icmp_family == AF_INET6) 3908 (void) icmp_build_hdrs(icmp); 3909 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3910 /* 3911 * Local address not yet set - pick it from the 3912 * T_bind_ack 3913 */ 3914 tba = (struct T_bind_ack *)mp->b_rptr; 3915 addrp = &mp->b_rptr[tba->ADDR_offset]; 3916 switch (icmp->icmp_family) { 3917 case AF_INET: 3918 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3919 ac = (ipa_conn_t *)addrp; 3920 } else { 3921 ASSERT(tba->ADDR_length == 3922 sizeof (ipa_conn_x_t)); 3923 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3924 } 3925 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3926 &icmp->icmp_v6src); 3927 break; 3928 case AF_INET6: 3929 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3930 ac6 = (ipa6_conn_t *)addrp; 3931 } else { 3932 ASSERT(tba->ADDR_length == 3933 sizeof (ipa6_conn_x_t)); 3934 ac6 = &((ipa6_conn_x_t *) 3935 addrp)->ac6x_conn; 3936 } 3937 icmp->icmp_v6src = ac6->ac6_laddr; 3938 (void) icmp_build_hdrs(icmp); 3939 } 3940 } 3941 mp1 = mp1->b_cont; 3942 } 3943 icmp->icmp_pending_op = -1; 3944 rw_exit(&icmp->icmp_rwlock); 3945 /* 3946 * Look for one or more appended ACK message added by 3947 * icmp_connect or icmp_disconnect. 3948 * If none found just send up the T_BIND_ACK. 3949 * icmp_connect has appended a T_OK_ACK and a 3950 * T_CONN_CON. 3951 * icmp_disconnect has appended a T_OK_ACK. 3952 */ 3953 if (mp1 != NULL) { 3954 if (mp->b_cont == mp1) 3955 mp->b_cont = NULL; 3956 else { 3957 ASSERT(mp->b_cont->b_cont == mp1); 3958 mp->b_cont->b_cont = NULL; 3959 } 3960 freemsg(mp); 3961 mp = mp1; 3962 while (mp != NULL) { 3963 mp1 = mp->b_cont; 3964 mp->b_cont = NULL; 3965 putnext(connp->conn_rq, mp); 3966 mp = mp1; 3967 } 3968 return; 3969 } 3970 freemsg(mp->b_cont); 3971 mp->b_cont = NULL; 3972 putnext(connp->conn_rq, mp); 3973 } 3974 3975 static void 3976 icmp_bind_error(conn_t *connp, mblk_t *mp) 3977 { 3978 icmp_t *icmp = connp->conn_icmp; 3979 struct T_error_ack *tea; 3980 3981 tea = (struct T_error_ack *)mp->b_rptr; 3982 /* 3983 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3984 * clear out the source address before 3985 * passing the message upstream. 3986 * If this was caused by a T_CONN_REQ 3987 * revert back to bound state. 3988 */ 3989 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3990 if (icmp->icmp_state == TS_UNBND) { 3991 /* 3992 * TPI has not yet bound - bind sent by icmp_bind_proto. 3993 */ 3994 freemsg(mp); 3995 rw_exit(&icmp->icmp_rwlock); 3996 return; 3997 } 3998 ASSERT(icmp->icmp_pending_op != -1); 3999 tea->ERROR_prim = icmp->icmp_pending_op; 4000 icmp->icmp_pending_op = -1; 4001 4002 switch (tea->ERROR_prim) { 4003 case T_CONN_REQ: 4004 ASSERT(icmp->icmp_state == TS_DATA_XFER); 4005 /* Connect failed */ 4006 /* Revert back to the bound source */ 4007 icmp->icmp_v6src = icmp->icmp_bound_v6src; 4008 icmp->icmp_state = TS_IDLE; 4009 if (icmp->icmp_family == AF_INET6) 4010 (void) icmp_build_hdrs(icmp); 4011 break; 4012 4013 case T_DISCON_REQ: 4014 case T_BIND_REQ: 4015 case O_T_BIND_REQ: 4016 V6_SET_ZERO(icmp->icmp_v6src); 4017 V6_SET_ZERO(icmp->icmp_bound_v6src); 4018 icmp->icmp_state = TS_UNBND; 4019 if (icmp->icmp_family == AF_INET6) 4020 (void) icmp_build_hdrs(icmp); 4021 break; 4022 default: 4023 break; 4024 } 4025 rw_exit(&icmp->icmp_rwlock); 4026 putnext(connp->conn_rq, mp); 4027 } 4028 4029 /* 4030 * return SNMP stuff in buffer in mpdata 4031 */ 4032 mblk_t * 4033 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4034 { 4035 mblk_t *mpdata; 4036 struct opthdr *optp; 4037 conn_t *connp = Q_TO_CONN(q); 4038 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4039 mblk_t *mp2ctl; 4040 4041 /* 4042 * make a copy of the original message 4043 */ 4044 mp2ctl = copymsg(mpctl); 4045 4046 if (mpctl == NULL || 4047 (mpdata = mpctl->b_cont) == NULL) { 4048 freemsg(mpctl); 4049 freemsg(mp2ctl); 4050 return (0); 4051 } 4052 4053 /* fixed length structure for IPv4 and IPv6 counters */ 4054 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4055 optp->level = EXPER_RAWIP; 4056 optp->name = 0; 4057 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4058 sizeof (is->is_rawip_mib)); 4059 optp->len = msgdsize(mpdata); 4060 qreply(q, mpctl); 4061 4062 return (mp2ctl); 4063 } 4064 4065 /* 4066 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4067 * TODO: If this ever actually tries to set anything, it needs to be 4068 * to do the appropriate locking. 4069 */ 4070 /* ARGSUSED */ 4071 int 4072 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4073 uchar_t *ptr, int len) 4074 { 4075 switch (level) { 4076 case EXPER_RAWIP: 4077 return (0); 4078 default: 4079 return (1); 4080 } 4081 } 4082 4083 /* Report for ndd "icmp_status" */ 4084 /* ARGSUSED */ 4085 static int 4086 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4087 { 4088 conn_t *connp; 4089 ip_stack_t *ipst; 4090 char laddrbuf[INET6_ADDRSTRLEN]; 4091 char faddrbuf[INET6_ADDRSTRLEN]; 4092 int i; 4093 4094 (void) mi_mpprintf(mp, 4095 "RAWIP " MI_COL_HDRPAD_STR 4096 /* 01234567[89ABCDEF] */ 4097 " src addr dest addr state"); 4098 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4099 4100 connp = Q_TO_CONN(q); 4101 ipst = connp->conn_netstack->netstack_ip; 4102 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4103 connf_t *connfp; 4104 char *state; 4105 4106 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4107 connp = NULL; 4108 4109 while ((connp = ipcl_get_next_conn(connfp, connp, 4110 IPCL_RAWIPCONN)) != NULL) { 4111 icmp_t *icmp; 4112 4113 mutex_enter(&(connp)->conn_lock); 4114 icmp = connp->conn_icmp; 4115 4116 if (icmp->icmp_state == TS_UNBND) 4117 state = "UNBOUND"; 4118 else if (icmp->icmp_state == TS_IDLE) 4119 state = "IDLE"; 4120 else if (icmp->icmp_state == TS_DATA_XFER) 4121 state = "CONNECTED"; 4122 else 4123 state = "UnkState"; 4124 4125 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4126 (void *)icmp, 4127 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4128 sizeof (faddrbuf)), 4129 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4130 sizeof (laddrbuf)), 4131 state); 4132 mutex_exit(&(connp)->conn_lock); 4133 } 4134 } 4135 return (0); 4136 } 4137 4138 /* 4139 * This routine creates a T_UDERROR_IND message and passes it upstream. 4140 * The address and options are copied from the T_UNITDATA_REQ message 4141 * passed in mp. This message is freed. 4142 */ 4143 static void 4144 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4145 { 4146 mblk_t *mp1; 4147 uchar_t *rptr = mp->b_rptr; 4148 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4149 4150 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4151 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4152 tudr->OPT_length, err); 4153 if (mp1) 4154 qreply(q, mp1); 4155 freemsg(mp); 4156 } 4157 4158 /* 4159 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4160 * After some error checking, the message is passed downstream to ip. 4161 */ 4162 static void 4163 icmp_unbind(queue_t *q, mblk_t *mp) 4164 { 4165 icmp_t *icmp = Q_TO_ICMP(q); 4166 4167 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4168 /* If a bind has not been done, we can't unbind. */ 4169 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4170 rw_exit(&icmp->icmp_rwlock); 4171 icmp_err_ack(q, mp, TOUTSTATE, 0); 4172 return; 4173 } 4174 icmp->icmp_pending_op = T_UNBIND_REQ; 4175 rw_exit(&icmp->icmp_rwlock); 4176 4177 /* 4178 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4179 * and therefore ip_unbind must never return NULL. 4180 */ 4181 mp = ip_unbind(q, mp); 4182 ASSERT(mp != NULL); 4183 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4184 4185 /* 4186 * Once we're unbound from IP, the pending operation may be cleared 4187 * here. 4188 */ 4189 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4190 V6_SET_ZERO(icmp->icmp_v6src); 4191 V6_SET_ZERO(icmp->icmp_bound_v6src); 4192 icmp->icmp_pending_op = -1; 4193 icmp->icmp_state = TS_UNBND; 4194 if (icmp->icmp_family == AF_INET6) 4195 (void) icmp_build_hdrs(icmp); 4196 rw_exit(&icmp->icmp_rwlock); 4197 4198 qreply(q, mp); 4199 } 4200 4201 /* 4202 * Process IPv4 packets that already include an IP header. 4203 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4204 * IPPROTO_IGMP). 4205 */ 4206 static void 4207 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4208 { 4209 icmp_stack_t *is = icmp->icmp_is; 4210 ipha_t *ipha; 4211 int ip_hdr_length; 4212 int tp_hdr_len; 4213 mblk_t *mp1; 4214 uint_t pkt_len; 4215 ip_opt_info_t optinfo; 4216 conn_t *connp = icmp->icmp_connp; 4217 4218 optinfo.ip_opt_flags = 0; 4219 optinfo.ip_opt_ill_index = 0; 4220 ipha = (ipha_t *)mp->b_rptr; 4221 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4222 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4223 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4224 ASSERT(icmp != NULL); 4225 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4226 freemsg(mp); 4227 return; 4228 } 4229 ipha = (ipha_t *)mp->b_rptr; 4230 } 4231 ipha->ipha_version_and_hdr_length = 4232 (IP_VERSION<<4) | (ip_hdr_length>>2); 4233 4234 /* 4235 * For the socket of SOCK_RAW type, the checksum is provided in the 4236 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4237 * tell IP that the application has sent a complete IP header and not 4238 * to compute the transport checksum nor change the DF flag. 4239 */ 4240 ipha->ipha_ident = IP_HDR_INCLUDED; 4241 ipha->ipha_hdr_checksum = 0; 4242 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4243 /* Insert options if any */ 4244 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4245 /* 4246 * Put the IP header plus any transport header that is 4247 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4248 * that at least the checksum field is in the first mblk.) 4249 */ 4250 switch (ipha->ipha_protocol) { 4251 case IPPROTO_UDP: 4252 tp_hdr_len = 8; 4253 break; 4254 case IPPROTO_TCP: 4255 tp_hdr_len = 20; 4256 break; 4257 default: 4258 tp_hdr_len = 0; 4259 break; 4260 } 4261 /* 4262 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4263 * tp_hdr_len bytes will be in a single mblk. 4264 */ 4265 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4266 tp_hdr_len)) { 4267 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4268 tp_hdr_len)) { 4269 BUMP_MIB(&is->is_rawip_mib, 4270 rawipOutErrors); 4271 freemsg(mp); 4272 return; 4273 } 4274 ipha = (ipha_t *)mp->b_rptr; 4275 } 4276 4277 /* 4278 * if the length is larger then the max allowed IP packet, 4279 * then send an error and abort the processing. 4280 */ 4281 pkt_len = ntohs(ipha->ipha_length) 4282 + icmp->icmp_ip_snd_options_len; 4283 if (pkt_len > IP_MAXPACKET) { 4284 icmp_ud_err(q, mp, EMSGSIZE); 4285 return; 4286 } 4287 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4288 tp_hdr_len, BPRI_LO))) { 4289 icmp_ud_err(q, mp, ENOMEM); 4290 return; 4291 } 4292 mp1->b_rptr += is->is_wroff_extra; 4293 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4294 4295 ipha->ipha_length = htons((uint16_t)pkt_len); 4296 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4297 4298 /* Copy transport header if any */ 4299 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4300 mp1->b_wptr += tp_hdr_len; 4301 4302 /* Add options */ 4303 ipha = (ipha_t *)mp1->b_rptr; 4304 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4305 icmp->icmp_ip_snd_options_len); 4306 4307 /* Drop IP header and transport header from original */ 4308 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4309 4310 mp1->b_cont = mp; 4311 mp = mp1; 4312 /* 4313 * Massage source route putting first source 4314 * route in ipha_dst. 4315 */ 4316 (void) ip_massage_options(ipha, is->is_netstack); 4317 } 4318 4319 if (pktinfop != NULL) { 4320 /* 4321 * Over write the source address provided in the header 4322 */ 4323 if (pktinfop->ip4_addr != INADDR_ANY) { 4324 ipha->ipha_src = pktinfop->ip4_addr; 4325 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4326 } 4327 4328 if (pktinfop->ip4_ill_index != 0) { 4329 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4330 } 4331 } 4332 4333 mblk_setcred(mp, connp->conn_cred); 4334 ip_output_options(connp, mp, q, IP_WPUT, 4335 &optinfo); 4336 } 4337 4338 static boolean_t 4339 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4340 { 4341 int err; 4342 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4343 icmp_stack_t *is = icmp->icmp_is; 4344 conn_t *connp = icmp->icmp_connp; 4345 4346 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4347 opt_storage, icmp->icmp_mac_exempt, 4348 is->is_netstack->netstack_ip); 4349 if (err == 0) { 4350 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4351 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4352 opt_storage); 4353 } 4354 if (err != 0) { 4355 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4356 DTRACE_PROBE4( 4357 tx__ip__log__drop__updatelabel__icmp, 4358 char *, "queue(1) failed to update options(2) on mp(3)", 4359 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4360 icmp_ud_err(q, mp, err); 4361 return (B_FALSE); 4362 } 4363 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4364 return (B_TRUE); 4365 } 4366 4367 /* 4368 * This routine handles all messages passed downstream. It either 4369 * consumes the message or passes it downstream; it never queues a 4370 * a message. 4371 */ 4372 static void 4373 icmp_wput(queue_t *q, mblk_t *mp) 4374 { 4375 uchar_t *rptr = mp->b_rptr; 4376 ipha_t *ipha; 4377 mblk_t *mp1; 4378 int ip_hdr_length; 4379 #define tudr ((struct T_unitdata_req *)rptr) 4380 size_t ip_len; 4381 conn_t *connp = Q_TO_CONN(q); 4382 icmp_t *icmp = connp->conn_icmp; 4383 icmp_stack_t *is = icmp->icmp_is; 4384 sin6_t *sin6; 4385 sin_t *sin; 4386 ipaddr_t v4dst; 4387 ip4_pkt_t pktinfo; 4388 ip4_pkt_t *pktinfop = &pktinfo; 4389 ip_opt_info_t optinfo; 4390 4391 switch (mp->b_datap->db_type) { 4392 case M_DATA: 4393 if (icmp->icmp_hdrincl) { 4394 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4395 ipha = (ipha_t *)mp->b_rptr; 4396 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4397 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4398 BUMP_MIB(&is->is_rawip_mib, 4399 rawipOutErrors); 4400 freemsg(mp); 4401 return; 4402 } 4403 ipha = (ipha_t *)mp->b_rptr; 4404 } 4405 /* 4406 * If this connection was used for v6 (inconceivable!) 4407 * or if we have a new destination, then it's time to 4408 * figure a new label. 4409 */ 4410 if (is_system_labeled() && 4411 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4412 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4413 ipha->ipha_dst) && 4414 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4415 return; 4416 } 4417 icmp_wput_hdrincl(q, mp, icmp, NULL); 4418 return; 4419 } 4420 freemsg(mp); 4421 return; 4422 case M_PROTO: 4423 case M_PCPROTO: 4424 ip_len = mp->b_wptr - rptr; 4425 if (ip_len >= sizeof (struct T_unitdata_req)) { 4426 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4427 if (((union T_primitives *)rptr)->type 4428 == T_UNITDATA_REQ) 4429 break; 4430 } 4431 /* FALLTHRU */ 4432 default: 4433 icmp_wput_other(q, mp); 4434 return; 4435 } 4436 4437 /* Handle T_UNITDATA_REQ messages here. */ 4438 4439 4440 4441 if (icmp->icmp_state == TS_UNBND) { 4442 /* If a port has not been bound to the stream, fail. */ 4443 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4444 icmp_ud_err(q, mp, EPROTO); 4445 return; 4446 } 4447 mp1 = mp->b_cont; 4448 if (mp1 == NULL) { 4449 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4450 icmp_ud_err(q, mp, EPROTO); 4451 return; 4452 } 4453 4454 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4455 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4456 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4457 return; 4458 } 4459 4460 switch (icmp->icmp_family) { 4461 case AF_INET6: 4462 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4463 if (!OK_32PTR((char *)sin6) || 4464 tudr->DEST_length != sizeof (sin6_t) || 4465 sin6->sin6_family != AF_INET6) { 4466 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4467 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4468 return; 4469 } 4470 4471 /* No support for mapped addresses on raw sockets */ 4472 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4473 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4474 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4475 return; 4476 } 4477 4478 /* 4479 * Destination is a native IPv6 address. 4480 * Send out an IPv6 format packet. 4481 */ 4482 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4483 return; 4484 4485 case AF_INET: 4486 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4487 if (!OK_32PTR((char *)sin) || 4488 tudr->DEST_length != sizeof (sin_t) || 4489 sin->sin_family != AF_INET) { 4490 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4491 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4492 return; 4493 } 4494 /* Extract and ipaddr */ 4495 v4dst = sin->sin_addr.s_addr; 4496 break; 4497 4498 default: 4499 ASSERT(0); 4500 } 4501 4502 pktinfop->ip4_ill_index = 0; 4503 pktinfop->ip4_addr = INADDR_ANY; 4504 optinfo.ip_opt_flags = 0; 4505 optinfo.ip_opt_ill_index = 0; 4506 4507 4508 /* 4509 * If options passed in, feed it for verification and handling 4510 */ 4511 if (tudr->OPT_length != 0) { 4512 int error; 4513 4514 error = 0; 4515 if (icmp_unitdata_opt_process(q, mp, &error, 4516 (void *)pktinfop) < 0) { 4517 /* failure */ 4518 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4519 icmp_ud_err(q, mp, error); 4520 return; 4521 } 4522 ASSERT(error == 0); 4523 /* 4524 * Note: Success in processing options. 4525 * mp option buffer represented by 4526 * OPT_length/offset now potentially modified 4527 * and contain option setting results 4528 */ 4529 4530 } 4531 4532 if (v4dst == INADDR_ANY) 4533 v4dst = htonl(INADDR_LOOPBACK); 4534 4535 /* Check if our saved options are valid; update if not */ 4536 if (is_system_labeled() && 4537 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4538 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4539 !icmp_update_label(q, icmp, mp, v4dst)) { 4540 return; 4541 } 4542 4543 /* Protocol 255 contains full IP headers */ 4544 if (icmp->icmp_hdrincl) { 4545 freeb(mp); 4546 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4547 return; 4548 } 4549 4550 4551 /* Add an IP header */ 4552 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4553 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4554 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4555 mp1->b_datap->db_ref != 1 || 4556 !OK_32PTR(ipha)) { 4557 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4558 BPRI_LO))) { 4559 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4560 icmp_ud_err(q, mp, ENOMEM); 4561 return; 4562 } 4563 mp1->b_cont = mp->b_cont; 4564 ipha = (ipha_t *)mp1->b_datap->db_lim; 4565 mp1->b_wptr = (uchar_t *)ipha; 4566 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4567 } 4568 #ifdef _BIG_ENDIAN 4569 /* Set version, header length, and tos */ 4570 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4571 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4572 icmp->icmp_type_of_service); 4573 /* Set ttl and protocol */ 4574 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4575 #else 4576 /* Set version, header length, and tos */ 4577 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4578 ((icmp->icmp_type_of_service << 8) | 4579 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4580 /* Set ttl and protocol */ 4581 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4582 #endif 4583 if (pktinfop->ip4_addr != INADDR_ANY) { 4584 ipha->ipha_src = pktinfop->ip4_addr; 4585 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4586 } else { 4587 4588 /* 4589 * Copy our address into the packet. If this is zero, 4590 * ip will fill in the real source address. 4591 */ 4592 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4593 } 4594 4595 ipha->ipha_fragment_offset_and_flags = 0; 4596 4597 if (pktinfop->ip4_ill_index != 0) { 4598 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4599 } 4600 4601 4602 /* 4603 * For the socket of SOCK_RAW type, the checksum is provided in the 4604 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4605 * tell IP that the application has sent a complete IP header and not 4606 * to compute the transport checksum nor change the DF flag. 4607 */ 4608 ipha->ipha_ident = IP_HDR_INCLUDED; 4609 4610 /* Finish common formatting of the packet. */ 4611 mp1->b_rptr = (uchar_t *)ipha; 4612 4613 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4614 if (mp1->b_cont != NULL) 4615 ip_len += msgdsize(mp1->b_cont); 4616 4617 /* 4618 * Set the length into the IP header. 4619 * If the length is greater than the maximum allowed by IP, 4620 * then free the message and return. Do not try and send it 4621 * as this can cause problems in layers below. 4622 */ 4623 if (ip_len > IP_MAXPACKET) { 4624 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4625 icmp_ud_err(q, mp, EMSGSIZE); 4626 return; 4627 } 4628 ipha->ipha_length = htons((uint16_t)ip_len); 4629 /* 4630 * Copy in the destination address from the T_UNITDATA 4631 * request 4632 */ 4633 ipha->ipha_dst = v4dst; 4634 4635 /* 4636 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4637 */ 4638 if (CLASSD(v4dst)) 4639 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4640 4641 /* Copy in options if any */ 4642 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4643 bcopy(icmp->icmp_ip_snd_options, 4644 &ipha[1], icmp->icmp_ip_snd_options_len); 4645 /* 4646 * Massage source route putting first source route in ipha_dst. 4647 * Ignore the destination in the T_unitdata_req. 4648 */ 4649 (void) ip_massage_options(ipha, is->is_netstack); 4650 } 4651 4652 freeb(mp); 4653 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4654 mblk_setcred(mp1, connp->conn_cred); 4655 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4656 #undef ipha 4657 #undef tudr 4658 } 4659 4660 static boolean_t 4661 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4662 { 4663 int err; 4664 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4665 icmp_stack_t *is = icmp->icmp_is; 4666 conn_t *connp = icmp->icmp_connp; 4667 4668 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4669 opt_storage, icmp->icmp_mac_exempt, 4670 is->is_netstack->netstack_ip); 4671 if (err == 0) { 4672 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4673 &icmp->icmp_label_len_v6, opt_storage); 4674 } 4675 if (err != 0) { 4676 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4677 DTRACE_PROBE4( 4678 tx__ip__log__drop__updatelabel__icmp6, 4679 char *, "queue(1) failed to update options(2) on mp(3)", 4680 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4681 icmp_ud_err(wq, mp, err); 4682 return (B_FALSE); 4683 } 4684 4685 icmp->icmp_v6lastdst = *dst; 4686 return (B_TRUE); 4687 } 4688 4689 /* 4690 * icmp_wput_ipv6(): 4691 * Assumes that icmp_wput did some sanity checking on the destination 4692 * address, but that the label may not yet be correct. 4693 */ 4694 void 4695 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4696 { 4697 ip6_t *ip6h; 4698 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4699 mblk_t *mp1; 4700 int ip_hdr_len = IPV6_HDR_LEN; 4701 size_t ip_len; 4702 icmp_t *icmp = Q_TO_ICMP(q); 4703 icmp_stack_t *is = icmp->icmp_is; 4704 ip6_pkt_t ipp_s; /* For ancillary data options */ 4705 ip6_pkt_t *ipp = &ipp_s; 4706 ip6_pkt_t *tipp; 4707 uint32_t csum = 0; 4708 uint_t ignore = 0; 4709 uint_t option_exists = 0, is_sticky = 0; 4710 uint8_t *cp; 4711 uint8_t *nxthdr_ptr; 4712 in6_addr_t ip6_dst; 4713 4714 /* 4715 * If the local address is a mapped address return 4716 * an error. 4717 * It would be possible to send an IPv6 packet but the 4718 * response would never make it back to the application 4719 * since it is bound to a mapped address. 4720 */ 4721 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4722 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4723 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4724 return; 4725 } 4726 4727 ipp->ipp_fields = 0; 4728 ipp->ipp_sticky_ignored = 0; 4729 4730 /* 4731 * If TPI options passed in, feed it for verification and handling 4732 */ 4733 if (tudr_optlen != 0) { 4734 int error; 4735 4736 if (icmp_unitdata_opt_process(q, mp, &error, 4737 (void *)ipp) < 0) { 4738 /* failure */ 4739 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4740 icmp_ud_err(q, mp, error); 4741 return; 4742 } 4743 ignore = ipp->ipp_sticky_ignored; 4744 ASSERT(error == 0); 4745 } 4746 4747 if (sin6->sin6_scope_id != 0 && 4748 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4749 /* 4750 * IPPF_SCOPE_ID is special. It's neither a sticky 4751 * option nor ancillary data. It needs to be 4752 * explicitly set in options_exists. 4753 */ 4754 option_exists |= IPPF_SCOPE_ID; 4755 } 4756 4757 /* 4758 * Compute the destination address 4759 */ 4760 ip6_dst = sin6->sin6_addr; 4761 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4762 ip6_dst = ipv6_loopback; 4763 4764 /* 4765 * If we're not going to the same destination as last time, then 4766 * recompute the label required. This is done in a separate routine to 4767 * avoid blowing up our stack here. 4768 */ 4769 if (is_system_labeled() && 4770 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4771 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4772 return; 4773 } 4774 4775 /* 4776 * If there's a security label here, then we ignore any options the 4777 * user may try to set. We keep the peer's label as a hidden sticky 4778 * option. 4779 */ 4780 if (icmp->icmp_label_len_v6 > 0) { 4781 ignore &= ~IPPF_HOPOPTS; 4782 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4783 } 4784 4785 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4786 (ipp->ipp_fields == 0)) { 4787 /* No sticky options nor ancillary data. */ 4788 goto no_options; 4789 } 4790 4791 /* 4792 * Go through the options figuring out where each is going to 4793 * come from and build two masks. The first mask indicates if 4794 * the option exists at all. The second mask indicates if the 4795 * option is sticky or ancillary. 4796 */ 4797 if (!(ignore & IPPF_HOPOPTS)) { 4798 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4799 option_exists |= IPPF_HOPOPTS; 4800 ip_hdr_len += ipp->ipp_hopoptslen; 4801 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4802 option_exists |= IPPF_HOPOPTS; 4803 is_sticky |= IPPF_HOPOPTS; 4804 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4805 } 4806 } 4807 4808 if (!(ignore & IPPF_RTHDR)) { 4809 if (ipp->ipp_fields & IPPF_RTHDR) { 4810 option_exists |= IPPF_RTHDR; 4811 ip_hdr_len += ipp->ipp_rthdrlen; 4812 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4813 option_exists |= IPPF_RTHDR; 4814 is_sticky |= IPPF_RTHDR; 4815 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4816 } 4817 } 4818 4819 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4820 /* 4821 * Need to have a router header to use these. 4822 */ 4823 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4824 option_exists |= IPPF_RTDSTOPTS; 4825 ip_hdr_len += ipp->ipp_rtdstoptslen; 4826 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4827 option_exists |= IPPF_RTDSTOPTS; 4828 is_sticky |= IPPF_RTDSTOPTS; 4829 ip_hdr_len += 4830 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4831 } 4832 } 4833 4834 if (!(ignore & IPPF_DSTOPTS)) { 4835 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4836 option_exists |= IPPF_DSTOPTS; 4837 ip_hdr_len += ipp->ipp_dstoptslen; 4838 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4839 option_exists |= IPPF_DSTOPTS; 4840 is_sticky |= IPPF_DSTOPTS; 4841 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4842 } 4843 } 4844 4845 if (!(ignore & IPPF_IFINDEX)) { 4846 if (ipp->ipp_fields & IPPF_IFINDEX) { 4847 option_exists |= IPPF_IFINDEX; 4848 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4849 option_exists |= IPPF_IFINDEX; 4850 is_sticky |= IPPF_IFINDEX; 4851 } 4852 } 4853 4854 if (!(ignore & IPPF_ADDR)) { 4855 if (ipp->ipp_fields & IPPF_ADDR) { 4856 option_exists |= IPPF_ADDR; 4857 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4858 option_exists |= IPPF_ADDR; 4859 is_sticky |= IPPF_ADDR; 4860 } 4861 } 4862 4863 if (!(ignore & IPPF_DONTFRAG)) { 4864 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4865 option_exists |= IPPF_DONTFRAG; 4866 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4867 option_exists |= IPPF_DONTFRAG; 4868 is_sticky |= IPPF_DONTFRAG; 4869 } 4870 } 4871 4872 if (!(ignore & IPPF_USE_MIN_MTU)) { 4873 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4874 option_exists |= IPPF_USE_MIN_MTU; 4875 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4876 IPPF_USE_MIN_MTU) { 4877 option_exists |= IPPF_USE_MIN_MTU; 4878 is_sticky |= IPPF_USE_MIN_MTU; 4879 } 4880 } 4881 4882 if (!(ignore & IPPF_NEXTHOP)) { 4883 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4884 option_exists |= IPPF_NEXTHOP; 4885 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4886 option_exists |= IPPF_NEXTHOP; 4887 is_sticky |= IPPF_NEXTHOP; 4888 } 4889 } 4890 4891 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4892 option_exists |= IPPF_HOPLIMIT; 4893 /* IPV6_HOPLIMIT can never be sticky */ 4894 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4895 4896 if (!(ignore & IPPF_UNICAST_HOPS) && 4897 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4898 option_exists |= IPPF_UNICAST_HOPS; 4899 is_sticky |= IPPF_UNICAST_HOPS; 4900 } 4901 4902 if (!(ignore & IPPF_MULTICAST_HOPS) && 4903 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4904 option_exists |= IPPF_MULTICAST_HOPS; 4905 is_sticky |= IPPF_MULTICAST_HOPS; 4906 } 4907 4908 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4909 /* This is a sticky socket option only */ 4910 option_exists |= IPPF_NO_CKSUM; 4911 is_sticky |= IPPF_NO_CKSUM; 4912 } 4913 4914 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4915 /* This is a sticky socket option only */ 4916 option_exists |= IPPF_RAW_CKSUM; 4917 is_sticky |= IPPF_RAW_CKSUM; 4918 } 4919 4920 if (!(ignore & IPPF_TCLASS)) { 4921 if (ipp->ipp_fields & IPPF_TCLASS) { 4922 option_exists |= IPPF_TCLASS; 4923 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4924 option_exists |= IPPF_TCLASS; 4925 is_sticky |= IPPF_TCLASS; 4926 } 4927 } 4928 4929 no_options: 4930 4931 /* 4932 * If any options carried in the ip6i_t were specified, we 4933 * need to account for the ip6i_t in the data we'll be sending 4934 * down. 4935 */ 4936 if (option_exists & IPPF_HAS_IP6I) 4937 ip_hdr_len += sizeof (ip6i_t); 4938 4939 /* check/fix buffer config, setup pointers into it */ 4940 mp1 = mp->b_cont; 4941 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4942 if ((mp1->b_datap->db_ref != 1) || 4943 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4944 !OK_32PTR(ip6h)) { 4945 /* Try to get everything in a single mblk next time */ 4946 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4947 icmp->icmp_max_hdr_len = ip_hdr_len; 4948 (void) mi_set_sth_wroff(RD(q), 4949 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4950 } 4951 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4952 if (!mp1) { 4953 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4954 icmp_ud_err(q, mp, ENOMEM); 4955 return; 4956 } 4957 mp1->b_cont = mp->b_cont; 4958 mp1->b_wptr = mp1->b_datap->db_lim; 4959 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4960 } 4961 mp1->b_rptr = (unsigned char *)ip6h; 4962 ip6i = (ip6i_t *)ip6h; 4963 4964 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4965 if (option_exists & IPPF_HAS_IP6I) { 4966 ip6h = (ip6_t *)&ip6i[1]; 4967 ip6i->ip6i_flags = 0; 4968 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4969 4970 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4971 if (option_exists & IPPF_SCOPE_ID) { 4972 ip6i->ip6i_flags |= IP6I_IFINDEX; 4973 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4974 } else if (option_exists & IPPF_IFINDEX) { 4975 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4976 ASSERT(tipp->ipp_ifindex != 0); 4977 ip6i->ip6i_flags |= IP6I_IFINDEX; 4978 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4979 } 4980 4981 if (option_exists & IPPF_RAW_CKSUM) { 4982 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4983 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4984 } 4985 4986 if (option_exists & IPPF_NO_CKSUM) { 4987 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4988 } 4989 4990 if (option_exists & IPPF_ADDR) { 4991 /* 4992 * Enable per-packet source address verification if 4993 * IPV6_PKTINFO specified the source address. 4994 * ip6_src is set in the transport's _wput function. 4995 */ 4996 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4997 } 4998 4999 if (option_exists & IPPF_DONTFRAG) { 5000 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5001 } 5002 5003 if (option_exists & IPPF_USE_MIN_MTU) { 5004 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5005 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5006 } 5007 5008 if (option_exists & IPPF_NEXTHOP) { 5009 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5010 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5011 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5012 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5013 } 5014 5015 /* 5016 * tell IP this is an ip6i_t private header 5017 */ 5018 ip6i->ip6i_nxt = IPPROTO_RAW; 5019 } 5020 5021 /* Initialize IPv6 header */ 5022 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5023 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5024 5025 /* Set the hoplimit of the outgoing packet. */ 5026 if (option_exists & IPPF_HOPLIMIT) { 5027 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5028 ip6h->ip6_hops = ipp->ipp_hoplimit; 5029 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5030 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5031 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5032 if (option_exists & IPPF_MULTICAST_HOPS) 5033 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5034 } else { 5035 ip6h->ip6_hops = icmp->icmp_ttl; 5036 if (option_exists & IPPF_UNICAST_HOPS) 5037 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5038 } 5039 5040 if (option_exists & IPPF_ADDR) { 5041 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5042 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5043 ip6h->ip6_src = tipp->ipp_addr; 5044 } else { 5045 /* 5046 * The source address was not set using IPV6_PKTINFO. 5047 * First look at the bound source. 5048 * If unspecified fallback to __sin6_src_id. 5049 */ 5050 ip6h->ip6_src = icmp->icmp_v6src; 5051 if (sin6->__sin6_src_id != 0 && 5052 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5053 ip_srcid_find_id(sin6->__sin6_src_id, 5054 &ip6h->ip6_src, icmp->icmp_zoneid, 5055 is->is_netstack); 5056 } 5057 } 5058 5059 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5060 cp = (uint8_t *)&ip6h[1]; 5061 5062 /* 5063 * Here's where we have to start stringing together 5064 * any extension headers in the right order: 5065 * Hop-by-hop, destination, routing, and final destination opts. 5066 */ 5067 if (option_exists & IPPF_HOPOPTS) { 5068 /* Hop-by-hop options */ 5069 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5070 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5071 5072 *nxthdr_ptr = IPPROTO_HOPOPTS; 5073 nxthdr_ptr = &hbh->ip6h_nxt; 5074 5075 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5076 cp += tipp->ipp_hopoptslen; 5077 } 5078 /* 5079 * En-route destination options 5080 * Only do them if there's a routing header as well 5081 */ 5082 if (option_exists & IPPF_RTDSTOPTS) { 5083 ip6_dest_t *dst = (ip6_dest_t *)cp; 5084 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5085 5086 *nxthdr_ptr = IPPROTO_DSTOPTS; 5087 nxthdr_ptr = &dst->ip6d_nxt; 5088 5089 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5090 cp += tipp->ipp_rtdstoptslen; 5091 } 5092 /* 5093 * Routing header next 5094 */ 5095 if (option_exists & IPPF_RTHDR) { 5096 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5097 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5098 5099 *nxthdr_ptr = IPPROTO_ROUTING; 5100 nxthdr_ptr = &rt->ip6r_nxt; 5101 5102 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5103 cp += tipp->ipp_rthdrlen; 5104 } 5105 /* 5106 * Do ultimate destination options 5107 */ 5108 if (option_exists & IPPF_DSTOPTS) { 5109 ip6_dest_t *dest = (ip6_dest_t *)cp; 5110 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5111 5112 *nxthdr_ptr = IPPROTO_DSTOPTS; 5113 nxthdr_ptr = &dest->ip6d_nxt; 5114 5115 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5116 cp += tipp->ipp_dstoptslen; 5117 } 5118 5119 /* 5120 * Now set the last header pointer to the proto passed in 5121 */ 5122 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5123 *nxthdr_ptr = icmp->icmp_proto; 5124 5125 /* 5126 * Copy in the destination address 5127 */ 5128 ip6h->ip6_dst = ip6_dst; 5129 5130 ip6h->ip6_vcf = 5131 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5132 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5133 5134 if (option_exists & IPPF_TCLASS) { 5135 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5136 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5137 tipp->ipp_tclass); 5138 } 5139 if (option_exists & IPPF_RTHDR) { 5140 ip6_rthdr_t *rth; 5141 5142 /* 5143 * Perform any processing needed for source routing. 5144 * We know that all extension headers will be in the same mblk 5145 * as the IPv6 header. 5146 */ 5147 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5148 if (rth != NULL && rth->ip6r_segleft != 0) { 5149 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5150 /* 5151 * Drop packet - only support Type 0 routing. 5152 * Notify the application as well. 5153 */ 5154 icmp_ud_err(q, mp, EPROTO); 5155 BUMP_MIB(&is->is_rawip_mib, 5156 rawipOutErrors); 5157 return; 5158 } 5159 /* 5160 * rth->ip6r_len is twice the number of 5161 * addresses in the header 5162 */ 5163 if (rth->ip6r_len & 0x1) { 5164 icmp_ud_err(q, mp, EPROTO); 5165 BUMP_MIB(&is->is_rawip_mib, 5166 rawipOutErrors); 5167 return; 5168 } 5169 /* 5170 * Shuffle the routing header and ip6_dst 5171 * addresses, and get the checksum difference 5172 * between the first hop (in ip6_dst) and 5173 * the destination (in the last routing hdr entry). 5174 */ 5175 csum = ip_massage_options_v6(ip6h, rth, 5176 is->is_netstack); 5177 /* 5178 * Verify that the first hop isn't a mapped address. 5179 * Routers along the path need to do this verification 5180 * for subsequent hops. 5181 */ 5182 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5183 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5184 BUMP_MIB(&is->is_rawip_mib, 5185 rawipOutErrors); 5186 return; 5187 } 5188 } 5189 } 5190 5191 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5192 if (mp1->b_cont != NULL) 5193 ip_len += msgdsize(mp1->b_cont); 5194 5195 /* 5196 * Set the length into the IP header. 5197 * If the length is greater than the maximum allowed by IP, 5198 * then free the message and return. Do not try and send it 5199 * as this can cause problems in layers below. 5200 */ 5201 if (ip_len > IP_MAXPACKET) { 5202 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5203 icmp_ud_err(q, mp, EMSGSIZE); 5204 return; 5205 } 5206 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5207 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5208 uint16_t *cksum_ptr; 5209 uint_t ext_hdrs_len; 5210 5211 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5212 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5213 icmp->icmp_checksum_off == 2); 5214 5215 /* 5216 * We make it easy for IP to include our pseudo header 5217 * by putting our length in uh_checksum, modified (if 5218 * we have a routing header) by the checksum difference 5219 * between the ultimate destination and first hop addresses. 5220 * Note: ICMPv6 must always checksum the packet. 5221 */ 5222 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5223 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5224 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5225 BUMP_MIB(&is->is_rawip_mib, 5226 rawipOutErrors); 5227 freemsg(mp); 5228 return; 5229 } 5230 ip6i = (ip6i_t *)mp1->b_rptr; 5231 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5232 ip6h = (ip6_t *)&ip6i[1]; 5233 else 5234 ip6h = (ip6_t *)ip6i; 5235 } 5236 /* Add payload length to checksum */ 5237 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5238 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5239 csum += htons(ip_len - ext_hdrs_len); 5240 5241 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5242 csum = (csum & 0xFFFF) + (csum >> 16); 5243 *cksum_ptr = (uint16_t)csum; 5244 } 5245 5246 #ifdef _LITTLE_ENDIAN 5247 ip_len = htons(ip_len); 5248 #endif 5249 ip6h->ip6_plen = (uint16_t)ip_len; 5250 5251 freeb(mp); 5252 5253 /* We're done. Pass the packet to IP */ 5254 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5255 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5256 } 5257 5258 static void 5259 icmp_wput_other(queue_t *q, mblk_t *mp) 5260 { 5261 uchar_t *rptr = mp->b_rptr; 5262 struct iocblk *iocp; 5263 #define tudr ((struct T_unitdata_req *)rptr) 5264 conn_t *connp = Q_TO_CONN(q); 5265 icmp_t *icmp = connp->conn_icmp; 5266 icmp_stack_t *is = icmp->icmp_is; 5267 cred_t *cr; 5268 5269 cr = DB_CREDDEF(mp, connp->conn_cred); 5270 5271 switch (mp->b_datap->db_type) { 5272 case M_PROTO: 5273 case M_PCPROTO: 5274 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5275 /* 5276 * If the message does not contain a PRIM_type, 5277 * throw it away. 5278 */ 5279 freemsg(mp); 5280 return; 5281 } 5282 switch (((union T_primitives *)rptr)->type) { 5283 case T_ADDR_REQ: 5284 icmp_addr_req(q, mp); 5285 return; 5286 case O_T_BIND_REQ: 5287 case T_BIND_REQ: 5288 icmp_bind(q, mp); 5289 return; 5290 case T_CONN_REQ: 5291 icmp_connect(q, mp); 5292 return; 5293 case T_CAPABILITY_REQ: 5294 icmp_capability_req(q, mp); 5295 return; 5296 case T_INFO_REQ: 5297 icmp_info_req(q, mp); 5298 return; 5299 case T_UNITDATA_REQ: 5300 /* 5301 * If a T_UNITDATA_REQ gets here, the address must 5302 * be bad. Valid T_UNITDATA_REQs are found above 5303 * and break to below this switch. 5304 */ 5305 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5306 return; 5307 case T_UNBIND_REQ: 5308 icmp_unbind(q, mp); 5309 return; 5310 5311 case T_SVR4_OPTMGMT_REQ: 5312 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5313 cr)) { 5314 /* Only IP can return anything meaningful */ 5315 (void) svr4_optcom_req(q, mp, cr, 5316 &icmp_opt_obj, B_TRUE); 5317 } 5318 return; 5319 5320 case T_OPTMGMT_REQ: 5321 /* Only IP can return anything meaningful */ 5322 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5323 return; 5324 5325 case T_DISCON_REQ: 5326 icmp_disconnect(q, mp); 5327 return; 5328 5329 /* The following TPI message is not supported by icmp. */ 5330 case O_T_CONN_RES: 5331 case T_CONN_RES: 5332 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5333 return; 5334 5335 /* The following 3 TPI requests are illegal for icmp. */ 5336 case T_DATA_REQ: 5337 case T_EXDATA_REQ: 5338 case T_ORDREL_REQ: 5339 freemsg(mp); 5340 (void) putctl1(RD(q), M_ERROR, EPROTO); 5341 return; 5342 default: 5343 break; 5344 } 5345 break; 5346 case M_IOCTL: 5347 iocp = (struct iocblk *)mp->b_rptr; 5348 switch (iocp->ioc_cmd) { 5349 case TI_GETPEERNAME: 5350 if (icmp->icmp_state != TS_DATA_XFER) { 5351 /* 5352 * If a default destination address has not 5353 * been associated with the stream, then we 5354 * don't know the peer's name. 5355 */ 5356 iocp->ioc_error = ENOTCONN; 5357 err_ret:; 5358 iocp->ioc_count = 0; 5359 mp->b_datap->db_type = M_IOCACK; 5360 qreply(q, mp); 5361 return; 5362 } 5363 /* FALLTHRU */ 5364 case TI_GETMYNAME: 5365 /* 5366 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5367 * need to copyin the user's strbuf structure. 5368 * Processing will continue in the M_IOCDATA case 5369 * below. 5370 */ 5371 mi_copyin(q, mp, NULL, 5372 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5373 return; 5374 case ND_SET: 5375 /* nd_getset performs the necessary error checking */ 5376 case ND_GET: 5377 if (nd_getset(q, is->is_nd, mp)) { 5378 qreply(q, mp); 5379 return; 5380 } 5381 break; 5382 default: 5383 break; 5384 } 5385 break; 5386 case M_IOCDATA: 5387 icmp_wput_iocdata(q, mp); 5388 return; 5389 default: 5390 break; 5391 } 5392 ip_wput(q, mp); 5393 } 5394 5395 /* 5396 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5397 * messages. 5398 */ 5399 static void 5400 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5401 { 5402 mblk_t *mp1; 5403 STRUCT_HANDLE(strbuf, sb); 5404 icmp_t *icmp; 5405 in6_addr_t v6addr; 5406 ipaddr_t v4addr; 5407 uint32_t flowinfo = 0; 5408 int addrlen; 5409 5410 /* Make sure it is one of ours. */ 5411 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5412 case TI_GETMYNAME: 5413 case TI_GETPEERNAME: 5414 break; 5415 default: 5416 icmp = Q_TO_ICMP(q); 5417 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5418 return; 5419 } 5420 switch (mi_copy_state(q, mp, &mp1)) { 5421 case -1: 5422 return; 5423 case MI_COPY_CASE(MI_COPY_IN, 1): 5424 break; 5425 case MI_COPY_CASE(MI_COPY_OUT, 1): 5426 /* 5427 * The address has been copied out, so now 5428 * copyout the strbuf. 5429 */ 5430 mi_copyout(q, mp); 5431 return; 5432 case MI_COPY_CASE(MI_COPY_OUT, 2): 5433 /* 5434 * The address and strbuf have been copied out. 5435 * We're done, so just acknowledge the original 5436 * M_IOCTL. 5437 */ 5438 mi_copy_done(q, mp, 0); 5439 return; 5440 default: 5441 /* 5442 * Something strange has happened, so acknowledge 5443 * the original M_IOCTL with an EPROTO error. 5444 */ 5445 mi_copy_done(q, mp, EPROTO); 5446 return; 5447 } 5448 /* 5449 * Now we have the strbuf structure for TI_GETMYNAME 5450 * and TI_GETPEERNAME. Next we copyout the requested 5451 * address and then we'll copyout the strbuf. 5452 */ 5453 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5454 (void *)mp1->b_rptr); 5455 icmp = Q_TO_ICMP(q); 5456 if (icmp->icmp_family == AF_INET) 5457 addrlen = sizeof (sin_t); 5458 else 5459 addrlen = sizeof (sin6_t); 5460 5461 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5462 mi_copy_done(q, mp, EINVAL); 5463 return; 5464 } 5465 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5466 case TI_GETMYNAME: 5467 if (icmp->icmp_family == AF_INET) { 5468 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5469 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5470 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5471 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5472 } else { 5473 /* 5474 * INADDR_ANY 5475 * icmp_v6src is not set, we might be bound to 5476 * broadcast/multicast. Use icmp_bound_v6src as 5477 * local address instead (that could 5478 * also still be INADDR_ANY) 5479 */ 5480 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5481 } 5482 } else { 5483 /* icmp->icmp_family == AF_INET6 */ 5484 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5485 v6addr = icmp->icmp_v6src; 5486 } else { 5487 /* 5488 * UNSPECIFIED 5489 * icmp_v6src is not set, we might be bound to 5490 * broadcast/multicast. Use icmp_bound_v6src as 5491 * local address instead (that could 5492 * also still be UNSPECIFIED) 5493 */ 5494 v6addr = icmp->icmp_bound_v6src; 5495 } 5496 } 5497 break; 5498 case TI_GETPEERNAME: 5499 if (icmp->icmp_family == AF_INET) { 5500 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5501 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5502 } else { 5503 /* icmp->icmp_family == AF_INET6) */ 5504 v6addr = icmp->icmp_v6dst; 5505 flowinfo = icmp->icmp_flowinfo; 5506 } 5507 break; 5508 default: 5509 mi_copy_done(q, mp, EPROTO); 5510 return; 5511 } 5512 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5513 if (!mp1) 5514 return; 5515 5516 if (icmp->icmp_family == AF_INET) { 5517 sin_t *sin; 5518 5519 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5520 sin = (sin_t *)mp1->b_rptr; 5521 mp1->b_wptr = (uchar_t *)&sin[1]; 5522 *sin = sin_null; 5523 sin->sin_family = AF_INET; 5524 sin->sin_addr.s_addr = v4addr; 5525 } else { 5526 /* icmp->icmp_family == AF_INET6 */ 5527 sin6_t *sin6; 5528 5529 ASSERT(icmp->icmp_family == AF_INET6); 5530 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5531 sin6 = (sin6_t *)mp1->b_rptr; 5532 mp1->b_wptr = (uchar_t *)&sin6[1]; 5533 *sin6 = sin6_null; 5534 sin6->sin6_family = AF_INET6; 5535 sin6->sin6_flowinfo = flowinfo; 5536 sin6->sin6_addr = v6addr; 5537 } 5538 /* Copy out the address */ 5539 mi_copyout(q, mp); 5540 } 5541 5542 static int 5543 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5544 void *thisdg_attrs) 5545 { 5546 conn_t *connp = Q_TO_CONN(q); 5547 struct T_unitdata_req *udreqp; 5548 int is_absreq_failure; 5549 cred_t *cr; 5550 5551 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5552 *errorp = 0; 5553 5554 cr = DB_CREDDEF(mp, connp->conn_cred); 5555 5556 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5557 udreqp->OPT_offset, cr, &icmp_opt_obj, 5558 thisdg_attrs, &is_absreq_failure); 5559 5560 if (*errorp != 0) { 5561 /* 5562 * Note: No special action needed in this 5563 * module for "is_absreq_failure" 5564 */ 5565 return (-1); /* failure */ 5566 } 5567 ASSERT(is_absreq_failure == 0); 5568 return (0); /* success */ 5569 } 5570 5571 void 5572 icmp_ddi_init(void) 5573 { 5574 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5575 icmp_opt_obj.odb_opt_arr_cnt); 5576 5577 /* 5578 * We want to be informed each time a stack is created or 5579 * destroyed in the kernel, so we can maintain the 5580 * set of icmp_stack_t's. 5581 */ 5582 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5583 } 5584 5585 void 5586 icmp_ddi_destroy(void) 5587 { 5588 netstack_unregister(NS_ICMP); 5589 } 5590 5591 /* 5592 * Initialize the ICMP stack instance. 5593 */ 5594 static void * 5595 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5596 { 5597 icmp_stack_t *is; 5598 icmpparam_t *pa; 5599 5600 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5601 is->is_netstack = ns; 5602 5603 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5604 is->is_param_arr = pa; 5605 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5606 5607 (void) icmp_param_register(&is->is_nd, 5608 is->is_param_arr, A_CNT(icmp_param_arr)); 5609 is->is_ksp = rawip_kstat_init(stackid); 5610 return (is); 5611 } 5612 5613 /* 5614 * Free the ICMP stack instance. 5615 */ 5616 static void 5617 rawip_stack_fini(netstackid_t stackid, void *arg) 5618 { 5619 icmp_stack_t *is = (icmp_stack_t *)arg; 5620 5621 nd_free(&is->is_nd); 5622 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5623 is->is_param_arr = NULL; 5624 5625 rawip_kstat_fini(stackid, is->is_ksp); 5626 is->is_ksp = NULL; 5627 kmem_free(is, sizeof (*is)); 5628 } 5629 5630 static void * 5631 rawip_kstat_init(netstackid_t stackid) { 5632 kstat_t *ksp; 5633 5634 rawip_named_kstat_t template = { 5635 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5636 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5637 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5638 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5639 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5640 }; 5641 5642 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5643 KSTAT_TYPE_NAMED, 5644 NUM_OF_FIELDS(rawip_named_kstat_t), 5645 0, stackid); 5646 if (ksp == NULL || ksp->ks_data == NULL) 5647 return (NULL); 5648 5649 bcopy(&template, ksp->ks_data, sizeof (template)); 5650 ksp->ks_update = rawip_kstat_update; 5651 ksp->ks_private = (void *)(uintptr_t)stackid; 5652 5653 kstat_install(ksp); 5654 return (ksp); 5655 } 5656 5657 static void 5658 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5659 { 5660 if (ksp != NULL) { 5661 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5662 kstat_delete_netstack(ksp, stackid); 5663 } 5664 } 5665 5666 static int 5667 rawip_kstat_update(kstat_t *ksp, int rw) 5668 { 5669 rawip_named_kstat_t *rawipkp; 5670 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5671 netstack_t *ns; 5672 icmp_stack_t *is; 5673 5674 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5675 return (EIO); 5676 5677 if (rw == KSTAT_WRITE) 5678 return (EACCES); 5679 5680 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5681 5682 ns = netstack_find_by_stackid(stackid); 5683 if (ns == NULL) 5684 return (-1); 5685 is = ns->netstack_icmp; 5686 if (is == NULL) { 5687 netstack_rele(ns); 5688 return (-1); 5689 } 5690 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5691 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5692 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5693 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5694 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5695 netstack_rele(ns); 5696 return (0); 5697 } 5698