1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/socket.h> 47 #include <sys/isa_defs.h> 48 #include <sys/suntpi.h> 49 #include <sys/xti_inet.h> 50 #include <sys/netstack.h> 51 52 #include <net/route.h> 53 #include <net/if.h> 54 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <inet/common.h> 59 #include <inet/ip.h> 60 #include <inet/ip6.h> 61 #include <inet/mi.h> 62 #include <inet/nd.h> 63 #include <inet/optcom.h> 64 #include <inet/snmpcom.h> 65 #include <inet/kstatcom.h> 66 #include <inet/rawip_impl.h> 67 68 #include <netinet/ip_mroute.h> 69 #include <inet/tcp.h> 70 #include <net/pfkeyv2.h> 71 #include <inet/ipsec_info.h> 72 #include <inet/ipclassifier.h> 73 74 #include <sys/tsol/label.h> 75 #include <sys/tsol/tnet.h> 76 77 #include <inet/ip_ire.h> 78 #include <inet/ip_if.h> 79 80 #include <inet/ip_impl.h> 81 82 /* 83 * Synchronization notes: 84 * 85 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 86 * locks, which is icmp_rwlock. We also use conn_lock when updating things 87 * which affect the IP classifier lookup. 88 * The lock order is icmp_rwlock -> conn_lock. 89 * 90 * The icmp_rwlock: 91 * This protects most of the other fields in the icmp_t. The exact list of 92 * fields which are protected by each of the above locks is documented in 93 * the icmp_t structure definition. 94 * 95 * Plumbing notes: 96 * ICMP is always a device driver. For compatibility with mibopen() code 97 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 98 * dummy module. 99 */ 100 101 static void icmp_addr_req(queue_t *q, mblk_t *mp); 102 static void icmp_bind(queue_t *q, mblk_t *mp); 103 static void icmp_bind_proto(queue_t *q); 104 static void icmp_bind_result(conn_t *, mblk_t *); 105 static void icmp_bind_ack(conn_t *, mblk_t *mp); 106 static void icmp_bind_error(conn_t *, mblk_t *mp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q); 110 static void icmp_connect(queue_t *q, mblk_t *mp); 111 static void icmp_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 121 t_scalar_t addr_length, in_port_t); 122 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 123 cred_t *credp, boolean_t isv6); 124 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 125 cred_t *credp); 126 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 127 cred_t *credp); 128 static void icmp_output(queue_t *q, mblk_t *mp); 129 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 130 int *errorp, void *thisdg_attrs); 131 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 132 int icmp_opt_set(queue_t *q, uint_t optset_context, 133 int level, int name, uint_t inlen, 134 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 135 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 136 int icmp_opt_get(queue_t *q, int level, int name, 137 uchar_t *ptr); 138 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 139 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 140 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 141 caddr_t cp, cred_t *cr); 142 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 143 uchar_t *ptr, int len); 144 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 145 cred_t *cr); 146 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 147 static void icmp_unbind(queue_t *q, mblk_t *mp); 148 static void icmp_wput(queue_t *q, mblk_t *mp); 149 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 150 t_scalar_t tudr_optlen); 151 static void icmp_wput_other(queue_t *q, mblk_t *mp); 152 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 153 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 154 155 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 156 static void rawip_stack_fini(netstackid_t stackid, void *arg); 157 158 static void *rawip_kstat_init(netstackid_t stackid); 159 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 160 static int rawip_kstat_update(kstat_t *kp, int rw); 161 162 163 static struct module_info icmp_mod_info = { 164 5707, "icmp", 1, INFPSZ, 512, 128 165 }; 166 167 /* 168 * Entry points for ICMP as a device. 169 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 170 */ 171 static struct qinit icmprinitv4 = { 172 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 173 }; 174 175 static struct qinit icmprinitv6 = { 176 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 177 }; 178 179 static struct qinit icmpwinit = { 180 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 181 }; 182 183 /* For AF_INET aka /dev/icmp */ 184 struct streamtab icmpinfov4 = { 185 &icmprinitv4, &icmpwinit 186 }; 187 188 /* For AF_INET6 aka /dev/icmp6 */ 189 struct streamtab icmpinfov6 = { 190 &icmprinitv6, &icmpwinit 191 }; 192 193 static sin_t sin_null; /* Zero address for quick clears */ 194 static sin6_t sin6_null; /* Zero address for quick clears */ 195 196 /* Default structure copied into T_INFO_ACK messages */ 197 static struct T_info_ack icmp_g_t_info_ack = { 198 T_INFO_ACK, 199 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 200 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 201 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 202 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 203 0, /* ADDR_size - filled in later. */ 204 0, /* OPT_size - not initialized here */ 205 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 206 T_CLTS, /* SERV_type. icmp supports connection-less. */ 207 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 208 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 209 }; 210 211 /* 212 * Table of ND variables supported by icmp. These are loaded into is_nd 213 * when the stack instance is created. 214 * All of these are alterable, within the min/max values given, at run time. 215 */ 216 static icmpparam_t icmp_param_arr[] = { 217 /* min max value name */ 218 { 0, 128, 32, "icmp_wroff_extra" }, 219 { 1, 255, 255, "icmp_ipv4_ttl" }, 220 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 221 { 0, 1, 1, "icmp_bsd_compat" }, 222 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 223 { 0, 65536, 1024, "icmp_xmit_lowat"}, 224 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 225 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 226 }; 227 #define is_wroff_extra is_param_arr[0].icmp_param_value 228 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 229 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 230 #define is_bsd_compat is_param_arr[3].icmp_param_value 231 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 232 #define is_xmit_lowat is_param_arr[5].icmp_param_value 233 #define is_recv_hiwat is_param_arr[6].icmp_param_value 234 #define is_max_buf is_param_arr[7].icmp_param_value 235 236 /* 237 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 238 * passed to icmp_wput. 239 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 240 * protocol type placed in the message following the address. A T_BIND_ACK 241 * message is returned by ip_bind_v4/v6. 242 */ 243 static void 244 icmp_bind(queue_t *q, mblk_t *mp) 245 { 246 sin_t *sin; 247 sin6_t *sin6; 248 mblk_t *mp1; 249 struct T_bind_req *tbr; 250 icmp_t *icmp; 251 conn_t *connp = Q_TO_CONN(q); 252 253 icmp = connp->conn_icmp; 254 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 255 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 256 "icmp_bind: bad req, len %u", 257 (uint_t)(mp->b_wptr - mp->b_rptr)); 258 icmp_err_ack(q, mp, TPROTO, 0); 259 return; 260 } 261 if (icmp->icmp_state != TS_UNBND) { 262 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 263 "icmp_bind: bad state, %d", icmp->icmp_state); 264 icmp_err_ack(q, mp, TOUTSTATE, 0); 265 return; 266 } 267 /* 268 * Reallocate the message to make sure we have enough room for an 269 * address and the protocol type. 270 */ 271 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 272 if (!mp1) { 273 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 274 return; 275 } 276 mp = mp1; 277 tbr = (struct T_bind_req *)mp->b_rptr; 278 switch (tbr->ADDR_length) { 279 case 0: /* Generic request */ 280 tbr->ADDR_offset = sizeof (struct T_bind_req); 281 if (icmp->icmp_family == AF_INET) { 282 tbr->ADDR_length = sizeof (sin_t); 283 sin = (sin_t *)&tbr[1]; 284 *sin = sin_null; 285 sin->sin_family = AF_INET; 286 mp->b_wptr = (uchar_t *)&sin[1]; 287 } else { 288 ASSERT(icmp->icmp_family == AF_INET6); 289 tbr->ADDR_length = sizeof (sin6_t); 290 sin6 = (sin6_t *)&tbr[1]; 291 *sin6 = sin6_null; 292 sin6->sin6_family = AF_INET6; 293 mp->b_wptr = (uchar_t *)&sin6[1]; 294 } 295 break; 296 case sizeof (sin_t): /* Complete IP address */ 297 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 298 sizeof (sin_t)); 299 if (sin == NULL || !OK_32PTR((char *)sin)) { 300 icmp_err_ack(q, mp, TSYSERR, EINVAL); 301 return; 302 } 303 if (icmp->icmp_family != AF_INET || 304 sin->sin_family != AF_INET) { 305 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 306 return; 307 } 308 break; 309 case sizeof (sin6_t): /* Complete IP address */ 310 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 311 sizeof (sin6_t)); 312 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 313 icmp_err_ack(q, mp, TSYSERR, EINVAL); 314 return; 315 } 316 if (icmp->icmp_family != AF_INET6 || 317 sin6->sin6_family != AF_INET6) { 318 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 319 return; 320 } 321 /* No support for mapped addresses on raw sockets */ 322 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 323 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 324 return; 325 } 326 break; 327 default: 328 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 329 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 330 icmp_err_ack(q, mp, TBADADDR, 0); 331 return; 332 } 333 334 /* 335 * The state must be TS_UNBND. TPI mandates that users must send 336 * TPI primitives only 1 at a time and wait for the response before 337 * sending the next primitive. 338 */ 339 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 340 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 341 rw_exit(&icmp->icmp_rwlock); 342 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 343 "icmp_bind: bad state, %d", icmp->icmp_state); 344 icmp_err_ack(q, mp, TOUTSTATE, 0); 345 return; 346 } 347 348 icmp->icmp_pending_op = tbr->PRIM_type; 349 350 /* 351 * Copy the source address into our icmp structure. This address 352 * may still be zero; if so, ip will fill in the correct address 353 * each time an outbound packet is passed to it. 354 * If we are binding to a broadcast or multicast address then 355 * icmp_bind_ack will clear the source address when it receives 356 * the T_BIND_ACK. 357 */ 358 icmp->icmp_state = TS_IDLE; 359 360 if (icmp->icmp_family == AF_INET) { 361 ASSERT(sin != NULL); 362 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 363 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 364 &icmp->icmp_v6src); 365 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 366 icmp->icmp_ip_snd_options_len; 367 icmp->icmp_bound_v6src = icmp->icmp_v6src; 368 } else { 369 int error; 370 371 ASSERT(sin6 != NULL); 372 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 373 icmp->icmp_v6src = sin6->sin6_addr; 374 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 375 icmp->icmp_bound_v6src = icmp->icmp_v6src; 376 377 /* Rebuild the header template */ 378 error = icmp_build_hdrs(icmp); 379 if (error != 0) { 380 icmp->icmp_pending_op = -1; 381 rw_exit(&icmp->icmp_rwlock); 382 icmp_err_ack(q, mp, TSYSERR, error); 383 return; 384 } 385 } 386 /* 387 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 388 * the address. 389 */ 390 *mp->b_wptr++ = icmp->icmp_proto; 391 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 392 /* 393 * Append a request for an IRE if src not 0 (INADDR_ANY) 394 */ 395 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 396 if (!mp->b_cont) { 397 icmp->icmp_pending_op = -1; 398 rw_exit(&icmp->icmp_rwlock); 399 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 400 return; 401 } 402 mp->b_cont->b_wptr += sizeof (ire_t); 403 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 404 } 405 rw_exit(&icmp->icmp_rwlock); 406 407 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 408 if (icmp->icmp_family == AF_INET6) 409 mp = ip_bind_v6(q, mp, connp, NULL); 410 else 411 mp = ip_bind_v4(q, mp, connp); 412 413 /* The above return NULL if the bind needs to be deferred */ 414 if (mp != NULL) 415 icmp_bind_result(connp, mp); 416 else 417 CONN_INC_REF(connp); 418 } 419 420 /* 421 * Send message to IP to just bind to the protocol. 422 */ 423 static void 424 icmp_bind_proto(queue_t *q) 425 { 426 mblk_t *mp; 427 struct T_bind_req *tbr; 428 icmp_t *icmp; 429 conn_t *connp = Q_TO_CONN(q); 430 431 icmp = connp->conn_icmp; 432 433 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 434 BPRI_MED); 435 if (!mp) { 436 return; 437 } 438 mp->b_datap->db_type = M_PROTO; 439 tbr = (struct T_bind_req *)mp->b_rptr; 440 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 441 tbr->ADDR_offset = sizeof (struct T_bind_req); 442 443 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 444 if (icmp->icmp_ipversion == IPV4_VERSION) { 445 sin_t *sin; 446 447 tbr->ADDR_length = sizeof (sin_t); 448 sin = (sin_t *)&tbr[1]; 449 *sin = sin_null; 450 sin->sin_family = AF_INET; 451 mp->b_wptr = (uchar_t *)&sin[1]; 452 } else { 453 sin6_t *sin6; 454 455 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 456 tbr->ADDR_length = sizeof (sin6_t); 457 sin6 = (sin6_t *)&tbr[1]; 458 *sin6 = sin6_null; 459 sin6->sin6_family = AF_INET6; 460 mp->b_wptr = (uchar_t *)&sin6[1]; 461 } 462 463 /* Place protocol type in the O_T_BIND_REQ following the address. */ 464 *mp->b_wptr++ = icmp->icmp_proto; 465 rw_exit(&icmp->icmp_rwlock); 466 467 /* Pass the O_T_BIND_REQ to ip. */ 468 if (icmp->icmp_family == AF_INET6) 469 mp = ip_bind_v6(q, mp, connp, NULL); 470 else 471 mp = ip_bind_v4(q, mp, connp); 472 473 /* The above return NULL if the bind needs to be deferred */ 474 if (mp != NULL) 475 icmp_bind_result(connp, mp); 476 else 477 CONN_INC_REF(connp); 478 } 479 480 /* 481 * This is called from ip_wput_nondata to handle the results of a 482 * deferred RAWIP bind. It is called once the bind has been completed. 483 */ 484 void 485 rawip_resume_bind(conn_t *connp, mblk_t *mp) 486 { 487 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 488 489 icmp_bind_result(connp, mp); 490 491 CONN_OPER_PENDING_DONE(connp); 492 } 493 494 /* 495 * This routine handles each T_CONN_REQ message passed to icmp. It 496 * associates a default destination address with the stream. 497 * 498 * This routine sends down a T_BIND_REQ to IP with the following mblks: 499 * T_BIND_REQ - specifying local and remote address. 500 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 501 * T_OK_ACK - for the T_CONN_REQ 502 * T_CONN_CON - to keep the TPI user happy 503 * 504 * The connect completes in icmp_bind_result. 505 * When a T_BIND_ACK is received information is extracted from the IRE 506 * and the two appended messages are sent to the TPI user. 507 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 508 * convert it to an error ack for the appropriate primitive. 509 */ 510 static void 511 icmp_connect(queue_t *q, mblk_t *mp) 512 { 513 sin_t *sin; 514 sin6_t *sin6; 515 mblk_t *mp1, *mp2; 516 struct T_conn_req *tcr; 517 icmp_t *icmp; 518 ipaddr_t v4dst; 519 in6_addr_t v6dst; 520 uint32_t flowinfo; 521 conn_t *connp = Q_TO_CONN(q); 522 523 icmp = connp->conn_icmp; 524 tcr = (struct T_conn_req *)mp->b_rptr; 525 /* Sanity checks */ 526 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 527 icmp_err_ack(q, mp, TPROTO, 0); 528 return; 529 } 530 531 if (tcr->OPT_length != 0) { 532 icmp_err_ack(q, mp, TBADOPT, 0); 533 return; 534 } 535 536 switch (tcr->DEST_length) { 537 default: 538 icmp_err_ack(q, mp, TBADADDR, 0); 539 return; 540 541 case sizeof (sin_t): 542 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 543 sizeof (sin_t)); 544 if (sin == NULL || !OK_32PTR((char *)sin)) { 545 icmp_err_ack(q, mp, TSYSERR, EINVAL); 546 return; 547 } 548 if (icmp->icmp_family != AF_INET || 549 sin->sin_family != AF_INET) { 550 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 551 return; 552 } 553 v4dst = sin->sin_addr.s_addr; 554 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 555 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 556 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 557 icmp->icmp_ip_snd_options_len; 558 break; 559 560 case sizeof (sin6_t): 561 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 562 sizeof (sin6_t)); 563 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 564 icmp_err_ack(q, mp, TSYSERR, EINVAL); 565 return; 566 } 567 if (icmp->icmp_family != AF_INET6 || 568 sin6->sin6_family != AF_INET6) { 569 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 570 return; 571 } 572 /* No support for mapped addresses on raw sockets */ 573 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 574 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 575 return; 576 } 577 v6dst = sin6->sin6_addr; 578 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 579 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 580 flowinfo = sin6->sin6_flowinfo; 581 break; 582 } 583 if (icmp->icmp_ipversion == IPV4_VERSION) { 584 /* 585 * Interpret a zero destination to mean loopback. 586 * Update the T_CONN_REQ (sin/sin6) since it is used to 587 * generate the T_CONN_CON. 588 */ 589 if (v4dst == INADDR_ANY) { 590 v4dst = htonl(INADDR_LOOPBACK); 591 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 592 if (icmp->icmp_family == AF_INET) { 593 sin->sin_addr.s_addr = v4dst; 594 } else { 595 sin6->sin6_addr = v6dst; 596 } 597 } 598 icmp->icmp_v6dst = v6dst; 599 icmp->icmp_flowinfo = 0; 600 601 /* 602 * If the destination address is multicast and 603 * an outgoing multicast interface has been set, 604 * use the address of that interface as our 605 * source address if no source address has been set. 606 */ 607 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 608 CLASSD(v4dst) && 609 icmp->icmp_multicast_if_addr != INADDR_ANY) { 610 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 611 &icmp->icmp_v6src); 612 } 613 } else { 614 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 615 /* 616 * Interpret a zero destination to mean loopback. 617 * Update the T_CONN_REQ (sin/sin6) since it is used to 618 * generate the T_CONN_CON. 619 */ 620 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 621 v6dst = ipv6_loopback; 622 sin6->sin6_addr = v6dst; 623 } 624 icmp->icmp_v6dst = v6dst; 625 icmp->icmp_flowinfo = flowinfo; 626 /* 627 * If the destination address is multicast and 628 * an outgoing multicast interface has been set, 629 * then the ip bind logic will pick the correct source 630 * address (i.e. matching the outgoing multicast interface). 631 */ 632 } 633 634 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 635 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 636 rw_exit(&icmp->icmp_rwlock); 637 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 638 "icmp_connect: bad state, %d", icmp->icmp_state); 639 icmp_err_ack(q, mp, TOUTSTATE, 0); 640 return; 641 } 642 icmp->icmp_pending_op = T_CONN_REQ; 643 644 if (icmp->icmp_state == TS_DATA_XFER) { 645 /* Already connected - clear out state */ 646 icmp->icmp_v6src = icmp->icmp_bound_v6src; 647 icmp->icmp_state = TS_IDLE; 648 } 649 650 /* 651 * Send down bind to IP to verify that there is a route 652 * and to determine the source address. 653 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 654 */ 655 if (icmp->icmp_family == AF_INET) { 656 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 657 sin->sin_port); 658 } else { 659 ASSERT(icmp->icmp_family == AF_INET6); 660 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 661 sin6->sin6_port); 662 } 663 if (mp1 == NULL) { 664 icmp->icmp_pending_op = -1; 665 rw_exit(&icmp->icmp_rwlock); 666 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 667 return; 668 } 669 670 /* 671 * We also have to send a connection confirmation to 672 * keep TLI happy. Prepare it for icmp_bind_result. 673 */ 674 if (icmp->icmp_family == AF_INET) { 675 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 676 0); 677 } else { 678 ASSERT(icmp->icmp_family == AF_INET6); 679 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 680 0); 681 } 682 if (mp2 == NULL) { 683 freemsg(mp1); 684 icmp->icmp_pending_op = -1; 685 rw_exit(&icmp->icmp_rwlock); 686 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 687 return; 688 } 689 690 mp = mi_tpi_ok_ack_alloc(mp); 691 if (mp == NULL) { 692 /* Unable to reuse the T_CONN_REQ for the ack. */ 693 freemsg(mp2); 694 icmp->icmp_pending_op = -1; 695 rw_exit(&icmp->icmp_rwlock); 696 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 697 return; 698 } 699 700 icmp->icmp_state = TS_DATA_XFER; 701 rw_exit(&icmp->icmp_rwlock); 702 703 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 704 linkb(mp1, mp); 705 linkb(mp1, mp2); 706 707 mblk_setcred(mp1, connp->conn_cred); 708 if (icmp->icmp_family == AF_INET) 709 mp1 = ip_bind_v4(q, mp1, connp); 710 else 711 mp1 = ip_bind_v6(q, mp1, connp, NULL); 712 713 /* The above return NULL if the bind needs to be deferred */ 714 if (mp1 != NULL) 715 icmp_bind_result(connp, mp1); 716 else 717 CONN_INC_REF(connp); 718 } 719 720 static void 721 icmp_close_free(conn_t *connp) 722 { 723 icmp_t *icmp = connp->conn_icmp; 724 725 /* If there are any options associated with the stream, free them. */ 726 if (icmp->icmp_ip_snd_options != NULL) { 727 mi_free((char *)icmp->icmp_ip_snd_options); 728 icmp->icmp_ip_snd_options = NULL; 729 icmp->icmp_ip_snd_options_len = 0; 730 } 731 732 if (icmp->icmp_filter != NULL) { 733 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 734 icmp->icmp_filter = NULL; 735 } 736 /* Free memory associated with sticky options */ 737 if (icmp->icmp_sticky_hdrs_len != 0) { 738 kmem_free(icmp->icmp_sticky_hdrs, 739 icmp->icmp_sticky_hdrs_len); 740 icmp->icmp_sticky_hdrs = NULL; 741 icmp->icmp_sticky_hdrs_len = 0; 742 } 743 ip6_pkt_free(&icmp->icmp_sticky_ipp); 744 745 /* 746 * Clear any fields which the kmem_cache constructor clears. 747 * Only icmp_connp needs to be preserved. 748 * TBD: We should make this more efficient to avoid clearing 749 * everything. 750 */ 751 ASSERT(icmp->icmp_connp == connp); 752 bzero(icmp, sizeof (icmp_t)); 753 icmp->icmp_connp = connp; 754 } 755 756 static int 757 icmp_close(queue_t *q) 758 { 759 conn_t *connp = (conn_t *)q->q_ptr; 760 761 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 762 763 ip_quiesce_conn(connp); 764 765 qprocsoff(connp->conn_rq); 766 767 icmp_close_free(connp); 768 769 /* 770 * Now we are truly single threaded on this stream, and can 771 * delete the things hanging off the connp, and finally the connp. 772 * We removed this connp from the fanout list, it cannot be 773 * accessed thru the fanouts, and we already waited for the 774 * conn_ref to drop to 0. We are already in close, so 775 * there cannot be any other thread from the top. qprocsoff 776 * has completed, and service has completed or won't run in 777 * future. 778 */ 779 ASSERT(connp->conn_ref == 1); 780 781 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 782 783 connp->conn_ref--; 784 ipcl_conn_destroy(connp); 785 786 q->q_ptr = WR(q)->q_ptr = NULL; 787 return (0); 788 } 789 790 /* 791 * This routine handles each T_DISCON_REQ message passed to icmp 792 * as an indicating that ICMP is no longer connected. This results 793 * in sending a T_BIND_REQ to IP to restore the binding to just 794 * the local address. 795 * 796 * This routine sends down a T_BIND_REQ to IP with the following mblks: 797 * T_BIND_REQ - specifying just the local address. 798 * T_OK_ACK - for the T_DISCON_REQ 799 * 800 * The disconnect completes in icmp_bind_result. 801 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 802 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 803 * convert it to an error ack for the appropriate primitive. 804 */ 805 static void 806 icmp_disconnect(queue_t *q, mblk_t *mp) 807 { 808 icmp_t *icmp; 809 mblk_t *mp1; 810 conn_t *connp = Q_TO_CONN(q); 811 812 icmp = connp->conn_icmp; 813 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 814 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 815 rw_exit(&icmp->icmp_rwlock); 816 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 817 "icmp_disconnect: bad state, %d", icmp->icmp_state); 818 icmp_err_ack(q, mp, TOUTSTATE, 0); 819 return; 820 } 821 icmp->icmp_pending_op = T_DISCON_REQ; 822 icmp->icmp_v6src = icmp->icmp_bound_v6src; 823 icmp->icmp_state = TS_IDLE; 824 825 /* 826 * Send down bind to IP to remove the full binding and revert 827 * to the local address binding. 828 */ 829 if (icmp->icmp_family == AF_INET) { 830 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 831 } else { 832 ASSERT(icmp->icmp_family == AF_INET6); 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 834 } 835 if (mp1 == NULL) { 836 icmp->icmp_pending_op = -1; 837 rw_exit(&icmp->icmp_rwlock); 838 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 839 return; 840 } 841 mp = mi_tpi_ok_ack_alloc(mp); 842 if (mp == NULL) { 843 /* Unable to reuse the T_DISCON_REQ for the ack. */ 844 icmp->icmp_pending_op = -1; 845 rw_exit(&icmp->icmp_rwlock); 846 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 847 return; 848 } 849 850 if (icmp->icmp_family == AF_INET6) { 851 int error; 852 853 /* Rebuild the header template */ 854 error = icmp_build_hdrs(icmp); 855 if (error != 0) { 856 icmp->icmp_pending_op = -1; 857 rw_exit(&icmp->icmp_rwlock); 858 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 859 freemsg(mp1); 860 return; 861 } 862 } 863 864 rw_exit(&icmp->icmp_rwlock); 865 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 866 linkb(mp1, mp); 867 868 if (icmp->icmp_family == AF_INET6) 869 mp1 = ip_bind_v6(q, mp1, connp, NULL); 870 else 871 mp1 = ip_bind_v4(q, mp1, connp); 872 873 /* The above return NULL if the bind needs to be deferred */ 874 if (mp1 != NULL) 875 icmp_bind_result(connp, mp1); 876 else 877 CONN_INC_REF(connp); 878 } 879 880 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 881 static void 882 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 883 { 884 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 885 qreply(q, mp); 886 } 887 888 /* Shorthand to generate and send TPI error acks to our client */ 889 static void 890 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 891 t_scalar_t t_error, int sys_error) 892 { 893 struct T_error_ack *teackp; 894 895 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 896 M_PCPROTO, T_ERROR_ACK)) != NULL) { 897 teackp = (struct T_error_ack *)mp->b_rptr; 898 teackp->ERROR_prim = primitive; 899 teackp->TLI_error = t_error; 900 teackp->UNIX_error = sys_error; 901 qreply(q, mp); 902 } 903 } 904 905 /* 906 * icmp_icmp_error is called by icmp_input to process ICMP 907 * messages passed up by IP. 908 * Generates the appropriate T_UDERROR_IND for permanent 909 * (non-transient) errors. 910 * Assumes that IP has pulled up everything up to and including 911 * the ICMP header. 912 */ 913 static void 914 icmp_icmp_error(queue_t *q, mblk_t *mp) 915 { 916 icmph_t *icmph; 917 ipha_t *ipha; 918 int iph_hdr_length; 919 sin_t sin; 920 sin6_t sin6; 921 mblk_t *mp1; 922 int error = 0; 923 icmp_t *icmp = Q_TO_ICMP(q); 924 925 ipha = (ipha_t *)mp->b_rptr; 926 927 ASSERT(OK_32PTR(mp->b_rptr)); 928 929 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 930 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 931 icmp_icmp_error_ipv6(q, mp); 932 return; 933 } 934 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 935 936 /* Skip past the outer IP and ICMP headers */ 937 iph_hdr_length = IPH_HDR_LENGTH(ipha); 938 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 939 ipha = (ipha_t *)&icmph[1]; 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 942 switch (icmph->icmph_type) { 943 case ICMP_DEST_UNREACHABLE: 944 switch (icmph->icmph_code) { 945 case ICMP_FRAGMENTATION_NEEDED: 946 /* 947 * IP has already adjusted the path MTU. 948 */ 949 break; 950 case ICMP_PORT_UNREACHABLE: 951 case ICMP_PROTOCOL_UNREACHABLE: 952 error = ECONNREFUSED; 953 break; 954 default: 955 /* Transient errors */ 956 break; 957 } 958 break; 959 default: 960 /* Transient errors */ 961 break; 962 } 963 if (error == 0) { 964 freemsg(mp); 965 return; 966 } 967 968 /* 969 * Deliver T_UDERROR_IND when the application has asked for it. 970 * The socket layer enables this automatically when connected. 971 */ 972 if (!icmp->icmp_dgram_errind) { 973 freemsg(mp); 974 return; 975 } 976 977 switch (icmp->icmp_family) { 978 case AF_INET: 979 sin = sin_null; 980 sin.sin_family = AF_INET; 981 sin.sin_addr.s_addr = ipha->ipha_dst; 982 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 983 error); 984 break; 985 case AF_INET6: 986 sin6 = sin6_null; 987 sin6.sin6_family = AF_INET6; 988 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 989 990 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 991 NULL, 0, error); 992 break; 993 } 994 if (mp1) 995 putnext(q, mp1); 996 freemsg(mp); 997 } 998 999 /* 1000 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1001 * for IPv6 packets. 1002 * Send permanent (non-transient) errors upstream. 1003 * Assumes that IP has pulled up all the extension headers as well 1004 * as the ICMPv6 header. 1005 */ 1006 static void 1007 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1008 { 1009 icmp6_t *icmp6; 1010 ip6_t *ip6h, *outer_ip6h; 1011 uint16_t iph_hdr_length; 1012 uint8_t *nexthdrp; 1013 sin6_t sin6; 1014 mblk_t *mp1; 1015 int error = 0; 1016 icmp_t *icmp = Q_TO_ICMP(q); 1017 1018 outer_ip6h = (ip6_t *)mp->b_rptr; 1019 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1020 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1021 else 1022 iph_hdr_length = IPV6_HDR_LEN; 1023 1024 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1025 ip6h = (ip6_t *)&icmp6[1]; 1026 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1027 freemsg(mp); 1028 return; 1029 } 1030 1031 switch (icmp6->icmp6_type) { 1032 case ICMP6_DST_UNREACH: 1033 switch (icmp6->icmp6_code) { 1034 case ICMP6_DST_UNREACH_NOPORT: 1035 error = ECONNREFUSED; 1036 break; 1037 case ICMP6_DST_UNREACH_ADMIN: 1038 case ICMP6_DST_UNREACH_NOROUTE: 1039 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1040 case ICMP6_DST_UNREACH_ADDR: 1041 /* Transient errors */ 1042 break; 1043 default: 1044 break; 1045 } 1046 break; 1047 case ICMP6_PACKET_TOO_BIG: { 1048 struct T_unitdata_ind *tudi; 1049 struct T_opthdr *toh; 1050 size_t udi_size; 1051 mblk_t *newmp; 1052 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1053 sizeof (struct ip6_mtuinfo); 1054 sin6_t *sin6; 1055 struct ip6_mtuinfo *mtuinfo; 1056 1057 /* 1058 * If the application has requested to receive path mtu 1059 * information, send up an empty message containing an 1060 * IPV6_PATHMTU ancillary data item. 1061 */ 1062 if (!icmp->icmp_ipv6_recvpathmtu) 1063 break; 1064 1065 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1066 opt_length; 1067 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1068 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1069 break; 1070 } 1071 1072 /* 1073 * newmp->b_cont is left to NULL on purpose. This is an 1074 * empty message containing only ancillary data. 1075 */ 1076 newmp->b_datap->db_type = M_PROTO; 1077 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1078 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1079 tudi->PRIM_type = T_UNITDATA_IND; 1080 tudi->SRC_length = sizeof (sin6_t); 1081 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1082 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1083 tudi->OPT_length = opt_length; 1084 1085 sin6 = (sin6_t *)&tudi[1]; 1086 bzero(sin6, sizeof (sin6_t)); 1087 sin6->sin6_family = AF_INET6; 1088 sin6->sin6_addr = icmp->icmp_v6dst; 1089 1090 toh = (struct T_opthdr *)&sin6[1]; 1091 toh->level = IPPROTO_IPV6; 1092 toh->name = IPV6_PATHMTU; 1093 toh->len = opt_length; 1094 toh->status = 0; 1095 1096 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1097 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1098 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1099 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1100 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1101 /* 1102 * We've consumed everything we need from the original 1103 * message. Free it, then send our empty message. 1104 */ 1105 freemsg(mp); 1106 putnext(q, newmp); 1107 return; 1108 } 1109 case ICMP6_TIME_EXCEEDED: 1110 /* Transient errors */ 1111 break; 1112 case ICMP6_PARAM_PROB: 1113 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1114 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1115 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1116 (uchar_t *)nexthdrp) { 1117 error = ECONNREFUSED; 1118 break; 1119 } 1120 break; 1121 } 1122 if (error == 0) { 1123 freemsg(mp); 1124 return; 1125 } 1126 1127 /* 1128 * Deliver T_UDERROR_IND when the application has asked for it. 1129 * The socket layer enables this automatically when connected. 1130 */ 1131 if (!icmp->icmp_dgram_errind) { 1132 freemsg(mp); 1133 return; 1134 } 1135 1136 sin6 = sin6_null; 1137 sin6.sin6_family = AF_INET6; 1138 sin6.sin6_addr = ip6h->ip6_dst; 1139 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1140 1141 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1142 error); 1143 if (mp1) 1144 putnext(q, mp1); 1145 freemsg(mp); 1146 } 1147 1148 /* 1149 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1150 * The local address is filled in if endpoint is bound. The remote address 1151 * is filled in if remote address has been precified ("connected endpoint") 1152 * (The concept of connected CLTS sockets is alien to published TPI 1153 * but we support it anyway). 1154 */ 1155 static void 1156 icmp_addr_req(queue_t *q, mblk_t *mp) 1157 { 1158 icmp_t *icmp = Q_TO_ICMP(q); 1159 mblk_t *ackmp; 1160 struct T_addr_ack *taa; 1161 1162 /* Make it large enough for worst case */ 1163 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1164 2 * sizeof (sin6_t), 1); 1165 if (ackmp == NULL) { 1166 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1167 return; 1168 } 1169 taa = (struct T_addr_ack *)ackmp->b_rptr; 1170 1171 bzero(taa, sizeof (struct T_addr_ack)); 1172 ackmp->b_wptr = (uchar_t *)&taa[1]; 1173 1174 taa->PRIM_type = T_ADDR_ACK; 1175 ackmp->b_datap->db_type = M_PCPROTO; 1176 rw_enter(&icmp->icmp_rwlock, RW_READER); 1177 /* 1178 * Note: Following code assumes 32 bit alignment of basic 1179 * data structures like sin_t and struct T_addr_ack. 1180 */ 1181 if (icmp->icmp_state != TS_UNBND) { 1182 /* 1183 * Fill in local address 1184 */ 1185 taa->LOCADDR_offset = sizeof (*taa); 1186 if (icmp->icmp_family == AF_INET) { 1187 sin_t *sin; 1188 1189 taa->LOCADDR_length = sizeof (sin_t); 1190 sin = (sin_t *)&taa[1]; 1191 /* Fill zeroes and then intialize non-zero fields */ 1192 *sin = sin_null; 1193 sin->sin_family = AF_INET; 1194 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1195 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1196 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1197 sin->sin_addr.s_addr); 1198 } else { 1199 /* 1200 * INADDR_ANY 1201 * icmp_v6src is not set, we might be bound to 1202 * broadcast/multicast. Use icmp_bound_v6src as 1203 * local address instead (that could 1204 * also still be INADDR_ANY) 1205 */ 1206 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1207 sin->sin_addr.s_addr); 1208 } 1209 ackmp->b_wptr = (uchar_t *)&sin[1]; 1210 } else { 1211 sin6_t *sin6; 1212 1213 ASSERT(icmp->icmp_family == AF_INET6); 1214 taa->LOCADDR_length = sizeof (sin6_t); 1215 sin6 = (sin6_t *)&taa[1]; 1216 /* Fill zeroes and then intialize non-zero fields */ 1217 *sin6 = sin6_null; 1218 sin6->sin6_family = AF_INET6; 1219 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1220 sin6->sin6_addr = icmp->icmp_v6src; 1221 } else { 1222 /* 1223 * UNSPECIFIED 1224 * icmp_v6src is not set, we might be bound to 1225 * broadcast/multicast. Use icmp_bound_v6src as 1226 * local address instead (that could 1227 * also still be UNSPECIFIED) 1228 */ 1229 sin6->sin6_addr = icmp->icmp_bound_v6src; 1230 } 1231 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1232 } 1233 } 1234 rw_exit(&icmp->icmp_rwlock); 1235 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1236 qreply(q, ackmp); 1237 } 1238 1239 static void 1240 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1241 { 1242 *tap = icmp_g_t_info_ack; 1243 1244 if (icmp->icmp_family == AF_INET6) 1245 tap->ADDR_size = sizeof (sin6_t); 1246 else 1247 tap->ADDR_size = sizeof (sin_t); 1248 tap->CURRENT_state = icmp->icmp_state; 1249 tap->OPT_size = icmp_max_optsize; 1250 } 1251 1252 /* 1253 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1254 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1255 * icmp_g_t_info_ack. The current state of the stream is copied from 1256 * icmp_state. 1257 */ 1258 static void 1259 icmp_capability_req(queue_t *q, mblk_t *mp) 1260 { 1261 icmp_t *icmp = Q_TO_ICMP(q); 1262 t_uscalar_t cap_bits1; 1263 struct T_capability_ack *tcap; 1264 1265 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1266 1267 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1268 mp->b_datap->db_type, T_CAPABILITY_ACK); 1269 if (!mp) 1270 return; 1271 1272 tcap = (struct T_capability_ack *)mp->b_rptr; 1273 tcap->CAP_bits1 = 0; 1274 1275 if (cap_bits1 & TC1_INFO) { 1276 icmp_copy_info(&tcap->INFO_ack, icmp); 1277 tcap->CAP_bits1 |= TC1_INFO; 1278 } 1279 1280 qreply(q, mp); 1281 } 1282 1283 /* 1284 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1285 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1286 * The current state of the stream is copied from icmp_state. 1287 */ 1288 static void 1289 icmp_info_req(queue_t *q, mblk_t *mp) 1290 { 1291 icmp_t *icmp = Q_TO_ICMP(q); 1292 1293 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1294 T_INFO_ACK); 1295 if (!mp) 1296 return; 1297 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1298 qreply(q, mp); 1299 } 1300 1301 /* 1302 * IP recognizes seven kinds of bind requests: 1303 * 1304 * - A zero-length address binds only to the protocol number. 1305 * 1306 * - A 4-byte address is treated as a request to 1307 * validate that the address is a valid local IPv4 1308 * address, appropriate for an application to bind to. 1309 * IP does the verification, but does not make any note 1310 * of the address at this time. 1311 * 1312 * - A 16-byte address contains is treated as a request 1313 * to validate a local IPv6 address, as the 4-byte 1314 * address case above. 1315 * 1316 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1317 * use it for the inbound fanout of packets. 1318 * 1319 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1323 * information consisting of local and remote addresses 1324 * and ports (unused for raw sockets). In this case, the addresses are both 1325 * validated as appropriate for this operation, and, if 1326 * so, the information is retained for use in the 1327 * inbound fanout. 1328 * 1329 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1330 * fanout information, like the 12-byte case above. 1331 * 1332 * IP will also fill in the IRE request mblk with information 1333 * regarding our peer. In all cases, we notify IP of our protocol 1334 * type by appending a single protocol byte to the bind request. 1335 */ 1336 static mblk_t * 1337 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1338 in_port_t fport) 1339 { 1340 char *cp; 1341 mblk_t *mp; 1342 struct T_bind_req *tbr; 1343 ipa_conn_t *ac; 1344 ipa6_conn_t *ac6; 1345 sin_t *sin; 1346 sin6_t *sin6; 1347 1348 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1349 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1350 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1351 if (mp == NULL) 1352 return (NULL); 1353 mp->b_datap->db_type = M_PROTO; 1354 tbr = (struct T_bind_req *)mp->b_rptr; 1355 tbr->PRIM_type = bind_prim; 1356 tbr->ADDR_offset = sizeof (*tbr); 1357 tbr->CONIND_number = 0; 1358 tbr->ADDR_length = addr_length; 1359 cp = (char *)&tbr[1]; 1360 switch (addr_length) { 1361 case sizeof (ipa_conn_t): 1362 ASSERT(icmp->icmp_family == AF_INET); 1363 /* Append a request for an IRE */ 1364 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1365 if (mp->b_cont == NULL) { 1366 freemsg(mp); 1367 return (NULL); 1368 } 1369 mp->b_cont->b_wptr += sizeof (ire_t); 1370 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1371 1372 /* cp known to be 32 bit aligned */ 1373 ac = (ipa_conn_t *)cp; 1374 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1375 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1376 ac->ac_fport = fport; 1377 ac->ac_lport = 0; 1378 break; 1379 1380 case sizeof (ipa6_conn_t): 1381 ASSERT(icmp->icmp_family == AF_INET6); 1382 /* Append a request for an IRE */ 1383 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1384 if (mp->b_cont == NULL) { 1385 freemsg(mp); 1386 return (NULL); 1387 } 1388 mp->b_cont->b_wptr += sizeof (ire_t); 1389 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1390 1391 /* cp known to be 32 bit aligned */ 1392 ac6 = (ipa6_conn_t *)cp; 1393 ac6->ac6_laddr = icmp->icmp_v6src; 1394 ac6->ac6_faddr = icmp->icmp_v6dst; 1395 ac6->ac6_fport = fport; 1396 ac6->ac6_lport = 0; 1397 break; 1398 1399 case sizeof (sin_t): 1400 ASSERT(icmp->icmp_family == AF_INET); 1401 /* Append a request for an IRE */ 1402 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1403 if (!mp->b_cont) { 1404 freemsg(mp); 1405 return (NULL); 1406 } 1407 mp->b_cont->b_wptr += sizeof (ire_t); 1408 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1409 1410 sin = (sin_t *)cp; 1411 *sin = sin_null; 1412 sin->sin_family = AF_INET; 1413 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1414 break; 1415 1416 case sizeof (sin6_t): 1417 ASSERT(icmp->icmp_family == AF_INET6); 1418 /* Append a request for an IRE */ 1419 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1420 if (!mp->b_cont) { 1421 freemsg(mp); 1422 return (NULL); 1423 } 1424 mp->b_cont->b_wptr += sizeof (ire_t); 1425 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1426 1427 sin6 = (sin6_t *)cp; 1428 *sin6 = sin6_null; 1429 sin6->sin6_family = AF_INET6; 1430 sin6->sin6_addr = icmp->icmp_bound_v6src; 1431 break; 1432 } 1433 /* Add protocol number to end */ 1434 cp[addr_length] = icmp->icmp_proto; 1435 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1436 return (mp); 1437 } 1438 1439 /* For /dev/icmp aka AF_INET open */ 1440 static int 1441 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1442 { 1443 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1444 } 1445 1446 /* For /dev/icmp6 aka AF_INET6 open */ 1447 static int 1448 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1449 { 1450 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1451 } 1452 1453 /* 1454 * This is the open routine for icmp. It allocates a icmp_t structure for 1455 * the stream and, on the first open of the module, creates an ND table. 1456 */ 1457 /*ARGSUSED2*/ 1458 static int 1459 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1460 boolean_t isv6) 1461 { 1462 int err; 1463 icmp_t *icmp; 1464 conn_t *connp; 1465 dev_t conn_dev; 1466 zoneid_t zoneid; 1467 netstack_t *ns; 1468 icmp_stack_t *is; 1469 1470 /* If the stream is already open, return immediately. */ 1471 if (q->q_ptr != NULL) 1472 return (0); 1473 1474 if (sflag == MODOPEN) 1475 return (EINVAL); 1476 1477 ns = netstack_find_by_cred(credp); 1478 ASSERT(ns != NULL); 1479 is = ns->netstack_icmp; 1480 ASSERT(is != NULL); 1481 1482 /* 1483 * For exclusive stacks we set the zoneid to zero 1484 * to make ICMP operate as if in the global zone. 1485 */ 1486 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1487 zoneid = GLOBAL_ZONEID; 1488 else 1489 zoneid = crgetzoneid(credp); 1490 1491 /* 1492 * Since ICMP is not used so heavily, allocating from the small 1493 * arena should be sufficient. 1494 */ 1495 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1496 netstack_rele(ns); 1497 return (EBUSY); 1498 } 1499 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1500 1501 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1502 connp->conn_dev = conn_dev; 1503 connp->conn_minor_arena = ip_minor_arena_sa; 1504 icmp = connp->conn_icmp; 1505 1506 /* 1507 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1508 * done by netstack_find_by_cred() 1509 */ 1510 netstack_rele(ns); 1511 1512 /* 1513 * Initialize the icmp_t structure for this stream. 1514 */ 1515 q->q_ptr = connp; 1516 WR(q)->q_ptr = connp; 1517 connp->conn_rq = q; 1518 connp->conn_wq = WR(q); 1519 1520 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1521 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1522 ASSERT(connp->conn_icmp == icmp); 1523 ASSERT(icmp->icmp_connp == connp); 1524 1525 /* Set the initial state of the stream and the privilege status. */ 1526 icmp->icmp_state = TS_UNBND; 1527 if (isv6) { 1528 icmp->icmp_ipversion = IPV6_VERSION; 1529 icmp->icmp_family = AF_INET6; 1530 connp->conn_ulp = IPPROTO_ICMPV6; 1531 /* May be changed by a SO_PROTOTYPE socket option. */ 1532 icmp->icmp_proto = IPPROTO_ICMPV6; 1533 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1534 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1535 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1536 connp->conn_af_isv6 = B_TRUE; 1537 connp->conn_flags |= IPCL_ISV6; 1538 } else { 1539 icmp->icmp_ipversion = IPV4_VERSION; 1540 icmp->icmp_family = AF_INET; 1541 /* May be changed by a SO_PROTOTYPE socket option. */ 1542 icmp->icmp_proto = IPPROTO_ICMP; 1543 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1544 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1545 connp->conn_af_isv6 = B_FALSE; 1546 connp->conn_flags &= ~IPCL_ISV6; 1547 } 1548 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1549 icmp->icmp_pending_op = -1; 1550 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1551 connp->conn_zoneid = zoneid; 1552 1553 /* 1554 * If the caller has the process-wide flag set, then default to MAC 1555 * exempt mode. This allows read-down to unlabeled hosts. 1556 */ 1557 if (getpflags(NET_MAC_AWARE, credp) != 0) 1558 connp->conn_mac_exempt = B_TRUE; 1559 1560 connp->conn_ulp_labeled = is_system_labeled(); 1561 1562 icmp->icmp_is = is; 1563 1564 q->q_hiwat = is->is_recv_hiwat; 1565 WR(q)->q_hiwat = is->is_xmit_hiwat; 1566 WR(q)->q_lowat = is->is_xmit_lowat; 1567 1568 connp->conn_recv = icmp_input; 1569 crhold(credp); 1570 connp->conn_cred = credp; 1571 1572 mutex_enter(&connp->conn_lock); 1573 connp->conn_state_flags &= ~CONN_INCIPIENT; 1574 mutex_exit(&connp->conn_lock); 1575 1576 qprocson(q); 1577 1578 if (icmp->icmp_family == AF_INET6) { 1579 /* Build initial header template for transmit */ 1580 if ((err = icmp_build_hdrs(icmp)) != 0) { 1581 rw_exit(&icmp->icmp_rwlock); 1582 qprocsoff(q); 1583 ipcl_conn_destroy(connp); 1584 return (err); 1585 } 1586 } 1587 rw_exit(&icmp->icmp_rwlock); 1588 1589 /* Set the Stream head write offset. */ 1590 (void) mi_set_sth_wroff(q, 1591 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1592 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1593 1594 return (0); 1595 } 1596 1597 /* 1598 * Which ICMP options OK to set through T_UNITDATA_REQ... 1599 */ 1600 /* ARGSUSED */ 1601 static boolean_t 1602 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1603 { 1604 return (B_TRUE); 1605 } 1606 1607 /* 1608 * This routine gets default values of certain options whose default 1609 * values are maintained by protcol specific code 1610 */ 1611 /* ARGSUSED */ 1612 int 1613 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1614 { 1615 icmp_t *icmp = Q_TO_ICMP(q); 1616 icmp_stack_t *is = icmp->icmp_is; 1617 int *i1 = (int *)ptr; 1618 1619 switch (level) { 1620 case IPPROTO_IP: 1621 switch (name) { 1622 case IP_MULTICAST_TTL: 1623 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1624 return (sizeof (uchar_t)); 1625 case IP_MULTICAST_LOOP: 1626 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1627 return (sizeof (uchar_t)); 1628 } 1629 break; 1630 case IPPROTO_IPV6: 1631 switch (name) { 1632 case IPV6_MULTICAST_HOPS: 1633 *i1 = IP_DEFAULT_MULTICAST_TTL; 1634 return (sizeof (int)); 1635 case IPV6_MULTICAST_LOOP: 1636 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1637 return (sizeof (int)); 1638 case IPV6_UNICAST_HOPS: 1639 *i1 = is->is_ipv6_hoplimit; 1640 return (sizeof (int)); 1641 } 1642 break; 1643 case IPPROTO_ICMPV6: 1644 switch (name) { 1645 case ICMP6_FILTER: 1646 /* Make it look like "pass all" */ 1647 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1648 return (sizeof (icmp6_filter_t)); 1649 } 1650 break; 1651 } 1652 return (-1); 1653 } 1654 1655 /* 1656 * This routine retrieves the current status of socket options. 1657 * It returns the size of the option retrieved. 1658 */ 1659 int 1660 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1661 { 1662 conn_t *connp = Q_TO_CONN(q); 1663 icmp_t *icmp = connp->conn_icmp; 1664 icmp_stack_t *is = icmp->icmp_is; 1665 int *i1 = (int *)ptr; 1666 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1667 1668 switch (level) { 1669 case SOL_SOCKET: 1670 switch (name) { 1671 case SO_DEBUG: 1672 *i1 = icmp->icmp_debug; 1673 break; 1674 case SO_TYPE: 1675 *i1 = SOCK_RAW; 1676 break; 1677 case SO_PROTOTYPE: 1678 *i1 = icmp->icmp_proto; 1679 break; 1680 case SO_REUSEADDR: 1681 *i1 = icmp->icmp_reuseaddr; 1682 break; 1683 1684 /* 1685 * The following three items are available here, 1686 * but are only meaningful to IP. 1687 */ 1688 case SO_DONTROUTE: 1689 *i1 = icmp->icmp_dontroute; 1690 break; 1691 case SO_USELOOPBACK: 1692 *i1 = icmp->icmp_useloopback; 1693 break; 1694 case SO_BROADCAST: 1695 *i1 = icmp->icmp_broadcast; 1696 break; 1697 1698 case SO_SNDBUF: 1699 ASSERT(q->q_hiwat <= INT_MAX); 1700 *i1 = (int)q->q_hiwat; 1701 break; 1702 case SO_RCVBUF: 1703 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1704 *i1 = (int)RD(q)->q_hiwat; 1705 break; 1706 case SO_DGRAM_ERRIND: 1707 *i1 = icmp->icmp_dgram_errind; 1708 break; 1709 case SO_TIMESTAMP: 1710 *i1 = icmp->icmp_timestamp; 1711 break; 1712 case SO_MAC_EXEMPT: 1713 *i1 = connp->conn_mac_exempt; 1714 break; 1715 case SO_DOMAIN: 1716 *i1 = icmp->icmp_family; 1717 break; 1718 1719 /* 1720 * Following four not meaningful for icmp 1721 * Action is same as "default" to which we fallthrough 1722 * so we keep them in comments. 1723 * case SO_LINGER: 1724 * case SO_KEEPALIVE: 1725 * case SO_OOBINLINE: 1726 * case SO_ALLZONES: 1727 */ 1728 default: 1729 return (-1); 1730 } 1731 break; 1732 case IPPROTO_IP: 1733 /* 1734 * Only allow IPv4 option processing on IPv4 sockets. 1735 */ 1736 if (icmp->icmp_family != AF_INET) 1737 return (-1); 1738 1739 switch (name) { 1740 case IP_OPTIONS: 1741 case T_IP_OPTIONS: 1742 /* Options are passed up with each packet */ 1743 return (0); 1744 case IP_HDRINCL: 1745 *i1 = (int)icmp->icmp_hdrincl; 1746 break; 1747 case IP_TOS: 1748 case T_IP_TOS: 1749 *i1 = (int)icmp->icmp_type_of_service; 1750 break; 1751 case IP_TTL: 1752 *i1 = (int)icmp->icmp_ttl; 1753 break; 1754 case IP_MULTICAST_IF: 1755 /* 0 address if not set */ 1756 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1757 return (sizeof (ipaddr_t)); 1758 case IP_MULTICAST_TTL: 1759 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1760 return (sizeof (uchar_t)); 1761 case IP_MULTICAST_LOOP: 1762 *ptr = connp->conn_multicast_loop; 1763 return (sizeof (uint8_t)); 1764 case IP_BOUND_IF: 1765 /* Zero if not set */ 1766 *i1 = icmp->icmp_bound_if; 1767 break; /* goto sizeof (int) option return */ 1768 case IP_UNSPEC_SRC: 1769 *ptr = icmp->icmp_unspec_source; 1770 break; /* goto sizeof (int) option return */ 1771 case IP_BROADCAST_TTL: 1772 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1773 return (sizeof (uchar_t)); 1774 case IP_RECVIF: 1775 *ptr = icmp->icmp_recvif; 1776 break; /* goto sizeof (int) option return */ 1777 case IP_RECVPKTINFO: 1778 /* 1779 * This also handles IP_PKTINFO. 1780 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1781 * Differentiation is based on the size of the argument 1782 * passed in. 1783 * This option is handled in IP which will return an 1784 * error for IP_PKTINFO as it's not supported as a 1785 * sticky option. 1786 */ 1787 return (-EINVAL); 1788 /* 1789 * Cannot "get" the value of following options 1790 * at this level. Action is same as "default" to 1791 * which we fallthrough so we keep them in comments. 1792 * 1793 * case IP_ADD_MEMBERSHIP: 1794 * case IP_DROP_MEMBERSHIP: 1795 * case IP_BLOCK_SOURCE: 1796 * case IP_UNBLOCK_SOURCE: 1797 * case IP_ADD_SOURCE_MEMBERSHIP: 1798 * case IP_DROP_SOURCE_MEMBERSHIP: 1799 * case MCAST_JOIN_GROUP: 1800 * case MCAST_LEAVE_GROUP: 1801 * case MCAST_BLOCK_SOURCE: 1802 * case MCAST_UNBLOCK_SOURCE: 1803 * case MCAST_JOIN_SOURCE_GROUP: 1804 * case MCAST_LEAVE_SOURCE_GROUP: 1805 * case MRT_INIT: 1806 * case MRT_DONE: 1807 * case MRT_ADD_VIF: 1808 * case MRT_DEL_VIF: 1809 * case MRT_ADD_MFC: 1810 * case MRT_DEL_MFC: 1811 * case MRT_VERSION: 1812 * case MRT_ASSERT: 1813 * case IP_SEC_OPT: 1814 * case IP_DONTFAILOVER_IF: 1815 * case IP_NEXTHOP: 1816 */ 1817 default: 1818 return (-1); 1819 } 1820 break; 1821 case IPPROTO_IPV6: 1822 /* 1823 * Only allow IPv6 option processing on native IPv6 sockets. 1824 */ 1825 if (icmp->icmp_family != AF_INET6) 1826 return (-1); 1827 switch (name) { 1828 case IPV6_UNICAST_HOPS: 1829 *i1 = (unsigned int)icmp->icmp_ttl; 1830 break; 1831 case IPV6_MULTICAST_IF: 1832 /* 0 index if not set */ 1833 *i1 = icmp->icmp_multicast_if_index; 1834 break; 1835 case IPV6_MULTICAST_HOPS: 1836 *i1 = icmp->icmp_multicast_ttl; 1837 break; 1838 case IPV6_MULTICAST_LOOP: 1839 *i1 = connp->conn_multicast_loop; 1840 break; 1841 case IPV6_BOUND_IF: 1842 /* Zero if not set */ 1843 *i1 = icmp->icmp_bound_if; 1844 break; 1845 case IPV6_UNSPEC_SRC: 1846 *i1 = icmp->icmp_unspec_source; 1847 break; 1848 case IPV6_CHECKSUM: 1849 /* 1850 * Return offset or -1 if no checksum offset. 1851 * Does not apply to IPPROTO_ICMPV6 1852 */ 1853 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1854 return (-1); 1855 1856 if (icmp->icmp_raw_checksum) { 1857 *i1 = icmp->icmp_checksum_off; 1858 } else { 1859 *i1 = -1; 1860 } 1861 break; 1862 case IPV6_JOIN_GROUP: 1863 case IPV6_LEAVE_GROUP: 1864 case MCAST_JOIN_GROUP: 1865 case MCAST_LEAVE_GROUP: 1866 case MCAST_BLOCK_SOURCE: 1867 case MCAST_UNBLOCK_SOURCE: 1868 case MCAST_JOIN_SOURCE_GROUP: 1869 case MCAST_LEAVE_SOURCE_GROUP: 1870 /* cannot "get" the value for these */ 1871 return (-1); 1872 case IPV6_RECVPKTINFO: 1873 *i1 = icmp->icmp_ip_recvpktinfo; 1874 break; 1875 case IPV6_RECVTCLASS: 1876 *i1 = icmp->icmp_ipv6_recvtclass; 1877 break; 1878 case IPV6_RECVPATHMTU: 1879 *i1 = icmp->icmp_ipv6_recvpathmtu; 1880 break; 1881 case IPV6_V6ONLY: 1882 *i1 = 1; 1883 break; 1884 case IPV6_RECVHOPLIMIT: 1885 *i1 = icmp->icmp_ipv6_recvhoplimit; 1886 break; 1887 case IPV6_RECVHOPOPTS: 1888 *i1 = icmp->icmp_ipv6_recvhopopts; 1889 break; 1890 case IPV6_RECVDSTOPTS: 1891 *i1 = icmp->icmp_ipv6_recvdstopts; 1892 break; 1893 case _OLD_IPV6_RECVDSTOPTS: 1894 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1895 break; 1896 case IPV6_RECVRTHDRDSTOPTS: 1897 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1898 break; 1899 case IPV6_RECVRTHDR: 1900 *i1 = icmp->icmp_ipv6_recvrthdr; 1901 break; 1902 case IPV6_PKTINFO: { 1903 /* XXX assumes that caller has room for max size! */ 1904 struct in6_pktinfo *pkti; 1905 1906 pkti = (struct in6_pktinfo *)ptr; 1907 if (ipp->ipp_fields & IPPF_IFINDEX) 1908 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1909 else 1910 pkti->ipi6_ifindex = 0; 1911 if (ipp->ipp_fields & IPPF_ADDR) 1912 pkti->ipi6_addr = ipp->ipp_addr; 1913 else 1914 pkti->ipi6_addr = ipv6_all_zeros; 1915 return (sizeof (struct in6_pktinfo)); 1916 } 1917 case IPV6_NEXTHOP: { 1918 sin6_t *sin6 = (sin6_t *)ptr; 1919 1920 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1921 return (0); 1922 *sin6 = sin6_null; 1923 sin6->sin6_family = AF_INET6; 1924 sin6->sin6_addr = ipp->ipp_nexthop; 1925 return (sizeof (sin6_t)); 1926 } 1927 case IPV6_HOPOPTS: 1928 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1929 return (0); 1930 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1931 return (0); 1932 bcopy((char *)ipp->ipp_hopopts + 1933 icmp->icmp_label_len_v6, ptr, 1934 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1935 if (icmp->icmp_label_len_v6 > 0) { 1936 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1937 ptr[1] = (ipp->ipp_hopoptslen - 1938 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1939 } 1940 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1941 case IPV6_RTHDRDSTOPTS: 1942 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1943 return (0); 1944 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1945 return (ipp->ipp_rtdstoptslen); 1946 case IPV6_RTHDR: 1947 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1948 return (0); 1949 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1950 return (ipp->ipp_rthdrlen); 1951 case IPV6_DSTOPTS: 1952 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1953 return (0); 1954 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1955 return (ipp->ipp_dstoptslen); 1956 case IPV6_PATHMTU: 1957 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1958 return (0); 1959 1960 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1961 (struct ip6_mtuinfo *)ptr, is->is_netstack)); 1962 case IPV6_TCLASS: 1963 if (ipp->ipp_fields & IPPF_TCLASS) 1964 *i1 = ipp->ipp_tclass; 1965 else 1966 *i1 = IPV6_FLOW_TCLASS( 1967 IPV6_DEFAULT_VERS_AND_FLOW); 1968 break; 1969 default: 1970 return (-1); 1971 } 1972 break; 1973 case IPPROTO_ICMPV6: 1974 /* 1975 * Only allow IPv6 option processing on native IPv6 sockets. 1976 */ 1977 if (icmp->icmp_family != AF_INET6) 1978 return (-1); 1979 1980 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1981 return (-1); 1982 1983 switch (name) { 1984 case ICMP6_FILTER: 1985 if (icmp->icmp_filter == NULL) { 1986 /* Make it look like "pass all" */ 1987 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1988 } else { 1989 (void) bcopy(icmp->icmp_filter, ptr, 1990 sizeof (icmp6_filter_t)); 1991 } 1992 return (sizeof (icmp6_filter_t)); 1993 default: 1994 return (-1); 1995 } 1996 default: 1997 return (-1); 1998 } 1999 return (sizeof (int)); 2000 } 2001 2002 /* 2003 * This routine retrieves the current status of socket options. 2004 * It returns the size of the option retrieved. 2005 */ 2006 int 2007 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2008 { 2009 icmp_t *icmp = Q_TO_ICMP(q); 2010 int err; 2011 2012 rw_enter(&icmp->icmp_rwlock, RW_READER); 2013 err = icmp_opt_get_locked(q, level, name, ptr); 2014 rw_exit(&icmp->icmp_rwlock); 2015 return (err); 2016 } 2017 2018 2019 /* This routine sets socket options. */ 2020 /* ARGSUSED */ 2021 int 2022 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2023 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2024 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2025 { 2026 conn_t *connp = Q_TO_CONN(q); 2027 icmp_t *icmp = connp->conn_icmp; 2028 icmp_stack_t *is = icmp->icmp_is; 2029 int *i1 = (int *)invalp; 2030 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2031 boolean_t checkonly; 2032 int error; 2033 2034 switch (optset_context) { 2035 case SETFN_OPTCOM_CHECKONLY: 2036 checkonly = B_TRUE; 2037 /* 2038 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2039 * inlen != 0 implies value supplied and 2040 * we have to "pretend" to set it. 2041 * inlen == 0 implies that there is no 2042 * value part in T_CHECK request and just validation 2043 * done elsewhere should be enough, we just return here. 2044 */ 2045 if (inlen == 0) { 2046 *outlenp = 0; 2047 return (0); 2048 } 2049 break; 2050 case SETFN_OPTCOM_NEGOTIATE: 2051 checkonly = B_FALSE; 2052 break; 2053 case SETFN_UD_NEGOTIATE: 2054 case SETFN_CONN_NEGOTIATE: 2055 checkonly = B_FALSE; 2056 /* 2057 * Negotiating local and "association-related" options 2058 * through T_UNITDATA_REQ. 2059 * 2060 * Following routine can filter out ones we do not 2061 * want to be "set" this way. 2062 */ 2063 if (!icmp_opt_allow_udr_set(level, name)) { 2064 *outlenp = 0; 2065 return (EINVAL); 2066 } 2067 break; 2068 default: 2069 /* 2070 * We should never get here 2071 */ 2072 *outlenp = 0; 2073 return (EINVAL); 2074 } 2075 2076 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2077 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2078 2079 /* 2080 * For fixed length options, no sanity check 2081 * of passed in length is done. It is assumed *_optcom_req() 2082 * routines do the right thing. 2083 */ 2084 2085 switch (level) { 2086 case SOL_SOCKET: 2087 switch (name) { 2088 case SO_DEBUG: 2089 if (!checkonly) 2090 icmp->icmp_debug = onoff; 2091 break; 2092 case SO_PROTOTYPE: 2093 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2094 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2095 secpolicy_net_rawaccess(cr) != 0) { 2096 *outlenp = 0; 2097 return (EACCES); 2098 } 2099 /* Can't use IPPROTO_RAW with IPv6 */ 2100 if ((*i1 & 0xFF) == IPPROTO_RAW && 2101 icmp->icmp_family == AF_INET6) { 2102 *outlenp = 0; 2103 return (EPROTONOSUPPORT); 2104 } 2105 if (checkonly) { 2106 /* T_CHECK case */ 2107 *(int *)outvalp = (*i1 & 0xFF); 2108 break; 2109 } 2110 icmp->icmp_proto = *i1 & 0xFF; 2111 if ((icmp->icmp_proto == IPPROTO_RAW || 2112 icmp->icmp_proto == IPPROTO_IGMP) && 2113 icmp->icmp_family == AF_INET) 2114 icmp->icmp_hdrincl = 1; 2115 else 2116 icmp->icmp_hdrincl = 0; 2117 2118 if (icmp->icmp_family == AF_INET6 && 2119 icmp->icmp_proto == IPPROTO_ICMPV6) { 2120 /* Set offset for icmp6_cksum */ 2121 icmp->icmp_raw_checksum = 0; 2122 icmp->icmp_checksum_off = 2; 2123 } 2124 if (icmp->icmp_proto == IPPROTO_UDP || 2125 icmp->icmp_proto == IPPROTO_TCP || 2126 icmp->icmp_proto == IPPROTO_SCTP) { 2127 icmp->icmp_no_tp_cksum = 1; 2128 icmp->icmp_sticky_ipp.ipp_fields |= 2129 IPPF_NO_CKSUM; 2130 } else { 2131 icmp->icmp_no_tp_cksum = 0; 2132 icmp->icmp_sticky_ipp.ipp_fields &= 2133 ~IPPF_NO_CKSUM; 2134 } 2135 2136 if (icmp->icmp_filter != NULL && 2137 icmp->icmp_proto != IPPROTO_ICMPV6) { 2138 kmem_free(icmp->icmp_filter, 2139 sizeof (icmp6_filter_t)); 2140 icmp->icmp_filter = NULL; 2141 } 2142 2143 /* Rebuild the header template */ 2144 error = icmp_build_hdrs(icmp); 2145 if (error != 0) { 2146 *outlenp = 0; 2147 return (error); 2148 } 2149 2150 /* 2151 * For SCTP, we don't use icmp_bind_proto() for 2152 * raw socket binding. Note that we do not need 2153 * to set *outlenp. 2154 * FIXME: how does SCTP work? 2155 */ 2156 if (icmp->icmp_proto == IPPROTO_SCTP) 2157 return (0); 2158 2159 *outlenp = sizeof (int); 2160 *(int *)outvalp = *i1 & 0xFF; 2161 2162 /* Drop lock across the bind operation */ 2163 rw_exit(&icmp->icmp_rwlock); 2164 icmp_bind_proto(q); 2165 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2166 return (0); 2167 case SO_REUSEADDR: 2168 if (!checkonly) 2169 icmp->icmp_reuseaddr = onoff; 2170 break; 2171 2172 /* 2173 * The following three items are available here, 2174 * but are only meaningful to IP. 2175 */ 2176 case SO_DONTROUTE: 2177 if (!checkonly) 2178 icmp->icmp_dontroute = onoff; 2179 break; 2180 case SO_USELOOPBACK: 2181 if (!checkonly) 2182 icmp->icmp_useloopback = onoff; 2183 break; 2184 case SO_BROADCAST: 2185 if (!checkonly) 2186 icmp->icmp_broadcast = onoff; 2187 break; 2188 2189 case SO_SNDBUF: 2190 if (*i1 > is->is_max_buf) { 2191 *outlenp = 0; 2192 return (ENOBUFS); 2193 } 2194 if (!checkonly) { 2195 q->q_hiwat = *i1; 2196 } 2197 break; 2198 case SO_RCVBUF: 2199 if (*i1 > is->is_max_buf) { 2200 *outlenp = 0; 2201 return (ENOBUFS); 2202 } 2203 if (!checkonly) { 2204 RD(q)->q_hiwat = *i1; 2205 rw_exit(&icmp->icmp_rwlock); 2206 (void) mi_set_sth_hiwat(RD(q), *i1); 2207 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2208 } 2209 break; 2210 case SO_DGRAM_ERRIND: 2211 if (!checkonly) 2212 icmp->icmp_dgram_errind = onoff; 2213 break; 2214 case SO_ALLZONES: 2215 /* 2216 * "soft" error (negative) 2217 * option not handled at this level 2218 * Note: Do not modify *outlenp 2219 */ 2220 return (-EINVAL); 2221 case SO_TIMESTAMP: 2222 if (!checkonly) { 2223 icmp->icmp_timestamp = onoff; 2224 } 2225 break; 2226 case SO_MAC_EXEMPT: 2227 /* 2228 * "soft" error (negative) 2229 * option not handled at this level 2230 * Note: Do not modify *outlenp 2231 */ 2232 return (-EINVAL); 2233 /* 2234 * Following three not meaningful for icmp 2235 * Action is same as "default" so we keep them 2236 * in comments. 2237 * case SO_LINGER: 2238 * case SO_KEEPALIVE: 2239 * case SO_OOBINLINE: 2240 */ 2241 default: 2242 *outlenp = 0; 2243 return (EINVAL); 2244 } 2245 break; 2246 case IPPROTO_IP: 2247 /* 2248 * Only allow IPv4 option processing on IPv4 sockets. 2249 */ 2250 if (icmp->icmp_family != AF_INET) { 2251 *outlenp = 0; 2252 return (ENOPROTOOPT); 2253 } 2254 switch (name) { 2255 case IP_OPTIONS: 2256 case T_IP_OPTIONS: 2257 /* Save options for use by IP. */ 2258 if ((inlen & 0x3) || 2259 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2260 *outlenp = 0; 2261 return (EINVAL); 2262 } 2263 if (checkonly) 2264 break; 2265 2266 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2267 &icmp->icmp_ip_snd_options_len, 2268 icmp->icmp_label_len, invalp, inlen)) { 2269 *outlenp = 0; 2270 return (ENOMEM); 2271 } 2272 2273 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2274 icmp->icmp_ip_snd_options_len; 2275 rw_exit(&icmp->icmp_rwlock); 2276 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2277 is->is_wroff_extra); 2278 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2279 break; 2280 case IP_HDRINCL: 2281 if (!checkonly) 2282 icmp->icmp_hdrincl = onoff; 2283 break; 2284 case IP_TOS: 2285 case T_IP_TOS: 2286 if (!checkonly) { 2287 icmp->icmp_type_of_service = (uint8_t)*i1; 2288 } 2289 break; 2290 case IP_TTL: 2291 if (!checkonly) { 2292 icmp->icmp_ttl = (uint8_t)*i1; 2293 } 2294 break; 2295 case IP_MULTICAST_IF: 2296 /* 2297 * TODO should check OPTMGMT reply and undo this if 2298 * there is an error. 2299 */ 2300 if (!checkonly) 2301 icmp->icmp_multicast_if_addr = *i1; 2302 break; 2303 case IP_MULTICAST_TTL: 2304 if (!checkonly) 2305 icmp->icmp_multicast_ttl = *invalp; 2306 break; 2307 case IP_MULTICAST_LOOP: 2308 if (!checkonly) { 2309 connp->conn_multicast_loop = 2310 (*invalp == 0) ? 0 : 1; 2311 } 2312 break; 2313 case IP_BOUND_IF: 2314 if (!checkonly) 2315 icmp->icmp_bound_if = *i1; 2316 break; 2317 case IP_UNSPEC_SRC: 2318 if (!checkonly) 2319 icmp->icmp_unspec_source = onoff; 2320 break; 2321 case IP_BROADCAST_TTL: 2322 if (!checkonly) 2323 connp->conn_broadcast_ttl = *invalp; 2324 break; 2325 case IP_RECVIF: 2326 if (!checkonly) 2327 icmp->icmp_recvif = onoff; 2328 /* 2329 * pass to ip 2330 */ 2331 return (-EINVAL); 2332 case IP_PKTINFO: { 2333 /* 2334 * This also handles IP_RECVPKTINFO. 2335 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2336 * Differentiation is based on the size of the argument 2337 * passed in. 2338 */ 2339 struct in_pktinfo *pktinfop; 2340 ip4_pkt_t *attr_pktinfop; 2341 2342 if (checkonly) 2343 break; 2344 2345 if (inlen == sizeof (int)) { 2346 /* 2347 * This is IP_RECVPKTINFO option. 2348 * Keep a local copy of wether this option is 2349 * set or not and pass it down to IP for 2350 * processing. 2351 */ 2352 icmp->icmp_ip_recvpktinfo = onoff; 2353 return (-EINVAL); 2354 } 2355 2356 2357 if (inlen != sizeof (struct in_pktinfo)) 2358 return (EINVAL); 2359 2360 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2361 == NULL) { 2362 /* 2363 * sticky option is not supported 2364 */ 2365 return (EINVAL); 2366 } 2367 2368 pktinfop = (struct in_pktinfo *)invalp; 2369 2370 /* 2371 * Atleast one of the values should be specified 2372 */ 2373 if (pktinfop->ipi_ifindex == 0 && 2374 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2375 return (EINVAL); 2376 } 2377 2378 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2379 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2380 } 2381 break; 2382 case IP_ADD_MEMBERSHIP: 2383 case IP_DROP_MEMBERSHIP: 2384 case IP_BLOCK_SOURCE: 2385 case IP_UNBLOCK_SOURCE: 2386 case IP_ADD_SOURCE_MEMBERSHIP: 2387 case IP_DROP_SOURCE_MEMBERSHIP: 2388 case MCAST_JOIN_GROUP: 2389 case MCAST_LEAVE_GROUP: 2390 case MCAST_BLOCK_SOURCE: 2391 case MCAST_UNBLOCK_SOURCE: 2392 case MCAST_JOIN_SOURCE_GROUP: 2393 case MCAST_LEAVE_SOURCE_GROUP: 2394 case MRT_INIT: 2395 case MRT_DONE: 2396 case MRT_ADD_VIF: 2397 case MRT_DEL_VIF: 2398 case MRT_ADD_MFC: 2399 case MRT_DEL_MFC: 2400 case MRT_VERSION: 2401 case MRT_ASSERT: 2402 case IP_SEC_OPT: 2403 case IP_DONTFAILOVER_IF: 2404 case IP_NEXTHOP: 2405 /* 2406 * "soft" error (negative) 2407 * option not handled at this level 2408 * Note: Do not modify *outlenp 2409 */ 2410 return (-EINVAL); 2411 default: 2412 *outlenp = 0; 2413 return (EINVAL); 2414 } 2415 break; 2416 case IPPROTO_IPV6: { 2417 ip6_pkt_t *ipp; 2418 boolean_t sticky; 2419 2420 if (icmp->icmp_family != AF_INET6) { 2421 *outlenp = 0; 2422 return (ENOPROTOOPT); 2423 } 2424 /* 2425 * Deal with both sticky options and ancillary data 2426 */ 2427 if (thisdg_attrs == NULL) { 2428 /* sticky options, or none */ 2429 ipp = &icmp->icmp_sticky_ipp; 2430 sticky = B_TRUE; 2431 } else { 2432 /* ancillary data */ 2433 ipp = (ip6_pkt_t *)thisdg_attrs; 2434 sticky = B_FALSE; 2435 } 2436 2437 switch (name) { 2438 case IPV6_MULTICAST_IF: 2439 if (!checkonly) 2440 icmp->icmp_multicast_if_index = *i1; 2441 break; 2442 case IPV6_UNICAST_HOPS: 2443 /* -1 means use default */ 2444 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2445 *outlenp = 0; 2446 return (EINVAL); 2447 } 2448 if (!checkonly) { 2449 if (*i1 == -1) { 2450 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2451 is->is_ipv6_hoplimit; 2452 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2453 /* Pass modified value to IP. */ 2454 *i1 = ipp->ipp_hoplimit; 2455 } else { 2456 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2457 (uint8_t)*i1; 2458 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2459 } 2460 /* Rebuild the header template */ 2461 error = icmp_build_hdrs(icmp); 2462 if (error != 0) { 2463 *outlenp = 0; 2464 return (error); 2465 } 2466 } 2467 break; 2468 case IPV6_MULTICAST_HOPS: 2469 /* -1 means use default */ 2470 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2471 *outlenp = 0; 2472 return (EINVAL); 2473 } 2474 if (!checkonly) { 2475 if (*i1 == -1) { 2476 icmp->icmp_multicast_ttl = 2477 ipp->ipp_multicast_hops = 2478 IP_DEFAULT_MULTICAST_TTL; 2479 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2480 /* Pass modified value to IP. */ 2481 *i1 = icmp->icmp_multicast_ttl; 2482 } else { 2483 icmp->icmp_multicast_ttl = 2484 ipp->ipp_multicast_hops = 2485 (uint8_t)*i1; 2486 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2487 } 2488 } 2489 break; 2490 case IPV6_MULTICAST_LOOP: 2491 if (*i1 != 0 && *i1 != 1) { 2492 *outlenp = 0; 2493 return (EINVAL); 2494 } 2495 if (!checkonly) 2496 connp->conn_multicast_loop = *i1; 2497 break; 2498 case IPV6_CHECKSUM: 2499 /* 2500 * Integer offset into the user data of where the 2501 * checksum is located. 2502 * Offset of -1 disables option. 2503 * Does not apply to IPPROTO_ICMPV6. 2504 */ 2505 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2506 *outlenp = 0; 2507 return (EINVAL); 2508 } 2509 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2510 /* Negative or not 16 bit aligned offset */ 2511 *outlenp = 0; 2512 return (EINVAL); 2513 } 2514 if (checkonly) 2515 break; 2516 2517 if (*i1 == -1) { 2518 icmp->icmp_raw_checksum = 0; 2519 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2520 } else { 2521 icmp->icmp_raw_checksum = 1; 2522 icmp->icmp_checksum_off = *i1; 2523 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2524 } 2525 /* Rebuild the header template */ 2526 error = icmp_build_hdrs(icmp); 2527 if (error != 0) { 2528 *outlenp = 0; 2529 return (error); 2530 } 2531 break; 2532 case IPV6_JOIN_GROUP: 2533 case IPV6_LEAVE_GROUP: 2534 case MCAST_JOIN_GROUP: 2535 case MCAST_LEAVE_GROUP: 2536 case MCAST_BLOCK_SOURCE: 2537 case MCAST_UNBLOCK_SOURCE: 2538 case MCAST_JOIN_SOURCE_GROUP: 2539 case MCAST_LEAVE_SOURCE_GROUP: 2540 /* 2541 * "soft" error (negative) 2542 * option not handled at this level 2543 * Note: Do not modify *outlenp 2544 */ 2545 return (-EINVAL); 2546 case IPV6_BOUND_IF: 2547 if (!checkonly) 2548 icmp->icmp_bound_if = *i1; 2549 break; 2550 case IPV6_UNSPEC_SRC: 2551 if (!checkonly) 2552 icmp->icmp_unspec_source = onoff; 2553 break; 2554 case IPV6_RECVTCLASS: 2555 if (!checkonly) 2556 icmp->icmp_ipv6_recvtclass = onoff; 2557 break; 2558 /* 2559 * Set boolean switches for ancillary data delivery 2560 */ 2561 case IPV6_RECVPKTINFO: 2562 if (!checkonly) 2563 icmp->icmp_ip_recvpktinfo = onoff; 2564 break; 2565 case IPV6_RECVPATHMTU: 2566 if (!checkonly) 2567 icmp->icmp_ipv6_recvpathmtu = onoff; 2568 break; 2569 case IPV6_RECVHOPLIMIT: 2570 if (!checkonly) 2571 icmp->icmp_ipv6_recvhoplimit = onoff; 2572 break; 2573 case IPV6_RECVHOPOPTS: 2574 if (!checkonly) 2575 icmp->icmp_ipv6_recvhopopts = onoff; 2576 break; 2577 case IPV6_RECVDSTOPTS: 2578 if (!checkonly) 2579 icmp->icmp_ipv6_recvdstopts = onoff; 2580 break; 2581 case _OLD_IPV6_RECVDSTOPTS: 2582 if (!checkonly) 2583 icmp->icmp_old_ipv6_recvdstopts = onoff; 2584 break; 2585 case IPV6_RECVRTHDRDSTOPTS: 2586 if (!checkonly) 2587 icmp->icmp_ipv6_recvrtdstopts = onoff; 2588 break; 2589 case IPV6_RECVRTHDR: 2590 if (!checkonly) 2591 icmp->icmp_ipv6_recvrthdr = onoff; 2592 break; 2593 /* 2594 * Set sticky options or ancillary data. 2595 * If sticky options, (re)build any extension headers 2596 * that might be needed as a result. 2597 */ 2598 case IPV6_PKTINFO: 2599 /* 2600 * The source address and ifindex are verified 2601 * in ip_opt_set(). For ancillary data the 2602 * source address is checked in ip_wput_v6. 2603 */ 2604 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2605 return (EINVAL); 2606 if (checkonly) 2607 break; 2608 2609 if (inlen == 0) { 2610 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2611 ipp->ipp_sticky_ignored |= 2612 (IPPF_IFINDEX|IPPF_ADDR); 2613 } else { 2614 struct in6_pktinfo *pkti; 2615 2616 pkti = (struct in6_pktinfo *)invalp; 2617 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2618 ipp->ipp_addr = pkti->ipi6_addr; 2619 if (ipp->ipp_ifindex != 0) 2620 ipp->ipp_fields |= IPPF_IFINDEX; 2621 else 2622 ipp->ipp_fields &= ~IPPF_IFINDEX; 2623 if (!IN6_IS_ADDR_UNSPECIFIED( 2624 &ipp->ipp_addr)) 2625 ipp->ipp_fields |= IPPF_ADDR; 2626 else 2627 ipp->ipp_fields &= ~IPPF_ADDR; 2628 } 2629 if (sticky) { 2630 error = icmp_build_hdrs(icmp); 2631 if (error != 0) 2632 return (error); 2633 } 2634 break; 2635 case IPV6_HOPLIMIT: 2636 /* This option can only be used as ancillary data. */ 2637 if (sticky) 2638 return (EINVAL); 2639 if (inlen != 0 && inlen != sizeof (int)) 2640 return (EINVAL); 2641 if (checkonly) 2642 break; 2643 2644 if (inlen == 0) { 2645 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2646 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2647 } else { 2648 if (*i1 > 255 || *i1 < -1) 2649 return (EINVAL); 2650 if (*i1 == -1) 2651 ipp->ipp_hoplimit = 2652 is->is_ipv6_hoplimit; 2653 else 2654 ipp->ipp_hoplimit = *i1; 2655 ipp->ipp_fields |= IPPF_HOPLIMIT; 2656 } 2657 break; 2658 case IPV6_TCLASS: 2659 /* 2660 * IPV6_RECVTCLASS accepts -1 as use kernel default 2661 * and [0, 255] as the actualy traffic class. 2662 */ 2663 if (inlen != 0 && inlen != sizeof (int)) 2664 return (EINVAL); 2665 if (checkonly) 2666 break; 2667 2668 if (inlen == 0) { 2669 ipp->ipp_fields &= ~IPPF_TCLASS; 2670 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2671 } else { 2672 if (*i1 >= 256 || *i1 < -1) 2673 return (EINVAL); 2674 if (*i1 == -1) { 2675 ipp->ipp_tclass = 2676 IPV6_FLOW_TCLASS( 2677 IPV6_DEFAULT_VERS_AND_FLOW); 2678 } else { 2679 ipp->ipp_tclass = *i1; 2680 } 2681 ipp->ipp_fields |= IPPF_TCLASS; 2682 } 2683 if (sticky) { 2684 error = icmp_build_hdrs(icmp); 2685 if (error != 0) 2686 return (error); 2687 } 2688 break; 2689 case IPV6_NEXTHOP: 2690 /* 2691 * IP will verify that the nexthop is reachable 2692 * and fail for sticky options. 2693 */ 2694 if (inlen != 0 && inlen != sizeof (sin6_t)) 2695 return (EINVAL); 2696 if (checkonly) 2697 break; 2698 2699 if (inlen == 0) { 2700 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2701 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2702 } else { 2703 sin6_t *sin6 = (sin6_t *)invalp; 2704 2705 if (sin6->sin6_family != AF_INET6) 2706 return (EAFNOSUPPORT); 2707 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2708 return (EADDRNOTAVAIL); 2709 ipp->ipp_nexthop = sin6->sin6_addr; 2710 if (!IN6_IS_ADDR_UNSPECIFIED( 2711 &ipp->ipp_nexthop)) 2712 ipp->ipp_fields |= IPPF_NEXTHOP; 2713 else 2714 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2715 } 2716 if (sticky) { 2717 error = icmp_build_hdrs(icmp); 2718 if (error != 0) 2719 return (error); 2720 } 2721 break; 2722 case IPV6_HOPOPTS: { 2723 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2724 /* 2725 * Sanity checks - minimum size, size a multiple of 2726 * eight bytes, and matching size passed in. 2727 */ 2728 if (inlen != 0 && 2729 inlen != (8 * (hopts->ip6h_len + 1))) 2730 return (EINVAL); 2731 2732 if (checkonly) 2733 break; 2734 error = optcom_pkt_set(invalp, inlen, sticky, 2735 (uchar_t **)&ipp->ipp_hopopts, 2736 &ipp->ipp_hopoptslen, 2737 sticky ? icmp->icmp_label_len_v6 : 0); 2738 if (error != 0) 2739 return (error); 2740 if (ipp->ipp_hopoptslen == 0) { 2741 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2742 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2743 } else { 2744 ipp->ipp_fields |= IPPF_HOPOPTS; 2745 } 2746 if (sticky) { 2747 error = icmp_build_hdrs(icmp); 2748 if (error != 0) 2749 return (error); 2750 } 2751 break; 2752 } 2753 case IPV6_RTHDRDSTOPTS: { 2754 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2755 2756 /* 2757 * Sanity checks - minimum size, size a multiple of 2758 * eight bytes, and matching size passed in. 2759 */ 2760 if (inlen != 0 && 2761 inlen != (8 * (dopts->ip6d_len + 1))) 2762 return (EINVAL); 2763 2764 if (checkonly) 2765 break; 2766 2767 if (inlen == 0) { 2768 if (sticky && 2769 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2770 kmem_free(ipp->ipp_rtdstopts, 2771 ipp->ipp_rtdstoptslen); 2772 ipp->ipp_rtdstopts = NULL; 2773 ipp->ipp_rtdstoptslen = 0; 2774 } 2775 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2776 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2777 } else { 2778 error = optcom_pkt_set(invalp, inlen, sticky, 2779 (uchar_t **)&ipp->ipp_rtdstopts, 2780 &ipp->ipp_rtdstoptslen, 0); 2781 if (error != 0) 2782 return (error); 2783 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2784 } 2785 if (sticky) { 2786 error = icmp_build_hdrs(icmp); 2787 if (error != 0) 2788 return (error); 2789 } 2790 break; 2791 } 2792 case IPV6_DSTOPTS: { 2793 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2794 2795 /* 2796 * Sanity checks - minimum size, size a multiple of 2797 * eight bytes, and matching size passed in. 2798 */ 2799 if (inlen != 0 && 2800 inlen != (8 * (dopts->ip6d_len + 1))) 2801 return (EINVAL); 2802 2803 if (checkonly) 2804 break; 2805 2806 if (inlen == 0) { 2807 if (sticky && 2808 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2809 kmem_free(ipp->ipp_dstopts, 2810 ipp->ipp_dstoptslen); 2811 ipp->ipp_dstopts = NULL; 2812 ipp->ipp_dstoptslen = 0; 2813 } 2814 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2815 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2816 } else { 2817 error = optcom_pkt_set(invalp, inlen, sticky, 2818 (uchar_t **)&ipp->ipp_dstopts, 2819 &ipp->ipp_dstoptslen, 0); 2820 if (error != 0) 2821 return (error); 2822 ipp->ipp_fields |= IPPF_DSTOPTS; 2823 } 2824 if (sticky) { 2825 error = icmp_build_hdrs(icmp); 2826 if (error != 0) 2827 return (error); 2828 } 2829 break; 2830 } 2831 case IPV6_RTHDR: { 2832 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2833 2834 /* 2835 * Sanity checks - minimum size, size a multiple of 2836 * eight bytes, and matching size passed in. 2837 */ 2838 if (inlen != 0 && 2839 inlen != (8 * (rt->ip6r_len + 1))) 2840 return (EINVAL); 2841 2842 if (checkonly) 2843 break; 2844 2845 if (inlen == 0) { 2846 if (sticky && 2847 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2848 kmem_free(ipp->ipp_rthdr, 2849 ipp->ipp_rthdrlen); 2850 ipp->ipp_rthdr = NULL; 2851 ipp->ipp_rthdrlen = 0; 2852 } 2853 ipp->ipp_fields &= ~IPPF_RTHDR; 2854 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2855 } else { 2856 error = optcom_pkt_set(invalp, inlen, sticky, 2857 (uchar_t **)&ipp->ipp_rthdr, 2858 &ipp->ipp_rthdrlen, 0); 2859 if (error != 0) 2860 return (error); 2861 ipp->ipp_fields |= IPPF_RTHDR; 2862 } 2863 if (sticky) { 2864 error = icmp_build_hdrs(icmp); 2865 if (error != 0) 2866 return (error); 2867 } 2868 break; 2869 } 2870 2871 case IPV6_DONTFRAG: 2872 if (checkonly) 2873 break; 2874 2875 if (onoff) { 2876 ipp->ipp_fields |= IPPF_DONTFRAG; 2877 } else { 2878 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2879 } 2880 break; 2881 2882 case IPV6_USE_MIN_MTU: 2883 if (inlen != sizeof (int)) 2884 return (EINVAL); 2885 2886 if (*i1 < -1 || *i1 > 1) 2887 return (EINVAL); 2888 2889 if (checkonly) 2890 break; 2891 2892 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2893 ipp->ipp_use_min_mtu = *i1; 2894 break; 2895 2896 /* 2897 * This option can't be set. Its only returned via 2898 * getsockopt() or ancillary data. 2899 */ 2900 case IPV6_PATHMTU: 2901 return (EINVAL); 2902 2903 case IPV6_BOUND_PIF: 2904 case IPV6_SEC_OPT: 2905 case IPV6_DONTFAILOVER_IF: 2906 case IPV6_SRC_PREFERENCES: 2907 case IPV6_V6ONLY: 2908 /* Handled at IP level */ 2909 return (-EINVAL); 2910 default: 2911 *outlenp = 0; 2912 return (EINVAL); 2913 } 2914 break; 2915 } /* end IPPROTO_IPV6 */ 2916 2917 case IPPROTO_ICMPV6: 2918 /* 2919 * Only allow IPv6 option processing on IPv6 sockets. 2920 */ 2921 if (icmp->icmp_family != AF_INET6) { 2922 *outlenp = 0; 2923 return (ENOPROTOOPT); 2924 } 2925 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2926 *outlenp = 0; 2927 return (ENOPROTOOPT); 2928 } 2929 switch (name) { 2930 case ICMP6_FILTER: 2931 if (!checkonly) { 2932 if ((inlen != 0) && 2933 (inlen != sizeof (icmp6_filter_t))) 2934 return (EINVAL); 2935 2936 if (inlen == 0) { 2937 if (icmp->icmp_filter != NULL) { 2938 kmem_free(icmp->icmp_filter, 2939 sizeof (icmp6_filter_t)); 2940 icmp->icmp_filter = NULL; 2941 } 2942 } else { 2943 if (icmp->icmp_filter == NULL) { 2944 icmp->icmp_filter = kmem_alloc( 2945 sizeof (icmp6_filter_t), 2946 KM_NOSLEEP); 2947 if (icmp->icmp_filter == NULL) { 2948 *outlenp = 0; 2949 return (ENOBUFS); 2950 } 2951 } 2952 (void) bcopy(invalp, icmp->icmp_filter, 2953 inlen); 2954 } 2955 } 2956 break; 2957 2958 default: 2959 *outlenp = 0; 2960 return (EINVAL); 2961 } 2962 break; 2963 default: 2964 *outlenp = 0; 2965 return (EINVAL); 2966 } 2967 /* 2968 * Common case of OK return with outval same as inval. 2969 */ 2970 if (invalp != outvalp) { 2971 /* don't trust bcopy for identical src/dst */ 2972 (void) bcopy(invalp, outvalp, inlen); 2973 } 2974 *outlenp = inlen; 2975 return (0); 2976 } 2977 /* This routine sets socket options. */ 2978 /* ARGSUSED */ 2979 int 2980 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2981 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2982 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2983 { 2984 icmp_t *icmp; 2985 int err; 2986 2987 icmp = Q_TO_ICMP(q); 2988 2989 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2990 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2991 outlenp, outvalp, thisdg_attrs, cr, mblk); 2992 rw_exit(&icmp->icmp_rwlock); 2993 return (err); 2994 } 2995 2996 /* 2997 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2998 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2999 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3000 * headers. 3001 * Returns failure if can't allocate memory. 3002 */ 3003 static int 3004 icmp_build_hdrs(icmp_t *icmp) 3005 { 3006 icmp_stack_t *is = icmp->icmp_is; 3007 uchar_t *hdrs; 3008 uint_t hdrs_len; 3009 ip6_t *ip6h; 3010 ip6i_t *ip6i; 3011 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3012 3013 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3014 hdrs_len = ip_total_hdrs_len_v6(ipp); 3015 ASSERT(hdrs_len != 0); 3016 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3017 /* Need to reallocate */ 3018 if (hdrs_len != 0) { 3019 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3020 if (hdrs == NULL) 3021 return (ENOMEM); 3022 } else { 3023 hdrs = NULL; 3024 } 3025 if (icmp->icmp_sticky_hdrs_len != 0) { 3026 kmem_free(icmp->icmp_sticky_hdrs, 3027 icmp->icmp_sticky_hdrs_len); 3028 } 3029 icmp->icmp_sticky_hdrs = hdrs; 3030 icmp->icmp_sticky_hdrs_len = hdrs_len; 3031 } 3032 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3033 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3034 3035 /* Set header fields not in ipp */ 3036 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3037 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3038 ip6h = (ip6_t *)&ip6i[1]; 3039 3040 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3041 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3042 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3043 } 3044 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3045 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3046 } 3047 } else { 3048 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3049 } 3050 3051 if (!(ipp->ipp_fields & IPPF_ADDR)) 3052 ip6h->ip6_src = icmp->icmp_v6src; 3053 3054 /* Try to get everything in a single mblk */ 3055 if (hdrs_len > icmp->icmp_max_hdr_len) { 3056 icmp->icmp_max_hdr_len = hdrs_len; 3057 rw_exit(&icmp->icmp_rwlock); 3058 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3059 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3060 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3061 } 3062 return (0); 3063 } 3064 3065 /* 3066 * This routine retrieves the value of an ND variable in a icmpparam_t 3067 * structure. It is called through nd_getset when a user reads the 3068 * variable. 3069 */ 3070 /* ARGSUSED */ 3071 static int 3072 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3073 { 3074 icmpparam_t *icmppa = (icmpparam_t *)cp; 3075 3076 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3077 return (0); 3078 } 3079 3080 /* 3081 * Walk through the param array specified registering each element with the 3082 * named dispatch (ND) handler. 3083 */ 3084 static boolean_t 3085 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3086 { 3087 for (; cnt-- > 0; icmppa++) { 3088 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3089 if (!nd_load(ndp, icmppa->icmp_param_name, 3090 icmp_param_get, icmp_param_set, 3091 (caddr_t)icmppa)) { 3092 nd_free(ndp); 3093 return (B_FALSE); 3094 } 3095 } 3096 } 3097 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3098 NULL)) { 3099 nd_free(ndp); 3100 return (B_FALSE); 3101 } 3102 return (B_TRUE); 3103 } 3104 3105 /* This routine sets an ND variable in a icmpparam_t structure. */ 3106 /* ARGSUSED */ 3107 static int 3108 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3109 { 3110 long new_value; 3111 icmpparam_t *icmppa = (icmpparam_t *)cp; 3112 3113 /* 3114 * Fail the request if the new value does not lie within the 3115 * required bounds. 3116 */ 3117 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3118 new_value < icmppa->icmp_param_min || 3119 new_value > icmppa->icmp_param_max) { 3120 return (EINVAL); 3121 } 3122 /* Set the new value */ 3123 icmppa->icmp_param_value = new_value; 3124 return (0); 3125 } 3126 /*ARGSUSED2*/ 3127 static void 3128 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3129 { 3130 conn_t *connp = (conn_t *)arg1; 3131 struct T_unitdata_ind *tudi; 3132 uchar_t *rptr; 3133 icmp_t *icmp; 3134 icmp_stack_t *is; 3135 sin_t *sin; 3136 sin6_t *sin6; 3137 ip6_t *ip6h; 3138 ip6i_t *ip6i; 3139 mblk_t *mp1; 3140 int hdr_len; 3141 ipha_t *ipha; 3142 int udi_size; /* Size of T_unitdata_ind */ 3143 uint_t ipvers; 3144 ip6_pkt_t ipp; 3145 uint8_t nexthdr; 3146 ip_pktinfo_t *pinfo = NULL; 3147 mblk_t *options_mp = NULL; 3148 uint_t icmp_opt = 0; 3149 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3150 uint_t hopstrip; 3151 3152 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3153 3154 icmp = connp->conn_icmp; 3155 is = icmp->icmp_is; 3156 rptr = mp->b_rptr; 3157 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3158 ASSERT(OK_32PTR(rptr)); 3159 3160 /* 3161 * IP should have prepended the options data in an M_CTL 3162 * Check M_CTL "type" to make sure are not here bcos of 3163 * a valid ICMP message 3164 */ 3165 if (DB_TYPE(mp) == M_CTL) { 3166 /* 3167 * FIXME: does IP still do this? 3168 * IP sends up the IPSEC_IN message for handling IPSEC 3169 * policy at the TCP level. We don't need it here. 3170 */ 3171 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3172 mp1 = mp->b_cont; 3173 freeb(mp); 3174 mp = mp1; 3175 rptr = mp->b_rptr; 3176 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3177 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3178 IN_PKTINFO) { 3179 /* 3180 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3181 * has been prepended to the packet by IP. We need to 3182 * extract the mblk and adjust the rptr 3183 */ 3184 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3185 options_mp = mp; 3186 mp = mp->b_cont; 3187 rptr = mp->b_rptr; 3188 } else { 3189 /* 3190 * ICMP messages. 3191 */ 3192 icmp_icmp_error(connp->conn_rq, mp); 3193 return; 3194 } 3195 } 3196 3197 /* 3198 * Discard message if it is misaligned or smaller than the IP header. 3199 */ 3200 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3201 freemsg(mp); 3202 if (options_mp != NULL) 3203 freeb(options_mp); 3204 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3205 return; 3206 } 3207 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3208 3209 /* Handle M_DATA messages containing IP packets messages */ 3210 if (ipvers == IPV4_VERSION) { 3211 /* 3212 * Special case where IP attaches 3213 * the IRE needs to be handled so that we don't send up 3214 * IRE to the user land. 3215 */ 3216 ipha = (ipha_t *)rptr; 3217 hdr_len = IPH_HDR_LENGTH(ipha); 3218 3219 if (ipha->ipha_protocol == IPPROTO_TCP) { 3220 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3221 3222 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3223 TH_SYN) && mp->b_cont != NULL) { 3224 mp1 = mp->b_cont; 3225 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3226 freeb(mp1); 3227 mp->b_cont = NULL; 3228 } 3229 } 3230 } 3231 if (is->is_bsd_compat) { 3232 ushort_t len; 3233 len = ntohs(ipha->ipha_length); 3234 3235 if (mp->b_datap->db_ref > 1) { 3236 /* 3237 * Allocate a new IP header so that we can 3238 * modify ipha_length. 3239 */ 3240 mblk_t *mp1; 3241 3242 mp1 = allocb(hdr_len, BPRI_MED); 3243 if (!mp1) { 3244 freemsg(mp); 3245 if (options_mp != NULL) 3246 freeb(options_mp); 3247 BUMP_MIB(&is->is_rawip_mib, 3248 rawipInErrors); 3249 return; 3250 } 3251 bcopy(rptr, mp1->b_rptr, hdr_len); 3252 mp->b_rptr = rptr + hdr_len; 3253 rptr = mp1->b_rptr; 3254 ipha = (ipha_t *)rptr; 3255 mp1->b_cont = mp; 3256 mp1->b_wptr = rptr + hdr_len; 3257 mp = mp1; 3258 } 3259 len -= hdr_len; 3260 ipha->ipha_length = htons(len); 3261 } 3262 } 3263 3264 /* 3265 * This is the inbound data path. Packets are passed upstream as 3266 * T_UNITDATA_IND messages with full IP headers still attached. 3267 */ 3268 if (icmp->icmp_family == AF_INET) { 3269 ASSERT(ipvers == IPV4_VERSION); 3270 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3271 if (icmp->icmp_recvif && (pinfo != NULL) && 3272 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3273 udi_size += sizeof (struct T_opthdr) + 3274 sizeof (uint_t); 3275 } 3276 3277 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3278 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3279 udi_size += sizeof (struct T_opthdr) + 3280 sizeof (struct in_pktinfo); 3281 } 3282 3283 /* 3284 * If SO_TIMESTAMP is set allocate the appropriate sized 3285 * buffer. Since gethrestime() expects a pointer aligned 3286 * argument, we allocate space necessary for extra 3287 * alignment (even though it might not be used). 3288 */ 3289 if (icmp->icmp_timestamp) { 3290 udi_size += sizeof (struct T_opthdr) + 3291 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3292 } 3293 mp1 = allocb(udi_size, BPRI_MED); 3294 if (mp1 == NULL) { 3295 freemsg(mp); 3296 if (options_mp != NULL) 3297 freeb(options_mp); 3298 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3299 return; 3300 } 3301 mp1->b_cont = mp; 3302 mp = mp1; 3303 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3304 mp->b_datap->db_type = M_PROTO; 3305 mp->b_wptr = (uchar_t *)tudi + udi_size; 3306 tudi->PRIM_type = T_UNITDATA_IND; 3307 tudi->SRC_length = sizeof (sin_t); 3308 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3309 sin = (sin_t *)&tudi[1]; 3310 *sin = sin_null; 3311 sin->sin_family = AF_INET; 3312 sin->sin_addr.s_addr = ipha->ipha_src; 3313 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3314 sizeof (sin_t); 3315 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3316 tudi->OPT_length = udi_size; 3317 3318 /* 3319 * Add options if IP_RECVIF is set 3320 */ 3321 if (udi_size != 0) { 3322 char *dstopt; 3323 3324 dstopt = (char *)&sin[1]; 3325 if (icmp->icmp_recvif && (pinfo != NULL) && 3326 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3327 3328 struct T_opthdr *toh; 3329 uint_t *dstptr; 3330 3331 toh = (struct T_opthdr *)dstopt; 3332 toh->level = IPPROTO_IP; 3333 toh->name = IP_RECVIF; 3334 toh->len = sizeof (struct T_opthdr) + 3335 sizeof (uint_t); 3336 toh->status = 0; 3337 dstopt += sizeof (struct T_opthdr); 3338 dstptr = (uint_t *)dstopt; 3339 *dstptr = pinfo->ip_pkt_ifindex; 3340 dstopt += sizeof (uint_t); 3341 udi_size -= toh->len; 3342 } 3343 if (icmp->icmp_timestamp) { 3344 struct T_opthdr *toh; 3345 3346 toh = (struct T_opthdr *)dstopt; 3347 toh->level = SOL_SOCKET; 3348 toh->name = SCM_TIMESTAMP; 3349 toh->len = sizeof (struct T_opthdr) + 3350 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3351 toh->status = 0; 3352 dstopt += sizeof (struct T_opthdr); 3353 /* Align for gethrestime() */ 3354 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3355 sizeof (intptr_t)); 3356 gethrestime((timestruc_t *)dstopt); 3357 dstopt = (char *)toh + toh->len; 3358 udi_size -= toh->len; 3359 } 3360 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3361 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3362 struct T_opthdr *toh; 3363 struct in_pktinfo *pktinfop; 3364 3365 toh = (struct T_opthdr *)dstopt; 3366 toh->level = IPPROTO_IP; 3367 toh->name = IP_PKTINFO; 3368 toh->len = sizeof (struct T_opthdr) + 3369 sizeof (in_pktinfo_t); 3370 toh->status = 0; 3371 dstopt += sizeof (struct T_opthdr); 3372 pktinfop = (struct in_pktinfo *)dstopt; 3373 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3374 pktinfop->ipi_spec_dst = 3375 pinfo->ip_pkt_match_addr; 3376 3377 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3378 3379 dstopt += sizeof (struct in_pktinfo); 3380 udi_size -= toh->len; 3381 } 3382 3383 /* Consumed all of allocated space */ 3384 ASSERT(udi_size == 0); 3385 } 3386 3387 if (options_mp != NULL) 3388 freeb(options_mp); 3389 3390 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3391 putnext(connp->conn_rq, mp); 3392 return; 3393 } 3394 3395 /* 3396 * We don't need options_mp in the IPv6 path. 3397 */ 3398 if (options_mp != NULL) { 3399 freeb(options_mp); 3400 options_mp = NULL; 3401 } 3402 3403 /* 3404 * Discard message if it is smaller than the IPv6 header 3405 * or if the header is malformed. 3406 */ 3407 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3408 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3409 icmp->icmp_family != AF_INET6) { 3410 freemsg(mp); 3411 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3412 return; 3413 } 3414 3415 /* Initialize */ 3416 ipp.ipp_fields = 0; 3417 hopstrip = 0; 3418 3419 ip6h = (ip6_t *)rptr; 3420 /* 3421 * Call on ip_find_hdr_v6 which gets the total hdr len 3422 * as well as individual lenghts of ext hdrs (and ptrs to 3423 * them). 3424 */ 3425 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3426 /* Look for ifindex information */ 3427 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3428 ip6i = (ip6i_t *)ip6h; 3429 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3430 ASSERT(ip6i->ip6i_ifindex != 0); 3431 ipp.ipp_fields |= IPPF_IFINDEX; 3432 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3433 } 3434 rptr = (uchar_t *)&ip6i[1]; 3435 mp->b_rptr = rptr; 3436 if (rptr == mp->b_wptr) { 3437 mp1 = mp->b_cont; 3438 freeb(mp); 3439 mp = mp1; 3440 rptr = mp->b_rptr; 3441 } 3442 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3443 ip6h = (ip6_t *)rptr; 3444 } 3445 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3446 3447 /* 3448 * We need to lie a bit to the user because users inside 3449 * labeled compartments should not see their own labels. We 3450 * assume that in all other respects IP has checked the label, 3451 * and that the label is always first among the options. (If 3452 * it's not first, then this code won't see it, and the option 3453 * will be passed along to the user.) 3454 * 3455 * If we had multilevel ICMP sockets, then the following code 3456 * should be skipped for them to allow the user to see the 3457 * label. 3458 * 3459 * Alignment restrictions in the definition of IP options 3460 * (namely, the requirement that the 4-octet DOI goes on a 3461 * 4-octet boundary) mean that we know exactly where the option 3462 * should start, but we're lenient for other hosts. 3463 * 3464 * Note that there are no multilevel ICMP or raw IP sockets 3465 * yet, thus nobody ever sees the IP6OPT_LS option. 3466 */ 3467 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3468 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3469 const uchar_t *ucp = 3470 (const uchar_t *)ipp.ipp_hopopts + 2; 3471 int remlen = ipp.ipp_hopoptslen - 2; 3472 3473 while (remlen > 0) { 3474 if (*ucp == IP6OPT_PAD1) { 3475 remlen--; 3476 ucp++; 3477 } else if (*ucp == IP6OPT_PADN) { 3478 remlen -= ucp[1] + 2; 3479 ucp += ucp[1] + 2; 3480 } else if (*ucp == ip6opt_ls) { 3481 hopstrip = (ucp - 3482 (const uchar_t *)ipp.ipp_hopopts) + 3483 ucp[1] + 2; 3484 hopstrip = (hopstrip + 7) & ~7; 3485 break; 3486 } else { 3487 /* label option must be first */ 3488 break; 3489 } 3490 } 3491 } 3492 } else { 3493 hdr_len = IPV6_HDR_LEN; 3494 ip6i = NULL; 3495 nexthdr = ip6h->ip6_nxt; 3496 } 3497 /* 3498 * One special case where IP attaches the IRE needs to 3499 * be handled so that we don't send up IRE to the user land. 3500 */ 3501 if (nexthdr == IPPROTO_TCP) { 3502 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3503 3504 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3505 mp->b_cont != NULL) { 3506 mp1 = mp->b_cont; 3507 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3508 freeb(mp1); 3509 mp->b_cont = NULL; 3510 } 3511 } 3512 } 3513 /* 3514 * Check a filter for ICMPv6 types if needed. 3515 * Verify raw checksums if needed. 3516 */ 3517 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3518 if (icmp->icmp_filter != NULL) { 3519 int type; 3520 3521 /* Assumes that IP has done the pullupmsg */ 3522 type = mp->b_rptr[hdr_len]; 3523 3524 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3525 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3526 freemsg(mp); 3527 return; 3528 } 3529 } else { 3530 /* Checksum */ 3531 uint16_t *up; 3532 uint32_t sum; 3533 int remlen; 3534 3535 up = (uint16_t *)&ip6h->ip6_src; 3536 3537 remlen = msgdsize(mp) - hdr_len; 3538 sum = htons(icmp->icmp_proto + remlen) 3539 + up[0] + up[1] + up[2] + up[3] 3540 + up[4] + up[5] + up[6] + up[7] 3541 + up[8] + up[9] + up[10] + up[11] 3542 + up[12] + up[13] + up[14] + up[15]; 3543 sum = (sum & 0xffff) + (sum >> 16); 3544 sum = IP_CSUM(mp, hdr_len, sum); 3545 if (sum != 0) { 3546 /* IPv6 RAW checksum failed */ 3547 ip0dbg(("icmp_rput: RAW checksum " 3548 "failed %x\n", sum)); 3549 freemsg(mp); 3550 BUMP_MIB(&is->is_rawip_mib, 3551 rawipInCksumErrs); 3552 return; 3553 } 3554 } 3555 } 3556 /* Skip all the IPv6 headers per API */ 3557 mp->b_rptr += hdr_len; 3558 3559 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3560 3561 /* 3562 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3563 * maintain state information, instead of relying on icmp_t 3564 * structure, since there arent any locks protecting these members 3565 * and there is a window where there might be a race between a 3566 * thread setting options on the write side and a thread reading 3567 * these options on the read size. 3568 */ 3569 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3570 IPPF_RTHDR|IPPF_IFINDEX)) { 3571 if (icmp->icmp_ipv6_recvhopopts && 3572 (ipp.ipp_fields & IPPF_HOPOPTS) && 3573 ipp.ipp_hopoptslen > hopstrip) { 3574 udi_size += sizeof (struct T_opthdr) + 3575 ipp.ipp_hopoptslen - hopstrip; 3576 icmp_opt |= IPPF_HOPOPTS; 3577 } 3578 if ((icmp->icmp_ipv6_recvdstopts || 3579 icmp->icmp_old_ipv6_recvdstopts) && 3580 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3581 udi_size += sizeof (struct T_opthdr) + 3582 ipp.ipp_dstoptslen; 3583 icmp_opt |= IPPF_DSTOPTS; 3584 } 3585 if (((icmp->icmp_ipv6_recvdstopts && 3586 icmp->icmp_ipv6_recvrthdr && 3587 (ipp.ipp_fields & IPPF_RTHDR)) || 3588 icmp->icmp_ipv6_recvrtdstopts) && 3589 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3590 udi_size += sizeof (struct T_opthdr) + 3591 ipp.ipp_rtdstoptslen; 3592 icmp_opt |= IPPF_RTDSTOPTS; 3593 } 3594 if (icmp->icmp_ipv6_recvrthdr && 3595 (ipp.ipp_fields & IPPF_RTHDR)) { 3596 udi_size += sizeof (struct T_opthdr) + 3597 ipp.ipp_rthdrlen; 3598 icmp_opt |= IPPF_RTHDR; 3599 } 3600 if (icmp->icmp_ip_recvpktinfo && 3601 (ipp.ipp_fields & IPPF_IFINDEX)) { 3602 udi_size += sizeof (struct T_opthdr) + 3603 sizeof (struct in6_pktinfo); 3604 icmp_opt |= IPPF_IFINDEX; 3605 } 3606 } 3607 if (icmp->icmp_ipv6_recvhoplimit) { 3608 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3609 icmp_ipv6_recvhoplimit = B_TRUE; 3610 } 3611 3612 if (icmp->icmp_ipv6_recvtclass) 3613 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3614 3615 /* 3616 * If SO_TIMESTAMP is set allocate the appropriate sized 3617 * buffer. Since gethrestime() expects a pointer aligned 3618 * argument, we allocate space necessary for extra 3619 * alignment (even though it might not be used). 3620 */ 3621 if (icmp->icmp_timestamp) { 3622 udi_size += sizeof (struct T_opthdr) + 3623 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3624 } 3625 3626 mp1 = allocb(udi_size, BPRI_MED); 3627 if (mp1 == NULL) { 3628 freemsg(mp); 3629 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3630 return; 3631 } 3632 mp1->b_cont = mp; 3633 mp = mp1; 3634 mp->b_datap->db_type = M_PROTO; 3635 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3636 mp->b_wptr = (uchar_t *)tudi + udi_size; 3637 tudi->PRIM_type = T_UNITDATA_IND; 3638 tudi->SRC_length = sizeof (sin6_t); 3639 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3640 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3641 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3642 tudi->OPT_length = udi_size; 3643 sin6 = (sin6_t *)&tudi[1]; 3644 sin6->sin6_port = 0; 3645 sin6->sin6_family = AF_INET6; 3646 3647 sin6->sin6_addr = ip6h->ip6_src; 3648 /* No sin6_flowinfo per API */ 3649 sin6->sin6_flowinfo = 0; 3650 /* For link-scope source pass up scope id */ 3651 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3652 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3653 sin6->sin6_scope_id = ipp.ipp_ifindex; 3654 else 3655 sin6->sin6_scope_id = 0; 3656 3657 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3658 icmp->icmp_zoneid, is->is_netstack); 3659 3660 if (udi_size != 0) { 3661 uchar_t *dstopt; 3662 3663 dstopt = (uchar_t *)&sin6[1]; 3664 if (icmp_opt & IPPF_IFINDEX) { 3665 struct T_opthdr *toh; 3666 struct in6_pktinfo *pkti; 3667 3668 toh = (struct T_opthdr *)dstopt; 3669 toh->level = IPPROTO_IPV6; 3670 toh->name = IPV6_PKTINFO; 3671 toh->len = sizeof (struct T_opthdr) + 3672 sizeof (*pkti); 3673 toh->status = 0; 3674 dstopt += sizeof (struct T_opthdr); 3675 pkti = (struct in6_pktinfo *)dstopt; 3676 pkti->ipi6_addr = ip6h->ip6_dst; 3677 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3678 dstopt += sizeof (*pkti); 3679 udi_size -= toh->len; 3680 } 3681 if (icmp_ipv6_recvhoplimit) { 3682 struct T_opthdr *toh; 3683 3684 toh = (struct T_opthdr *)dstopt; 3685 toh->level = IPPROTO_IPV6; 3686 toh->name = IPV6_HOPLIMIT; 3687 toh->len = sizeof (struct T_opthdr) + 3688 sizeof (uint_t); 3689 toh->status = 0; 3690 dstopt += sizeof (struct T_opthdr); 3691 *(uint_t *)dstopt = ip6h->ip6_hops; 3692 dstopt += sizeof (uint_t); 3693 udi_size -= toh->len; 3694 } 3695 if (icmp->icmp_ipv6_recvtclass) { 3696 struct T_opthdr *toh; 3697 3698 toh = (struct T_opthdr *)dstopt; 3699 toh->level = IPPROTO_IPV6; 3700 toh->name = IPV6_TCLASS; 3701 toh->len = sizeof (struct T_opthdr) + 3702 sizeof (uint_t); 3703 toh->status = 0; 3704 dstopt += sizeof (struct T_opthdr); 3705 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3706 dstopt += sizeof (uint_t); 3707 udi_size -= toh->len; 3708 } 3709 if (icmp->icmp_timestamp) { 3710 struct T_opthdr *toh; 3711 3712 toh = (struct T_opthdr *)dstopt; 3713 toh->level = SOL_SOCKET; 3714 toh->name = SCM_TIMESTAMP; 3715 toh->len = sizeof (struct T_opthdr) + 3716 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3717 toh->status = 0; 3718 dstopt += sizeof (struct T_opthdr); 3719 /* Align for gethrestime() */ 3720 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3721 sizeof (intptr_t)); 3722 gethrestime((timestruc_t *)dstopt); 3723 dstopt = (uchar_t *)toh + toh->len; 3724 udi_size -= toh->len; 3725 } 3726 if (icmp_opt & IPPF_HOPOPTS) { 3727 struct T_opthdr *toh; 3728 3729 toh = (struct T_opthdr *)dstopt; 3730 toh->level = IPPROTO_IPV6; 3731 toh->name = IPV6_HOPOPTS; 3732 toh->len = sizeof (struct T_opthdr) + 3733 ipp.ipp_hopoptslen - hopstrip; 3734 toh->status = 0; 3735 dstopt += sizeof (struct T_opthdr); 3736 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3737 ipp.ipp_hopoptslen - hopstrip); 3738 if (hopstrip > 0) { 3739 /* copy next header value and fake length */ 3740 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3741 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3742 hopstrip / 8; 3743 } 3744 dstopt += ipp.ipp_hopoptslen - hopstrip; 3745 udi_size -= toh->len; 3746 } 3747 if (icmp_opt & IPPF_RTDSTOPTS) { 3748 struct T_opthdr *toh; 3749 3750 toh = (struct T_opthdr *)dstopt; 3751 toh->level = IPPROTO_IPV6; 3752 toh->name = IPV6_DSTOPTS; 3753 toh->len = sizeof (struct T_opthdr) + 3754 ipp.ipp_rtdstoptslen; 3755 toh->status = 0; 3756 dstopt += sizeof (struct T_opthdr); 3757 bcopy(ipp.ipp_rtdstopts, dstopt, 3758 ipp.ipp_rtdstoptslen); 3759 dstopt += ipp.ipp_rtdstoptslen; 3760 udi_size -= toh->len; 3761 } 3762 if (icmp_opt & IPPF_RTHDR) { 3763 struct T_opthdr *toh; 3764 3765 toh = (struct T_opthdr *)dstopt; 3766 toh->level = IPPROTO_IPV6; 3767 toh->name = IPV6_RTHDR; 3768 toh->len = sizeof (struct T_opthdr) + 3769 ipp.ipp_rthdrlen; 3770 toh->status = 0; 3771 dstopt += sizeof (struct T_opthdr); 3772 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3773 dstopt += ipp.ipp_rthdrlen; 3774 udi_size -= toh->len; 3775 } 3776 if (icmp_opt & IPPF_DSTOPTS) { 3777 struct T_opthdr *toh; 3778 3779 toh = (struct T_opthdr *)dstopt; 3780 toh->level = IPPROTO_IPV6; 3781 toh->name = IPV6_DSTOPTS; 3782 toh->len = sizeof (struct T_opthdr) + 3783 ipp.ipp_dstoptslen; 3784 toh->status = 0; 3785 dstopt += sizeof (struct T_opthdr); 3786 bcopy(ipp.ipp_dstopts, dstopt, 3787 ipp.ipp_dstoptslen); 3788 dstopt += ipp.ipp_dstoptslen; 3789 udi_size -= toh->len; 3790 } 3791 /* Consumed all of allocated space */ 3792 ASSERT(udi_size == 0); 3793 } 3794 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3795 putnext(connp->conn_rq, mp); 3796 } 3797 3798 /* 3799 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3800 * immediately. 3801 */ 3802 static void 3803 icmp_bind_result(conn_t *connp, mblk_t *mp) 3804 { 3805 struct T_error_ack *tea; 3806 3807 switch (mp->b_datap->db_type) { 3808 case M_PROTO: 3809 case M_PCPROTO: 3810 /* M_PROTO messages contain some type of TPI message. */ 3811 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3812 freemsg(mp); 3813 return; 3814 } 3815 tea = (struct T_error_ack *)mp->b_rptr; 3816 3817 switch (tea->PRIM_type) { 3818 case T_ERROR_ACK: 3819 switch (tea->ERROR_prim) { 3820 case O_T_BIND_REQ: 3821 case T_BIND_REQ: 3822 icmp_bind_error(connp, mp); 3823 return; 3824 default: 3825 break; 3826 } 3827 ASSERT(0); 3828 freemsg(mp); 3829 return; 3830 3831 case T_BIND_ACK: 3832 icmp_bind_ack(connp, mp); 3833 return; 3834 3835 default: 3836 break; 3837 } 3838 freemsg(mp); 3839 return; 3840 default: 3841 /* FIXME: other cases? */ 3842 ASSERT(0); 3843 freemsg(mp); 3844 return; 3845 } 3846 } 3847 3848 /* 3849 * Process a T_BIND_ACK 3850 */ 3851 static void 3852 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3853 { 3854 icmp_t *icmp = connp->conn_icmp; 3855 mblk_t *mp1; 3856 ire_t *ire; 3857 struct T_bind_ack *tba; 3858 uchar_t *addrp; 3859 ipa_conn_t *ac; 3860 ipa6_conn_t *ac6; 3861 3862 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3863 /* 3864 * We know if headers are included or not so we can 3865 * safely do this. 3866 */ 3867 if (icmp->icmp_state == TS_UNBND) { 3868 /* 3869 * TPI has not yet bound - bind sent by 3870 * icmp_bind_proto. 3871 */ 3872 freemsg(mp); 3873 rw_exit(&icmp->icmp_rwlock); 3874 return; 3875 } 3876 ASSERT(icmp->icmp_pending_op != -1); 3877 3878 /* 3879 * If a broadcast/multicast address was bound set 3880 * the source address to 0. 3881 * This ensures no datagrams with broadcast address 3882 * as source address are emitted (which would violate 3883 * RFC1122 - Hosts requirements) 3884 * 3885 * Note that when connecting the returned IRE is 3886 * for the destination address and we only perform 3887 * the broadcast check for the source address (it 3888 * is OK to connect to a broadcast/multicast address.) 3889 */ 3890 mp1 = mp->b_cont; 3891 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3892 ire = (ire_t *)mp1->b_rptr; 3893 3894 /* 3895 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3896 * local address. 3897 */ 3898 if (ire->ire_type == IRE_BROADCAST && 3899 icmp->icmp_state != TS_DATA_XFER) { 3900 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3901 icmp->icmp_pending_op == O_T_BIND_REQ); 3902 /* This was just a local bind to a MC/broadcast addr */ 3903 V6_SET_ZERO(icmp->icmp_v6src); 3904 if (icmp->icmp_family == AF_INET6) 3905 (void) icmp_build_hdrs(icmp); 3906 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3907 /* 3908 * Local address not yet set - pick it from the 3909 * T_bind_ack 3910 */ 3911 tba = (struct T_bind_ack *)mp->b_rptr; 3912 addrp = &mp->b_rptr[tba->ADDR_offset]; 3913 switch (icmp->icmp_family) { 3914 case AF_INET: 3915 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3916 ac = (ipa_conn_t *)addrp; 3917 } else { 3918 ASSERT(tba->ADDR_length == 3919 sizeof (ipa_conn_x_t)); 3920 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3921 } 3922 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3923 &icmp->icmp_v6src); 3924 break; 3925 case AF_INET6: 3926 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3927 ac6 = (ipa6_conn_t *)addrp; 3928 } else { 3929 ASSERT(tba->ADDR_length == 3930 sizeof (ipa6_conn_x_t)); 3931 ac6 = &((ipa6_conn_x_t *) 3932 addrp)->ac6x_conn; 3933 } 3934 icmp->icmp_v6src = ac6->ac6_laddr; 3935 (void) icmp_build_hdrs(icmp); 3936 } 3937 } 3938 mp1 = mp1->b_cont; 3939 } 3940 icmp->icmp_pending_op = -1; 3941 rw_exit(&icmp->icmp_rwlock); 3942 /* 3943 * Look for one or more appended ACK message added by 3944 * icmp_connect or icmp_disconnect. 3945 * If none found just send up the T_BIND_ACK. 3946 * icmp_connect has appended a T_OK_ACK and a 3947 * T_CONN_CON. 3948 * icmp_disconnect has appended a T_OK_ACK. 3949 */ 3950 if (mp1 != NULL) { 3951 if (mp->b_cont == mp1) 3952 mp->b_cont = NULL; 3953 else { 3954 ASSERT(mp->b_cont->b_cont == mp1); 3955 mp->b_cont->b_cont = NULL; 3956 } 3957 freemsg(mp); 3958 mp = mp1; 3959 while (mp != NULL) { 3960 mp1 = mp->b_cont; 3961 mp->b_cont = NULL; 3962 putnext(connp->conn_rq, mp); 3963 mp = mp1; 3964 } 3965 return; 3966 } 3967 freemsg(mp->b_cont); 3968 mp->b_cont = NULL; 3969 putnext(connp->conn_rq, mp); 3970 } 3971 3972 static void 3973 icmp_bind_error(conn_t *connp, mblk_t *mp) 3974 { 3975 icmp_t *icmp = connp->conn_icmp; 3976 struct T_error_ack *tea; 3977 3978 tea = (struct T_error_ack *)mp->b_rptr; 3979 /* 3980 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3981 * clear out the source address before 3982 * passing the message upstream. 3983 * If this was caused by a T_CONN_REQ 3984 * revert back to bound state. 3985 */ 3986 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3987 if (icmp->icmp_state == TS_UNBND) { 3988 /* 3989 * TPI has not yet bound - bind sent by icmp_bind_proto. 3990 */ 3991 freemsg(mp); 3992 rw_exit(&icmp->icmp_rwlock); 3993 return; 3994 } 3995 ASSERT(icmp->icmp_pending_op != -1); 3996 tea->ERROR_prim = icmp->icmp_pending_op; 3997 icmp->icmp_pending_op = -1; 3998 3999 switch (tea->ERROR_prim) { 4000 case T_CONN_REQ: 4001 ASSERT(icmp->icmp_state == TS_DATA_XFER); 4002 /* Connect failed */ 4003 /* Revert back to the bound source */ 4004 icmp->icmp_v6src = icmp->icmp_bound_v6src; 4005 icmp->icmp_state = TS_IDLE; 4006 if (icmp->icmp_family == AF_INET6) 4007 (void) icmp_build_hdrs(icmp); 4008 break; 4009 4010 case T_DISCON_REQ: 4011 case T_BIND_REQ: 4012 case O_T_BIND_REQ: 4013 V6_SET_ZERO(icmp->icmp_v6src); 4014 V6_SET_ZERO(icmp->icmp_bound_v6src); 4015 icmp->icmp_state = TS_UNBND; 4016 if (icmp->icmp_family == AF_INET6) 4017 (void) icmp_build_hdrs(icmp); 4018 break; 4019 default: 4020 break; 4021 } 4022 rw_exit(&icmp->icmp_rwlock); 4023 putnext(connp->conn_rq, mp); 4024 } 4025 4026 /* 4027 * return SNMP stuff in buffer in mpdata 4028 */ 4029 mblk_t * 4030 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4031 { 4032 mblk_t *mpdata; 4033 struct opthdr *optp; 4034 conn_t *connp = Q_TO_CONN(q); 4035 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4036 mblk_t *mp2ctl; 4037 4038 /* 4039 * make a copy of the original message 4040 */ 4041 mp2ctl = copymsg(mpctl); 4042 4043 if (mpctl == NULL || 4044 (mpdata = mpctl->b_cont) == NULL) { 4045 freemsg(mpctl); 4046 freemsg(mp2ctl); 4047 return (0); 4048 } 4049 4050 /* fixed length structure for IPv4 and IPv6 counters */ 4051 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4052 optp->level = EXPER_RAWIP; 4053 optp->name = 0; 4054 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4055 sizeof (is->is_rawip_mib)); 4056 optp->len = msgdsize(mpdata); 4057 qreply(q, mpctl); 4058 4059 return (mp2ctl); 4060 } 4061 4062 /* 4063 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4064 * TODO: If this ever actually tries to set anything, it needs to be 4065 * to do the appropriate locking. 4066 */ 4067 /* ARGSUSED */ 4068 int 4069 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4070 uchar_t *ptr, int len) 4071 { 4072 switch (level) { 4073 case EXPER_RAWIP: 4074 return (0); 4075 default: 4076 return (1); 4077 } 4078 } 4079 4080 /* Report for ndd "icmp_status" */ 4081 /* ARGSUSED */ 4082 static int 4083 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4084 { 4085 conn_t *connp; 4086 ip_stack_t *ipst; 4087 char laddrbuf[INET6_ADDRSTRLEN]; 4088 char faddrbuf[INET6_ADDRSTRLEN]; 4089 int i; 4090 4091 (void) mi_mpprintf(mp, 4092 "RAWIP " MI_COL_HDRPAD_STR 4093 /* 01234567[89ABCDEF] */ 4094 " src addr dest addr state"); 4095 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4096 4097 connp = Q_TO_CONN(q); 4098 ipst = connp->conn_netstack->netstack_ip; 4099 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4100 connf_t *connfp; 4101 char *state; 4102 4103 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4104 connp = NULL; 4105 4106 while ((connp = ipcl_get_next_conn(connfp, connp, 4107 IPCL_RAWIPCONN)) != NULL) { 4108 icmp_t *icmp; 4109 4110 mutex_enter(&(connp)->conn_lock); 4111 icmp = connp->conn_icmp; 4112 4113 if (icmp->icmp_state == TS_UNBND) 4114 state = "UNBOUND"; 4115 else if (icmp->icmp_state == TS_IDLE) 4116 state = "IDLE"; 4117 else if (icmp->icmp_state == TS_DATA_XFER) 4118 state = "CONNECTED"; 4119 else 4120 state = "UnkState"; 4121 4122 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4123 (void *)icmp, 4124 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4125 sizeof (faddrbuf)), 4126 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4127 sizeof (laddrbuf)), 4128 state); 4129 mutex_exit(&(connp)->conn_lock); 4130 } 4131 } 4132 return (0); 4133 } 4134 4135 /* 4136 * This routine creates a T_UDERROR_IND message and passes it upstream. 4137 * The address and options are copied from the T_UNITDATA_REQ message 4138 * passed in mp. This message is freed. 4139 */ 4140 static void 4141 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4142 { 4143 mblk_t *mp1; 4144 uchar_t *rptr = mp->b_rptr; 4145 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4146 4147 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4148 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4149 tudr->OPT_length, err); 4150 if (mp1) 4151 qreply(q, mp1); 4152 freemsg(mp); 4153 } 4154 4155 /* 4156 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4157 * After some error checking, the message is passed downstream to ip. 4158 */ 4159 static void 4160 icmp_unbind(queue_t *q, mblk_t *mp) 4161 { 4162 icmp_t *icmp = Q_TO_ICMP(q); 4163 4164 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4165 /* If a bind has not been done, we can't unbind. */ 4166 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4167 rw_exit(&icmp->icmp_rwlock); 4168 icmp_err_ack(q, mp, TOUTSTATE, 0); 4169 return; 4170 } 4171 icmp->icmp_pending_op = T_UNBIND_REQ; 4172 rw_exit(&icmp->icmp_rwlock); 4173 4174 /* 4175 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4176 * and therefore ip_unbind must never return NULL. 4177 */ 4178 mp = ip_unbind(q, mp); 4179 ASSERT(mp != NULL); 4180 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4181 4182 /* 4183 * Once we're unbound from IP, the pending operation may be cleared 4184 * here. 4185 */ 4186 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4187 V6_SET_ZERO(icmp->icmp_v6src); 4188 V6_SET_ZERO(icmp->icmp_bound_v6src); 4189 icmp->icmp_pending_op = -1; 4190 icmp->icmp_state = TS_UNBND; 4191 if (icmp->icmp_family == AF_INET6) 4192 (void) icmp_build_hdrs(icmp); 4193 rw_exit(&icmp->icmp_rwlock); 4194 4195 qreply(q, mp); 4196 } 4197 4198 /* 4199 * Process IPv4 packets that already include an IP header. 4200 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4201 * IPPROTO_IGMP). 4202 */ 4203 static void 4204 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4205 { 4206 icmp_stack_t *is = icmp->icmp_is; 4207 ipha_t *ipha; 4208 int ip_hdr_length; 4209 int tp_hdr_len; 4210 mblk_t *mp1; 4211 uint_t pkt_len; 4212 ip_opt_info_t optinfo; 4213 conn_t *connp = icmp->icmp_connp; 4214 4215 optinfo.ip_opt_flags = 0; 4216 optinfo.ip_opt_ill_index = 0; 4217 ipha = (ipha_t *)mp->b_rptr; 4218 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4219 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4220 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4221 ASSERT(icmp != NULL); 4222 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4223 freemsg(mp); 4224 return; 4225 } 4226 ipha = (ipha_t *)mp->b_rptr; 4227 } 4228 ipha->ipha_version_and_hdr_length = 4229 (IP_VERSION<<4) | (ip_hdr_length>>2); 4230 4231 /* 4232 * For the socket of SOCK_RAW type, the checksum is provided in the 4233 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4234 * tell IP that the application has sent a complete IP header and not 4235 * to compute the transport checksum nor change the DF flag. 4236 */ 4237 ipha->ipha_ident = IP_HDR_INCLUDED; 4238 ipha->ipha_hdr_checksum = 0; 4239 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4240 /* Insert options if any */ 4241 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4242 /* 4243 * Put the IP header plus any transport header that is 4244 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4245 * that at least the checksum field is in the first mblk.) 4246 */ 4247 switch (ipha->ipha_protocol) { 4248 case IPPROTO_UDP: 4249 tp_hdr_len = 8; 4250 break; 4251 case IPPROTO_TCP: 4252 tp_hdr_len = 20; 4253 break; 4254 default: 4255 tp_hdr_len = 0; 4256 break; 4257 } 4258 /* 4259 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4260 * tp_hdr_len bytes will be in a single mblk. 4261 */ 4262 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4263 tp_hdr_len)) { 4264 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4265 tp_hdr_len)) { 4266 BUMP_MIB(&is->is_rawip_mib, 4267 rawipOutErrors); 4268 freemsg(mp); 4269 return; 4270 } 4271 ipha = (ipha_t *)mp->b_rptr; 4272 } 4273 4274 /* 4275 * if the length is larger then the max allowed IP packet, 4276 * then send an error and abort the processing. 4277 */ 4278 pkt_len = ntohs(ipha->ipha_length) 4279 + icmp->icmp_ip_snd_options_len; 4280 if (pkt_len > IP_MAXPACKET) { 4281 icmp_ud_err(q, mp, EMSGSIZE); 4282 return; 4283 } 4284 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4285 tp_hdr_len, BPRI_LO))) { 4286 icmp_ud_err(q, mp, ENOMEM); 4287 return; 4288 } 4289 mp1->b_rptr += is->is_wroff_extra; 4290 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4291 4292 ipha->ipha_length = htons((uint16_t)pkt_len); 4293 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4294 4295 /* Copy transport header if any */ 4296 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4297 mp1->b_wptr += tp_hdr_len; 4298 4299 /* Add options */ 4300 ipha = (ipha_t *)mp1->b_rptr; 4301 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4302 icmp->icmp_ip_snd_options_len); 4303 4304 /* Drop IP header and transport header from original */ 4305 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4306 4307 mp1->b_cont = mp; 4308 mp = mp1; 4309 /* 4310 * Massage source route putting first source 4311 * route in ipha_dst. 4312 */ 4313 (void) ip_massage_options(ipha, is->is_netstack); 4314 } 4315 4316 if (pktinfop != NULL) { 4317 /* 4318 * Over write the source address provided in the header 4319 */ 4320 if (pktinfop->ip4_addr != INADDR_ANY) { 4321 ipha->ipha_src = pktinfop->ip4_addr; 4322 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4323 } 4324 4325 if (pktinfop->ip4_ill_index != 0) { 4326 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4327 } 4328 } 4329 4330 mblk_setcred(mp, connp->conn_cred); 4331 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4332 } 4333 4334 static boolean_t 4335 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4336 { 4337 int err; 4338 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4339 icmp_stack_t *is = icmp->icmp_is; 4340 conn_t *connp = icmp->icmp_connp; 4341 4342 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4343 opt_storage, connp->conn_mac_exempt, 4344 is->is_netstack->netstack_ip); 4345 if (err == 0) { 4346 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4347 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4348 opt_storage); 4349 } 4350 if (err != 0) { 4351 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4352 DTRACE_PROBE4( 4353 tx__ip__log__drop__updatelabel__icmp, 4354 char *, "queue(1) failed to update options(2) on mp(3)", 4355 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4356 icmp_ud_err(q, mp, err); 4357 return (B_FALSE); 4358 } 4359 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4360 return (B_TRUE); 4361 } 4362 4363 /* 4364 * This routine handles all messages passed downstream. It either 4365 * consumes the message or passes it downstream; it never queues a 4366 * a message. 4367 */ 4368 static void 4369 icmp_wput(queue_t *q, mblk_t *mp) 4370 { 4371 uchar_t *rptr = mp->b_rptr; 4372 ipha_t *ipha; 4373 mblk_t *mp1; 4374 int ip_hdr_length; 4375 #define tudr ((struct T_unitdata_req *)rptr) 4376 size_t ip_len; 4377 conn_t *connp = Q_TO_CONN(q); 4378 icmp_t *icmp = connp->conn_icmp; 4379 icmp_stack_t *is = icmp->icmp_is; 4380 sin6_t *sin6; 4381 sin_t *sin; 4382 ipaddr_t v4dst; 4383 ip4_pkt_t pktinfo; 4384 ip4_pkt_t *pktinfop = &pktinfo; 4385 ip_opt_info_t optinfo; 4386 4387 switch (mp->b_datap->db_type) { 4388 case M_DATA: 4389 if (icmp->icmp_hdrincl) { 4390 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4391 ipha = (ipha_t *)mp->b_rptr; 4392 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4393 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4394 BUMP_MIB(&is->is_rawip_mib, 4395 rawipOutErrors); 4396 freemsg(mp); 4397 return; 4398 } 4399 ipha = (ipha_t *)mp->b_rptr; 4400 } 4401 /* 4402 * If this connection was used for v6 (inconceivable!) 4403 * or if we have a new destination, then it's time to 4404 * figure a new label. 4405 */ 4406 if (is_system_labeled() && 4407 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4408 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4409 ipha->ipha_dst) && 4410 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4411 return; 4412 } 4413 icmp_wput_hdrincl(q, mp, icmp, NULL); 4414 return; 4415 } 4416 freemsg(mp); 4417 return; 4418 case M_PROTO: 4419 case M_PCPROTO: 4420 ip_len = mp->b_wptr - rptr; 4421 if (ip_len >= sizeof (struct T_unitdata_req)) { 4422 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4423 if (((union T_primitives *)rptr)->type 4424 == T_UNITDATA_REQ) 4425 break; 4426 } 4427 /* FALLTHRU */ 4428 default: 4429 icmp_wput_other(q, mp); 4430 return; 4431 } 4432 4433 /* Handle T_UNITDATA_REQ messages here. */ 4434 4435 4436 4437 if (icmp->icmp_state == TS_UNBND) { 4438 /* If a port has not been bound to the stream, fail. */ 4439 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4440 icmp_ud_err(q, mp, EPROTO); 4441 return; 4442 } 4443 mp1 = mp->b_cont; 4444 if (mp1 == NULL) { 4445 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4446 icmp_ud_err(q, mp, EPROTO); 4447 return; 4448 } 4449 4450 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4451 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4452 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4453 return; 4454 } 4455 4456 switch (icmp->icmp_family) { 4457 case AF_INET6: 4458 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4459 if (!OK_32PTR((char *)sin6) || 4460 tudr->DEST_length != sizeof (sin6_t) || 4461 sin6->sin6_family != AF_INET6) { 4462 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4463 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4464 return; 4465 } 4466 4467 /* No support for mapped addresses on raw sockets */ 4468 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4469 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4470 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4471 return; 4472 } 4473 4474 /* 4475 * Destination is a native IPv6 address. 4476 * Send out an IPv6 format packet. 4477 */ 4478 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4479 return; 4480 4481 case AF_INET: 4482 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4483 if (!OK_32PTR((char *)sin) || 4484 tudr->DEST_length != sizeof (sin_t) || 4485 sin->sin_family != AF_INET) { 4486 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4487 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4488 return; 4489 } 4490 /* Extract and ipaddr */ 4491 v4dst = sin->sin_addr.s_addr; 4492 break; 4493 4494 default: 4495 ASSERT(0); 4496 } 4497 4498 pktinfop->ip4_ill_index = 0; 4499 pktinfop->ip4_addr = INADDR_ANY; 4500 optinfo.ip_opt_flags = 0; 4501 optinfo.ip_opt_ill_index = 0; 4502 4503 4504 /* 4505 * If options passed in, feed it for verification and handling 4506 */ 4507 if (tudr->OPT_length != 0) { 4508 int error; 4509 4510 error = 0; 4511 if (icmp_unitdata_opt_process(q, mp, &error, 4512 (void *)pktinfop) < 0) { 4513 /* failure */ 4514 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4515 icmp_ud_err(q, mp, error); 4516 return; 4517 } 4518 ASSERT(error == 0); 4519 /* 4520 * Note: Success in processing options. 4521 * mp option buffer represented by 4522 * OPT_length/offset now potentially modified 4523 * and contain option setting results 4524 */ 4525 4526 } 4527 4528 if (v4dst == INADDR_ANY) 4529 v4dst = htonl(INADDR_LOOPBACK); 4530 4531 /* Check if our saved options are valid; update if not */ 4532 if (is_system_labeled() && 4533 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4534 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4535 !icmp_update_label(q, icmp, mp, v4dst)) { 4536 return; 4537 } 4538 4539 /* Protocol 255 contains full IP headers */ 4540 if (icmp->icmp_hdrincl) { 4541 freeb(mp); 4542 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4543 return; 4544 } 4545 4546 4547 /* Add an IP header */ 4548 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4549 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4550 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4551 mp1->b_datap->db_ref != 1 || 4552 !OK_32PTR(ipha)) { 4553 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4554 BPRI_LO))) { 4555 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4556 icmp_ud_err(q, mp, ENOMEM); 4557 return; 4558 } 4559 mp1->b_cont = mp->b_cont; 4560 ipha = (ipha_t *)mp1->b_datap->db_lim; 4561 mp1->b_wptr = (uchar_t *)ipha; 4562 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4563 } 4564 #ifdef _BIG_ENDIAN 4565 /* Set version, header length, and tos */ 4566 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4567 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4568 icmp->icmp_type_of_service); 4569 /* Set ttl and protocol */ 4570 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4571 #else 4572 /* Set version, header length, and tos */ 4573 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4574 ((icmp->icmp_type_of_service << 8) | 4575 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4576 /* Set ttl and protocol */ 4577 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4578 #endif 4579 if (pktinfop->ip4_addr != INADDR_ANY) { 4580 ipha->ipha_src = pktinfop->ip4_addr; 4581 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4582 } else { 4583 4584 /* 4585 * Copy our address into the packet. If this is zero, 4586 * ip will fill in the real source address. 4587 */ 4588 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4589 } 4590 4591 ipha->ipha_fragment_offset_and_flags = 0; 4592 4593 if (pktinfop->ip4_ill_index != 0) { 4594 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4595 } 4596 4597 4598 /* 4599 * For the socket of SOCK_RAW type, the checksum is provided in the 4600 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4601 * tell IP that the application has sent a complete IP header and not 4602 * to compute the transport checksum nor change the DF flag. 4603 */ 4604 ipha->ipha_ident = IP_HDR_INCLUDED; 4605 4606 /* Finish common formatting of the packet. */ 4607 mp1->b_rptr = (uchar_t *)ipha; 4608 4609 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4610 if (mp1->b_cont != NULL) 4611 ip_len += msgdsize(mp1->b_cont); 4612 4613 /* 4614 * Set the length into the IP header. 4615 * If the length is greater than the maximum allowed by IP, 4616 * then free the message and return. Do not try and send it 4617 * as this can cause problems in layers below. 4618 */ 4619 if (ip_len > IP_MAXPACKET) { 4620 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4621 icmp_ud_err(q, mp, EMSGSIZE); 4622 return; 4623 } 4624 ipha->ipha_length = htons((uint16_t)ip_len); 4625 /* 4626 * Copy in the destination address from the T_UNITDATA 4627 * request 4628 */ 4629 ipha->ipha_dst = v4dst; 4630 4631 /* 4632 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4633 */ 4634 if (CLASSD(v4dst)) 4635 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4636 4637 /* Copy in options if any */ 4638 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4639 bcopy(icmp->icmp_ip_snd_options, 4640 &ipha[1], icmp->icmp_ip_snd_options_len); 4641 /* 4642 * Massage source route putting first source route in ipha_dst. 4643 * Ignore the destination in the T_unitdata_req. 4644 */ 4645 (void) ip_massage_options(ipha, is->is_netstack); 4646 } 4647 4648 freeb(mp); 4649 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4650 mblk_setcred(mp1, connp->conn_cred); 4651 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4652 #undef ipha 4653 #undef tudr 4654 } 4655 4656 static boolean_t 4657 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4658 { 4659 int err; 4660 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4661 icmp_stack_t *is = icmp->icmp_is; 4662 conn_t *connp = icmp->icmp_connp; 4663 4664 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4665 opt_storage, connp->conn_mac_exempt, 4666 is->is_netstack->netstack_ip); 4667 if (err == 0) { 4668 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4669 &icmp->icmp_label_len_v6, opt_storage); 4670 } 4671 if (err != 0) { 4672 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4673 DTRACE_PROBE4( 4674 tx__ip__log__drop__updatelabel__icmp6, 4675 char *, "queue(1) failed to update options(2) on mp(3)", 4676 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4677 icmp_ud_err(wq, mp, err); 4678 return (B_FALSE); 4679 } 4680 4681 icmp->icmp_v6lastdst = *dst; 4682 return (B_TRUE); 4683 } 4684 4685 /* 4686 * icmp_wput_ipv6(): 4687 * Assumes that icmp_wput did some sanity checking on the destination 4688 * address, but that the label may not yet be correct. 4689 */ 4690 void 4691 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4692 { 4693 ip6_t *ip6h; 4694 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4695 mblk_t *mp1; 4696 int ip_hdr_len = IPV6_HDR_LEN; 4697 size_t ip_len; 4698 icmp_t *icmp = Q_TO_ICMP(q); 4699 icmp_stack_t *is = icmp->icmp_is; 4700 ip6_pkt_t ipp_s; /* For ancillary data options */ 4701 ip6_pkt_t *ipp = &ipp_s; 4702 ip6_pkt_t *tipp; 4703 uint32_t csum = 0; 4704 uint_t ignore = 0; 4705 uint_t option_exists = 0, is_sticky = 0; 4706 uint8_t *cp; 4707 uint8_t *nxthdr_ptr; 4708 in6_addr_t ip6_dst; 4709 4710 /* 4711 * If the local address is a mapped address return 4712 * an error. 4713 * It would be possible to send an IPv6 packet but the 4714 * response would never make it back to the application 4715 * since it is bound to a mapped address. 4716 */ 4717 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4718 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4719 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4720 return; 4721 } 4722 4723 ipp->ipp_fields = 0; 4724 ipp->ipp_sticky_ignored = 0; 4725 4726 /* 4727 * If TPI options passed in, feed it for verification and handling 4728 */ 4729 if (tudr_optlen != 0) { 4730 int error; 4731 4732 if (icmp_unitdata_opt_process(q, mp, &error, 4733 (void *)ipp) < 0) { 4734 /* failure */ 4735 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4736 icmp_ud_err(q, mp, error); 4737 return; 4738 } 4739 ignore = ipp->ipp_sticky_ignored; 4740 ASSERT(error == 0); 4741 } 4742 4743 if (sin6->sin6_scope_id != 0 && 4744 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4745 /* 4746 * IPPF_SCOPE_ID is special. It's neither a sticky 4747 * option nor ancillary data. It needs to be 4748 * explicitly set in options_exists. 4749 */ 4750 option_exists |= IPPF_SCOPE_ID; 4751 } 4752 4753 /* 4754 * Compute the destination address 4755 */ 4756 ip6_dst = sin6->sin6_addr; 4757 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4758 ip6_dst = ipv6_loopback; 4759 4760 /* 4761 * If we're not going to the same destination as last time, then 4762 * recompute the label required. This is done in a separate routine to 4763 * avoid blowing up our stack here. 4764 */ 4765 if (is_system_labeled() && 4766 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4767 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4768 return; 4769 } 4770 4771 /* 4772 * If there's a security label here, then we ignore any options the 4773 * user may try to set. We keep the peer's label as a hidden sticky 4774 * option. 4775 */ 4776 if (icmp->icmp_label_len_v6 > 0) { 4777 ignore &= ~IPPF_HOPOPTS; 4778 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4779 } 4780 4781 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4782 (ipp->ipp_fields == 0)) { 4783 /* No sticky options nor ancillary data. */ 4784 goto no_options; 4785 } 4786 4787 /* 4788 * Go through the options figuring out where each is going to 4789 * come from and build two masks. The first mask indicates if 4790 * the option exists at all. The second mask indicates if the 4791 * option is sticky or ancillary. 4792 */ 4793 if (!(ignore & IPPF_HOPOPTS)) { 4794 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4795 option_exists |= IPPF_HOPOPTS; 4796 ip_hdr_len += ipp->ipp_hopoptslen; 4797 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4798 option_exists |= IPPF_HOPOPTS; 4799 is_sticky |= IPPF_HOPOPTS; 4800 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4801 } 4802 } 4803 4804 if (!(ignore & IPPF_RTHDR)) { 4805 if (ipp->ipp_fields & IPPF_RTHDR) { 4806 option_exists |= IPPF_RTHDR; 4807 ip_hdr_len += ipp->ipp_rthdrlen; 4808 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4809 option_exists |= IPPF_RTHDR; 4810 is_sticky |= IPPF_RTHDR; 4811 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4812 } 4813 } 4814 4815 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4816 /* 4817 * Need to have a router header to use these. 4818 */ 4819 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4820 option_exists |= IPPF_RTDSTOPTS; 4821 ip_hdr_len += ipp->ipp_rtdstoptslen; 4822 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4823 option_exists |= IPPF_RTDSTOPTS; 4824 is_sticky |= IPPF_RTDSTOPTS; 4825 ip_hdr_len += 4826 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4827 } 4828 } 4829 4830 if (!(ignore & IPPF_DSTOPTS)) { 4831 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4832 option_exists |= IPPF_DSTOPTS; 4833 ip_hdr_len += ipp->ipp_dstoptslen; 4834 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4835 option_exists |= IPPF_DSTOPTS; 4836 is_sticky |= IPPF_DSTOPTS; 4837 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4838 } 4839 } 4840 4841 if (!(ignore & IPPF_IFINDEX)) { 4842 if (ipp->ipp_fields & IPPF_IFINDEX) { 4843 option_exists |= IPPF_IFINDEX; 4844 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4845 option_exists |= IPPF_IFINDEX; 4846 is_sticky |= IPPF_IFINDEX; 4847 } 4848 } 4849 4850 if (!(ignore & IPPF_ADDR)) { 4851 if (ipp->ipp_fields & IPPF_ADDR) { 4852 option_exists |= IPPF_ADDR; 4853 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4854 option_exists |= IPPF_ADDR; 4855 is_sticky |= IPPF_ADDR; 4856 } 4857 } 4858 4859 if (!(ignore & IPPF_DONTFRAG)) { 4860 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4861 option_exists |= IPPF_DONTFRAG; 4862 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4863 option_exists |= IPPF_DONTFRAG; 4864 is_sticky |= IPPF_DONTFRAG; 4865 } 4866 } 4867 4868 if (!(ignore & IPPF_USE_MIN_MTU)) { 4869 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4870 option_exists |= IPPF_USE_MIN_MTU; 4871 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4872 IPPF_USE_MIN_MTU) { 4873 option_exists |= IPPF_USE_MIN_MTU; 4874 is_sticky |= IPPF_USE_MIN_MTU; 4875 } 4876 } 4877 4878 if (!(ignore & IPPF_NEXTHOP)) { 4879 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4880 option_exists |= IPPF_NEXTHOP; 4881 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4882 option_exists |= IPPF_NEXTHOP; 4883 is_sticky |= IPPF_NEXTHOP; 4884 } 4885 } 4886 4887 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4888 option_exists |= IPPF_HOPLIMIT; 4889 /* IPV6_HOPLIMIT can never be sticky */ 4890 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4891 4892 if (!(ignore & IPPF_UNICAST_HOPS) && 4893 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4894 option_exists |= IPPF_UNICAST_HOPS; 4895 is_sticky |= IPPF_UNICAST_HOPS; 4896 } 4897 4898 if (!(ignore & IPPF_MULTICAST_HOPS) && 4899 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4900 option_exists |= IPPF_MULTICAST_HOPS; 4901 is_sticky |= IPPF_MULTICAST_HOPS; 4902 } 4903 4904 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4905 /* This is a sticky socket option only */ 4906 option_exists |= IPPF_NO_CKSUM; 4907 is_sticky |= IPPF_NO_CKSUM; 4908 } 4909 4910 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4911 /* This is a sticky socket option only */ 4912 option_exists |= IPPF_RAW_CKSUM; 4913 is_sticky |= IPPF_RAW_CKSUM; 4914 } 4915 4916 if (!(ignore & IPPF_TCLASS)) { 4917 if (ipp->ipp_fields & IPPF_TCLASS) { 4918 option_exists |= IPPF_TCLASS; 4919 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4920 option_exists |= IPPF_TCLASS; 4921 is_sticky |= IPPF_TCLASS; 4922 } 4923 } 4924 4925 no_options: 4926 4927 /* 4928 * If any options carried in the ip6i_t were specified, we 4929 * need to account for the ip6i_t in the data we'll be sending 4930 * down. 4931 */ 4932 if (option_exists & IPPF_HAS_IP6I) 4933 ip_hdr_len += sizeof (ip6i_t); 4934 4935 /* check/fix buffer config, setup pointers into it */ 4936 mp1 = mp->b_cont; 4937 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4938 if ((mp1->b_datap->db_ref != 1) || 4939 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4940 !OK_32PTR(ip6h)) { 4941 /* Try to get everything in a single mblk next time */ 4942 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4943 icmp->icmp_max_hdr_len = ip_hdr_len; 4944 (void) mi_set_sth_wroff(RD(q), 4945 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4946 } 4947 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4948 if (!mp1) { 4949 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4950 icmp_ud_err(q, mp, ENOMEM); 4951 return; 4952 } 4953 mp1->b_cont = mp->b_cont; 4954 mp1->b_wptr = mp1->b_datap->db_lim; 4955 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4956 } 4957 mp1->b_rptr = (unsigned char *)ip6h; 4958 ip6i = (ip6i_t *)ip6h; 4959 4960 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4961 if (option_exists & IPPF_HAS_IP6I) { 4962 ip6h = (ip6_t *)&ip6i[1]; 4963 ip6i->ip6i_flags = 0; 4964 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4965 4966 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4967 if (option_exists & IPPF_SCOPE_ID) { 4968 ip6i->ip6i_flags |= IP6I_IFINDEX; 4969 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4970 } else if (option_exists & IPPF_IFINDEX) { 4971 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4972 ASSERT(tipp->ipp_ifindex != 0); 4973 ip6i->ip6i_flags |= IP6I_IFINDEX; 4974 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4975 } 4976 4977 if (option_exists & IPPF_RAW_CKSUM) { 4978 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4979 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4980 } 4981 4982 if (option_exists & IPPF_NO_CKSUM) { 4983 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4984 } 4985 4986 if (option_exists & IPPF_ADDR) { 4987 /* 4988 * Enable per-packet source address verification if 4989 * IPV6_PKTINFO specified the source address. 4990 * ip6_src is set in the transport's _wput function. 4991 */ 4992 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4993 } 4994 4995 if (option_exists & IPPF_DONTFRAG) { 4996 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4997 } 4998 4999 if (option_exists & IPPF_USE_MIN_MTU) { 5000 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5001 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5002 } 5003 5004 if (option_exists & IPPF_NEXTHOP) { 5005 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5006 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5007 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5008 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5009 } 5010 5011 /* 5012 * tell IP this is an ip6i_t private header 5013 */ 5014 ip6i->ip6i_nxt = IPPROTO_RAW; 5015 } 5016 5017 /* Initialize IPv6 header */ 5018 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5019 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5020 5021 /* Set the hoplimit of the outgoing packet. */ 5022 if (option_exists & IPPF_HOPLIMIT) { 5023 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5024 ip6h->ip6_hops = ipp->ipp_hoplimit; 5025 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5026 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5027 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5028 if (option_exists & IPPF_MULTICAST_HOPS) 5029 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5030 } else { 5031 ip6h->ip6_hops = icmp->icmp_ttl; 5032 if (option_exists & IPPF_UNICAST_HOPS) 5033 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5034 } 5035 5036 if (option_exists & IPPF_ADDR) { 5037 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5038 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5039 ip6h->ip6_src = tipp->ipp_addr; 5040 } else { 5041 /* 5042 * The source address was not set using IPV6_PKTINFO. 5043 * First look at the bound source. 5044 * If unspecified fallback to __sin6_src_id. 5045 */ 5046 ip6h->ip6_src = icmp->icmp_v6src; 5047 if (sin6->__sin6_src_id != 0 && 5048 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5049 ip_srcid_find_id(sin6->__sin6_src_id, 5050 &ip6h->ip6_src, icmp->icmp_zoneid, 5051 is->is_netstack); 5052 } 5053 } 5054 5055 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5056 cp = (uint8_t *)&ip6h[1]; 5057 5058 /* 5059 * Here's where we have to start stringing together 5060 * any extension headers in the right order: 5061 * Hop-by-hop, destination, routing, and final destination opts. 5062 */ 5063 if (option_exists & IPPF_HOPOPTS) { 5064 /* Hop-by-hop options */ 5065 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5066 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5067 5068 *nxthdr_ptr = IPPROTO_HOPOPTS; 5069 nxthdr_ptr = &hbh->ip6h_nxt; 5070 5071 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5072 cp += tipp->ipp_hopoptslen; 5073 } 5074 /* 5075 * En-route destination options 5076 * Only do them if there's a routing header as well 5077 */ 5078 if (option_exists & IPPF_RTDSTOPTS) { 5079 ip6_dest_t *dst = (ip6_dest_t *)cp; 5080 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5081 5082 *nxthdr_ptr = IPPROTO_DSTOPTS; 5083 nxthdr_ptr = &dst->ip6d_nxt; 5084 5085 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5086 cp += tipp->ipp_rtdstoptslen; 5087 } 5088 /* 5089 * Routing header next 5090 */ 5091 if (option_exists & IPPF_RTHDR) { 5092 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5093 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5094 5095 *nxthdr_ptr = IPPROTO_ROUTING; 5096 nxthdr_ptr = &rt->ip6r_nxt; 5097 5098 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5099 cp += tipp->ipp_rthdrlen; 5100 } 5101 /* 5102 * Do ultimate destination options 5103 */ 5104 if (option_exists & IPPF_DSTOPTS) { 5105 ip6_dest_t *dest = (ip6_dest_t *)cp; 5106 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5107 5108 *nxthdr_ptr = IPPROTO_DSTOPTS; 5109 nxthdr_ptr = &dest->ip6d_nxt; 5110 5111 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5112 cp += tipp->ipp_dstoptslen; 5113 } 5114 5115 /* 5116 * Now set the last header pointer to the proto passed in 5117 */ 5118 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5119 *nxthdr_ptr = icmp->icmp_proto; 5120 5121 /* 5122 * Copy in the destination address 5123 */ 5124 ip6h->ip6_dst = ip6_dst; 5125 5126 ip6h->ip6_vcf = 5127 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5128 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5129 5130 if (option_exists & IPPF_TCLASS) { 5131 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5132 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5133 tipp->ipp_tclass); 5134 } 5135 if (option_exists & IPPF_RTHDR) { 5136 ip6_rthdr_t *rth; 5137 5138 /* 5139 * Perform any processing needed for source routing. 5140 * We know that all extension headers will be in the same mblk 5141 * as the IPv6 header. 5142 */ 5143 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5144 if (rth != NULL && rth->ip6r_segleft != 0) { 5145 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5146 /* 5147 * Drop packet - only support Type 0 routing. 5148 * Notify the application as well. 5149 */ 5150 icmp_ud_err(q, mp, EPROTO); 5151 BUMP_MIB(&is->is_rawip_mib, 5152 rawipOutErrors); 5153 return; 5154 } 5155 /* 5156 * rth->ip6r_len is twice the number of 5157 * addresses in the header 5158 */ 5159 if (rth->ip6r_len & 0x1) { 5160 icmp_ud_err(q, mp, EPROTO); 5161 BUMP_MIB(&is->is_rawip_mib, 5162 rawipOutErrors); 5163 return; 5164 } 5165 /* 5166 * Shuffle the routing header and ip6_dst 5167 * addresses, and get the checksum difference 5168 * between the first hop (in ip6_dst) and 5169 * the destination (in the last routing hdr entry). 5170 */ 5171 csum = ip_massage_options_v6(ip6h, rth, 5172 is->is_netstack); 5173 /* 5174 * Verify that the first hop isn't a mapped address. 5175 * Routers along the path need to do this verification 5176 * for subsequent hops. 5177 */ 5178 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5179 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5180 BUMP_MIB(&is->is_rawip_mib, 5181 rawipOutErrors); 5182 return; 5183 } 5184 } 5185 } 5186 5187 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5188 if (mp1->b_cont != NULL) 5189 ip_len += msgdsize(mp1->b_cont); 5190 5191 /* 5192 * Set the length into the IP header. 5193 * If the length is greater than the maximum allowed by IP, 5194 * then free the message and return. Do not try and send it 5195 * as this can cause problems in layers below. 5196 */ 5197 if (ip_len > IP_MAXPACKET) { 5198 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5199 icmp_ud_err(q, mp, EMSGSIZE); 5200 return; 5201 } 5202 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5203 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5204 uint16_t *cksum_ptr; 5205 uint_t ext_hdrs_len; 5206 5207 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5208 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5209 icmp->icmp_checksum_off == 2); 5210 5211 /* 5212 * We make it easy for IP to include our pseudo header 5213 * by putting our length in uh_checksum, modified (if 5214 * we have a routing header) by the checksum difference 5215 * between the ultimate destination and first hop addresses. 5216 * Note: ICMPv6 must always checksum the packet. 5217 */ 5218 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5219 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5220 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5221 BUMP_MIB(&is->is_rawip_mib, 5222 rawipOutErrors); 5223 freemsg(mp); 5224 return; 5225 } 5226 ip6i = (ip6i_t *)mp1->b_rptr; 5227 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5228 ip6h = (ip6_t *)&ip6i[1]; 5229 else 5230 ip6h = (ip6_t *)ip6i; 5231 } 5232 /* Add payload length to checksum */ 5233 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5234 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5235 csum += htons(ip_len - ext_hdrs_len); 5236 5237 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5238 csum = (csum & 0xFFFF) + (csum >> 16); 5239 *cksum_ptr = (uint16_t)csum; 5240 } 5241 5242 #ifdef _LITTLE_ENDIAN 5243 ip_len = htons(ip_len); 5244 #endif 5245 ip6h->ip6_plen = (uint16_t)ip_len; 5246 5247 freeb(mp); 5248 5249 /* We're done. Pass the packet to IP */ 5250 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5251 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5252 } 5253 5254 static void 5255 icmp_wput_other(queue_t *q, mblk_t *mp) 5256 { 5257 uchar_t *rptr = mp->b_rptr; 5258 struct iocblk *iocp; 5259 #define tudr ((struct T_unitdata_req *)rptr) 5260 conn_t *connp = Q_TO_CONN(q); 5261 icmp_t *icmp = connp->conn_icmp; 5262 icmp_stack_t *is = icmp->icmp_is; 5263 cred_t *cr; 5264 5265 cr = DB_CREDDEF(mp, connp->conn_cred); 5266 5267 switch (mp->b_datap->db_type) { 5268 case M_PROTO: 5269 case M_PCPROTO: 5270 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5271 /* 5272 * If the message does not contain a PRIM_type, 5273 * throw it away. 5274 */ 5275 freemsg(mp); 5276 return; 5277 } 5278 switch (((union T_primitives *)rptr)->type) { 5279 case T_ADDR_REQ: 5280 icmp_addr_req(q, mp); 5281 return; 5282 case O_T_BIND_REQ: 5283 case T_BIND_REQ: 5284 icmp_bind(q, mp); 5285 return; 5286 case T_CONN_REQ: 5287 icmp_connect(q, mp); 5288 return; 5289 case T_CAPABILITY_REQ: 5290 icmp_capability_req(q, mp); 5291 return; 5292 case T_INFO_REQ: 5293 icmp_info_req(q, mp); 5294 return; 5295 case T_UNITDATA_REQ: 5296 /* 5297 * If a T_UNITDATA_REQ gets here, the address must 5298 * be bad. Valid T_UNITDATA_REQs are found above 5299 * and break to below this switch. 5300 */ 5301 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5302 return; 5303 case T_UNBIND_REQ: 5304 icmp_unbind(q, mp); 5305 return; 5306 5307 case T_SVR4_OPTMGMT_REQ: 5308 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5309 cr)) { 5310 /* Only IP can return anything meaningful */ 5311 (void) svr4_optcom_req(q, mp, cr, 5312 &icmp_opt_obj, B_TRUE); 5313 } 5314 return; 5315 5316 case T_OPTMGMT_REQ: 5317 /* Only IP can return anything meaningful */ 5318 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5319 return; 5320 5321 case T_DISCON_REQ: 5322 icmp_disconnect(q, mp); 5323 return; 5324 5325 /* The following TPI message is not supported by icmp. */ 5326 case O_T_CONN_RES: 5327 case T_CONN_RES: 5328 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5329 return; 5330 5331 /* The following 3 TPI requests are illegal for icmp. */ 5332 case T_DATA_REQ: 5333 case T_EXDATA_REQ: 5334 case T_ORDREL_REQ: 5335 freemsg(mp); 5336 (void) putctl1(RD(q), M_ERROR, EPROTO); 5337 return; 5338 default: 5339 break; 5340 } 5341 break; 5342 case M_IOCTL: 5343 iocp = (struct iocblk *)mp->b_rptr; 5344 switch (iocp->ioc_cmd) { 5345 case TI_GETPEERNAME: 5346 if (icmp->icmp_state != TS_DATA_XFER) { 5347 /* 5348 * If a default destination address has not 5349 * been associated with the stream, then we 5350 * don't know the peer's name. 5351 */ 5352 iocp->ioc_error = ENOTCONN; 5353 err_ret:; 5354 iocp->ioc_count = 0; 5355 mp->b_datap->db_type = M_IOCACK; 5356 qreply(q, mp); 5357 return; 5358 } 5359 /* FALLTHRU */ 5360 case TI_GETMYNAME: 5361 /* 5362 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5363 * need to copyin the user's strbuf structure. 5364 * Processing will continue in the M_IOCDATA case 5365 * below. 5366 */ 5367 mi_copyin(q, mp, NULL, 5368 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5369 return; 5370 case ND_SET: 5371 /* nd_getset performs the necessary error checking */ 5372 case ND_GET: 5373 if (nd_getset(q, is->is_nd, mp)) { 5374 qreply(q, mp); 5375 return; 5376 } 5377 break; 5378 default: 5379 break; 5380 } 5381 break; 5382 case M_IOCDATA: 5383 icmp_wput_iocdata(q, mp); 5384 return; 5385 default: 5386 break; 5387 } 5388 ip_wput(q, mp); 5389 } 5390 5391 /* 5392 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5393 * messages. 5394 */ 5395 static void 5396 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5397 { 5398 mblk_t *mp1; 5399 STRUCT_HANDLE(strbuf, sb); 5400 icmp_t *icmp; 5401 in6_addr_t v6addr; 5402 ipaddr_t v4addr; 5403 uint32_t flowinfo = 0; 5404 int addrlen; 5405 5406 /* Make sure it is one of ours. */ 5407 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5408 case TI_GETMYNAME: 5409 case TI_GETPEERNAME: 5410 break; 5411 default: 5412 icmp = Q_TO_ICMP(q); 5413 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5414 return; 5415 } 5416 switch (mi_copy_state(q, mp, &mp1)) { 5417 case -1: 5418 return; 5419 case MI_COPY_CASE(MI_COPY_IN, 1): 5420 break; 5421 case MI_COPY_CASE(MI_COPY_OUT, 1): 5422 /* 5423 * The address has been copied out, so now 5424 * copyout the strbuf. 5425 */ 5426 mi_copyout(q, mp); 5427 return; 5428 case MI_COPY_CASE(MI_COPY_OUT, 2): 5429 /* 5430 * The address and strbuf have been copied out. 5431 * We're done, so just acknowledge the original 5432 * M_IOCTL. 5433 */ 5434 mi_copy_done(q, mp, 0); 5435 return; 5436 default: 5437 /* 5438 * Something strange has happened, so acknowledge 5439 * the original M_IOCTL with an EPROTO error. 5440 */ 5441 mi_copy_done(q, mp, EPROTO); 5442 return; 5443 } 5444 /* 5445 * Now we have the strbuf structure for TI_GETMYNAME 5446 * and TI_GETPEERNAME. Next we copyout the requested 5447 * address and then we'll copyout the strbuf. 5448 */ 5449 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5450 (void *)mp1->b_rptr); 5451 icmp = Q_TO_ICMP(q); 5452 if (icmp->icmp_family == AF_INET) 5453 addrlen = sizeof (sin_t); 5454 else 5455 addrlen = sizeof (sin6_t); 5456 5457 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5458 mi_copy_done(q, mp, EINVAL); 5459 return; 5460 } 5461 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5462 case TI_GETMYNAME: 5463 if (icmp->icmp_family == AF_INET) { 5464 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5465 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5466 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5467 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5468 } else { 5469 /* 5470 * INADDR_ANY 5471 * icmp_v6src is not set, we might be bound to 5472 * broadcast/multicast. Use icmp_bound_v6src as 5473 * local address instead (that could 5474 * also still be INADDR_ANY) 5475 */ 5476 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5477 } 5478 } else { 5479 /* icmp->icmp_family == AF_INET6 */ 5480 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5481 v6addr = icmp->icmp_v6src; 5482 } else { 5483 /* 5484 * UNSPECIFIED 5485 * icmp_v6src is not set, we might be bound to 5486 * broadcast/multicast. Use icmp_bound_v6src as 5487 * local address instead (that could 5488 * also still be UNSPECIFIED) 5489 */ 5490 v6addr = icmp->icmp_bound_v6src; 5491 } 5492 } 5493 break; 5494 case TI_GETPEERNAME: 5495 if (icmp->icmp_family == AF_INET) { 5496 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5497 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5498 } else { 5499 /* icmp->icmp_family == AF_INET6) */ 5500 v6addr = icmp->icmp_v6dst; 5501 flowinfo = icmp->icmp_flowinfo; 5502 } 5503 break; 5504 default: 5505 mi_copy_done(q, mp, EPROTO); 5506 return; 5507 } 5508 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5509 if (!mp1) 5510 return; 5511 5512 if (icmp->icmp_family == AF_INET) { 5513 sin_t *sin; 5514 5515 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5516 sin = (sin_t *)mp1->b_rptr; 5517 mp1->b_wptr = (uchar_t *)&sin[1]; 5518 *sin = sin_null; 5519 sin->sin_family = AF_INET; 5520 sin->sin_addr.s_addr = v4addr; 5521 } else { 5522 /* icmp->icmp_family == AF_INET6 */ 5523 sin6_t *sin6; 5524 5525 ASSERT(icmp->icmp_family == AF_INET6); 5526 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5527 sin6 = (sin6_t *)mp1->b_rptr; 5528 mp1->b_wptr = (uchar_t *)&sin6[1]; 5529 *sin6 = sin6_null; 5530 sin6->sin6_family = AF_INET6; 5531 sin6->sin6_flowinfo = flowinfo; 5532 sin6->sin6_addr = v6addr; 5533 } 5534 /* Copy out the address */ 5535 mi_copyout(q, mp); 5536 } 5537 5538 static int 5539 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5540 void *thisdg_attrs) 5541 { 5542 conn_t *connp = Q_TO_CONN(q); 5543 struct T_unitdata_req *udreqp; 5544 int is_absreq_failure; 5545 cred_t *cr; 5546 5547 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5548 *errorp = 0; 5549 5550 cr = DB_CREDDEF(mp, connp->conn_cred); 5551 5552 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5553 udreqp->OPT_offset, cr, &icmp_opt_obj, 5554 thisdg_attrs, &is_absreq_failure); 5555 5556 if (*errorp != 0) { 5557 /* 5558 * Note: No special action needed in this 5559 * module for "is_absreq_failure" 5560 */ 5561 return (-1); /* failure */ 5562 } 5563 ASSERT(is_absreq_failure == 0); 5564 return (0); /* success */ 5565 } 5566 5567 void 5568 icmp_ddi_init(void) 5569 { 5570 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5571 icmp_opt_obj.odb_opt_arr_cnt); 5572 5573 /* 5574 * We want to be informed each time a stack is created or 5575 * destroyed in the kernel, so we can maintain the 5576 * set of icmp_stack_t's. 5577 */ 5578 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5579 } 5580 5581 void 5582 icmp_ddi_destroy(void) 5583 { 5584 netstack_unregister(NS_ICMP); 5585 } 5586 5587 /* 5588 * Initialize the ICMP stack instance. 5589 */ 5590 static void * 5591 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5592 { 5593 icmp_stack_t *is; 5594 icmpparam_t *pa; 5595 5596 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5597 is->is_netstack = ns; 5598 5599 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5600 is->is_param_arr = pa; 5601 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5602 5603 (void) icmp_param_register(&is->is_nd, 5604 is->is_param_arr, A_CNT(icmp_param_arr)); 5605 is->is_ksp = rawip_kstat_init(stackid); 5606 return (is); 5607 } 5608 5609 /* 5610 * Free the ICMP stack instance. 5611 */ 5612 static void 5613 rawip_stack_fini(netstackid_t stackid, void *arg) 5614 { 5615 icmp_stack_t *is = (icmp_stack_t *)arg; 5616 5617 nd_free(&is->is_nd); 5618 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5619 is->is_param_arr = NULL; 5620 5621 rawip_kstat_fini(stackid, is->is_ksp); 5622 is->is_ksp = NULL; 5623 kmem_free(is, sizeof (*is)); 5624 } 5625 5626 static void * 5627 rawip_kstat_init(netstackid_t stackid) { 5628 kstat_t *ksp; 5629 5630 rawip_named_kstat_t template = { 5631 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5632 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5633 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5634 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5635 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5636 }; 5637 5638 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5639 KSTAT_TYPE_NAMED, 5640 NUM_OF_FIELDS(rawip_named_kstat_t), 5641 0, stackid); 5642 if (ksp == NULL || ksp->ks_data == NULL) 5643 return (NULL); 5644 5645 bcopy(&template, ksp->ks_data, sizeof (template)); 5646 ksp->ks_update = rawip_kstat_update; 5647 ksp->ks_private = (void *)(uintptr_t)stackid; 5648 5649 kstat_install(ksp); 5650 return (ksp); 5651 } 5652 5653 static void 5654 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5655 { 5656 if (ksp != NULL) { 5657 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5658 kstat_delete_netstack(ksp, stackid); 5659 } 5660 } 5661 5662 static int 5663 rawip_kstat_update(kstat_t *ksp, int rw) 5664 { 5665 rawip_named_kstat_t *rawipkp; 5666 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5667 netstack_t *ns; 5668 icmp_stack_t *is; 5669 5670 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5671 return (EIO); 5672 5673 if (rw == KSTAT_WRITE) 5674 return (EACCES); 5675 5676 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5677 5678 ns = netstack_find_by_stackid(stackid); 5679 if (ns == NULL) 5680 return (-1); 5681 is = ns->netstack_icmp; 5682 if (is == NULL) { 5683 netstack_rele(ns); 5684 return (-1); 5685 } 5686 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5687 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5688 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5689 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5690 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5691 netstack_rele(ns); 5692 return (0); 5693 } 5694