1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options) 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 732 if (icmp->icmp_filter != NULL) 733 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 734 735 /* Free memory associated with sticky options */ 736 if (icmp->icmp_sticky_hdrs_len != 0) { 737 kmem_free(icmp->icmp_sticky_hdrs, 738 icmp->icmp_sticky_hdrs_len); 739 icmp->icmp_sticky_hdrs = NULL; 740 icmp->icmp_sticky_hdrs_len = 0; 741 } 742 ip6_pkt_free(&icmp->icmp_sticky_ipp); 743 } 744 745 static int 746 icmp_close(queue_t *q) 747 { 748 conn_t *connp = (conn_t *)q->q_ptr; 749 750 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 751 752 ip_quiesce_conn(connp); 753 754 qprocsoff(connp->conn_rq); 755 756 icmp_close_free(connp); 757 758 /* 759 * Now we are truly single threaded on this stream, and can 760 * delete the things hanging off the connp, and finally the connp. 761 * We removed this connp from the fanout list, it cannot be 762 * accessed thru the fanouts, and we already waited for the 763 * conn_ref to drop to 0. We are already in close, so 764 * there cannot be any other thread from the top. qprocsoff 765 * has completed, and service has completed or won't run in 766 * future. 767 */ 768 ASSERT(connp->conn_ref == 1); 769 770 inet_minor_free(ip_minor_arena, connp->conn_dev); 771 772 connp->conn_ref--; 773 ipcl_conn_destroy(connp); 774 775 q->q_ptr = WR(q)->q_ptr = NULL; 776 return (0); 777 } 778 779 /* 780 * This routine handles each T_DISCON_REQ message passed to icmp 781 * as an indicating that ICMP is no longer connected. This results 782 * in sending a T_BIND_REQ to IP to restore the binding to just 783 * the local address. 784 * 785 * This routine sends down a T_BIND_REQ to IP with the following mblks: 786 * T_BIND_REQ - specifying just the local address. 787 * T_OK_ACK - for the T_DISCON_REQ 788 * 789 * The disconnect completes in icmp_bind_result. 790 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 791 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 792 * convert it to an error ack for the appropriate primitive. 793 */ 794 static void 795 icmp_disconnect(queue_t *q, mblk_t *mp) 796 { 797 icmp_t *icmp; 798 mblk_t *mp1; 799 conn_t *connp = Q_TO_CONN(q); 800 801 icmp = connp->conn_icmp; 802 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 803 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 804 rw_exit(&icmp->icmp_rwlock); 805 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 806 "icmp_disconnect: bad state, %d", icmp->icmp_state); 807 icmp_err_ack(q, mp, TOUTSTATE, 0); 808 return; 809 } 810 icmp->icmp_pending_op = T_DISCON_REQ; 811 icmp->icmp_v6src = icmp->icmp_bound_v6src; 812 icmp->icmp_state = TS_IDLE; 813 814 /* 815 * Send down bind to IP to remove the full binding and revert 816 * to the local address binding. 817 */ 818 if (icmp->icmp_family == AF_INET) { 819 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 820 } else { 821 ASSERT(icmp->icmp_family == AF_INET6); 822 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 823 } 824 if (mp1 == NULL) { 825 icmp->icmp_pending_op = -1; 826 rw_exit(&icmp->icmp_rwlock); 827 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 828 return; 829 } 830 mp = mi_tpi_ok_ack_alloc(mp); 831 if (mp == NULL) { 832 /* Unable to reuse the T_DISCON_REQ for the ack. */ 833 icmp->icmp_pending_op = -1; 834 rw_exit(&icmp->icmp_rwlock); 835 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 836 return; 837 } 838 839 if (icmp->icmp_family == AF_INET6) { 840 int error; 841 842 /* Rebuild the header template */ 843 error = icmp_build_hdrs(icmp); 844 if (error != 0) { 845 icmp->icmp_pending_op = -1; 846 rw_exit(&icmp->icmp_rwlock); 847 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 848 freemsg(mp1); 849 return; 850 } 851 } 852 853 rw_exit(&icmp->icmp_rwlock); 854 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 855 linkb(mp1, mp); 856 857 if (icmp->icmp_family == AF_INET6) 858 mp1 = ip_bind_v6(q, mp1, connp, NULL); 859 else 860 mp1 = ip_bind_v4(q, mp1, connp); 861 862 /* The above return NULL if the bind needs to be deferred */ 863 if (mp1 != NULL) 864 icmp_bind_result(connp, mp1); 865 else 866 CONN_INC_REF(connp); 867 } 868 869 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 870 static void 871 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 872 { 873 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 874 qreply(q, mp); 875 } 876 877 /* Shorthand to generate and send TPI error acks to our client */ 878 static void 879 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 880 t_scalar_t t_error, int sys_error) 881 { 882 struct T_error_ack *teackp; 883 884 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 885 M_PCPROTO, T_ERROR_ACK)) != NULL) { 886 teackp = (struct T_error_ack *)mp->b_rptr; 887 teackp->ERROR_prim = primitive; 888 teackp->TLI_error = t_error; 889 teackp->UNIX_error = sys_error; 890 qreply(q, mp); 891 } 892 } 893 894 /* 895 * icmp_icmp_error is called by icmp_input to process ICMP 896 * messages passed up by IP. 897 * Generates the appropriate T_UDERROR_IND for permanent 898 * (non-transient) errors. 899 * Assumes that IP has pulled up everything up to and including 900 * the ICMP header. 901 */ 902 static void 903 icmp_icmp_error(queue_t *q, mblk_t *mp) 904 { 905 icmph_t *icmph; 906 ipha_t *ipha; 907 int iph_hdr_length; 908 sin_t sin; 909 sin6_t sin6; 910 mblk_t *mp1; 911 int error = 0; 912 icmp_t *icmp = Q_TO_ICMP(q); 913 914 ipha = (ipha_t *)mp->b_rptr; 915 916 ASSERT(OK_32PTR(mp->b_rptr)); 917 918 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 919 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 920 icmp_icmp_error_ipv6(q, mp); 921 return; 922 } 923 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 924 925 /* Skip past the outer IP and ICMP headers */ 926 iph_hdr_length = IPH_HDR_LENGTH(ipha); 927 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 928 ipha = (ipha_t *)&icmph[1]; 929 iph_hdr_length = IPH_HDR_LENGTH(ipha); 930 931 switch (icmph->icmph_type) { 932 case ICMP_DEST_UNREACHABLE: 933 switch (icmph->icmph_code) { 934 case ICMP_FRAGMENTATION_NEEDED: 935 /* 936 * IP has already adjusted the path MTU. 937 */ 938 break; 939 case ICMP_PORT_UNREACHABLE: 940 case ICMP_PROTOCOL_UNREACHABLE: 941 error = ECONNREFUSED; 942 break; 943 default: 944 /* Transient errors */ 945 break; 946 } 947 break; 948 default: 949 /* Transient errors */ 950 break; 951 } 952 if (error == 0) { 953 freemsg(mp); 954 return; 955 } 956 957 /* 958 * Deliver T_UDERROR_IND when the application has asked for it. 959 * The socket layer enables this automatically when connected. 960 */ 961 if (!icmp->icmp_dgram_errind) { 962 freemsg(mp); 963 return; 964 } 965 966 switch (icmp->icmp_family) { 967 case AF_INET: 968 sin = sin_null; 969 sin.sin_family = AF_INET; 970 sin.sin_addr.s_addr = ipha->ipha_dst; 971 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 972 error); 973 break; 974 case AF_INET6: 975 sin6 = sin6_null; 976 sin6.sin6_family = AF_INET6; 977 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 978 979 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 980 NULL, 0, error); 981 break; 982 } 983 if (mp1) 984 putnext(q, mp1); 985 freemsg(mp); 986 } 987 988 /* 989 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 990 * for IPv6 packets. 991 * Send permanent (non-transient) errors upstream. 992 * Assumes that IP has pulled up all the extension headers as well 993 * as the ICMPv6 header. 994 */ 995 static void 996 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 997 { 998 icmp6_t *icmp6; 999 ip6_t *ip6h, *outer_ip6h; 1000 uint16_t iph_hdr_length; 1001 uint8_t *nexthdrp; 1002 sin6_t sin6; 1003 mblk_t *mp1; 1004 int error = 0; 1005 icmp_t *icmp = Q_TO_ICMP(q); 1006 1007 outer_ip6h = (ip6_t *)mp->b_rptr; 1008 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1009 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1010 else 1011 iph_hdr_length = IPV6_HDR_LEN; 1012 1013 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1014 ip6h = (ip6_t *)&icmp6[1]; 1015 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1016 freemsg(mp); 1017 return; 1018 } 1019 1020 switch (icmp6->icmp6_type) { 1021 case ICMP6_DST_UNREACH: 1022 switch (icmp6->icmp6_code) { 1023 case ICMP6_DST_UNREACH_NOPORT: 1024 error = ECONNREFUSED; 1025 break; 1026 case ICMP6_DST_UNREACH_ADMIN: 1027 case ICMP6_DST_UNREACH_NOROUTE: 1028 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1029 case ICMP6_DST_UNREACH_ADDR: 1030 /* Transient errors */ 1031 break; 1032 default: 1033 break; 1034 } 1035 break; 1036 case ICMP6_PACKET_TOO_BIG: { 1037 struct T_unitdata_ind *tudi; 1038 struct T_opthdr *toh; 1039 size_t udi_size; 1040 mblk_t *newmp; 1041 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1042 sizeof (struct ip6_mtuinfo); 1043 sin6_t *sin6; 1044 struct ip6_mtuinfo *mtuinfo; 1045 1046 /* 1047 * If the application has requested to receive path mtu 1048 * information, send up an empty message containing an 1049 * IPV6_PATHMTU ancillary data item. 1050 */ 1051 if (!icmp->icmp_ipv6_recvpathmtu) 1052 break; 1053 1054 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1055 opt_length; 1056 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1057 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1058 break; 1059 } 1060 1061 /* 1062 * newmp->b_cont is left to NULL on purpose. This is an 1063 * empty message containing only ancillary data. 1064 */ 1065 newmp->b_datap->db_type = M_PROTO; 1066 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1067 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1068 tudi->PRIM_type = T_UNITDATA_IND; 1069 tudi->SRC_length = sizeof (sin6_t); 1070 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1071 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1072 tudi->OPT_length = opt_length; 1073 1074 sin6 = (sin6_t *)&tudi[1]; 1075 bzero(sin6, sizeof (sin6_t)); 1076 sin6->sin6_family = AF_INET6; 1077 sin6->sin6_addr = icmp->icmp_v6dst; 1078 1079 toh = (struct T_opthdr *)&sin6[1]; 1080 toh->level = IPPROTO_IPV6; 1081 toh->name = IPV6_PATHMTU; 1082 toh->len = opt_length; 1083 toh->status = 0; 1084 1085 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1086 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1087 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1088 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1089 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1090 /* 1091 * We've consumed everything we need from the original 1092 * message. Free it, then send our empty message. 1093 */ 1094 freemsg(mp); 1095 putnext(q, newmp); 1096 return; 1097 } 1098 case ICMP6_TIME_EXCEEDED: 1099 /* Transient errors */ 1100 break; 1101 case ICMP6_PARAM_PROB: 1102 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1103 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1104 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1105 (uchar_t *)nexthdrp) { 1106 error = ECONNREFUSED; 1107 break; 1108 } 1109 break; 1110 } 1111 if (error == 0) { 1112 freemsg(mp); 1113 return; 1114 } 1115 1116 /* 1117 * Deliver T_UDERROR_IND when the application has asked for it. 1118 * The socket layer enables this automatically when connected. 1119 */ 1120 if (!icmp->icmp_dgram_errind) { 1121 freemsg(mp); 1122 return; 1123 } 1124 1125 sin6 = sin6_null; 1126 sin6.sin6_family = AF_INET6; 1127 sin6.sin6_addr = ip6h->ip6_dst; 1128 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1129 1130 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1131 error); 1132 if (mp1) 1133 putnext(q, mp1); 1134 freemsg(mp); 1135 } 1136 1137 /* 1138 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1139 * The local address is filled in if endpoint is bound. The remote address 1140 * is filled in if remote address has been precified ("connected endpoint") 1141 * (The concept of connected CLTS sockets is alien to published TPI 1142 * but we support it anyway). 1143 */ 1144 static void 1145 icmp_addr_req(queue_t *q, mblk_t *mp) 1146 { 1147 icmp_t *icmp = Q_TO_ICMP(q); 1148 mblk_t *ackmp; 1149 struct T_addr_ack *taa; 1150 1151 /* Make it large enough for worst case */ 1152 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1153 2 * sizeof (sin6_t), 1); 1154 if (ackmp == NULL) { 1155 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1156 return; 1157 } 1158 taa = (struct T_addr_ack *)ackmp->b_rptr; 1159 1160 bzero(taa, sizeof (struct T_addr_ack)); 1161 ackmp->b_wptr = (uchar_t *)&taa[1]; 1162 1163 taa->PRIM_type = T_ADDR_ACK; 1164 ackmp->b_datap->db_type = M_PCPROTO; 1165 rw_enter(&icmp->icmp_rwlock, RW_READER); 1166 /* 1167 * Note: Following code assumes 32 bit alignment of basic 1168 * data structures like sin_t and struct T_addr_ack. 1169 */ 1170 if (icmp->icmp_state != TS_UNBND) { 1171 /* 1172 * Fill in local address 1173 */ 1174 taa->LOCADDR_offset = sizeof (*taa); 1175 if (icmp->icmp_family == AF_INET) { 1176 sin_t *sin; 1177 1178 taa->LOCADDR_length = sizeof (sin_t); 1179 sin = (sin_t *)&taa[1]; 1180 /* Fill zeroes and then intialize non-zero fields */ 1181 *sin = sin_null; 1182 sin->sin_family = AF_INET; 1183 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1184 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1185 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1186 sin->sin_addr.s_addr); 1187 } else { 1188 /* 1189 * INADDR_ANY 1190 * icmp_v6src is not set, we might be bound to 1191 * broadcast/multicast. Use icmp_bound_v6src as 1192 * local address instead (that could 1193 * also still be INADDR_ANY) 1194 */ 1195 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1196 sin->sin_addr.s_addr); 1197 } 1198 ackmp->b_wptr = (uchar_t *)&sin[1]; 1199 } else { 1200 sin6_t *sin6; 1201 1202 ASSERT(icmp->icmp_family == AF_INET6); 1203 taa->LOCADDR_length = sizeof (sin6_t); 1204 sin6 = (sin6_t *)&taa[1]; 1205 /* Fill zeroes and then intialize non-zero fields */ 1206 *sin6 = sin6_null; 1207 sin6->sin6_family = AF_INET6; 1208 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1209 sin6->sin6_addr = icmp->icmp_v6src; 1210 } else { 1211 /* 1212 * UNSPECIFIED 1213 * icmp_v6src is not set, we might be bound to 1214 * broadcast/multicast. Use icmp_bound_v6src as 1215 * local address instead (that could 1216 * also still be UNSPECIFIED) 1217 */ 1218 sin6->sin6_addr = icmp->icmp_bound_v6src; 1219 } 1220 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1221 } 1222 } 1223 rw_exit(&icmp->icmp_rwlock); 1224 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1225 qreply(q, ackmp); 1226 } 1227 1228 static void 1229 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1230 { 1231 *tap = icmp_g_t_info_ack; 1232 1233 if (icmp->icmp_family == AF_INET6) 1234 tap->ADDR_size = sizeof (sin6_t); 1235 else 1236 tap->ADDR_size = sizeof (sin_t); 1237 tap->CURRENT_state = icmp->icmp_state; 1238 tap->OPT_size = icmp_max_optsize; 1239 } 1240 1241 /* 1242 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1243 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1244 * icmp_g_t_info_ack. The current state of the stream is copied from 1245 * icmp_state. 1246 */ 1247 static void 1248 icmp_capability_req(queue_t *q, mblk_t *mp) 1249 { 1250 icmp_t *icmp = Q_TO_ICMP(q); 1251 t_uscalar_t cap_bits1; 1252 struct T_capability_ack *tcap; 1253 1254 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1255 1256 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1257 mp->b_datap->db_type, T_CAPABILITY_ACK); 1258 if (!mp) 1259 return; 1260 1261 tcap = (struct T_capability_ack *)mp->b_rptr; 1262 tcap->CAP_bits1 = 0; 1263 1264 if (cap_bits1 & TC1_INFO) { 1265 icmp_copy_info(&tcap->INFO_ack, icmp); 1266 tcap->CAP_bits1 |= TC1_INFO; 1267 } 1268 1269 qreply(q, mp); 1270 } 1271 1272 /* 1273 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1274 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1275 * The current state of the stream is copied from icmp_state. 1276 */ 1277 static void 1278 icmp_info_req(queue_t *q, mblk_t *mp) 1279 { 1280 icmp_t *icmp = Q_TO_ICMP(q); 1281 1282 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1283 T_INFO_ACK); 1284 if (!mp) 1285 return; 1286 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1287 qreply(q, mp); 1288 } 1289 1290 /* 1291 * IP recognizes seven kinds of bind requests: 1292 * 1293 * - A zero-length address binds only to the protocol number. 1294 * 1295 * - A 4-byte address is treated as a request to 1296 * validate that the address is a valid local IPv4 1297 * address, appropriate for an application to bind to. 1298 * IP does the verification, but does not make any note 1299 * of the address at this time. 1300 * 1301 * - A 16-byte address contains is treated as a request 1302 * to validate a local IPv6 address, as the 4-byte 1303 * address case above. 1304 * 1305 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1306 * use it for the inbound fanout of packets. 1307 * 1308 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1309 * use it for the inbound fanout of packets. 1310 * 1311 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1312 * information consisting of local and remote addresses 1313 * and ports (unused for raw sockets). In this case, the addresses are both 1314 * validated as appropriate for this operation, and, if 1315 * so, the information is retained for use in the 1316 * inbound fanout. 1317 * 1318 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1319 * fanout information, like the 12-byte case above. 1320 * 1321 * IP will also fill in the IRE request mblk with information 1322 * regarding our peer. In all cases, we notify IP of our protocol 1323 * type by appending a single protocol byte to the bind request. 1324 */ 1325 static mblk_t * 1326 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1327 in_port_t fport) 1328 { 1329 char *cp; 1330 mblk_t *mp; 1331 struct T_bind_req *tbr; 1332 ipa_conn_t *ac; 1333 ipa6_conn_t *ac6; 1334 sin_t *sin; 1335 sin6_t *sin6; 1336 1337 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1338 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1339 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1340 if (mp == NULL) 1341 return (NULL); 1342 mp->b_datap->db_type = M_PROTO; 1343 tbr = (struct T_bind_req *)mp->b_rptr; 1344 tbr->PRIM_type = bind_prim; 1345 tbr->ADDR_offset = sizeof (*tbr); 1346 tbr->CONIND_number = 0; 1347 tbr->ADDR_length = addr_length; 1348 cp = (char *)&tbr[1]; 1349 switch (addr_length) { 1350 case sizeof (ipa_conn_t): 1351 ASSERT(icmp->icmp_family == AF_INET); 1352 /* Append a request for an IRE */ 1353 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1354 if (mp->b_cont == NULL) { 1355 freemsg(mp); 1356 return (NULL); 1357 } 1358 mp->b_cont->b_wptr += sizeof (ire_t); 1359 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1360 1361 /* cp known to be 32 bit aligned */ 1362 ac = (ipa_conn_t *)cp; 1363 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1364 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1365 ac->ac_fport = fport; 1366 ac->ac_lport = 0; 1367 break; 1368 1369 case sizeof (ipa6_conn_t): 1370 ASSERT(icmp->icmp_family == AF_INET6); 1371 /* Append a request for an IRE */ 1372 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1373 if (mp->b_cont == NULL) { 1374 freemsg(mp); 1375 return (NULL); 1376 } 1377 mp->b_cont->b_wptr += sizeof (ire_t); 1378 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1379 1380 /* cp known to be 32 bit aligned */ 1381 ac6 = (ipa6_conn_t *)cp; 1382 ac6->ac6_laddr = icmp->icmp_v6src; 1383 ac6->ac6_faddr = icmp->icmp_v6dst; 1384 ac6->ac6_fport = fport; 1385 ac6->ac6_lport = 0; 1386 break; 1387 1388 case sizeof (sin_t): 1389 ASSERT(icmp->icmp_family == AF_INET); 1390 /* Append a request for an IRE */ 1391 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1392 if (!mp->b_cont) { 1393 freemsg(mp); 1394 return (NULL); 1395 } 1396 mp->b_cont->b_wptr += sizeof (ire_t); 1397 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1398 1399 sin = (sin_t *)cp; 1400 *sin = sin_null; 1401 sin->sin_family = AF_INET; 1402 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1403 break; 1404 1405 case sizeof (sin6_t): 1406 ASSERT(icmp->icmp_family == AF_INET6); 1407 /* Append a request for an IRE */ 1408 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1409 if (!mp->b_cont) { 1410 freemsg(mp); 1411 return (NULL); 1412 } 1413 mp->b_cont->b_wptr += sizeof (ire_t); 1414 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1415 1416 sin6 = (sin6_t *)cp; 1417 *sin6 = sin6_null; 1418 sin6->sin6_family = AF_INET6; 1419 sin6->sin6_addr = icmp->icmp_bound_v6src; 1420 break; 1421 } 1422 /* Add protocol number to end */ 1423 cp[addr_length] = icmp->icmp_proto; 1424 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1425 return (mp); 1426 } 1427 1428 /* For /dev/icmp aka AF_INET open */ 1429 static int 1430 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1431 { 1432 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1433 } 1434 1435 /* For /dev/icmp6 aka AF_INET6 open */ 1436 static int 1437 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1438 { 1439 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1440 } 1441 1442 /* 1443 * This is the open routine for icmp. It allocates a icmp_t structure for 1444 * the stream and, on the first open of the module, creates an ND table. 1445 */ 1446 /*ARGSUSED2*/ 1447 static int 1448 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1449 boolean_t isv6) 1450 { 1451 int err; 1452 icmp_t *icmp; 1453 conn_t *connp; 1454 dev_t conn_dev; 1455 zoneid_t zoneid; 1456 netstack_t *ns; 1457 icmp_stack_t *is; 1458 1459 /* If the stream is already open, return immediately. */ 1460 if (q->q_ptr != NULL) 1461 return (0); 1462 1463 if (sflag == MODOPEN) 1464 return (EINVAL); 1465 1466 ns = netstack_find_by_cred(credp); 1467 ASSERT(ns != NULL); 1468 is = ns->netstack_icmp; 1469 ASSERT(is != NULL); 1470 1471 /* 1472 * For exclusive stacks we set the zoneid to zero 1473 * to make ICMP operate as if in the global zone. 1474 */ 1475 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1476 zoneid = GLOBAL_ZONEID; 1477 else 1478 zoneid = crgetzoneid(credp); 1479 1480 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1481 netstack_rele(ns); 1482 return (EBUSY); 1483 } 1484 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1485 1486 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1487 connp->conn_dev = conn_dev; 1488 icmp = connp->conn_icmp; 1489 1490 /* 1491 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1492 * done by netstack_find_by_cred() 1493 */ 1494 netstack_rele(ns); 1495 1496 /* 1497 * Initialize the icmp_t structure for this stream. 1498 */ 1499 q->q_ptr = connp; 1500 WR(q)->q_ptr = connp; 1501 connp->conn_rq = q; 1502 connp->conn_wq = WR(q); 1503 1504 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1505 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1506 ASSERT(connp->conn_icmp == icmp); 1507 ASSERT(icmp->icmp_connp == connp); 1508 1509 /* Set the initial state of the stream and the privilege status. */ 1510 icmp->icmp_state = TS_UNBND; 1511 if (isv6) { 1512 icmp->icmp_ipversion = IPV6_VERSION; 1513 icmp->icmp_family = AF_INET6; 1514 connp->conn_ulp = IPPROTO_ICMPV6; 1515 /* May be changed by a SO_PROTOTYPE socket option. */ 1516 icmp->icmp_proto = IPPROTO_ICMPV6; 1517 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1518 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1519 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1520 connp->conn_af_isv6 = B_TRUE; 1521 connp->conn_flags |= IPCL_ISV6; 1522 } else { 1523 icmp->icmp_ipversion = IPV4_VERSION; 1524 icmp->icmp_family = AF_INET; 1525 /* May be changed by a SO_PROTOTYPE socket option. */ 1526 icmp->icmp_proto = IPPROTO_ICMP; 1527 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1528 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1529 connp->conn_af_isv6 = B_FALSE; 1530 connp->conn_flags &= ~IPCL_ISV6; 1531 } 1532 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1533 icmp->icmp_pending_op = -1; 1534 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1535 connp->conn_zoneid = zoneid; 1536 1537 /* 1538 * If the caller has the process-wide flag set, then default to MAC 1539 * exempt mode. This allows read-down to unlabeled hosts. 1540 */ 1541 if (getpflags(NET_MAC_AWARE, credp) != 0) 1542 icmp->icmp_mac_exempt = B_TRUE; 1543 1544 connp->conn_ulp_labeled = is_system_labeled(); 1545 1546 icmp->icmp_is = is; 1547 1548 q->q_hiwat = is->is_recv_hiwat; 1549 WR(q)->q_hiwat = is->is_xmit_hiwat; 1550 WR(q)->q_lowat = is->is_xmit_lowat; 1551 1552 connp->conn_recv = icmp_input; 1553 crhold(credp); 1554 connp->conn_cred = credp; 1555 1556 mutex_enter(&connp->conn_lock); 1557 connp->conn_state_flags &= ~CONN_INCIPIENT; 1558 mutex_exit(&connp->conn_lock); 1559 1560 qprocson(q); 1561 1562 if (icmp->icmp_family == AF_INET6) { 1563 /* Build initial header template for transmit */ 1564 if ((err = icmp_build_hdrs(icmp)) != 0) { 1565 rw_exit(&icmp->icmp_rwlock); 1566 qprocsoff(q); 1567 ipcl_conn_destroy(connp); 1568 return (err); 1569 } 1570 } 1571 rw_exit(&icmp->icmp_rwlock); 1572 1573 /* Set the Stream head write offset. */ 1574 (void) mi_set_sth_wroff(q, 1575 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1576 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1577 1578 return (0); 1579 } 1580 1581 /* 1582 * Which ICMP options OK to set through T_UNITDATA_REQ... 1583 */ 1584 /* ARGSUSED */ 1585 static boolean_t 1586 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1587 { 1588 return (B_TRUE); 1589 } 1590 1591 /* 1592 * This routine gets default values of certain options whose default 1593 * values are maintained by protcol specific code 1594 */ 1595 /* ARGSUSED */ 1596 int 1597 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1598 { 1599 icmp_t *icmp = Q_TO_ICMP(q); 1600 icmp_stack_t *is = icmp->icmp_is; 1601 int *i1 = (int *)ptr; 1602 1603 switch (level) { 1604 case IPPROTO_IP: 1605 switch (name) { 1606 case IP_MULTICAST_TTL: 1607 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1608 return (sizeof (uchar_t)); 1609 case IP_MULTICAST_LOOP: 1610 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1611 return (sizeof (uchar_t)); 1612 } 1613 break; 1614 case IPPROTO_IPV6: 1615 switch (name) { 1616 case IPV6_MULTICAST_HOPS: 1617 *i1 = IP_DEFAULT_MULTICAST_TTL; 1618 return (sizeof (int)); 1619 case IPV6_MULTICAST_LOOP: 1620 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1621 return (sizeof (int)); 1622 case IPV6_UNICAST_HOPS: 1623 *i1 = is->is_ipv6_hoplimit; 1624 return (sizeof (int)); 1625 } 1626 break; 1627 case IPPROTO_ICMPV6: 1628 switch (name) { 1629 case ICMP6_FILTER: 1630 /* Make it look like "pass all" */ 1631 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1632 return (sizeof (icmp6_filter_t)); 1633 } 1634 break; 1635 } 1636 return (-1); 1637 } 1638 1639 /* 1640 * This routine retrieves the current status of socket options. 1641 * It returns the size of the option retrieved. 1642 */ 1643 int 1644 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1645 { 1646 conn_t *connp = Q_TO_CONN(q); 1647 icmp_t *icmp = connp->conn_icmp; 1648 icmp_stack_t *is = icmp->icmp_is; 1649 int *i1 = (int *)ptr; 1650 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1651 1652 switch (level) { 1653 case SOL_SOCKET: 1654 switch (name) { 1655 case SO_DEBUG: 1656 *i1 = icmp->icmp_debug; 1657 break; 1658 case SO_TYPE: 1659 *i1 = SOCK_RAW; 1660 break; 1661 case SO_PROTOTYPE: 1662 *i1 = icmp->icmp_proto; 1663 break; 1664 case SO_REUSEADDR: 1665 *i1 = icmp->icmp_reuseaddr; 1666 break; 1667 1668 /* 1669 * The following three items are available here, 1670 * but are only meaningful to IP. 1671 */ 1672 case SO_DONTROUTE: 1673 *i1 = icmp->icmp_dontroute; 1674 break; 1675 case SO_USELOOPBACK: 1676 *i1 = icmp->icmp_useloopback; 1677 break; 1678 case SO_BROADCAST: 1679 *i1 = icmp->icmp_broadcast; 1680 break; 1681 1682 case SO_SNDBUF: 1683 ASSERT(q->q_hiwat <= INT_MAX); 1684 *i1 = (int)q->q_hiwat; 1685 break; 1686 case SO_RCVBUF: 1687 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1688 *i1 = (int)RD(q)->q_hiwat; 1689 break; 1690 case SO_DGRAM_ERRIND: 1691 *i1 = icmp->icmp_dgram_errind; 1692 break; 1693 case SO_TIMESTAMP: 1694 *i1 = icmp->icmp_timestamp; 1695 break; 1696 case SO_MAC_EXEMPT: 1697 *i1 = icmp->icmp_mac_exempt; 1698 break; 1699 case SO_DOMAIN: 1700 *i1 = icmp->icmp_family; 1701 break; 1702 1703 /* 1704 * Following four not meaningful for icmp 1705 * Action is same as "default" to which we fallthrough 1706 * so we keep them in comments. 1707 * case SO_LINGER: 1708 * case SO_KEEPALIVE: 1709 * case SO_OOBINLINE: 1710 * case SO_ALLZONES: 1711 */ 1712 default: 1713 return (-1); 1714 } 1715 break; 1716 case IPPROTO_IP: 1717 /* 1718 * Only allow IPv4 option processing on IPv4 sockets. 1719 */ 1720 if (icmp->icmp_family != AF_INET) 1721 return (-1); 1722 1723 switch (name) { 1724 case IP_OPTIONS: 1725 case T_IP_OPTIONS: 1726 /* Options are passed up with each packet */ 1727 return (0); 1728 case IP_HDRINCL: 1729 *i1 = (int)icmp->icmp_hdrincl; 1730 break; 1731 case IP_TOS: 1732 case T_IP_TOS: 1733 *i1 = (int)icmp->icmp_type_of_service; 1734 break; 1735 case IP_TTL: 1736 *i1 = (int)icmp->icmp_ttl; 1737 break; 1738 case IP_MULTICAST_IF: 1739 /* 0 address if not set */ 1740 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1741 return (sizeof (ipaddr_t)); 1742 case IP_MULTICAST_TTL: 1743 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1744 return (sizeof (uchar_t)); 1745 case IP_MULTICAST_LOOP: 1746 *ptr = connp->conn_multicast_loop; 1747 return (sizeof (uint8_t)); 1748 case IP_BOUND_IF: 1749 /* Zero if not set */ 1750 *i1 = icmp->icmp_bound_if; 1751 break; /* goto sizeof (int) option return */ 1752 case IP_UNSPEC_SRC: 1753 *ptr = icmp->icmp_unspec_source; 1754 break; /* goto sizeof (int) option return */ 1755 case IP_XMIT_IF: 1756 *i1 = icmp->icmp_xmit_if; 1757 break; /* goto sizeof (int) option return */ 1758 case IP_RECVIF: 1759 *ptr = icmp->icmp_recvif; 1760 break; /* goto sizeof (int) option return */ 1761 case IP_RECVPKTINFO: 1762 /* 1763 * This also handles IP_PKTINFO. 1764 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1765 * Differentiation is based on the size of the argument 1766 * passed in. 1767 * This option is handled in IP which will return an 1768 * error for IP_PKTINFO as it's not supported as a 1769 * sticky option. 1770 */ 1771 return (-EINVAL); 1772 /* 1773 * Cannot "get" the value of following options 1774 * at this level. Action is same as "default" to 1775 * which we fallthrough so we keep them in comments. 1776 * 1777 * case IP_ADD_MEMBERSHIP: 1778 * case IP_DROP_MEMBERSHIP: 1779 * case IP_BLOCK_SOURCE: 1780 * case IP_UNBLOCK_SOURCE: 1781 * case IP_ADD_SOURCE_MEMBERSHIP: 1782 * case IP_DROP_SOURCE_MEMBERSHIP: 1783 * case MCAST_JOIN_GROUP: 1784 * case MCAST_LEAVE_GROUP: 1785 * case MCAST_BLOCK_SOURCE: 1786 * case MCAST_UNBLOCK_SOURCE: 1787 * case MCAST_JOIN_SOURCE_GROUP: 1788 * case MCAST_LEAVE_SOURCE_GROUP: 1789 * case MRT_INIT: 1790 * case MRT_DONE: 1791 * case MRT_ADD_VIF: 1792 * case MRT_DEL_VIF: 1793 * case MRT_ADD_MFC: 1794 * case MRT_DEL_MFC: 1795 * case MRT_VERSION: 1796 * case MRT_ASSERT: 1797 * case IP_SEC_OPT: 1798 * case IP_DONTFAILOVER_IF: 1799 * case IP_NEXTHOP: 1800 */ 1801 default: 1802 return (-1); 1803 } 1804 break; 1805 case IPPROTO_IPV6: 1806 /* 1807 * Only allow IPv6 option processing on native IPv6 sockets. 1808 */ 1809 if (icmp->icmp_family != AF_INET6) 1810 return (-1); 1811 switch (name) { 1812 case IPV6_UNICAST_HOPS: 1813 *i1 = (unsigned int)icmp->icmp_ttl; 1814 break; 1815 case IPV6_MULTICAST_IF: 1816 /* 0 index if not set */ 1817 *i1 = icmp->icmp_multicast_if_index; 1818 break; 1819 case IPV6_MULTICAST_HOPS: 1820 *i1 = icmp->icmp_multicast_ttl; 1821 break; 1822 case IPV6_MULTICAST_LOOP: 1823 *i1 = connp->conn_multicast_loop; 1824 break; 1825 case IPV6_BOUND_IF: 1826 /* Zero if not set */ 1827 *i1 = icmp->icmp_bound_if; 1828 break; 1829 case IPV6_UNSPEC_SRC: 1830 *i1 = icmp->icmp_unspec_source; 1831 break; 1832 case IPV6_CHECKSUM: 1833 /* 1834 * Return offset or -1 if no checksum offset. 1835 * Does not apply to IPPROTO_ICMPV6 1836 */ 1837 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1838 return (-1); 1839 1840 if (icmp->icmp_raw_checksum) { 1841 *i1 = icmp->icmp_checksum_off; 1842 } else { 1843 *i1 = -1; 1844 } 1845 break; 1846 case IPV6_JOIN_GROUP: 1847 case IPV6_LEAVE_GROUP: 1848 case MCAST_JOIN_GROUP: 1849 case MCAST_LEAVE_GROUP: 1850 case MCAST_BLOCK_SOURCE: 1851 case MCAST_UNBLOCK_SOURCE: 1852 case MCAST_JOIN_SOURCE_GROUP: 1853 case MCAST_LEAVE_SOURCE_GROUP: 1854 /* cannot "get" the value for these */ 1855 return (-1); 1856 case IPV6_RECVPKTINFO: 1857 *i1 = icmp->icmp_ip_recvpktinfo; 1858 break; 1859 case IPV6_RECVTCLASS: 1860 *i1 = icmp->icmp_ipv6_recvtclass; 1861 break; 1862 case IPV6_RECVPATHMTU: 1863 *i1 = icmp->icmp_ipv6_recvpathmtu; 1864 break; 1865 case IPV6_V6ONLY: 1866 *i1 = 1; 1867 break; 1868 case IPV6_RECVHOPLIMIT: 1869 *i1 = icmp->icmp_ipv6_recvhoplimit; 1870 break; 1871 case IPV6_RECVHOPOPTS: 1872 *i1 = icmp->icmp_ipv6_recvhopopts; 1873 break; 1874 case IPV6_RECVDSTOPTS: 1875 *i1 = icmp->icmp_ipv6_recvdstopts; 1876 break; 1877 case _OLD_IPV6_RECVDSTOPTS: 1878 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1879 break; 1880 case IPV6_RECVRTHDRDSTOPTS: 1881 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1882 break; 1883 case IPV6_RECVRTHDR: 1884 *i1 = icmp->icmp_ipv6_recvrthdr; 1885 break; 1886 case IPV6_PKTINFO: { 1887 /* XXX assumes that caller has room for max size! */ 1888 struct in6_pktinfo *pkti; 1889 1890 pkti = (struct in6_pktinfo *)ptr; 1891 if (ipp->ipp_fields & IPPF_IFINDEX) 1892 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1893 else 1894 pkti->ipi6_ifindex = 0; 1895 if (ipp->ipp_fields & IPPF_ADDR) 1896 pkti->ipi6_addr = ipp->ipp_addr; 1897 else 1898 pkti->ipi6_addr = ipv6_all_zeros; 1899 return (sizeof (struct in6_pktinfo)); 1900 } 1901 case IPV6_NEXTHOP: { 1902 sin6_t *sin6 = (sin6_t *)ptr; 1903 1904 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1905 return (0); 1906 *sin6 = sin6_null; 1907 sin6->sin6_family = AF_INET6; 1908 sin6->sin6_addr = ipp->ipp_nexthop; 1909 return (sizeof (sin6_t)); 1910 } 1911 case IPV6_HOPOPTS: 1912 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1913 return (0); 1914 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1915 return (0); 1916 bcopy((char *)ipp->ipp_hopopts + 1917 icmp->icmp_label_len_v6, ptr, 1918 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1919 if (icmp->icmp_label_len_v6 > 0) { 1920 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1921 ptr[1] = (ipp->ipp_hopoptslen - 1922 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1923 } 1924 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1925 case IPV6_RTHDRDSTOPTS: 1926 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1927 return (0); 1928 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1929 return (ipp->ipp_rtdstoptslen); 1930 case IPV6_RTHDR: 1931 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1932 return (0); 1933 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1934 return (ipp->ipp_rthdrlen); 1935 case IPV6_DSTOPTS: 1936 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1937 return (0); 1938 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1939 return (ipp->ipp_dstoptslen); 1940 case IPV6_PATHMTU: 1941 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1942 return (0); 1943 1944 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1945 (struct ip6_mtuinfo *)ptr, 1946 is->is_netstack)); 1947 case IPV6_TCLASS: 1948 if (ipp->ipp_fields & IPPF_TCLASS) 1949 *i1 = ipp->ipp_tclass; 1950 else 1951 *i1 = IPV6_FLOW_TCLASS( 1952 IPV6_DEFAULT_VERS_AND_FLOW); 1953 break; 1954 default: 1955 return (-1); 1956 } 1957 break; 1958 case IPPROTO_ICMPV6: 1959 /* 1960 * Only allow IPv6 option processing on native IPv6 sockets. 1961 */ 1962 if (icmp->icmp_family != AF_INET6) 1963 return (-1); 1964 1965 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1966 return (-1); 1967 1968 switch (name) { 1969 case ICMP6_FILTER: 1970 if (icmp->icmp_filter == NULL) { 1971 /* Make it look like "pass all" */ 1972 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1973 } else { 1974 (void) bcopy(icmp->icmp_filter, ptr, 1975 sizeof (icmp6_filter_t)); 1976 } 1977 return (sizeof (icmp6_filter_t)); 1978 default: 1979 return (-1); 1980 } 1981 default: 1982 return (-1); 1983 } 1984 return (sizeof (int)); 1985 } 1986 1987 /* 1988 * This routine retrieves the current status of socket options. 1989 * It returns the size of the option retrieved. 1990 */ 1991 int 1992 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 1993 { 1994 icmp_t *icmp = Q_TO_ICMP(q); 1995 int err; 1996 1997 rw_enter(&icmp->icmp_rwlock, RW_READER); 1998 err = icmp_opt_get_locked(q, level, name, ptr); 1999 rw_exit(&icmp->icmp_rwlock); 2000 return (err); 2001 } 2002 2003 2004 /* This routine sets socket options. */ 2005 /* ARGSUSED */ 2006 int 2007 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2008 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2009 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2010 { 2011 conn_t *connp = Q_TO_CONN(q); 2012 icmp_t *icmp = connp->conn_icmp; 2013 icmp_stack_t *is = icmp->icmp_is; 2014 int *i1 = (int *)invalp; 2015 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2016 boolean_t checkonly; 2017 int error; 2018 2019 switch (optset_context) { 2020 case SETFN_OPTCOM_CHECKONLY: 2021 checkonly = B_TRUE; 2022 /* 2023 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2024 * inlen != 0 implies value supplied and 2025 * we have to "pretend" to set it. 2026 * inlen == 0 implies that there is no 2027 * value part in T_CHECK request and just validation 2028 * done elsewhere should be enough, we just return here. 2029 */ 2030 if (inlen == 0) { 2031 *outlenp = 0; 2032 return (0); 2033 } 2034 break; 2035 case SETFN_OPTCOM_NEGOTIATE: 2036 checkonly = B_FALSE; 2037 break; 2038 case SETFN_UD_NEGOTIATE: 2039 case SETFN_CONN_NEGOTIATE: 2040 checkonly = B_FALSE; 2041 /* 2042 * Negotiating local and "association-related" options 2043 * through T_UNITDATA_REQ. 2044 * 2045 * Following routine can filter out ones we do not 2046 * want to be "set" this way. 2047 */ 2048 if (!icmp_opt_allow_udr_set(level, name)) { 2049 *outlenp = 0; 2050 return (EINVAL); 2051 } 2052 break; 2053 default: 2054 /* 2055 * We should never get here 2056 */ 2057 *outlenp = 0; 2058 return (EINVAL); 2059 } 2060 2061 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2062 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2063 2064 /* 2065 * For fixed length options, no sanity check 2066 * of passed in length is done. It is assumed *_optcom_req() 2067 * routines do the right thing. 2068 */ 2069 2070 switch (level) { 2071 case SOL_SOCKET: 2072 switch (name) { 2073 case SO_DEBUG: 2074 if (!checkonly) 2075 icmp->icmp_debug = onoff; 2076 break; 2077 case SO_PROTOTYPE: 2078 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2079 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2080 secpolicy_net_rawaccess(cr) != 0) { 2081 *outlenp = 0; 2082 return (EACCES); 2083 } 2084 /* Can't use IPPROTO_RAW with IPv6 */ 2085 if ((*i1 & 0xFF) == IPPROTO_RAW && 2086 icmp->icmp_family == AF_INET6) { 2087 *outlenp = 0; 2088 return (EPROTONOSUPPORT); 2089 } 2090 if (checkonly) { 2091 /* T_CHECK case */ 2092 *(int *)outvalp = (*i1 & 0xFF); 2093 break; 2094 } 2095 icmp->icmp_proto = *i1 & 0xFF; 2096 if ((icmp->icmp_proto == IPPROTO_RAW || 2097 icmp->icmp_proto == IPPROTO_IGMP) && 2098 icmp->icmp_family == AF_INET) 2099 icmp->icmp_hdrincl = 1; 2100 else 2101 icmp->icmp_hdrincl = 0; 2102 2103 if (icmp->icmp_family == AF_INET6 && 2104 icmp->icmp_proto == IPPROTO_ICMPV6) { 2105 /* Set offset for icmp6_cksum */ 2106 icmp->icmp_raw_checksum = 0; 2107 icmp->icmp_checksum_off = 2; 2108 } 2109 if (icmp->icmp_proto == IPPROTO_UDP || 2110 icmp->icmp_proto == IPPROTO_TCP || 2111 icmp->icmp_proto == IPPROTO_SCTP) { 2112 icmp->icmp_no_tp_cksum = 1; 2113 icmp->icmp_sticky_ipp.ipp_fields |= 2114 IPPF_NO_CKSUM; 2115 } else { 2116 icmp->icmp_no_tp_cksum = 0; 2117 icmp->icmp_sticky_ipp.ipp_fields &= 2118 ~IPPF_NO_CKSUM; 2119 } 2120 2121 if (icmp->icmp_filter != NULL && 2122 icmp->icmp_proto != IPPROTO_ICMPV6) { 2123 kmem_free(icmp->icmp_filter, 2124 sizeof (icmp6_filter_t)); 2125 icmp->icmp_filter = NULL; 2126 } 2127 2128 /* Rebuild the header template */ 2129 error = icmp_build_hdrs(icmp); 2130 if (error != 0) { 2131 *outlenp = 0; 2132 return (error); 2133 } 2134 2135 /* 2136 * For SCTP, we don't use icmp_bind_proto() for 2137 * raw socket binding. Note that we do not need 2138 * to set *outlenp. 2139 * FIXME: how does SCTP work? 2140 */ 2141 if (icmp->icmp_proto == IPPROTO_SCTP) 2142 return (0); 2143 2144 *outlenp = sizeof (int); 2145 *(int *)outvalp = *i1 & 0xFF; 2146 2147 /* Drop lock across the bind operation */ 2148 rw_exit(&icmp->icmp_rwlock); 2149 icmp_bind_proto(q); 2150 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2151 return (0); 2152 case SO_REUSEADDR: 2153 if (!checkonly) 2154 icmp->icmp_reuseaddr = onoff; 2155 break; 2156 2157 /* 2158 * The following three items are available here, 2159 * but are only meaningful to IP. 2160 */ 2161 case SO_DONTROUTE: 2162 if (!checkonly) 2163 icmp->icmp_dontroute = onoff; 2164 break; 2165 case SO_USELOOPBACK: 2166 if (!checkonly) 2167 icmp->icmp_useloopback = onoff; 2168 break; 2169 case SO_BROADCAST: 2170 if (!checkonly) 2171 icmp->icmp_broadcast = onoff; 2172 break; 2173 2174 case SO_SNDBUF: 2175 if (*i1 > is->is_max_buf) { 2176 *outlenp = 0; 2177 return (ENOBUFS); 2178 } 2179 if (!checkonly) { 2180 q->q_hiwat = *i1; 2181 } 2182 break; 2183 case SO_RCVBUF: 2184 if (*i1 > is->is_max_buf) { 2185 *outlenp = 0; 2186 return (ENOBUFS); 2187 } 2188 if (!checkonly) { 2189 RD(q)->q_hiwat = *i1; 2190 rw_exit(&icmp->icmp_rwlock); 2191 (void) mi_set_sth_hiwat(RD(q), *i1); 2192 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2193 } 2194 break; 2195 case SO_DGRAM_ERRIND: 2196 if (!checkonly) 2197 icmp->icmp_dgram_errind = onoff; 2198 break; 2199 case SO_ALLZONES: 2200 /* 2201 * "soft" error (negative) 2202 * option not handled at this level 2203 * Note: Do not modify *outlenp 2204 */ 2205 return (-EINVAL); 2206 case SO_TIMESTAMP: 2207 if (!checkonly) { 2208 icmp->icmp_timestamp = onoff; 2209 } 2210 break; 2211 case SO_MAC_EXEMPT: 2212 if (secpolicy_net_mac_aware(cr) != 0 || 2213 icmp->icmp_state != TS_UNBND) 2214 return (EACCES); 2215 if (!checkonly) 2216 icmp->icmp_mac_exempt = onoff; 2217 break; 2218 /* 2219 * Following three not meaningful for icmp 2220 * Action is same as "default" so we keep them 2221 * in comments. 2222 * case SO_LINGER: 2223 * case SO_KEEPALIVE: 2224 * case SO_OOBINLINE: 2225 */ 2226 default: 2227 *outlenp = 0; 2228 return (EINVAL); 2229 } 2230 break; 2231 case IPPROTO_IP: 2232 /* 2233 * Only allow IPv4 option processing on IPv4 sockets. 2234 */ 2235 if (icmp->icmp_family != AF_INET) { 2236 *outlenp = 0; 2237 return (ENOPROTOOPT); 2238 } 2239 switch (name) { 2240 case IP_OPTIONS: 2241 case T_IP_OPTIONS: 2242 /* Save options for use by IP. */ 2243 if ((inlen & 0x3) || 2244 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2245 *outlenp = 0; 2246 return (EINVAL); 2247 } 2248 if (checkonly) 2249 break; 2250 2251 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2252 &icmp->icmp_ip_snd_options_len, 2253 icmp->icmp_label_len, invalp, inlen)) { 2254 *outlenp = 0; 2255 return (ENOMEM); 2256 } 2257 2258 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2259 icmp->icmp_ip_snd_options_len; 2260 rw_exit(&icmp->icmp_rwlock); 2261 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2262 is->is_wroff_extra); 2263 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2264 break; 2265 case IP_HDRINCL: 2266 if (!checkonly) 2267 icmp->icmp_hdrincl = onoff; 2268 break; 2269 case IP_TOS: 2270 case T_IP_TOS: 2271 if (!checkonly) { 2272 icmp->icmp_type_of_service = (uint8_t)*i1; 2273 } 2274 break; 2275 case IP_TTL: 2276 if (!checkonly) { 2277 icmp->icmp_ttl = (uint8_t)*i1; 2278 } 2279 break; 2280 case IP_MULTICAST_IF: 2281 /* 2282 * TODO should check OPTMGMT reply and undo this if 2283 * there is an error. 2284 */ 2285 if (!checkonly) 2286 icmp->icmp_multicast_if_addr = *i1; 2287 break; 2288 case IP_MULTICAST_TTL: 2289 if (!checkonly) 2290 icmp->icmp_multicast_ttl = *invalp; 2291 break; 2292 case IP_MULTICAST_LOOP: 2293 if (!checkonly) { 2294 connp->conn_multicast_loop = 2295 (*invalp == 0) ? 0 : 1; 2296 } 2297 break; 2298 case IP_BOUND_IF: 2299 if (!checkonly) 2300 icmp->icmp_bound_if = *i1; 2301 break; 2302 case IP_UNSPEC_SRC: 2303 if (!checkonly) 2304 icmp->icmp_unspec_source = onoff; 2305 break; 2306 case IP_XMIT_IF: 2307 if (!checkonly) 2308 icmp->icmp_xmit_if = *i1; 2309 break; 2310 case IP_RECVIF: 2311 if (!checkonly) 2312 icmp->icmp_recvif = onoff; 2313 /* 2314 * pass to ip 2315 */ 2316 return (-EINVAL); 2317 case IP_PKTINFO: { 2318 /* 2319 * This also handles IP_RECVPKTINFO. 2320 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2321 * Differentiation is based on the size of the argument 2322 * passed in. 2323 */ 2324 struct in_pktinfo *pktinfop; 2325 ip4_pkt_t *attr_pktinfop; 2326 2327 if (checkonly) 2328 break; 2329 2330 if (inlen == sizeof (int)) { 2331 /* 2332 * This is IP_RECVPKTINFO option. 2333 * Keep a local copy of wether this option is 2334 * set or not and pass it down to IP for 2335 * processing. 2336 */ 2337 icmp->icmp_ip_recvpktinfo = onoff; 2338 return (-EINVAL); 2339 } 2340 2341 2342 if (inlen != sizeof (struct in_pktinfo)) 2343 return (EINVAL); 2344 2345 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2346 == NULL) { 2347 /* 2348 * sticky option is not supported 2349 */ 2350 return (EINVAL); 2351 } 2352 2353 pktinfop = (struct in_pktinfo *)invalp; 2354 2355 /* 2356 * Atleast one of the values should be specified 2357 */ 2358 if (pktinfop->ipi_ifindex == 0 && 2359 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2360 return (EINVAL); 2361 } 2362 2363 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2364 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2365 } 2366 break; 2367 case IP_ADD_MEMBERSHIP: 2368 case IP_DROP_MEMBERSHIP: 2369 case IP_BLOCK_SOURCE: 2370 case IP_UNBLOCK_SOURCE: 2371 case IP_ADD_SOURCE_MEMBERSHIP: 2372 case IP_DROP_SOURCE_MEMBERSHIP: 2373 case MCAST_JOIN_GROUP: 2374 case MCAST_LEAVE_GROUP: 2375 case MCAST_BLOCK_SOURCE: 2376 case MCAST_UNBLOCK_SOURCE: 2377 case MCAST_JOIN_SOURCE_GROUP: 2378 case MCAST_LEAVE_SOURCE_GROUP: 2379 case MRT_INIT: 2380 case MRT_DONE: 2381 case MRT_ADD_VIF: 2382 case MRT_DEL_VIF: 2383 case MRT_ADD_MFC: 2384 case MRT_DEL_MFC: 2385 case MRT_VERSION: 2386 case MRT_ASSERT: 2387 case IP_SEC_OPT: 2388 case IP_DONTFAILOVER_IF: 2389 case IP_NEXTHOP: 2390 /* 2391 * "soft" error (negative) 2392 * option not handled at this level 2393 * Note: Do not modify *outlenp 2394 */ 2395 return (-EINVAL); 2396 default: 2397 *outlenp = 0; 2398 return (EINVAL); 2399 } 2400 break; 2401 case IPPROTO_IPV6: { 2402 ip6_pkt_t *ipp; 2403 boolean_t sticky; 2404 2405 if (icmp->icmp_family != AF_INET6) { 2406 *outlenp = 0; 2407 return (ENOPROTOOPT); 2408 } 2409 /* 2410 * Deal with both sticky options and ancillary data 2411 */ 2412 if (thisdg_attrs == NULL) { 2413 /* sticky options, or none */ 2414 ipp = &icmp->icmp_sticky_ipp; 2415 sticky = B_TRUE; 2416 } else { 2417 /* ancillary data */ 2418 ipp = (ip6_pkt_t *)thisdg_attrs; 2419 sticky = B_FALSE; 2420 } 2421 2422 switch (name) { 2423 case IPV6_MULTICAST_IF: 2424 if (!checkonly) 2425 icmp->icmp_multicast_if_index = *i1; 2426 break; 2427 case IPV6_UNICAST_HOPS: 2428 /* -1 means use default */ 2429 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2430 *outlenp = 0; 2431 return (EINVAL); 2432 } 2433 if (!checkonly) { 2434 if (*i1 == -1) { 2435 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2436 is->is_ipv6_hoplimit; 2437 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2438 /* Pass modified value to IP. */ 2439 *i1 = ipp->ipp_hoplimit; 2440 } else { 2441 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2442 (uint8_t)*i1; 2443 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2444 } 2445 /* Rebuild the header template */ 2446 error = icmp_build_hdrs(icmp); 2447 if (error != 0) { 2448 *outlenp = 0; 2449 return (error); 2450 } 2451 } 2452 break; 2453 case IPV6_MULTICAST_HOPS: 2454 /* -1 means use default */ 2455 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2456 *outlenp = 0; 2457 return (EINVAL); 2458 } 2459 if (!checkonly) { 2460 if (*i1 == -1) { 2461 icmp->icmp_multicast_ttl = 2462 ipp->ipp_multicast_hops = 2463 IP_DEFAULT_MULTICAST_TTL; 2464 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2465 /* Pass modified value to IP. */ 2466 *i1 = icmp->icmp_multicast_ttl; 2467 } else { 2468 icmp->icmp_multicast_ttl = 2469 ipp->ipp_multicast_hops = 2470 (uint8_t)*i1; 2471 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2472 } 2473 } 2474 break; 2475 case IPV6_MULTICAST_LOOP: 2476 if (*i1 != 0 && *i1 != 1) { 2477 *outlenp = 0; 2478 return (EINVAL); 2479 } 2480 if (!checkonly) 2481 connp->conn_multicast_loop = *i1; 2482 break; 2483 case IPV6_CHECKSUM: 2484 /* 2485 * Integer offset into the user data of where the 2486 * checksum is located. 2487 * Offset of -1 disables option. 2488 * Does not apply to IPPROTO_ICMPV6. 2489 */ 2490 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2491 *outlenp = 0; 2492 return (EINVAL); 2493 } 2494 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2495 /* Negative or not 16 bit aligned offset */ 2496 *outlenp = 0; 2497 return (EINVAL); 2498 } 2499 if (checkonly) 2500 break; 2501 2502 if (*i1 == -1) { 2503 icmp->icmp_raw_checksum = 0; 2504 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2505 } else { 2506 icmp->icmp_raw_checksum = 1; 2507 icmp->icmp_checksum_off = *i1; 2508 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2509 } 2510 /* Rebuild the header template */ 2511 error = icmp_build_hdrs(icmp); 2512 if (error != 0) { 2513 *outlenp = 0; 2514 return (error); 2515 } 2516 break; 2517 case IPV6_JOIN_GROUP: 2518 case IPV6_LEAVE_GROUP: 2519 case MCAST_JOIN_GROUP: 2520 case MCAST_LEAVE_GROUP: 2521 case MCAST_BLOCK_SOURCE: 2522 case MCAST_UNBLOCK_SOURCE: 2523 case MCAST_JOIN_SOURCE_GROUP: 2524 case MCAST_LEAVE_SOURCE_GROUP: 2525 /* 2526 * "soft" error (negative) 2527 * option not handled at this level 2528 * Note: Do not modify *outlenp 2529 */ 2530 return (-EINVAL); 2531 case IPV6_BOUND_IF: 2532 if (!checkonly) 2533 icmp->icmp_bound_if = *i1; 2534 break; 2535 case IPV6_UNSPEC_SRC: 2536 if (!checkonly) 2537 icmp->icmp_unspec_source = onoff; 2538 break; 2539 case IPV6_RECVTCLASS: 2540 if (!checkonly) 2541 icmp->icmp_ipv6_recvtclass = onoff; 2542 break; 2543 /* 2544 * Set boolean switches for ancillary data delivery 2545 */ 2546 case IPV6_RECVPKTINFO: 2547 if (!checkonly) 2548 icmp->icmp_ip_recvpktinfo = onoff; 2549 break; 2550 case IPV6_RECVPATHMTU: 2551 if (!checkonly) 2552 icmp->icmp_ipv6_recvpathmtu = onoff; 2553 break; 2554 case IPV6_RECVHOPLIMIT: 2555 if (!checkonly) 2556 icmp->icmp_ipv6_recvhoplimit = onoff; 2557 break; 2558 case IPV6_RECVHOPOPTS: 2559 if (!checkonly) 2560 icmp->icmp_ipv6_recvhopopts = onoff; 2561 break; 2562 case IPV6_RECVDSTOPTS: 2563 if (!checkonly) 2564 icmp->icmp_ipv6_recvdstopts = onoff; 2565 break; 2566 case _OLD_IPV6_RECVDSTOPTS: 2567 if (!checkonly) 2568 icmp->icmp_old_ipv6_recvdstopts = onoff; 2569 break; 2570 case IPV6_RECVRTHDRDSTOPTS: 2571 if (!checkonly) 2572 icmp->icmp_ipv6_recvrtdstopts = onoff; 2573 break; 2574 case IPV6_RECVRTHDR: 2575 if (!checkonly) 2576 icmp->icmp_ipv6_recvrthdr = onoff; 2577 break; 2578 /* 2579 * Set sticky options or ancillary data. 2580 * If sticky options, (re)build any extension headers 2581 * that might be needed as a result. 2582 */ 2583 case IPV6_PKTINFO: 2584 /* 2585 * The source address and ifindex are verified 2586 * in ip_opt_set(). For ancillary data the 2587 * source address is checked in ip_wput_v6. 2588 */ 2589 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2590 return (EINVAL); 2591 if (checkonly) 2592 break; 2593 2594 if (inlen == 0) { 2595 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2596 ipp->ipp_sticky_ignored |= 2597 (IPPF_IFINDEX|IPPF_ADDR); 2598 } else { 2599 struct in6_pktinfo *pkti; 2600 2601 pkti = (struct in6_pktinfo *)invalp; 2602 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2603 ipp->ipp_addr = pkti->ipi6_addr; 2604 if (ipp->ipp_ifindex != 0) 2605 ipp->ipp_fields |= IPPF_IFINDEX; 2606 else 2607 ipp->ipp_fields &= ~IPPF_IFINDEX; 2608 if (!IN6_IS_ADDR_UNSPECIFIED( 2609 &ipp->ipp_addr)) 2610 ipp->ipp_fields |= IPPF_ADDR; 2611 else 2612 ipp->ipp_fields &= ~IPPF_ADDR; 2613 } 2614 if (sticky) { 2615 error = icmp_build_hdrs(icmp); 2616 if (error != 0) 2617 return (error); 2618 } 2619 break; 2620 case IPV6_HOPLIMIT: 2621 /* This option can only be used as ancillary data. */ 2622 if (sticky) 2623 return (EINVAL); 2624 if (inlen != 0 && inlen != sizeof (int)) 2625 return (EINVAL); 2626 if (checkonly) 2627 break; 2628 2629 if (inlen == 0) { 2630 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2631 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2632 } else { 2633 if (*i1 > 255 || *i1 < -1) 2634 return (EINVAL); 2635 if (*i1 == -1) 2636 ipp->ipp_hoplimit = 2637 is->is_ipv6_hoplimit; 2638 else 2639 ipp->ipp_hoplimit = *i1; 2640 ipp->ipp_fields |= IPPF_HOPLIMIT; 2641 } 2642 break; 2643 case IPV6_TCLASS: 2644 /* 2645 * IPV6_RECVTCLASS accepts -1 as use kernel default 2646 * and [0, 255] as the actualy traffic class. 2647 */ 2648 if (inlen != 0 && inlen != sizeof (int)) 2649 return (EINVAL); 2650 if (checkonly) 2651 break; 2652 2653 if (inlen == 0) { 2654 ipp->ipp_fields &= ~IPPF_TCLASS; 2655 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2656 } else { 2657 if (*i1 >= 256 || *i1 < -1) 2658 return (EINVAL); 2659 if (*i1 == -1) { 2660 ipp->ipp_tclass = 2661 IPV6_FLOW_TCLASS( 2662 IPV6_DEFAULT_VERS_AND_FLOW); 2663 } else { 2664 ipp->ipp_tclass = *i1; 2665 } 2666 ipp->ipp_fields |= IPPF_TCLASS; 2667 } 2668 if (sticky) { 2669 error = icmp_build_hdrs(icmp); 2670 if (error != 0) 2671 return (error); 2672 } 2673 break; 2674 case IPV6_NEXTHOP: 2675 /* 2676 * IP will verify that the nexthop is reachable 2677 * and fail for sticky options. 2678 */ 2679 if (inlen != 0 && inlen != sizeof (sin6_t)) 2680 return (EINVAL); 2681 if (checkonly) 2682 break; 2683 2684 if (inlen == 0) { 2685 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2686 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2687 } else { 2688 sin6_t *sin6 = (sin6_t *)invalp; 2689 2690 if (sin6->sin6_family != AF_INET6) 2691 return (EAFNOSUPPORT); 2692 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2693 return (EADDRNOTAVAIL); 2694 ipp->ipp_nexthop = sin6->sin6_addr; 2695 if (!IN6_IS_ADDR_UNSPECIFIED( 2696 &ipp->ipp_nexthop)) 2697 ipp->ipp_fields |= IPPF_NEXTHOP; 2698 else 2699 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2700 } 2701 if (sticky) { 2702 error = icmp_build_hdrs(icmp); 2703 if (error != 0) 2704 return (error); 2705 } 2706 break; 2707 case IPV6_HOPOPTS: { 2708 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2709 /* 2710 * Sanity checks - minimum size, size a multiple of 2711 * eight bytes, and matching size passed in. 2712 */ 2713 if (inlen != 0 && 2714 inlen != (8 * (hopts->ip6h_len + 1))) 2715 return (EINVAL); 2716 2717 if (checkonly) 2718 break; 2719 error = optcom_pkt_set(invalp, inlen, sticky, 2720 (uchar_t **)&ipp->ipp_hopopts, 2721 &ipp->ipp_hopoptslen, 2722 sticky ? icmp->icmp_label_len_v6 : 0); 2723 if (error != 0) 2724 return (error); 2725 if (ipp->ipp_hopoptslen == 0) { 2726 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2727 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2728 } else { 2729 ipp->ipp_fields |= IPPF_HOPOPTS; 2730 } 2731 if (sticky) { 2732 error = icmp_build_hdrs(icmp); 2733 if (error != 0) 2734 return (error); 2735 } 2736 break; 2737 } 2738 case IPV6_RTHDRDSTOPTS: { 2739 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2740 2741 /* 2742 * Sanity checks - minimum size, size a multiple of 2743 * eight bytes, and matching size passed in. 2744 */ 2745 if (inlen != 0 && 2746 inlen != (8 * (dopts->ip6d_len + 1))) 2747 return (EINVAL); 2748 2749 if (checkonly) 2750 break; 2751 2752 if (inlen == 0) { 2753 if (sticky && 2754 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2755 kmem_free(ipp->ipp_rtdstopts, 2756 ipp->ipp_rtdstoptslen); 2757 ipp->ipp_rtdstopts = NULL; 2758 ipp->ipp_rtdstoptslen = 0; 2759 } 2760 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2761 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2762 } else { 2763 error = optcom_pkt_set(invalp, inlen, sticky, 2764 (uchar_t **)&ipp->ipp_rtdstopts, 2765 &ipp->ipp_rtdstoptslen, 0); 2766 if (error != 0) 2767 return (error); 2768 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2769 } 2770 if (sticky) { 2771 error = icmp_build_hdrs(icmp); 2772 if (error != 0) 2773 return (error); 2774 } 2775 break; 2776 } 2777 case IPV6_DSTOPTS: { 2778 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2779 2780 /* 2781 * Sanity checks - minimum size, size a multiple of 2782 * eight bytes, and matching size passed in. 2783 */ 2784 if (inlen != 0 && 2785 inlen != (8 * (dopts->ip6d_len + 1))) 2786 return (EINVAL); 2787 2788 if (checkonly) 2789 break; 2790 2791 if (inlen == 0) { 2792 if (sticky && 2793 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2794 kmem_free(ipp->ipp_dstopts, 2795 ipp->ipp_dstoptslen); 2796 ipp->ipp_dstopts = NULL; 2797 ipp->ipp_dstoptslen = 0; 2798 } 2799 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2800 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2801 } else { 2802 error = optcom_pkt_set(invalp, inlen, sticky, 2803 (uchar_t **)&ipp->ipp_dstopts, 2804 &ipp->ipp_dstoptslen, 0); 2805 if (error != 0) 2806 return (error); 2807 ipp->ipp_fields |= IPPF_DSTOPTS; 2808 } 2809 if (sticky) { 2810 error = icmp_build_hdrs(icmp); 2811 if (error != 0) 2812 return (error); 2813 } 2814 break; 2815 } 2816 case IPV6_RTHDR: { 2817 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2818 2819 /* 2820 * Sanity checks - minimum size, size a multiple of 2821 * eight bytes, and matching size passed in. 2822 */ 2823 if (inlen != 0 && 2824 inlen != (8 * (rt->ip6r_len + 1))) 2825 return (EINVAL); 2826 2827 if (checkonly) 2828 break; 2829 2830 if (inlen == 0) { 2831 if (sticky && 2832 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2833 kmem_free(ipp->ipp_rthdr, 2834 ipp->ipp_rthdrlen); 2835 ipp->ipp_rthdr = NULL; 2836 ipp->ipp_rthdrlen = 0; 2837 } 2838 ipp->ipp_fields &= ~IPPF_RTHDR; 2839 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2840 } else { 2841 error = optcom_pkt_set(invalp, inlen, sticky, 2842 (uchar_t **)&ipp->ipp_rthdr, 2843 &ipp->ipp_rthdrlen, 0); 2844 if (error != 0) 2845 return (error); 2846 ipp->ipp_fields |= IPPF_RTHDR; 2847 } 2848 if (sticky) { 2849 error = icmp_build_hdrs(icmp); 2850 if (error != 0) 2851 return (error); 2852 } 2853 break; 2854 } 2855 2856 case IPV6_DONTFRAG: 2857 if (checkonly) 2858 break; 2859 2860 if (onoff) { 2861 ipp->ipp_fields |= IPPF_DONTFRAG; 2862 } else { 2863 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2864 } 2865 break; 2866 2867 case IPV6_USE_MIN_MTU: 2868 if (inlen != sizeof (int)) 2869 return (EINVAL); 2870 2871 if (*i1 < -1 || *i1 > 1) 2872 return (EINVAL); 2873 2874 if (checkonly) 2875 break; 2876 2877 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2878 ipp->ipp_use_min_mtu = *i1; 2879 break; 2880 2881 /* 2882 * This option can't be set. Its only returned via 2883 * getsockopt() or ancillary data. 2884 */ 2885 case IPV6_PATHMTU: 2886 return (EINVAL); 2887 2888 case IPV6_BOUND_PIF: 2889 case IPV6_SEC_OPT: 2890 case IPV6_DONTFAILOVER_IF: 2891 case IPV6_SRC_PREFERENCES: 2892 case IPV6_V6ONLY: 2893 /* Handled at IP level */ 2894 return (-EINVAL); 2895 default: 2896 *outlenp = 0; 2897 return (EINVAL); 2898 } 2899 break; 2900 } /* end IPPROTO_IPV6 */ 2901 2902 case IPPROTO_ICMPV6: 2903 /* 2904 * Only allow IPv6 option processing on IPv6 sockets. 2905 */ 2906 if (icmp->icmp_family != AF_INET6) { 2907 *outlenp = 0; 2908 return (ENOPROTOOPT); 2909 } 2910 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2911 *outlenp = 0; 2912 return (ENOPROTOOPT); 2913 } 2914 switch (name) { 2915 case ICMP6_FILTER: 2916 if (!checkonly) { 2917 if ((inlen != 0) && 2918 (inlen != sizeof (icmp6_filter_t))) 2919 return (EINVAL); 2920 2921 if (inlen == 0) { 2922 if (icmp->icmp_filter != NULL) { 2923 kmem_free(icmp->icmp_filter, 2924 sizeof (icmp6_filter_t)); 2925 icmp->icmp_filter = NULL; 2926 } 2927 } else { 2928 if (icmp->icmp_filter == NULL) { 2929 icmp->icmp_filter = kmem_alloc( 2930 sizeof (icmp6_filter_t), 2931 KM_NOSLEEP); 2932 if (icmp->icmp_filter == NULL) { 2933 *outlenp = 0; 2934 return (ENOBUFS); 2935 } 2936 } 2937 (void) bcopy(invalp, icmp->icmp_filter, 2938 inlen); 2939 } 2940 } 2941 break; 2942 2943 default: 2944 *outlenp = 0; 2945 return (EINVAL); 2946 } 2947 break; 2948 default: 2949 *outlenp = 0; 2950 return (EINVAL); 2951 } 2952 /* 2953 * Common case of OK return with outval same as inval. 2954 */ 2955 if (invalp != outvalp) { 2956 /* don't trust bcopy for identical src/dst */ 2957 (void) bcopy(invalp, outvalp, inlen); 2958 } 2959 *outlenp = inlen; 2960 return (0); 2961 } 2962 /* This routine sets socket options. */ 2963 /* ARGSUSED */ 2964 int 2965 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2966 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2967 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2968 { 2969 icmp_t *icmp; 2970 int err; 2971 2972 icmp = Q_TO_ICMP(q); 2973 2974 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2975 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2976 outlenp, outvalp, thisdg_attrs, cr, mblk); 2977 rw_exit(&icmp->icmp_rwlock); 2978 return (err); 2979 } 2980 2981 /* 2982 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2983 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2984 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2985 * headers. 2986 * Returns failure if can't allocate memory. 2987 */ 2988 static int 2989 icmp_build_hdrs(icmp_t *icmp) 2990 { 2991 icmp_stack_t *is = icmp->icmp_is; 2992 uchar_t *hdrs; 2993 uint_t hdrs_len; 2994 ip6_t *ip6h; 2995 ip6i_t *ip6i; 2996 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 2997 2998 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2999 hdrs_len = ip_total_hdrs_len_v6(ipp); 3000 ASSERT(hdrs_len != 0); 3001 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3002 /* Need to reallocate */ 3003 if (hdrs_len != 0) { 3004 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3005 if (hdrs == NULL) 3006 return (ENOMEM); 3007 } else { 3008 hdrs = NULL; 3009 } 3010 if (icmp->icmp_sticky_hdrs_len != 0) { 3011 kmem_free(icmp->icmp_sticky_hdrs, 3012 icmp->icmp_sticky_hdrs_len); 3013 } 3014 icmp->icmp_sticky_hdrs = hdrs; 3015 icmp->icmp_sticky_hdrs_len = hdrs_len; 3016 } 3017 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3018 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3019 3020 /* Set header fields not in ipp */ 3021 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3022 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3023 ip6h = (ip6_t *)&ip6i[1]; 3024 3025 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3026 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3027 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3028 } 3029 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3030 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3031 } 3032 } else { 3033 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3034 } 3035 3036 if (!(ipp->ipp_fields & IPPF_ADDR)) 3037 ip6h->ip6_src = icmp->icmp_v6src; 3038 3039 /* Try to get everything in a single mblk */ 3040 if (hdrs_len > icmp->icmp_max_hdr_len) { 3041 icmp->icmp_max_hdr_len = hdrs_len; 3042 rw_exit(&icmp->icmp_rwlock); 3043 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3044 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3045 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3046 } 3047 return (0); 3048 } 3049 3050 /* 3051 * This routine retrieves the value of an ND variable in a icmpparam_t 3052 * structure. It is called through nd_getset when a user reads the 3053 * variable. 3054 */ 3055 /* ARGSUSED */ 3056 static int 3057 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3058 { 3059 icmpparam_t *icmppa = (icmpparam_t *)cp; 3060 3061 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3062 return (0); 3063 } 3064 3065 /* 3066 * Walk through the param array specified registering each element with the 3067 * named dispatch (ND) handler. 3068 */ 3069 static boolean_t 3070 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3071 { 3072 for (; cnt-- > 0; icmppa++) { 3073 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3074 if (!nd_load(ndp, icmppa->icmp_param_name, 3075 icmp_param_get, icmp_param_set, 3076 (caddr_t)icmppa)) { 3077 nd_free(ndp); 3078 return (B_FALSE); 3079 } 3080 } 3081 } 3082 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3083 NULL)) { 3084 nd_free(ndp); 3085 return (B_FALSE); 3086 } 3087 return (B_TRUE); 3088 } 3089 3090 /* This routine sets an ND variable in a icmpparam_t structure. */ 3091 /* ARGSUSED */ 3092 static int 3093 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3094 { 3095 long new_value; 3096 icmpparam_t *icmppa = (icmpparam_t *)cp; 3097 3098 /* 3099 * Fail the request if the new value does not lie within the 3100 * required bounds. 3101 */ 3102 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3103 new_value < icmppa->icmp_param_min || 3104 new_value > icmppa->icmp_param_max) { 3105 return (EINVAL); 3106 } 3107 /* Set the new value */ 3108 icmppa->icmp_param_value = new_value; 3109 return (0); 3110 } 3111 /*ARGSUSED2*/ 3112 static void 3113 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3114 { 3115 conn_t *connp = (conn_t *)arg1; 3116 struct T_unitdata_ind *tudi; 3117 uchar_t *rptr; 3118 icmp_t *icmp; 3119 icmp_stack_t *is; 3120 sin_t *sin; 3121 sin6_t *sin6; 3122 ip6_t *ip6h; 3123 ip6i_t *ip6i; 3124 mblk_t *mp1; 3125 int hdr_len; 3126 ipha_t *ipha; 3127 int udi_size; /* Size of T_unitdata_ind */ 3128 uint_t ipvers; 3129 ip6_pkt_t ipp; 3130 uint8_t nexthdr; 3131 ip_pktinfo_t *pinfo = NULL; 3132 mblk_t *options_mp = NULL; 3133 uint_t icmp_opt = 0; 3134 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3135 uint_t hopstrip; 3136 3137 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3138 3139 icmp = connp->conn_icmp; 3140 is = icmp->icmp_is; 3141 rptr = mp->b_rptr; 3142 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3143 ASSERT(OK_32PTR(rptr)); 3144 3145 /* 3146 * IP should have prepended the options data in an M_CTL 3147 * Check M_CTL "type" to make sure are not here bcos of 3148 * a valid ICMP message 3149 */ 3150 if (DB_TYPE(mp) == M_CTL) { 3151 /* 3152 * FIXME: does IP still do this? 3153 * IP sends up the IPSEC_IN message for handling IPSEC 3154 * policy at the TCP level. We don't need it here. 3155 */ 3156 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3157 mp1 = mp->b_cont; 3158 freeb(mp); 3159 mp = mp1; 3160 rptr = mp->b_rptr; 3161 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3162 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3163 IN_PKTINFO) { 3164 /* 3165 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3166 * has been prepended to the packet by IP. We need to 3167 * extract the mblk and adjust the rptr 3168 */ 3169 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3170 options_mp = mp; 3171 mp = mp->b_cont; 3172 rptr = mp->b_rptr; 3173 } else { 3174 /* 3175 * ICMP messages. 3176 */ 3177 icmp_icmp_error(connp->conn_rq, mp); 3178 return; 3179 } 3180 } 3181 3182 /* 3183 * Discard message if it is misaligned or smaller than the IP header. 3184 */ 3185 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3186 freemsg(mp); 3187 if (options_mp != NULL) 3188 freeb(options_mp); 3189 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3190 return; 3191 } 3192 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3193 3194 /* Handle M_DATA messages containing IP packets messages */ 3195 if (ipvers == IPV4_VERSION) { 3196 /* 3197 * Special case where IP attaches 3198 * the IRE needs to be handled so that we don't send up 3199 * IRE to the user land. 3200 */ 3201 ipha = (ipha_t *)rptr; 3202 hdr_len = IPH_HDR_LENGTH(ipha); 3203 3204 if (ipha->ipha_protocol == IPPROTO_TCP) { 3205 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3206 3207 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3208 TH_SYN) && mp->b_cont != NULL) { 3209 mp1 = mp->b_cont; 3210 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3211 freeb(mp1); 3212 mp->b_cont = NULL; 3213 } 3214 } 3215 } 3216 if (is->is_bsd_compat) { 3217 ushort_t len; 3218 len = ntohs(ipha->ipha_length); 3219 3220 if (mp->b_datap->db_ref > 1) { 3221 /* 3222 * Allocate a new IP header so that we can 3223 * modify ipha_length. 3224 */ 3225 mblk_t *mp1; 3226 3227 mp1 = allocb(hdr_len, BPRI_MED); 3228 if (!mp1) { 3229 freemsg(mp); 3230 if (options_mp != NULL) 3231 freeb(options_mp); 3232 BUMP_MIB(&is->is_rawip_mib, 3233 rawipInErrors); 3234 return; 3235 } 3236 bcopy(rptr, mp1->b_rptr, hdr_len); 3237 mp->b_rptr = rptr + hdr_len; 3238 rptr = mp1->b_rptr; 3239 ipha = (ipha_t *)rptr; 3240 mp1->b_cont = mp; 3241 mp1->b_wptr = rptr + hdr_len; 3242 mp = mp1; 3243 } 3244 len -= hdr_len; 3245 ipha->ipha_length = htons(len); 3246 } 3247 } 3248 3249 /* 3250 * This is the inbound data path. Packets are passed upstream as 3251 * T_UNITDATA_IND messages with full IP headers still attached. 3252 */ 3253 if (icmp->icmp_family == AF_INET) { 3254 ASSERT(ipvers == IPV4_VERSION); 3255 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3256 if (icmp->icmp_recvif && (pinfo != NULL) && 3257 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3258 udi_size += sizeof (struct T_opthdr) + 3259 sizeof (uint_t); 3260 } 3261 3262 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3263 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3264 udi_size += sizeof (struct T_opthdr) + 3265 sizeof (struct in_pktinfo); 3266 } 3267 3268 /* 3269 * If SO_TIMESTAMP is set allocate the appropriate sized 3270 * buffer. Since gethrestime() expects a pointer aligned 3271 * argument, we allocate space necessary for extra 3272 * alignment (even though it might not be used). 3273 */ 3274 if (icmp->icmp_timestamp) { 3275 udi_size += sizeof (struct T_opthdr) + 3276 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3277 } 3278 mp1 = allocb(udi_size, BPRI_MED); 3279 if (mp1 == NULL) { 3280 freemsg(mp); 3281 if (options_mp != NULL) 3282 freeb(options_mp); 3283 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3284 return; 3285 } 3286 mp1->b_cont = mp; 3287 mp = mp1; 3288 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3289 mp->b_datap->db_type = M_PROTO; 3290 mp->b_wptr = (uchar_t *)tudi + udi_size; 3291 tudi->PRIM_type = T_UNITDATA_IND; 3292 tudi->SRC_length = sizeof (sin_t); 3293 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3294 sin = (sin_t *)&tudi[1]; 3295 *sin = sin_null; 3296 sin->sin_family = AF_INET; 3297 sin->sin_addr.s_addr = ipha->ipha_src; 3298 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3299 sizeof (sin_t); 3300 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3301 tudi->OPT_length = udi_size; 3302 3303 /* 3304 * Add options if IP_RECVIF is set 3305 */ 3306 if (udi_size != 0) { 3307 char *dstopt; 3308 3309 dstopt = (char *)&sin[1]; 3310 if (icmp->icmp_recvif && (pinfo != NULL) && 3311 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3312 3313 struct T_opthdr *toh; 3314 uint_t *dstptr; 3315 3316 toh = (struct T_opthdr *)dstopt; 3317 toh->level = IPPROTO_IP; 3318 toh->name = IP_RECVIF; 3319 toh->len = sizeof (struct T_opthdr) + 3320 sizeof (uint_t); 3321 toh->status = 0; 3322 dstopt += sizeof (struct T_opthdr); 3323 dstptr = (uint_t *)dstopt; 3324 *dstptr = pinfo->ip_pkt_ifindex; 3325 dstopt += sizeof (uint_t); 3326 udi_size -= toh->len; 3327 } 3328 if (icmp->icmp_timestamp) { 3329 struct T_opthdr *toh; 3330 3331 toh = (struct T_opthdr *)dstopt; 3332 toh->level = SOL_SOCKET; 3333 toh->name = SCM_TIMESTAMP; 3334 toh->len = sizeof (struct T_opthdr) + 3335 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3336 toh->status = 0; 3337 dstopt += sizeof (struct T_opthdr); 3338 /* Align for gethrestime() */ 3339 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3340 sizeof (intptr_t)); 3341 gethrestime((timestruc_t *)dstopt); 3342 dstopt = (char *)toh + toh->len; 3343 udi_size -= toh->len; 3344 } 3345 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3346 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3347 struct T_opthdr *toh; 3348 struct in_pktinfo *pktinfop; 3349 3350 toh = (struct T_opthdr *)dstopt; 3351 toh->level = IPPROTO_IP; 3352 toh->name = IP_PKTINFO; 3353 toh->len = sizeof (struct T_opthdr) + 3354 sizeof (in_pktinfo_t); 3355 toh->status = 0; 3356 dstopt += sizeof (struct T_opthdr); 3357 pktinfop = (struct in_pktinfo *)dstopt; 3358 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3359 pktinfop->ipi_spec_dst = 3360 pinfo->ip_pkt_match_addr; 3361 3362 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3363 3364 dstopt += sizeof (struct in_pktinfo); 3365 udi_size -= toh->len; 3366 } 3367 3368 /* Consumed all of allocated space */ 3369 ASSERT(udi_size == 0); 3370 } 3371 3372 if (options_mp != NULL) 3373 freeb(options_mp); 3374 3375 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3376 putnext(connp->conn_rq, mp); 3377 return; 3378 } 3379 3380 /* 3381 * We don't need options_mp in the IPv6 path. 3382 */ 3383 if (options_mp != NULL) { 3384 freeb(options_mp); 3385 options_mp = NULL; 3386 } 3387 3388 /* 3389 * Discard message if it is smaller than the IPv6 header 3390 * or if the header is malformed. 3391 */ 3392 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3393 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3394 icmp->icmp_family != AF_INET6) { 3395 freemsg(mp); 3396 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3397 return; 3398 } 3399 3400 /* Initialize */ 3401 ipp.ipp_fields = 0; 3402 hopstrip = 0; 3403 3404 ip6h = (ip6_t *)rptr; 3405 /* 3406 * Call on ip_find_hdr_v6 which gets the total hdr len 3407 * as well as individual lenghts of ext hdrs (and ptrs to 3408 * them). 3409 */ 3410 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3411 /* Look for ifindex information */ 3412 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3413 ip6i = (ip6i_t *)ip6h; 3414 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3415 ASSERT(ip6i->ip6i_ifindex != 0); 3416 ipp.ipp_fields |= IPPF_IFINDEX; 3417 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3418 } 3419 rptr = (uchar_t *)&ip6i[1]; 3420 mp->b_rptr = rptr; 3421 if (rptr == mp->b_wptr) { 3422 mp1 = mp->b_cont; 3423 freeb(mp); 3424 mp = mp1; 3425 rptr = mp->b_rptr; 3426 } 3427 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3428 ip6h = (ip6_t *)rptr; 3429 } 3430 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3431 3432 /* 3433 * We need to lie a bit to the user because users inside 3434 * labeled compartments should not see their own labels. We 3435 * assume that in all other respects IP has checked the label, 3436 * and that the label is always first among the options. (If 3437 * it's not first, then this code won't see it, and the option 3438 * will be passed along to the user.) 3439 * 3440 * If we had multilevel ICMP sockets, then the following code 3441 * should be skipped for them to allow the user to see the 3442 * label. 3443 * 3444 * Alignment restrictions in the definition of IP options 3445 * (namely, the requirement that the 4-octet DOI goes on a 3446 * 4-octet boundary) mean that we know exactly where the option 3447 * should start, but we're lenient for other hosts. 3448 * 3449 * Note that there are no multilevel ICMP or raw IP sockets 3450 * yet, thus nobody ever sees the IP6OPT_LS option. 3451 */ 3452 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3453 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3454 const uchar_t *ucp = 3455 (const uchar_t *)ipp.ipp_hopopts + 2; 3456 int remlen = ipp.ipp_hopoptslen - 2; 3457 3458 while (remlen > 0) { 3459 if (*ucp == IP6OPT_PAD1) { 3460 remlen--; 3461 ucp++; 3462 } else if (*ucp == IP6OPT_PADN) { 3463 remlen -= ucp[1] + 2; 3464 ucp += ucp[1] + 2; 3465 } else if (*ucp == ip6opt_ls) { 3466 hopstrip = (ucp - 3467 (const uchar_t *)ipp.ipp_hopopts) + 3468 ucp[1] + 2; 3469 hopstrip = (hopstrip + 7) & ~7; 3470 break; 3471 } else { 3472 /* label option must be first */ 3473 break; 3474 } 3475 } 3476 } 3477 } else { 3478 hdr_len = IPV6_HDR_LEN; 3479 ip6i = NULL; 3480 nexthdr = ip6h->ip6_nxt; 3481 } 3482 /* 3483 * One special case where IP attaches the IRE needs to 3484 * be handled so that we don't send up IRE to the user land. 3485 */ 3486 if (nexthdr == IPPROTO_TCP) { 3487 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3488 3489 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3490 mp->b_cont != NULL) { 3491 mp1 = mp->b_cont; 3492 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3493 freeb(mp1); 3494 mp->b_cont = NULL; 3495 } 3496 } 3497 } 3498 /* 3499 * Check a filter for ICMPv6 types if needed. 3500 * Verify raw checksums if needed. 3501 */ 3502 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3503 if (icmp->icmp_filter != NULL) { 3504 int type; 3505 3506 /* Assumes that IP has done the pullupmsg */ 3507 type = mp->b_rptr[hdr_len]; 3508 3509 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3510 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3511 freemsg(mp); 3512 return; 3513 } 3514 } else { 3515 /* Checksum */ 3516 uint16_t *up; 3517 uint32_t sum; 3518 int remlen; 3519 3520 up = (uint16_t *)&ip6h->ip6_src; 3521 3522 remlen = msgdsize(mp) - hdr_len; 3523 sum = htons(icmp->icmp_proto + remlen) 3524 + up[0] + up[1] + up[2] + up[3] 3525 + up[4] + up[5] + up[6] + up[7] 3526 + up[8] + up[9] + up[10] + up[11] 3527 + up[12] + up[13] + up[14] + up[15]; 3528 sum = (sum & 0xffff) + (sum >> 16); 3529 sum = IP_CSUM(mp, hdr_len, sum); 3530 if (sum != 0) { 3531 /* IPv6 RAW checksum failed */ 3532 ip0dbg(("icmp_rput: RAW checksum " 3533 "failed %x\n", sum)); 3534 freemsg(mp); 3535 BUMP_MIB(&is->is_rawip_mib, 3536 rawipInCksumErrs); 3537 return; 3538 } 3539 } 3540 } 3541 /* Skip all the IPv6 headers per API */ 3542 mp->b_rptr += hdr_len; 3543 3544 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3545 3546 /* 3547 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3548 * maintain state information, instead of relying on icmp_t 3549 * structure, since there arent any locks protecting these members 3550 * and there is a window where there might be a race between a 3551 * thread setting options on the write side and a thread reading 3552 * these options on the read size. 3553 */ 3554 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3555 IPPF_RTHDR|IPPF_IFINDEX)) { 3556 if (icmp->icmp_ipv6_recvhopopts && 3557 (ipp.ipp_fields & IPPF_HOPOPTS) && 3558 ipp.ipp_hopoptslen > hopstrip) { 3559 udi_size += sizeof (struct T_opthdr) + 3560 ipp.ipp_hopoptslen - hopstrip; 3561 icmp_opt |= IPPF_HOPOPTS; 3562 } 3563 if ((icmp->icmp_ipv6_recvdstopts || 3564 icmp->icmp_old_ipv6_recvdstopts) && 3565 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3566 udi_size += sizeof (struct T_opthdr) + 3567 ipp.ipp_dstoptslen; 3568 icmp_opt |= IPPF_DSTOPTS; 3569 } 3570 if (((icmp->icmp_ipv6_recvdstopts && 3571 icmp->icmp_ipv6_recvrthdr && 3572 (ipp.ipp_fields & IPPF_RTHDR)) || 3573 icmp->icmp_ipv6_recvrtdstopts) && 3574 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3575 udi_size += sizeof (struct T_opthdr) + 3576 ipp.ipp_rtdstoptslen; 3577 icmp_opt |= IPPF_RTDSTOPTS; 3578 } 3579 if (icmp->icmp_ipv6_recvrthdr && 3580 (ipp.ipp_fields & IPPF_RTHDR)) { 3581 udi_size += sizeof (struct T_opthdr) + 3582 ipp.ipp_rthdrlen; 3583 icmp_opt |= IPPF_RTHDR; 3584 } 3585 if (icmp->icmp_ip_recvpktinfo && 3586 (ipp.ipp_fields & IPPF_IFINDEX)) { 3587 udi_size += sizeof (struct T_opthdr) + 3588 sizeof (struct in6_pktinfo); 3589 icmp_opt |= IPPF_IFINDEX; 3590 } 3591 } 3592 if (icmp->icmp_ipv6_recvhoplimit) { 3593 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3594 icmp_ipv6_recvhoplimit = B_TRUE; 3595 } 3596 3597 if (icmp->icmp_ipv6_recvtclass) 3598 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3599 3600 mp1 = allocb(udi_size, BPRI_MED); 3601 if (mp1 == NULL) { 3602 freemsg(mp); 3603 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3604 return; 3605 } 3606 mp1->b_cont = mp; 3607 mp = mp1; 3608 mp->b_datap->db_type = M_PROTO; 3609 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3610 mp->b_wptr = (uchar_t *)tudi + udi_size; 3611 tudi->PRIM_type = T_UNITDATA_IND; 3612 tudi->SRC_length = sizeof (sin6_t); 3613 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3614 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3615 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3616 tudi->OPT_length = udi_size; 3617 sin6 = (sin6_t *)&tudi[1]; 3618 sin6->sin6_port = 0; 3619 sin6->sin6_family = AF_INET6; 3620 3621 sin6->sin6_addr = ip6h->ip6_src; 3622 /* No sin6_flowinfo per API */ 3623 sin6->sin6_flowinfo = 0; 3624 /* For link-scope source pass up scope id */ 3625 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3626 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3627 sin6->sin6_scope_id = ipp.ipp_ifindex; 3628 else 3629 sin6->sin6_scope_id = 0; 3630 3631 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3632 icmp->icmp_zoneid, is->is_netstack); 3633 3634 if (udi_size != 0) { 3635 uchar_t *dstopt; 3636 3637 dstopt = (uchar_t *)&sin6[1]; 3638 if (icmp_opt & IPPF_IFINDEX) { 3639 struct T_opthdr *toh; 3640 struct in6_pktinfo *pkti; 3641 3642 toh = (struct T_opthdr *)dstopt; 3643 toh->level = IPPROTO_IPV6; 3644 toh->name = IPV6_PKTINFO; 3645 toh->len = sizeof (struct T_opthdr) + 3646 sizeof (*pkti); 3647 toh->status = 0; 3648 dstopt += sizeof (struct T_opthdr); 3649 pkti = (struct in6_pktinfo *)dstopt; 3650 pkti->ipi6_addr = ip6h->ip6_dst; 3651 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3652 dstopt += sizeof (*pkti); 3653 udi_size -= toh->len; 3654 } 3655 if (icmp_ipv6_recvhoplimit) { 3656 struct T_opthdr *toh; 3657 3658 toh = (struct T_opthdr *)dstopt; 3659 toh->level = IPPROTO_IPV6; 3660 toh->name = IPV6_HOPLIMIT; 3661 toh->len = sizeof (struct T_opthdr) + 3662 sizeof (uint_t); 3663 toh->status = 0; 3664 dstopt += sizeof (struct T_opthdr); 3665 *(uint_t *)dstopt = ip6h->ip6_hops; 3666 dstopt += sizeof (uint_t); 3667 udi_size -= toh->len; 3668 } 3669 if (icmp->icmp_ipv6_recvtclass) { 3670 struct T_opthdr *toh; 3671 3672 toh = (struct T_opthdr *)dstopt; 3673 toh->level = IPPROTO_IPV6; 3674 toh->name = IPV6_TCLASS; 3675 toh->len = sizeof (struct T_opthdr) + 3676 sizeof (uint_t); 3677 toh->status = 0; 3678 dstopt += sizeof (struct T_opthdr); 3679 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3680 dstopt += sizeof (uint_t); 3681 udi_size -= toh->len; 3682 } 3683 if (icmp_opt & IPPF_HOPOPTS) { 3684 struct T_opthdr *toh; 3685 3686 toh = (struct T_opthdr *)dstopt; 3687 toh->level = IPPROTO_IPV6; 3688 toh->name = IPV6_HOPOPTS; 3689 toh->len = sizeof (struct T_opthdr) + 3690 ipp.ipp_hopoptslen - hopstrip; 3691 toh->status = 0; 3692 dstopt += sizeof (struct T_opthdr); 3693 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3694 ipp.ipp_hopoptslen - hopstrip); 3695 if (hopstrip > 0) { 3696 /* copy next header value and fake length */ 3697 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3698 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3699 hopstrip / 8; 3700 } 3701 dstopt += ipp.ipp_hopoptslen - hopstrip; 3702 udi_size -= toh->len; 3703 } 3704 if (icmp_opt & IPPF_RTDSTOPTS) { 3705 struct T_opthdr *toh; 3706 3707 toh = (struct T_opthdr *)dstopt; 3708 toh->level = IPPROTO_IPV6; 3709 toh->name = IPV6_DSTOPTS; 3710 toh->len = sizeof (struct T_opthdr) + 3711 ipp.ipp_rtdstoptslen; 3712 toh->status = 0; 3713 dstopt += sizeof (struct T_opthdr); 3714 bcopy(ipp.ipp_rtdstopts, dstopt, 3715 ipp.ipp_rtdstoptslen); 3716 dstopt += ipp.ipp_rtdstoptslen; 3717 udi_size -= toh->len; 3718 } 3719 if (icmp_opt & IPPF_RTHDR) { 3720 struct T_opthdr *toh; 3721 3722 toh = (struct T_opthdr *)dstopt; 3723 toh->level = IPPROTO_IPV6; 3724 toh->name = IPV6_RTHDR; 3725 toh->len = sizeof (struct T_opthdr) + 3726 ipp.ipp_rthdrlen; 3727 toh->status = 0; 3728 dstopt += sizeof (struct T_opthdr); 3729 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3730 dstopt += ipp.ipp_rthdrlen; 3731 udi_size -= toh->len; 3732 } 3733 if (icmp_opt & IPPF_DSTOPTS) { 3734 struct T_opthdr *toh; 3735 3736 toh = (struct T_opthdr *)dstopt; 3737 toh->level = IPPROTO_IPV6; 3738 toh->name = IPV6_DSTOPTS; 3739 toh->len = sizeof (struct T_opthdr) + 3740 ipp.ipp_dstoptslen; 3741 toh->status = 0; 3742 dstopt += sizeof (struct T_opthdr); 3743 bcopy(ipp.ipp_dstopts, dstopt, 3744 ipp.ipp_dstoptslen); 3745 dstopt += ipp.ipp_dstoptslen; 3746 udi_size -= toh->len; 3747 } 3748 /* Consumed all of allocated space */ 3749 ASSERT(udi_size == 0); 3750 } 3751 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3752 putnext(connp->conn_rq, mp); 3753 } 3754 3755 /* 3756 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3757 * immediately. 3758 */ 3759 static void 3760 icmp_bind_result(conn_t *connp, mblk_t *mp) 3761 { 3762 struct T_error_ack *tea; 3763 3764 switch (mp->b_datap->db_type) { 3765 case M_PROTO: 3766 case M_PCPROTO: 3767 /* M_PROTO messages contain some type of TPI message. */ 3768 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3769 freemsg(mp); 3770 return; 3771 } 3772 tea = (struct T_error_ack *)mp->b_rptr; 3773 3774 switch (tea->PRIM_type) { 3775 case T_ERROR_ACK: 3776 switch (tea->ERROR_prim) { 3777 case O_T_BIND_REQ: 3778 case T_BIND_REQ: 3779 icmp_bind_error(connp, mp); 3780 return; 3781 default: 3782 break; 3783 } 3784 ASSERT(0); 3785 freemsg(mp); 3786 return; 3787 3788 case T_BIND_ACK: 3789 icmp_bind_ack(connp, mp); 3790 return; 3791 3792 default: 3793 break; 3794 } 3795 freemsg(mp); 3796 return; 3797 default: 3798 /* FIXME: other cases? */ 3799 ASSERT(0); 3800 freemsg(mp); 3801 return; 3802 } 3803 } 3804 3805 /* 3806 * Process a T_BIND_ACK 3807 */ 3808 static void 3809 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3810 { 3811 icmp_t *icmp = connp->conn_icmp; 3812 mblk_t *mp1; 3813 ire_t *ire; 3814 struct T_bind_ack *tba; 3815 uchar_t *addrp; 3816 ipa_conn_t *ac; 3817 ipa6_conn_t *ac6; 3818 3819 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3820 /* 3821 * We know if headers are included or not so we can 3822 * safely do this. 3823 */ 3824 if (icmp->icmp_state == TS_UNBND) { 3825 /* 3826 * TPI has not yet bound - bind sent by 3827 * icmp_bind_proto. 3828 */ 3829 freemsg(mp); 3830 rw_exit(&icmp->icmp_rwlock); 3831 return; 3832 } 3833 ASSERT(icmp->icmp_pending_op != -1); 3834 3835 /* 3836 * If a broadcast/multicast address was bound set 3837 * the source address to 0. 3838 * This ensures no datagrams with broadcast address 3839 * as source address are emitted (which would violate 3840 * RFC1122 - Hosts requirements) 3841 * 3842 * Note that when connecting the returned IRE is 3843 * for the destination address and we only perform 3844 * the broadcast check for the source address (it 3845 * is OK to connect to a broadcast/multicast address.) 3846 */ 3847 mp1 = mp->b_cont; 3848 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3849 ire = (ire_t *)mp1->b_rptr; 3850 3851 /* 3852 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3853 * local address. 3854 */ 3855 if (ire->ire_type == IRE_BROADCAST && 3856 icmp->icmp_state != TS_DATA_XFER) { 3857 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3858 icmp->icmp_pending_op == O_T_BIND_REQ); 3859 /* This was just a local bind to a MC/broadcast addr */ 3860 V6_SET_ZERO(icmp->icmp_v6src); 3861 if (icmp->icmp_family == AF_INET6) 3862 (void) icmp_build_hdrs(icmp); 3863 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3864 /* 3865 * Local address not yet set - pick it from the 3866 * T_bind_ack 3867 */ 3868 tba = (struct T_bind_ack *)mp->b_rptr; 3869 addrp = &mp->b_rptr[tba->ADDR_offset]; 3870 switch (icmp->icmp_family) { 3871 case AF_INET: 3872 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3873 ac = (ipa_conn_t *)addrp; 3874 } else { 3875 ASSERT(tba->ADDR_length == 3876 sizeof (ipa_conn_x_t)); 3877 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3878 } 3879 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3880 &icmp->icmp_v6src); 3881 break; 3882 case AF_INET6: 3883 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3884 ac6 = (ipa6_conn_t *)addrp; 3885 } else { 3886 ASSERT(tba->ADDR_length == 3887 sizeof (ipa6_conn_x_t)); 3888 ac6 = &((ipa6_conn_x_t *) 3889 addrp)->ac6x_conn; 3890 } 3891 icmp->icmp_v6src = ac6->ac6_laddr; 3892 (void) icmp_build_hdrs(icmp); 3893 } 3894 } 3895 mp1 = mp1->b_cont; 3896 } 3897 icmp->icmp_pending_op = -1; 3898 rw_exit(&icmp->icmp_rwlock); 3899 /* 3900 * Look for one or more appended ACK message added by 3901 * icmp_connect or icmp_disconnect. 3902 * If none found just send up the T_BIND_ACK. 3903 * icmp_connect has appended a T_OK_ACK and a 3904 * T_CONN_CON. 3905 * icmp_disconnect has appended a T_OK_ACK. 3906 */ 3907 if (mp1 != NULL) { 3908 if (mp->b_cont == mp1) 3909 mp->b_cont = NULL; 3910 else { 3911 ASSERT(mp->b_cont->b_cont == mp1); 3912 mp->b_cont->b_cont = NULL; 3913 } 3914 freemsg(mp); 3915 mp = mp1; 3916 while (mp != NULL) { 3917 mp1 = mp->b_cont; 3918 mp->b_cont = NULL; 3919 putnext(connp->conn_rq, mp); 3920 mp = mp1; 3921 } 3922 return; 3923 } 3924 freemsg(mp->b_cont); 3925 mp->b_cont = NULL; 3926 putnext(connp->conn_rq, mp); 3927 } 3928 3929 static void 3930 icmp_bind_error(conn_t *connp, mblk_t *mp) 3931 { 3932 icmp_t *icmp = connp->conn_icmp; 3933 struct T_error_ack *tea; 3934 3935 tea = (struct T_error_ack *)mp->b_rptr; 3936 /* 3937 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3938 * clear out the source address before 3939 * passing the message upstream. 3940 * If this was caused by a T_CONN_REQ 3941 * revert back to bound state. 3942 */ 3943 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3944 if (icmp->icmp_state == TS_UNBND) { 3945 /* 3946 * TPI has not yet bound - bind sent by icmp_bind_proto. 3947 */ 3948 freemsg(mp); 3949 rw_exit(&icmp->icmp_rwlock); 3950 return; 3951 } 3952 ASSERT(icmp->icmp_pending_op != -1); 3953 tea->ERROR_prim = icmp->icmp_pending_op; 3954 icmp->icmp_pending_op = -1; 3955 3956 switch (tea->ERROR_prim) { 3957 case T_CONN_REQ: 3958 ASSERT(icmp->icmp_state == TS_DATA_XFER); 3959 /* Connect failed */ 3960 /* Revert back to the bound source */ 3961 icmp->icmp_v6src = icmp->icmp_bound_v6src; 3962 icmp->icmp_state = TS_IDLE; 3963 if (icmp->icmp_family == AF_INET6) 3964 (void) icmp_build_hdrs(icmp); 3965 break; 3966 3967 case T_DISCON_REQ: 3968 case T_BIND_REQ: 3969 case O_T_BIND_REQ: 3970 V6_SET_ZERO(icmp->icmp_v6src); 3971 V6_SET_ZERO(icmp->icmp_bound_v6src); 3972 icmp->icmp_state = TS_UNBND; 3973 if (icmp->icmp_family == AF_INET6) 3974 (void) icmp_build_hdrs(icmp); 3975 break; 3976 default: 3977 break; 3978 } 3979 rw_exit(&icmp->icmp_rwlock); 3980 putnext(connp->conn_rq, mp); 3981 } 3982 3983 /* 3984 * return SNMP stuff in buffer in mpdata 3985 */ 3986 mblk_t * 3987 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3988 { 3989 mblk_t *mpdata; 3990 struct opthdr *optp; 3991 conn_t *connp = Q_TO_CONN(q); 3992 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 3993 mblk_t *mp2ctl; 3994 3995 /* 3996 * make a copy of the original message 3997 */ 3998 mp2ctl = copymsg(mpctl); 3999 4000 if (mpctl == NULL || 4001 (mpdata = mpctl->b_cont) == NULL) { 4002 freemsg(mpctl); 4003 freemsg(mp2ctl); 4004 return (0); 4005 } 4006 4007 /* fixed length structure for IPv4 and IPv6 counters */ 4008 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4009 optp->level = EXPER_RAWIP; 4010 optp->name = 0; 4011 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4012 sizeof (is->is_rawip_mib)); 4013 optp->len = msgdsize(mpdata); 4014 qreply(q, mpctl); 4015 4016 return (mp2ctl); 4017 } 4018 4019 /* 4020 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4021 * TODO: If this ever actually tries to set anything, it needs to be 4022 * to do the appropriate locking. 4023 */ 4024 /* ARGSUSED */ 4025 int 4026 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4027 uchar_t *ptr, int len) 4028 { 4029 switch (level) { 4030 case EXPER_RAWIP: 4031 return (0); 4032 default: 4033 return (1); 4034 } 4035 } 4036 4037 /* Report for ndd "icmp_status" */ 4038 /* ARGSUSED */ 4039 static int 4040 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4041 { 4042 conn_t *connp; 4043 ip_stack_t *ipst; 4044 char laddrbuf[INET6_ADDRSTRLEN]; 4045 char faddrbuf[INET6_ADDRSTRLEN]; 4046 int i; 4047 4048 (void) mi_mpprintf(mp, 4049 "RAWIP " MI_COL_HDRPAD_STR 4050 /* 01234567[89ABCDEF] */ 4051 " src addr dest addr state"); 4052 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4053 4054 connp = Q_TO_CONN(q); 4055 ipst = connp->conn_netstack->netstack_ip; 4056 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4057 connf_t *connfp; 4058 char *state; 4059 4060 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4061 connp = NULL; 4062 4063 while ((connp = ipcl_get_next_conn(connfp, connp, 4064 IPCL_RAWIPCONN)) != NULL) { 4065 icmp_t *icmp; 4066 4067 mutex_enter(&(connp)->conn_lock); 4068 icmp = connp->conn_icmp; 4069 4070 if (icmp->icmp_state == TS_UNBND) 4071 state = "UNBOUND"; 4072 else if (icmp->icmp_state == TS_IDLE) 4073 state = "IDLE"; 4074 else if (icmp->icmp_state == TS_DATA_XFER) 4075 state = "CONNECTED"; 4076 else 4077 state = "UnkState"; 4078 4079 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4080 (void *)icmp, 4081 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4082 sizeof (faddrbuf)), 4083 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4084 sizeof (laddrbuf)), 4085 state); 4086 mutex_exit(&(connp)->conn_lock); 4087 } 4088 } 4089 return (0); 4090 } 4091 4092 /* 4093 * This routine creates a T_UDERROR_IND message and passes it upstream. 4094 * The address and options are copied from the T_UNITDATA_REQ message 4095 * passed in mp. This message is freed. 4096 */ 4097 static void 4098 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4099 { 4100 mblk_t *mp1; 4101 uchar_t *rptr = mp->b_rptr; 4102 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4103 4104 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4105 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4106 tudr->OPT_length, err); 4107 if (mp1) 4108 qreply(q, mp1); 4109 freemsg(mp); 4110 } 4111 4112 /* 4113 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4114 * After some error checking, the message is passed downstream to ip. 4115 */ 4116 static void 4117 icmp_unbind(queue_t *q, mblk_t *mp) 4118 { 4119 icmp_t *icmp = Q_TO_ICMP(q); 4120 4121 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4122 /* If a bind has not been done, we can't unbind. */ 4123 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4124 rw_exit(&icmp->icmp_rwlock); 4125 icmp_err_ack(q, mp, TOUTSTATE, 0); 4126 return; 4127 } 4128 icmp->icmp_pending_op = T_UNBIND_REQ; 4129 rw_exit(&icmp->icmp_rwlock); 4130 4131 /* 4132 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4133 * and therefore ip_unbind must never return NULL. 4134 */ 4135 mp = ip_unbind(q, mp); 4136 ASSERT(mp != NULL); 4137 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4138 4139 /* 4140 * Once we're unbound from IP, the pending operation may be cleared 4141 * here. 4142 */ 4143 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4144 V6_SET_ZERO(icmp->icmp_v6src); 4145 V6_SET_ZERO(icmp->icmp_bound_v6src); 4146 icmp->icmp_pending_op = -1; 4147 icmp->icmp_state = TS_UNBND; 4148 if (icmp->icmp_family == AF_INET6) 4149 (void) icmp_build_hdrs(icmp); 4150 rw_exit(&icmp->icmp_rwlock); 4151 4152 qreply(q, mp); 4153 } 4154 4155 /* 4156 * Process IPv4 packets that already include an IP header. 4157 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4158 * IPPROTO_IGMP). 4159 */ 4160 static void 4161 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4162 { 4163 icmp_stack_t *is = icmp->icmp_is; 4164 ipha_t *ipha; 4165 int ip_hdr_length; 4166 int tp_hdr_len; 4167 mblk_t *mp1; 4168 uint_t pkt_len; 4169 ip_opt_info_t optinfo; 4170 conn_t *connp = icmp->icmp_connp; 4171 4172 optinfo.ip_opt_flags = 0; 4173 optinfo.ip_opt_ill_index = 0; 4174 ipha = (ipha_t *)mp->b_rptr; 4175 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4176 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4177 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4178 ASSERT(icmp != NULL); 4179 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4180 freemsg(mp); 4181 return; 4182 } 4183 ipha = (ipha_t *)mp->b_rptr; 4184 } 4185 ipha->ipha_version_and_hdr_length = 4186 (IP_VERSION<<4) | (ip_hdr_length>>2); 4187 4188 /* 4189 * For the socket of SOCK_RAW type, the checksum is provided in the 4190 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4191 * tell IP that the application has sent a complete IP header and not 4192 * to compute the transport checksum nor change the DF flag. 4193 */ 4194 ipha->ipha_ident = IP_HDR_INCLUDED; 4195 ipha->ipha_hdr_checksum = 0; 4196 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4197 /* Insert options if any */ 4198 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4199 /* 4200 * Put the IP header plus any transport header that is 4201 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4202 * that at least the checksum field is in the first mblk.) 4203 */ 4204 switch (ipha->ipha_protocol) { 4205 case IPPROTO_UDP: 4206 tp_hdr_len = 8; 4207 break; 4208 case IPPROTO_TCP: 4209 tp_hdr_len = 20; 4210 break; 4211 default: 4212 tp_hdr_len = 0; 4213 break; 4214 } 4215 /* 4216 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4217 * tp_hdr_len bytes will be in a single mblk. 4218 */ 4219 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4220 tp_hdr_len)) { 4221 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4222 tp_hdr_len)) { 4223 BUMP_MIB(&is->is_rawip_mib, 4224 rawipOutErrors); 4225 freemsg(mp); 4226 return; 4227 } 4228 ipha = (ipha_t *)mp->b_rptr; 4229 } 4230 4231 /* 4232 * if the length is larger then the max allowed IP packet, 4233 * then send an error and abort the processing. 4234 */ 4235 pkt_len = ntohs(ipha->ipha_length) 4236 + icmp->icmp_ip_snd_options_len; 4237 if (pkt_len > IP_MAXPACKET) { 4238 icmp_ud_err(q, mp, EMSGSIZE); 4239 return; 4240 } 4241 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4242 tp_hdr_len, BPRI_LO))) { 4243 icmp_ud_err(q, mp, ENOMEM); 4244 return; 4245 } 4246 mp1->b_rptr += is->is_wroff_extra; 4247 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4248 4249 ipha->ipha_length = htons((uint16_t)pkt_len); 4250 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4251 4252 /* Copy transport header if any */ 4253 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4254 mp1->b_wptr += tp_hdr_len; 4255 4256 /* Add options */ 4257 ipha = (ipha_t *)mp1->b_rptr; 4258 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4259 icmp->icmp_ip_snd_options_len); 4260 4261 /* Drop IP header and transport header from original */ 4262 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4263 4264 mp1->b_cont = mp; 4265 mp = mp1; 4266 /* 4267 * Massage source route putting first source 4268 * route in ipha_dst. 4269 */ 4270 (void) ip_massage_options(ipha, is->is_netstack); 4271 } 4272 4273 if (pktinfop != NULL) { 4274 /* 4275 * Over write the source address provided in the header 4276 */ 4277 if (pktinfop->ip4_addr != INADDR_ANY) { 4278 ipha->ipha_src = pktinfop->ip4_addr; 4279 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4280 } 4281 4282 if (pktinfop->ip4_ill_index != 0) { 4283 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4284 } 4285 } 4286 4287 mblk_setcred(mp, connp->conn_cred); 4288 ip_output_options(connp, mp, q, IP_WPUT, 4289 &optinfo); 4290 } 4291 4292 static boolean_t 4293 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4294 { 4295 int err; 4296 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4297 icmp_stack_t *is = icmp->icmp_is; 4298 conn_t *connp = icmp->icmp_connp; 4299 4300 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4301 opt_storage, icmp->icmp_mac_exempt, 4302 is->is_netstack->netstack_ip); 4303 if (err == 0) { 4304 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4305 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4306 opt_storage); 4307 } 4308 if (err != 0) { 4309 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4310 DTRACE_PROBE4( 4311 tx__ip__log__drop__updatelabel__icmp, 4312 char *, "queue(1) failed to update options(2) on mp(3)", 4313 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4314 icmp_ud_err(q, mp, err); 4315 return (B_FALSE); 4316 } 4317 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4318 return (B_TRUE); 4319 } 4320 4321 /* 4322 * This routine handles all messages passed downstream. It either 4323 * consumes the message or passes it downstream; it never queues a 4324 * a message. 4325 */ 4326 static void 4327 icmp_wput(queue_t *q, mblk_t *mp) 4328 { 4329 uchar_t *rptr = mp->b_rptr; 4330 ipha_t *ipha; 4331 mblk_t *mp1; 4332 int ip_hdr_length; 4333 #define tudr ((struct T_unitdata_req *)rptr) 4334 size_t ip_len; 4335 conn_t *connp = Q_TO_CONN(q); 4336 icmp_t *icmp = connp->conn_icmp; 4337 icmp_stack_t *is = icmp->icmp_is; 4338 sin6_t *sin6; 4339 sin_t *sin; 4340 ipaddr_t v4dst; 4341 ip4_pkt_t pktinfo; 4342 ip4_pkt_t *pktinfop = &pktinfo; 4343 ip_opt_info_t optinfo; 4344 4345 switch (mp->b_datap->db_type) { 4346 case M_DATA: 4347 if (icmp->icmp_hdrincl) { 4348 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4349 ipha = (ipha_t *)mp->b_rptr; 4350 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4351 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4352 BUMP_MIB(&is->is_rawip_mib, 4353 rawipOutErrors); 4354 freemsg(mp); 4355 return; 4356 } 4357 ipha = (ipha_t *)mp->b_rptr; 4358 } 4359 /* 4360 * If this connection was used for v6 (inconceivable!) 4361 * or if we have a new destination, then it's time to 4362 * figure a new label. 4363 */ 4364 if (is_system_labeled() && 4365 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4366 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4367 ipha->ipha_dst) && 4368 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4369 return; 4370 } 4371 icmp_wput_hdrincl(q, mp, icmp, NULL); 4372 return; 4373 } 4374 freemsg(mp); 4375 return; 4376 case M_PROTO: 4377 case M_PCPROTO: 4378 ip_len = mp->b_wptr - rptr; 4379 if (ip_len >= sizeof (struct T_unitdata_req)) { 4380 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4381 if (((union T_primitives *)rptr)->type 4382 == T_UNITDATA_REQ) 4383 break; 4384 } 4385 /* FALLTHRU */ 4386 default: 4387 icmp_wput_other(q, mp); 4388 return; 4389 } 4390 4391 /* Handle T_UNITDATA_REQ messages here. */ 4392 4393 4394 4395 if (icmp->icmp_state == TS_UNBND) { 4396 /* If a port has not been bound to the stream, fail. */ 4397 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4398 icmp_ud_err(q, mp, EPROTO); 4399 return; 4400 } 4401 mp1 = mp->b_cont; 4402 if (mp1 == NULL) { 4403 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4404 icmp_ud_err(q, mp, EPROTO); 4405 return; 4406 } 4407 4408 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4409 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4410 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4411 return; 4412 } 4413 4414 switch (icmp->icmp_family) { 4415 case AF_INET6: 4416 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4417 if (!OK_32PTR((char *)sin6) || 4418 tudr->DEST_length != sizeof (sin6_t) || 4419 sin6->sin6_family != AF_INET6) { 4420 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4421 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4422 return; 4423 } 4424 4425 /* No support for mapped addresses on raw sockets */ 4426 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4427 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4428 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4429 return; 4430 } 4431 4432 /* 4433 * Destination is a native IPv6 address. 4434 * Send out an IPv6 format packet. 4435 */ 4436 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4437 return; 4438 4439 case AF_INET: 4440 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4441 if (!OK_32PTR((char *)sin) || 4442 tudr->DEST_length != sizeof (sin_t) || 4443 sin->sin_family != AF_INET) { 4444 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4445 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4446 return; 4447 } 4448 /* Extract and ipaddr */ 4449 v4dst = sin->sin_addr.s_addr; 4450 break; 4451 4452 default: 4453 ASSERT(0); 4454 } 4455 4456 pktinfop->ip4_ill_index = 0; 4457 pktinfop->ip4_addr = INADDR_ANY; 4458 optinfo.ip_opt_flags = 0; 4459 optinfo.ip_opt_ill_index = 0; 4460 4461 4462 /* 4463 * If options passed in, feed it for verification and handling 4464 */ 4465 if (tudr->OPT_length != 0) { 4466 int error; 4467 4468 error = 0; 4469 if (icmp_unitdata_opt_process(q, mp, &error, 4470 (void *)pktinfop) < 0) { 4471 /* failure */ 4472 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4473 icmp_ud_err(q, mp, error); 4474 return; 4475 } 4476 ASSERT(error == 0); 4477 /* 4478 * Note: Success in processing options. 4479 * mp option buffer represented by 4480 * OPT_length/offset now potentially modified 4481 * and contain option setting results 4482 */ 4483 4484 } 4485 4486 if (v4dst == INADDR_ANY) 4487 v4dst = htonl(INADDR_LOOPBACK); 4488 4489 /* Check if our saved options are valid; update if not */ 4490 if (is_system_labeled() && 4491 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4492 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4493 !icmp_update_label(q, icmp, mp, v4dst)) { 4494 return; 4495 } 4496 4497 /* Protocol 255 contains full IP headers */ 4498 if (icmp->icmp_hdrincl) { 4499 freeb(mp); 4500 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4501 return; 4502 } 4503 4504 4505 /* Add an IP header */ 4506 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4507 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4508 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4509 mp1->b_datap->db_ref != 1 || 4510 !OK_32PTR(ipha)) { 4511 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4512 BPRI_LO))) { 4513 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4514 icmp_ud_err(q, mp, ENOMEM); 4515 return; 4516 } 4517 mp1->b_cont = mp->b_cont; 4518 ipha = (ipha_t *)mp1->b_datap->db_lim; 4519 mp1->b_wptr = (uchar_t *)ipha; 4520 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4521 } 4522 #ifdef _BIG_ENDIAN 4523 /* Set version, header length, and tos */ 4524 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4525 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4526 icmp->icmp_type_of_service); 4527 /* Set ttl and protocol */ 4528 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4529 #else 4530 /* Set version, header length, and tos */ 4531 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4532 ((icmp->icmp_type_of_service << 8) | 4533 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4534 /* Set ttl and protocol */ 4535 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4536 #endif 4537 if (pktinfop->ip4_addr != INADDR_ANY) { 4538 ipha->ipha_src = pktinfop->ip4_addr; 4539 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4540 } else { 4541 4542 /* 4543 * Copy our address into the packet. If this is zero, 4544 * ip will fill in the real source address. 4545 */ 4546 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4547 } 4548 4549 ipha->ipha_fragment_offset_and_flags = 0; 4550 4551 if (pktinfop->ip4_ill_index != 0) { 4552 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4553 } 4554 4555 4556 /* 4557 * For the socket of SOCK_RAW type, the checksum is provided in the 4558 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4559 * tell IP that the application has sent a complete IP header and not 4560 * to compute the transport checksum nor change the DF flag. 4561 */ 4562 ipha->ipha_ident = IP_HDR_INCLUDED; 4563 4564 /* Finish common formatting of the packet. */ 4565 mp1->b_rptr = (uchar_t *)ipha; 4566 4567 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4568 if (mp1->b_cont != NULL) 4569 ip_len += msgdsize(mp1->b_cont); 4570 4571 /* 4572 * Set the length into the IP header. 4573 * If the length is greater than the maximum allowed by IP, 4574 * then free the message and return. Do not try and send it 4575 * as this can cause problems in layers below. 4576 */ 4577 if (ip_len > IP_MAXPACKET) { 4578 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4579 icmp_ud_err(q, mp, EMSGSIZE); 4580 return; 4581 } 4582 ipha->ipha_length = htons((uint16_t)ip_len); 4583 /* 4584 * Copy in the destination address from the T_UNITDATA 4585 * request 4586 */ 4587 ipha->ipha_dst = v4dst; 4588 4589 /* 4590 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4591 */ 4592 if (CLASSD(v4dst)) 4593 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4594 4595 /* Copy in options if any */ 4596 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4597 bcopy(icmp->icmp_ip_snd_options, 4598 &ipha[1], icmp->icmp_ip_snd_options_len); 4599 /* 4600 * Massage source route putting first source route in ipha_dst. 4601 * Ignore the destination in the T_unitdata_req. 4602 */ 4603 (void) ip_massage_options(ipha, is->is_netstack); 4604 } 4605 4606 freeb(mp); 4607 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4608 mblk_setcred(mp1, connp->conn_cred); 4609 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4610 #undef ipha 4611 #undef tudr 4612 } 4613 4614 static boolean_t 4615 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4616 { 4617 int err; 4618 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4619 icmp_stack_t *is = icmp->icmp_is; 4620 conn_t *connp = icmp->icmp_connp; 4621 4622 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4623 opt_storage, icmp->icmp_mac_exempt, 4624 is->is_netstack->netstack_ip); 4625 if (err == 0) { 4626 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4627 &icmp->icmp_label_len_v6, opt_storage); 4628 } 4629 if (err != 0) { 4630 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4631 DTRACE_PROBE4( 4632 tx__ip__log__drop__updatelabel__icmp6, 4633 char *, "queue(1) failed to update options(2) on mp(3)", 4634 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4635 icmp_ud_err(wq, mp, err); 4636 return (B_FALSE); 4637 } 4638 4639 icmp->icmp_v6lastdst = *dst; 4640 return (B_TRUE); 4641 } 4642 4643 /* 4644 * icmp_wput_ipv6(): 4645 * Assumes that icmp_wput did some sanity checking on the destination 4646 * address, but that the label may not yet be correct. 4647 */ 4648 void 4649 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4650 { 4651 ip6_t *ip6h; 4652 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4653 mblk_t *mp1; 4654 int ip_hdr_len = IPV6_HDR_LEN; 4655 size_t ip_len; 4656 icmp_t *icmp = Q_TO_ICMP(q); 4657 icmp_stack_t *is = icmp->icmp_is; 4658 ip6_pkt_t ipp_s; /* For ancillary data options */ 4659 ip6_pkt_t *ipp = &ipp_s; 4660 ip6_pkt_t *tipp; 4661 uint32_t csum = 0; 4662 uint_t ignore = 0; 4663 uint_t option_exists = 0, is_sticky = 0; 4664 uint8_t *cp; 4665 uint8_t *nxthdr_ptr; 4666 in6_addr_t ip6_dst; 4667 4668 /* 4669 * If the local address is a mapped address return 4670 * an error. 4671 * It would be possible to send an IPv6 packet but the 4672 * response would never make it back to the application 4673 * since it is bound to a mapped address. 4674 */ 4675 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4676 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4677 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4678 return; 4679 } 4680 4681 ipp->ipp_fields = 0; 4682 ipp->ipp_sticky_ignored = 0; 4683 4684 /* 4685 * If TPI options passed in, feed it for verification and handling 4686 */ 4687 if (tudr_optlen != 0) { 4688 int error; 4689 4690 if (icmp_unitdata_opt_process(q, mp, &error, 4691 (void *)ipp) < 0) { 4692 /* failure */ 4693 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4694 icmp_ud_err(q, mp, error); 4695 return; 4696 } 4697 ignore = ipp->ipp_sticky_ignored; 4698 ASSERT(error == 0); 4699 } 4700 4701 if (sin6->sin6_scope_id != 0 && 4702 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4703 /* 4704 * IPPF_SCOPE_ID is special. It's neither a sticky 4705 * option nor ancillary data. It needs to be 4706 * explicitly set in options_exists. 4707 */ 4708 option_exists |= IPPF_SCOPE_ID; 4709 } 4710 4711 /* 4712 * Compute the destination address 4713 */ 4714 ip6_dst = sin6->sin6_addr; 4715 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4716 ip6_dst = ipv6_loopback; 4717 4718 /* 4719 * If we're not going to the same destination as last time, then 4720 * recompute the label required. This is done in a separate routine to 4721 * avoid blowing up our stack here. 4722 */ 4723 if (is_system_labeled() && 4724 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4725 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4726 return; 4727 } 4728 4729 /* 4730 * If there's a security label here, then we ignore any options the 4731 * user may try to set. We keep the peer's label as a hidden sticky 4732 * option. 4733 */ 4734 if (icmp->icmp_label_len_v6 > 0) { 4735 ignore &= ~IPPF_HOPOPTS; 4736 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4737 } 4738 4739 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4740 (ipp->ipp_fields == 0)) { 4741 /* No sticky options nor ancillary data. */ 4742 goto no_options; 4743 } 4744 4745 /* 4746 * Go through the options figuring out where each is going to 4747 * come from and build two masks. The first mask indicates if 4748 * the option exists at all. The second mask indicates if the 4749 * option is sticky or ancillary. 4750 */ 4751 if (!(ignore & IPPF_HOPOPTS)) { 4752 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4753 option_exists |= IPPF_HOPOPTS; 4754 ip_hdr_len += ipp->ipp_hopoptslen; 4755 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4756 option_exists |= IPPF_HOPOPTS; 4757 is_sticky |= IPPF_HOPOPTS; 4758 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4759 } 4760 } 4761 4762 if (!(ignore & IPPF_RTHDR)) { 4763 if (ipp->ipp_fields & IPPF_RTHDR) { 4764 option_exists |= IPPF_RTHDR; 4765 ip_hdr_len += ipp->ipp_rthdrlen; 4766 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4767 option_exists |= IPPF_RTHDR; 4768 is_sticky |= IPPF_RTHDR; 4769 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4770 } 4771 } 4772 4773 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4774 /* 4775 * Need to have a router header to use these. 4776 */ 4777 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4778 option_exists |= IPPF_RTDSTOPTS; 4779 ip_hdr_len += ipp->ipp_rtdstoptslen; 4780 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4781 option_exists |= IPPF_RTDSTOPTS; 4782 is_sticky |= IPPF_RTDSTOPTS; 4783 ip_hdr_len += 4784 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4785 } 4786 } 4787 4788 if (!(ignore & IPPF_DSTOPTS)) { 4789 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4790 option_exists |= IPPF_DSTOPTS; 4791 ip_hdr_len += ipp->ipp_dstoptslen; 4792 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4793 option_exists |= IPPF_DSTOPTS; 4794 is_sticky |= IPPF_DSTOPTS; 4795 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4796 } 4797 } 4798 4799 if (!(ignore & IPPF_IFINDEX)) { 4800 if (ipp->ipp_fields & IPPF_IFINDEX) { 4801 option_exists |= IPPF_IFINDEX; 4802 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4803 option_exists |= IPPF_IFINDEX; 4804 is_sticky |= IPPF_IFINDEX; 4805 } 4806 } 4807 4808 if (!(ignore & IPPF_ADDR)) { 4809 if (ipp->ipp_fields & IPPF_ADDR) { 4810 option_exists |= IPPF_ADDR; 4811 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4812 option_exists |= IPPF_ADDR; 4813 is_sticky |= IPPF_ADDR; 4814 } 4815 } 4816 4817 if (!(ignore & IPPF_DONTFRAG)) { 4818 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4819 option_exists |= IPPF_DONTFRAG; 4820 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4821 option_exists |= IPPF_DONTFRAG; 4822 is_sticky |= IPPF_DONTFRAG; 4823 } 4824 } 4825 4826 if (!(ignore & IPPF_USE_MIN_MTU)) { 4827 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4828 option_exists |= IPPF_USE_MIN_MTU; 4829 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4830 IPPF_USE_MIN_MTU) { 4831 option_exists |= IPPF_USE_MIN_MTU; 4832 is_sticky |= IPPF_USE_MIN_MTU; 4833 } 4834 } 4835 4836 if (!(ignore & IPPF_NEXTHOP)) { 4837 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4838 option_exists |= IPPF_NEXTHOP; 4839 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4840 option_exists |= IPPF_NEXTHOP; 4841 is_sticky |= IPPF_NEXTHOP; 4842 } 4843 } 4844 4845 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4846 option_exists |= IPPF_HOPLIMIT; 4847 /* IPV6_HOPLIMIT can never be sticky */ 4848 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4849 4850 if (!(ignore & IPPF_UNICAST_HOPS) && 4851 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4852 option_exists |= IPPF_UNICAST_HOPS; 4853 is_sticky |= IPPF_UNICAST_HOPS; 4854 } 4855 4856 if (!(ignore & IPPF_MULTICAST_HOPS) && 4857 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4858 option_exists |= IPPF_MULTICAST_HOPS; 4859 is_sticky |= IPPF_MULTICAST_HOPS; 4860 } 4861 4862 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4863 /* This is a sticky socket option only */ 4864 option_exists |= IPPF_NO_CKSUM; 4865 is_sticky |= IPPF_NO_CKSUM; 4866 } 4867 4868 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4869 /* This is a sticky socket option only */ 4870 option_exists |= IPPF_RAW_CKSUM; 4871 is_sticky |= IPPF_RAW_CKSUM; 4872 } 4873 4874 if (!(ignore & IPPF_TCLASS)) { 4875 if (ipp->ipp_fields & IPPF_TCLASS) { 4876 option_exists |= IPPF_TCLASS; 4877 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4878 option_exists |= IPPF_TCLASS; 4879 is_sticky |= IPPF_TCLASS; 4880 } 4881 } 4882 4883 no_options: 4884 4885 /* 4886 * If any options carried in the ip6i_t were specified, we 4887 * need to account for the ip6i_t in the data we'll be sending 4888 * down. 4889 */ 4890 if (option_exists & IPPF_HAS_IP6I) 4891 ip_hdr_len += sizeof (ip6i_t); 4892 4893 /* check/fix buffer config, setup pointers into it */ 4894 mp1 = mp->b_cont; 4895 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4896 if ((mp1->b_datap->db_ref != 1) || 4897 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4898 !OK_32PTR(ip6h)) { 4899 /* Try to get everything in a single mblk next time */ 4900 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4901 icmp->icmp_max_hdr_len = ip_hdr_len; 4902 (void) mi_set_sth_wroff(RD(q), 4903 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4904 } 4905 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4906 if (!mp1) { 4907 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4908 icmp_ud_err(q, mp, ENOMEM); 4909 return; 4910 } 4911 mp1->b_cont = mp->b_cont; 4912 mp1->b_wptr = mp1->b_datap->db_lim; 4913 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4914 } 4915 mp1->b_rptr = (unsigned char *)ip6h; 4916 ip6i = (ip6i_t *)ip6h; 4917 4918 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4919 if (option_exists & IPPF_HAS_IP6I) { 4920 ip6h = (ip6_t *)&ip6i[1]; 4921 ip6i->ip6i_flags = 0; 4922 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4923 4924 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4925 if (option_exists & IPPF_SCOPE_ID) { 4926 ip6i->ip6i_flags |= IP6I_IFINDEX; 4927 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4928 } else if (option_exists & IPPF_IFINDEX) { 4929 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4930 ASSERT(tipp->ipp_ifindex != 0); 4931 ip6i->ip6i_flags |= IP6I_IFINDEX; 4932 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4933 } 4934 4935 if (option_exists & IPPF_RAW_CKSUM) { 4936 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4937 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4938 } 4939 4940 if (option_exists & IPPF_NO_CKSUM) { 4941 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4942 } 4943 4944 if (option_exists & IPPF_ADDR) { 4945 /* 4946 * Enable per-packet source address verification if 4947 * IPV6_PKTINFO specified the source address. 4948 * ip6_src is set in the transport's _wput function. 4949 */ 4950 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4951 } 4952 4953 if (option_exists & IPPF_DONTFRAG) { 4954 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4955 } 4956 4957 if (option_exists & IPPF_USE_MIN_MTU) { 4958 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4959 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4960 } 4961 4962 if (option_exists & IPPF_NEXTHOP) { 4963 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4964 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4965 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4966 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4967 } 4968 4969 /* 4970 * tell IP this is an ip6i_t private header 4971 */ 4972 ip6i->ip6i_nxt = IPPROTO_RAW; 4973 } 4974 4975 /* Initialize IPv6 header */ 4976 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4977 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4978 4979 /* Set the hoplimit of the outgoing packet. */ 4980 if (option_exists & IPPF_HOPLIMIT) { 4981 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4982 ip6h->ip6_hops = ipp->ipp_hoplimit; 4983 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4984 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4985 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4986 if (option_exists & IPPF_MULTICAST_HOPS) 4987 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4988 } else { 4989 ip6h->ip6_hops = icmp->icmp_ttl; 4990 if (option_exists & IPPF_UNICAST_HOPS) 4991 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4992 } 4993 4994 if (option_exists & IPPF_ADDR) { 4995 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 4996 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 4997 ip6h->ip6_src = tipp->ipp_addr; 4998 } else { 4999 /* 5000 * The source address was not set using IPV6_PKTINFO. 5001 * First look at the bound source. 5002 * If unspecified fallback to __sin6_src_id. 5003 */ 5004 ip6h->ip6_src = icmp->icmp_v6src; 5005 if (sin6->__sin6_src_id != 0 && 5006 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5007 ip_srcid_find_id(sin6->__sin6_src_id, 5008 &ip6h->ip6_src, icmp->icmp_zoneid, 5009 is->is_netstack); 5010 } 5011 } 5012 5013 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5014 cp = (uint8_t *)&ip6h[1]; 5015 5016 /* 5017 * Here's where we have to start stringing together 5018 * any extension headers in the right order: 5019 * Hop-by-hop, destination, routing, and final destination opts. 5020 */ 5021 if (option_exists & IPPF_HOPOPTS) { 5022 /* Hop-by-hop options */ 5023 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5024 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5025 5026 *nxthdr_ptr = IPPROTO_HOPOPTS; 5027 nxthdr_ptr = &hbh->ip6h_nxt; 5028 5029 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5030 cp += tipp->ipp_hopoptslen; 5031 } 5032 /* 5033 * En-route destination options 5034 * Only do them if there's a routing header as well 5035 */ 5036 if (option_exists & IPPF_RTDSTOPTS) { 5037 ip6_dest_t *dst = (ip6_dest_t *)cp; 5038 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5039 5040 *nxthdr_ptr = IPPROTO_DSTOPTS; 5041 nxthdr_ptr = &dst->ip6d_nxt; 5042 5043 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5044 cp += tipp->ipp_rtdstoptslen; 5045 } 5046 /* 5047 * Routing header next 5048 */ 5049 if (option_exists & IPPF_RTHDR) { 5050 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5051 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5052 5053 *nxthdr_ptr = IPPROTO_ROUTING; 5054 nxthdr_ptr = &rt->ip6r_nxt; 5055 5056 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5057 cp += tipp->ipp_rthdrlen; 5058 } 5059 /* 5060 * Do ultimate destination options 5061 */ 5062 if (option_exists & IPPF_DSTOPTS) { 5063 ip6_dest_t *dest = (ip6_dest_t *)cp; 5064 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5065 5066 *nxthdr_ptr = IPPROTO_DSTOPTS; 5067 nxthdr_ptr = &dest->ip6d_nxt; 5068 5069 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5070 cp += tipp->ipp_dstoptslen; 5071 } 5072 5073 /* 5074 * Now set the last header pointer to the proto passed in 5075 */ 5076 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5077 *nxthdr_ptr = icmp->icmp_proto; 5078 5079 /* 5080 * Copy in the destination address 5081 */ 5082 ip6h->ip6_dst = ip6_dst; 5083 5084 ip6h->ip6_vcf = 5085 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5086 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5087 5088 if (option_exists & IPPF_TCLASS) { 5089 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5090 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5091 tipp->ipp_tclass); 5092 } 5093 if (option_exists & IPPF_RTHDR) { 5094 ip6_rthdr_t *rth; 5095 5096 /* 5097 * Perform any processing needed for source routing. 5098 * We know that all extension headers will be in the same mblk 5099 * as the IPv6 header. 5100 */ 5101 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5102 if (rth != NULL && rth->ip6r_segleft != 0) { 5103 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5104 /* 5105 * Drop packet - only support Type 0 routing. 5106 * Notify the application as well. 5107 */ 5108 icmp_ud_err(q, mp, EPROTO); 5109 BUMP_MIB(&is->is_rawip_mib, 5110 rawipOutErrors); 5111 return; 5112 } 5113 /* 5114 * rth->ip6r_len is twice the number of 5115 * addresses in the header 5116 */ 5117 if (rth->ip6r_len & 0x1) { 5118 icmp_ud_err(q, mp, EPROTO); 5119 BUMP_MIB(&is->is_rawip_mib, 5120 rawipOutErrors); 5121 return; 5122 } 5123 /* 5124 * Shuffle the routing header and ip6_dst 5125 * addresses, and get the checksum difference 5126 * between the first hop (in ip6_dst) and 5127 * the destination (in the last routing hdr entry). 5128 */ 5129 csum = ip_massage_options_v6(ip6h, rth, 5130 is->is_netstack); 5131 /* 5132 * Verify that the first hop isn't a mapped address. 5133 * Routers along the path need to do this verification 5134 * for subsequent hops. 5135 */ 5136 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5137 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5138 BUMP_MIB(&is->is_rawip_mib, 5139 rawipOutErrors); 5140 return; 5141 } 5142 } 5143 } 5144 5145 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5146 if (mp1->b_cont != NULL) 5147 ip_len += msgdsize(mp1->b_cont); 5148 5149 /* 5150 * Set the length into the IP header. 5151 * If the length is greater than the maximum allowed by IP, 5152 * then free the message and return. Do not try and send it 5153 * as this can cause problems in layers below. 5154 */ 5155 if (ip_len > IP_MAXPACKET) { 5156 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5157 icmp_ud_err(q, mp, EMSGSIZE); 5158 return; 5159 } 5160 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5161 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5162 uint16_t *cksum_ptr; 5163 uint_t ext_hdrs_len; 5164 5165 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5166 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5167 icmp->icmp_checksum_off == 2); 5168 5169 /* 5170 * We make it easy for IP to include our pseudo header 5171 * by putting our length in uh_checksum, modified (if 5172 * we have a routing header) by the checksum difference 5173 * between the ultimate destination and first hop addresses. 5174 * Note: ICMPv6 must always checksum the packet. 5175 */ 5176 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5177 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5178 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5179 BUMP_MIB(&is->is_rawip_mib, 5180 rawipOutErrors); 5181 freemsg(mp); 5182 return; 5183 } 5184 ip6i = (ip6i_t *)mp1->b_rptr; 5185 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5186 ip6h = (ip6_t *)&ip6i[1]; 5187 else 5188 ip6h = (ip6_t *)ip6i; 5189 } 5190 /* Add payload length to checksum */ 5191 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5192 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5193 csum += htons(ip_len - ext_hdrs_len); 5194 5195 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5196 csum = (csum & 0xFFFF) + (csum >> 16); 5197 *cksum_ptr = (uint16_t)csum; 5198 } 5199 5200 #ifdef _LITTLE_ENDIAN 5201 ip_len = htons(ip_len); 5202 #endif 5203 ip6h->ip6_plen = (uint16_t)ip_len; 5204 5205 freeb(mp); 5206 5207 /* We're done. Pass the packet to IP */ 5208 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5209 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5210 } 5211 5212 static void 5213 icmp_wput_other(queue_t *q, mblk_t *mp) 5214 { 5215 uchar_t *rptr = mp->b_rptr; 5216 struct iocblk *iocp; 5217 #define tudr ((struct T_unitdata_req *)rptr) 5218 conn_t *connp = Q_TO_CONN(q); 5219 icmp_t *icmp = connp->conn_icmp; 5220 icmp_stack_t *is = icmp->icmp_is; 5221 cred_t *cr; 5222 5223 cr = DB_CREDDEF(mp, connp->conn_cred); 5224 5225 switch (mp->b_datap->db_type) { 5226 case M_PROTO: 5227 case M_PCPROTO: 5228 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5229 /* 5230 * If the message does not contain a PRIM_type, 5231 * throw it away. 5232 */ 5233 freemsg(mp); 5234 return; 5235 } 5236 switch (((union T_primitives *)rptr)->type) { 5237 case T_ADDR_REQ: 5238 icmp_addr_req(q, mp); 5239 return; 5240 case O_T_BIND_REQ: 5241 case T_BIND_REQ: 5242 icmp_bind(q, mp); 5243 return; 5244 case T_CONN_REQ: 5245 icmp_connect(q, mp); 5246 return; 5247 case T_CAPABILITY_REQ: 5248 icmp_capability_req(q, mp); 5249 return; 5250 case T_INFO_REQ: 5251 icmp_info_req(q, mp); 5252 return; 5253 case T_UNITDATA_REQ: 5254 /* 5255 * If a T_UNITDATA_REQ gets here, the address must 5256 * be bad. Valid T_UNITDATA_REQs are found above 5257 * and break to below this switch. 5258 */ 5259 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5260 return; 5261 case T_UNBIND_REQ: 5262 icmp_unbind(q, mp); 5263 return; 5264 5265 case T_SVR4_OPTMGMT_REQ: 5266 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5267 cr)) { 5268 /* Only IP can return anything meaningful */ 5269 (void) svr4_optcom_req(q, mp, cr, 5270 &icmp_opt_obj, B_TRUE); 5271 } 5272 return; 5273 5274 case T_OPTMGMT_REQ: 5275 /* Only IP can return anything meaningful */ 5276 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5277 return; 5278 5279 case T_DISCON_REQ: 5280 icmp_disconnect(q, mp); 5281 return; 5282 5283 /* The following TPI message is not supported by icmp. */ 5284 case O_T_CONN_RES: 5285 case T_CONN_RES: 5286 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5287 return; 5288 5289 /* The following 3 TPI requests are illegal for icmp. */ 5290 case T_DATA_REQ: 5291 case T_EXDATA_REQ: 5292 case T_ORDREL_REQ: 5293 freemsg(mp); 5294 (void) putctl1(RD(q), M_ERROR, EPROTO); 5295 return; 5296 default: 5297 break; 5298 } 5299 break; 5300 case M_IOCTL: 5301 iocp = (struct iocblk *)mp->b_rptr; 5302 switch (iocp->ioc_cmd) { 5303 case TI_GETPEERNAME: 5304 if (icmp->icmp_state != TS_DATA_XFER) { 5305 /* 5306 * If a default destination address has not 5307 * been associated with the stream, then we 5308 * don't know the peer's name. 5309 */ 5310 iocp->ioc_error = ENOTCONN; 5311 err_ret:; 5312 iocp->ioc_count = 0; 5313 mp->b_datap->db_type = M_IOCACK; 5314 qreply(q, mp); 5315 return; 5316 } 5317 /* FALLTHRU */ 5318 case TI_GETMYNAME: 5319 /* 5320 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5321 * need to copyin the user's strbuf structure. 5322 * Processing will continue in the M_IOCDATA case 5323 * below. 5324 */ 5325 mi_copyin(q, mp, NULL, 5326 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5327 return; 5328 case ND_SET: 5329 /* nd_getset performs the necessary error checking */ 5330 case ND_GET: 5331 if (nd_getset(q, is->is_nd, mp)) { 5332 qreply(q, mp); 5333 return; 5334 } 5335 break; 5336 default: 5337 break; 5338 } 5339 break; 5340 case M_IOCDATA: 5341 icmp_wput_iocdata(q, mp); 5342 return; 5343 default: 5344 break; 5345 } 5346 ip_wput(q, mp); 5347 } 5348 5349 /* 5350 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5351 * messages. 5352 */ 5353 static void 5354 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5355 { 5356 mblk_t *mp1; 5357 STRUCT_HANDLE(strbuf, sb); 5358 icmp_t *icmp; 5359 in6_addr_t v6addr; 5360 ipaddr_t v4addr; 5361 uint32_t flowinfo = 0; 5362 int addrlen; 5363 5364 /* Make sure it is one of ours. */ 5365 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5366 case TI_GETMYNAME: 5367 case TI_GETPEERNAME: 5368 break; 5369 default: 5370 icmp = Q_TO_ICMP(q); 5371 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5372 return; 5373 } 5374 switch (mi_copy_state(q, mp, &mp1)) { 5375 case -1: 5376 return; 5377 case MI_COPY_CASE(MI_COPY_IN, 1): 5378 break; 5379 case MI_COPY_CASE(MI_COPY_OUT, 1): 5380 /* 5381 * The address has been copied out, so now 5382 * copyout the strbuf. 5383 */ 5384 mi_copyout(q, mp); 5385 return; 5386 case MI_COPY_CASE(MI_COPY_OUT, 2): 5387 /* 5388 * The address and strbuf have been copied out. 5389 * We're done, so just acknowledge the original 5390 * M_IOCTL. 5391 */ 5392 mi_copy_done(q, mp, 0); 5393 return; 5394 default: 5395 /* 5396 * Something strange has happened, so acknowledge 5397 * the original M_IOCTL with an EPROTO error. 5398 */ 5399 mi_copy_done(q, mp, EPROTO); 5400 return; 5401 } 5402 /* 5403 * Now we have the strbuf structure for TI_GETMYNAME 5404 * and TI_GETPEERNAME. Next we copyout the requested 5405 * address and then we'll copyout the strbuf. 5406 */ 5407 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5408 (void *)mp1->b_rptr); 5409 icmp = Q_TO_ICMP(q); 5410 if (icmp->icmp_family == AF_INET) 5411 addrlen = sizeof (sin_t); 5412 else 5413 addrlen = sizeof (sin6_t); 5414 5415 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5416 mi_copy_done(q, mp, EINVAL); 5417 return; 5418 } 5419 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5420 case TI_GETMYNAME: 5421 if (icmp->icmp_family == AF_INET) { 5422 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5423 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5424 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5425 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5426 } else { 5427 /* 5428 * INADDR_ANY 5429 * icmp_v6src is not set, we might be bound to 5430 * broadcast/multicast. Use icmp_bound_v6src as 5431 * local address instead (that could 5432 * also still be INADDR_ANY) 5433 */ 5434 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5435 } 5436 } else { 5437 /* icmp->icmp_family == AF_INET6 */ 5438 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5439 v6addr = icmp->icmp_v6src; 5440 } else { 5441 /* 5442 * UNSPECIFIED 5443 * icmp_v6src is not set, we might be bound to 5444 * broadcast/multicast. Use icmp_bound_v6src as 5445 * local address instead (that could 5446 * also still be UNSPECIFIED) 5447 */ 5448 v6addr = icmp->icmp_bound_v6src; 5449 } 5450 } 5451 break; 5452 case TI_GETPEERNAME: 5453 if (icmp->icmp_family == AF_INET) { 5454 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5455 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5456 } else { 5457 /* icmp->icmp_family == AF_INET6) */ 5458 v6addr = icmp->icmp_v6dst; 5459 flowinfo = icmp->icmp_flowinfo; 5460 } 5461 break; 5462 default: 5463 mi_copy_done(q, mp, EPROTO); 5464 return; 5465 } 5466 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5467 if (!mp1) 5468 return; 5469 5470 if (icmp->icmp_family == AF_INET) { 5471 sin_t *sin; 5472 5473 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5474 sin = (sin_t *)mp1->b_rptr; 5475 mp1->b_wptr = (uchar_t *)&sin[1]; 5476 *sin = sin_null; 5477 sin->sin_family = AF_INET; 5478 sin->sin_addr.s_addr = v4addr; 5479 } else { 5480 /* icmp->icmp_family == AF_INET6 */ 5481 sin6_t *sin6; 5482 5483 ASSERT(icmp->icmp_family == AF_INET6); 5484 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5485 sin6 = (sin6_t *)mp1->b_rptr; 5486 mp1->b_wptr = (uchar_t *)&sin6[1]; 5487 *sin6 = sin6_null; 5488 sin6->sin6_family = AF_INET6; 5489 sin6->sin6_flowinfo = flowinfo; 5490 sin6->sin6_addr = v6addr; 5491 } 5492 /* Copy out the address */ 5493 mi_copyout(q, mp); 5494 } 5495 5496 static int 5497 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5498 void *thisdg_attrs) 5499 { 5500 conn_t *connp = Q_TO_CONN(q); 5501 struct T_unitdata_req *udreqp; 5502 int is_absreq_failure; 5503 cred_t *cr; 5504 5505 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5506 *errorp = 0; 5507 5508 cr = DB_CREDDEF(mp, connp->conn_cred); 5509 5510 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5511 udreqp->OPT_offset, cr, &icmp_opt_obj, 5512 thisdg_attrs, &is_absreq_failure); 5513 5514 if (*errorp != 0) { 5515 /* 5516 * Note: No special action needed in this 5517 * module for "is_absreq_failure" 5518 */ 5519 return (-1); /* failure */ 5520 } 5521 ASSERT(is_absreq_failure == 0); 5522 return (0); /* success */ 5523 } 5524 5525 void 5526 icmp_ddi_init(void) 5527 { 5528 icmp_max_optsize = 5529 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5530 icmp_opt_obj.odb_opt_arr_cnt); 5531 5532 /* 5533 * We want to be informed each time a stack is created or 5534 * destroyed in the kernel, so we can maintain the 5535 * set of icmp_stack_t's. 5536 */ 5537 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5538 } 5539 5540 void 5541 icmp_ddi_destroy(void) 5542 { 5543 netstack_unregister(NS_ICMP); 5544 } 5545 5546 /* 5547 * Initialize the ICMP stack instance. 5548 */ 5549 static void * 5550 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5551 { 5552 icmp_stack_t *is; 5553 icmpparam_t *pa; 5554 5555 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5556 is->is_netstack = ns; 5557 5558 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5559 is->is_param_arr = pa; 5560 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5561 5562 (void) icmp_param_register(&is->is_nd, 5563 is->is_param_arr, A_CNT(icmp_param_arr)); 5564 is->is_ksp = rawip_kstat_init(stackid); 5565 return (is); 5566 } 5567 5568 /* 5569 * Free the ICMP stack instance. 5570 */ 5571 static void 5572 rawip_stack_fini(netstackid_t stackid, void *arg) 5573 { 5574 icmp_stack_t *is = (icmp_stack_t *)arg; 5575 5576 nd_free(&is->is_nd); 5577 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5578 is->is_param_arr = NULL; 5579 5580 rawip_kstat_fini(stackid, is->is_ksp); 5581 is->is_ksp = NULL; 5582 kmem_free(is, sizeof (*is)); 5583 } 5584 5585 static void * 5586 rawip_kstat_init(netstackid_t stackid) { 5587 kstat_t *ksp; 5588 5589 rawip_named_kstat_t template = { 5590 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5591 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5592 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5593 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5594 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5595 }; 5596 5597 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5598 KSTAT_TYPE_NAMED, 5599 NUM_OF_FIELDS(rawip_named_kstat_t), 5600 0, stackid); 5601 if (ksp == NULL || ksp->ks_data == NULL) 5602 return (NULL); 5603 5604 bcopy(&template, ksp->ks_data, sizeof (template)); 5605 ksp->ks_update = rawip_kstat_update; 5606 ksp->ks_private = (void *)(uintptr_t)stackid; 5607 5608 kstat_install(ksp); 5609 return (ksp); 5610 } 5611 5612 static void 5613 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5614 { 5615 if (ksp != NULL) { 5616 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5617 kstat_delete_netstack(ksp, stackid); 5618 } 5619 } 5620 5621 static int 5622 rawip_kstat_update(kstat_t *ksp, int rw) 5623 { 5624 rawip_named_kstat_t *rawipkp; 5625 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5626 netstack_t *ns; 5627 icmp_stack_t *is; 5628 5629 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5630 return (EIO); 5631 5632 if (rw == KSTAT_WRITE) 5633 return (EACCES); 5634 5635 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5636 5637 ns = netstack_find_by_stackid(stackid); 5638 if (ns == NULL) 5639 return (-1); 5640 is = ns->netstack_icmp; 5641 if (is == NULL) { 5642 netstack_rele(ns); 5643 return (-1); 5644 } 5645 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5646 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5647 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5648 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5649 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5650 netstack_rele(ns); 5651 return (0); 5652 } 5653