1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 icmp->icmp_ip_snd_options_len = 0; 733 } 734 735 if (icmp->icmp_filter != NULL) { 736 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 737 icmp->icmp_filter = NULL; 738 } 739 /* Free memory associated with sticky options */ 740 if (icmp->icmp_sticky_hdrs_len != 0) { 741 kmem_free(icmp->icmp_sticky_hdrs, 742 icmp->icmp_sticky_hdrs_len); 743 icmp->icmp_sticky_hdrs = NULL; 744 icmp->icmp_sticky_hdrs_len = 0; 745 } 746 ip6_pkt_free(&icmp->icmp_sticky_ipp); 747 748 /* 749 * Clear any fields which the kmem_cache constructor clears. 750 * Only icmp_connp needs to be preserved. 751 * TBD: We should make this more efficient to avoid clearing 752 * everything. 753 */ 754 ASSERT(icmp->icmp_connp == connp); 755 bzero(icmp, sizeof (icmp_t)); 756 icmp->icmp_connp = connp; 757 } 758 759 static int 760 icmp_close(queue_t *q) 761 { 762 conn_t *connp = (conn_t *)q->q_ptr; 763 764 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 765 766 ip_quiesce_conn(connp); 767 768 qprocsoff(connp->conn_rq); 769 770 icmp_close_free(connp); 771 772 /* 773 * Now we are truly single threaded on this stream, and can 774 * delete the things hanging off the connp, and finally the connp. 775 * We removed this connp from the fanout list, it cannot be 776 * accessed thru the fanouts, and we already waited for the 777 * conn_ref to drop to 0. We are already in close, so 778 * there cannot be any other thread from the top. qprocsoff 779 * has completed, and service has completed or won't run in 780 * future. 781 */ 782 ASSERT(connp->conn_ref == 1); 783 784 inet_minor_free(ip_minor_arena, connp->conn_dev); 785 786 connp->conn_ref--; 787 ipcl_conn_destroy(connp); 788 789 q->q_ptr = WR(q)->q_ptr = NULL; 790 return (0); 791 } 792 793 /* 794 * This routine handles each T_DISCON_REQ message passed to icmp 795 * as an indicating that ICMP is no longer connected. This results 796 * in sending a T_BIND_REQ to IP to restore the binding to just 797 * the local address. 798 * 799 * This routine sends down a T_BIND_REQ to IP with the following mblks: 800 * T_BIND_REQ - specifying just the local address. 801 * T_OK_ACK - for the T_DISCON_REQ 802 * 803 * The disconnect completes in icmp_bind_result. 804 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 805 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 806 * convert it to an error ack for the appropriate primitive. 807 */ 808 static void 809 icmp_disconnect(queue_t *q, mblk_t *mp) 810 { 811 icmp_t *icmp; 812 mblk_t *mp1; 813 conn_t *connp = Q_TO_CONN(q); 814 815 icmp = connp->conn_icmp; 816 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 817 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 818 rw_exit(&icmp->icmp_rwlock); 819 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 820 "icmp_disconnect: bad state, %d", icmp->icmp_state); 821 icmp_err_ack(q, mp, TOUTSTATE, 0); 822 return; 823 } 824 icmp->icmp_pending_op = T_DISCON_REQ; 825 icmp->icmp_v6src = icmp->icmp_bound_v6src; 826 icmp->icmp_state = TS_IDLE; 827 828 /* 829 * Send down bind to IP to remove the full binding and revert 830 * to the local address binding. 831 */ 832 if (icmp->icmp_family == AF_INET) { 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 834 } else { 835 ASSERT(icmp->icmp_family == AF_INET6); 836 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 837 } 838 if (mp1 == NULL) { 839 icmp->icmp_pending_op = -1; 840 rw_exit(&icmp->icmp_rwlock); 841 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 842 return; 843 } 844 mp = mi_tpi_ok_ack_alloc(mp); 845 if (mp == NULL) { 846 /* Unable to reuse the T_DISCON_REQ for the ack. */ 847 icmp->icmp_pending_op = -1; 848 rw_exit(&icmp->icmp_rwlock); 849 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 850 return; 851 } 852 853 if (icmp->icmp_family == AF_INET6) { 854 int error; 855 856 /* Rebuild the header template */ 857 error = icmp_build_hdrs(icmp); 858 if (error != 0) { 859 icmp->icmp_pending_op = -1; 860 rw_exit(&icmp->icmp_rwlock); 861 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 862 freemsg(mp1); 863 return; 864 } 865 } 866 867 rw_exit(&icmp->icmp_rwlock); 868 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 869 linkb(mp1, mp); 870 871 if (icmp->icmp_family == AF_INET6) 872 mp1 = ip_bind_v6(q, mp1, connp, NULL); 873 else 874 mp1 = ip_bind_v4(q, mp1, connp); 875 876 /* The above return NULL if the bind needs to be deferred */ 877 if (mp1 != NULL) 878 icmp_bind_result(connp, mp1); 879 else 880 CONN_INC_REF(connp); 881 } 882 883 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 884 static void 885 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 886 { 887 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 888 qreply(q, mp); 889 } 890 891 /* Shorthand to generate and send TPI error acks to our client */ 892 static void 893 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 894 t_scalar_t t_error, int sys_error) 895 { 896 struct T_error_ack *teackp; 897 898 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 899 M_PCPROTO, T_ERROR_ACK)) != NULL) { 900 teackp = (struct T_error_ack *)mp->b_rptr; 901 teackp->ERROR_prim = primitive; 902 teackp->TLI_error = t_error; 903 teackp->UNIX_error = sys_error; 904 qreply(q, mp); 905 } 906 } 907 908 /* 909 * icmp_icmp_error is called by icmp_input to process ICMP 910 * messages passed up by IP. 911 * Generates the appropriate T_UDERROR_IND for permanent 912 * (non-transient) errors. 913 * Assumes that IP has pulled up everything up to and including 914 * the ICMP header. 915 */ 916 static void 917 icmp_icmp_error(queue_t *q, mblk_t *mp) 918 { 919 icmph_t *icmph; 920 ipha_t *ipha; 921 int iph_hdr_length; 922 sin_t sin; 923 sin6_t sin6; 924 mblk_t *mp1; 925 int error = 0; 926 icmp_t *icmp = Q_TO_ICMP(q); 927 928 ipha = (ipha_t *)mp->b_rptr; 929 930 ASSERT(OK_32PTR(mp->b_rptr)); 931 932 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 933 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 934 icmp_icmp_error_ipv6(q, mp); 935 return; 936 } 937 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 938 939 /* Skip past the outer IP and ICMP headers */ 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 942 ipha = (ipha_t *)&icmph[1]; 943 iph_hdr_length = IPH_HDR_LENGTH(ipha); 944 945 switch (icmph->icmph_type) { 946 case ICMP_DEST_UNREACHABLE: 947 switch (icmph->icmph_code) { 948 case ICMP_FRAGMENTATION_NEEDED: 949 /* 950 * IP has already adjusted the path MTU. 951 */ 952 break; 953 case ICMP_PORT_UNREACHABLE: 954 case ICMP_PROTOCOL_UNREACHABLE: 955 error = ECONNREFUSED; 956 break; 957 default: 958 /* Transient errors */ 959 break; 960 } 961 break; 962 default: 963 /* Transient errors */ 964 break; 965 } 966 if (error == 0) { 967 freemsg(mp); 968 return; 969 } 970 971 /* 972 * Deliver T_UDERROR_IND when the application has asked for it. 973 * The socket layer enables this automatically when connected. 974 */ 975 if (!icmp->icmp_dgram_errind) { 976 freemsg(mp); 977 return; 978 } 979 980 switch (icmp->icmp_family) { 981 case AF_INET: 982 sin = sin_null; 983 sin.sin_family = AF_INET; 984 sin.sin_addr.s_addr = ipha->ipha_dst; 985 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 986 error); 987 break; 988 case AF_INET6: 989 sin6 = sin6_null; 990 sin6.sin6_family = AF_INET6; 991 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 994 NULL, 0, error); 995 break; 996 } 997 if (mp1) 998 putnext(q, mp1); 999 freemsg(mp); 1000 } 1001 1002 /* 1003 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1004 * for IPv6 packets. 1005 * Send permanent (non-transient) errors upstream. 1006 * Assumes that IP has pulled up all the extension headers as well 1007 * as the ICMPv6 header. 1008 */ 1009 static void 1010 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1011 { 1012 icmp6_t *icmp6; 1013 ip6_t *ip6h, *outer_ip6h; 1014 uint16_t iph_hdr_length; 1015 uint8_t *nexthdrp; 1016 sin6_t sin6; 1017 mblk_t *mp1; 1018 int error = 0; 1019 icmp_t *icmp = Q_TO_ICMP(q); 1020 1021 outer_ip6h = (ip6_t *)mp->b_rptr; 1022 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1023 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1024 else 1025 iph_hdr_length = IPV6_HDR_LEN; 1026 1027 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1028 ip6h = (ip6_t *)&icmp6[1]; 1029 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 switch (icmp6->icmp6_type) { 1035 case ICMP6_DST_UNREACH: 1036 switch (icmp6->icmp6_code) { 1037 case ICMP6_DST_UNREACH_NOPORT: 1038 error = ECONNREFUSED; 1039 break; 1040 case ICMP6_DST_UNREACH_ADMIN: 1041 case ICMP6_DST_UNREACH_NOROUTE: 1042 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1043 case ICMP6_DST_UNREACH_ADDR: 1044 /* Transient errors */ 1045 break; 1046 default: 1047 break; 1048 } 1049 break; 1050 case ICMP6_PACKET_TOO_BIG: { 1051 struct T_unitdata_ind *tudi; 1052 struct T_opthdr *toh; 1053 size_t udi_size; 1054 mblk_t *newmp; 1055 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1056 sizeof (struct ip6_mtuinfo); 1057 sin6_t *sin6; 1058 struct ip6_mtuinfo *mtuinfo; 1059 1060 /* 1061 * If the application has requested to receive path mtu 1062 * information, send up an empty message containing an 1063 * IPV6_PATHMTU ancillary data item. 1064 */ 1065 if (!icmp->icmp_ipv6_recvpathmtu) 1066 break; 1067 1068 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1069 opt_length; 1070 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1071 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1072 break; 1073 } 1074 1075 /* 1076 * newmp->b_cont is left to NULL on purpose. This is an 1077 * empty message containing only ancillary data. 1078 */ 1079 newmp->b_datap->db_type = M_PROTO; 1080 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1081 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1082 tudi->PRIM_type = T_UNITDATA_IND; 1083 tudi->SRC_length = sizeof (sin6_t); 1084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1085 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1086 tudi->OPT_length = opt_length; 1087 1088 sin6 = (sin6_t *)&tudi[1]; 1089 bzero(sin6, sizeof (sin6_t)); 1090 sin6->sin6_family = AF_INET6; 1091 sin6->sin6_addr = icmp->icmp_v6dst; 1092 1093 toh = (struct T_opthdr *)&sin6[1]; 1094 toh->level = IPPROTO_IPV6; 1095 toh->name = IPV6_PATHMTU; 1096 toh->len = opt_length; 1097 toh->status = 0; 1098 1099 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1100 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1101 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1102 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1103 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1104 /* 1105 * We've consumed everything we need from the original 1106 * message. Free it, then send our empty message. 1107 */ 1108 freemsg(mp); 1109 putnext(q, newmp); 1110 return; 1111 } 1112 case ICMP6_TIME_EXCEEDED: 1113 /* Transient errors */ 1114 break; 1115 case ICMP6_PARAM_PROB: 1116 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1117 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1118 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1119 (uchar_t *)nexthdrp) { 1120 error = ECONNREFUSED; 1121 break; 1122 } 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin6 = sin6_null; 1140 sin6.sin6_family = AF_INET6; 1141 sin6.sin6_addr = ip6h->ip6_dst; 1142 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1143 1144 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1145 error); 1146 if (mp1) 1147 putnext(q, mp1); 1148 freemsg(mp); 1149 } 1150 1151 /* 1152 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1153 * The local address is filled in if endpoint is bound. The remote address 1154 * is filled in if remote address has been precified ("connected endpoint") 1155 * (The concept of connected CLTS sockets is alien to published TPI 1156 * but we support it anyway). 1157 */ 1158 static void 1159 icmp_addr_req(queue_t *q, mblk_t *mp) 1160 { 1161 icmp_t *icmp = Q_TO_ICMP(q); 1162 mblk_t *ackmp; 1163 struct T_addr_ack *taa; 1164 1165 /* Make it large enough for worst case */ 1166 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1167 2 * sizeof (sin6_t), 1); 1168 if (ackmp == NULL) { 1169 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1170 return; 1171 } 1172 taa = (struct T_addr_ack *)ackmp->b_rptr; 1173 1174 bzero(taa, sizeof (struct T_addr_ack)); 1175 ackmp->b_wptr = (uchar_t *)&taa[1]; 1176 1177 taa->PRIM_type = T_ADDR_ACK; 1178 ackmp->b_datap->db_type = M_PCPROTO; 1179 rw_enter(&icmp->icmp_rwlock, RW_READER); 1180 /* 1181 * Note: Following code assumes 32 bit alignment of basic 1182 * data structures like sin_t and struct T_addr_ack. 1183 */ 1184 if (icmp->icmp_state != TS_UNBND) { 1185 /* 1186 * Fill in local address 1187 */ 1188 taa->LOCADDR_offset = sizeof (*taa); 1189 if (icmp->icmp_family == AF_INET) { 1190 sin_t *sin; 1191 1192 taa->LOCADDR_length = sizeof (sin_t); 1193 sin = (sin_t *)&taa[1]; 1194 /* Fill zeroes and then intialize non-zero fields */ 1195 *sin = sin_null; 1196 sin->sin_family = AF_INET; 1197 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1198 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1199 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1200 sin->sin_addr.s_addr); 1201 } else { 1202 /* 1203 * INADDR_ANY 1204 * icmp_v6src is not set, we might be bound to 1205 * broadcast/multicast. Use icmp_bound_v6src as 1206 * local address instead (that could 1207 * also still be INADDR_ANY) 1208 */ 1209 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1210 sin->sin_addr.s_addr); 1211 } 1212 ackmp->b_wptr = (uchar_t *)&sin[1]; 1213 } else { 1214 sin6_t *sin6; 1215 1216 ASSERT(icmp->icmp_family == AF_INET6); 1217 taa->LOCADDR_length = sizeof (sin6_t); 1218 sin6 = (sin6_t *)&taa[1]; 1219 /* Fill zeroes and then intialize non-zero fields */ 1220 *sin6 = sin6_null; 1221 sin6->sin6_family = AF_INET6; 1222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1223 sin6->sin6_addr = icmp->icmp_v6src; 1224 } else { 1225 /* 1226 * UNSPECIFIED 1227 * icmp_v6src is not set, we might be bound to 1228 * broadcast/multicast. Use icmp_bound_v6src as 1229 * local address instead (that could 1230 * also still be UNSPECIFIED) 1231 */ 1232 sin6->sin6_addr = icmp->icmp_bound_v6src; 1233 } 1234 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1235 } 1236 } 1237 rw_exit(&icmp->icmp_rwlock); 1238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1239 qreply(q, ackmp); 1240 } 1241 1242 static void 1243 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1244 { 1245 *tap = icmp_g_t_info_ack; 1246 1247 if (icmp->icmp_family == AF_INET6) 1248 tap->ADDR_size = sizeof (sin6_t); 1249 else 1250 tap->ADDR_size = sizeof (sin_t); 1251 tap->CURRENT_state = icmp->icmp_state; 1252 tap->OPT_size = icmp_max_optsize; 1253 } 1254 1255 /* 1256 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1257 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1258 * icmp_g_t_info_ack. The current state of the stream is copied from 1259 * icmp_state. 1260 */ 1261 static void 1262 icmp_capability_req(queue_t *q, mblk_t *mp) 1263 { 1264 icmp_t *icmp = Q_TO_ICMP(q); 1265 t_uscalar_t cap_bits1; 1266 struct T_capability_ack *tcap; 1267 1268 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1269 1270 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1271 mp->b_datap->db_type, T_CAPABILITY_ACK); 1272 if (!mp) 1273 return; 1274 1275 tcap = (struct T_capability_ack *)mp->b_rptr; 1276 tcap->CAP_bits1 = 0; 1277 1278 if (cap_bits1 & TC1_INFO) { 1279 icmp_copy_info(&tcap->INFO_ack, icmp); 1280 tcap->CAP_bits1 |= TC1_INFO; 1281 } 1282 1283 qreply(q, mp); 1284 } 1285 1286 /* 1287 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1288 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1289 * The current state of the stream is copied from icmp_state. 1290 */ 1291 static void 1292 icmp_info_req(queue_t *q, mblk_t *mp) 1293 { 1294 icmp_t *icmp = Q_TO_ICMP(q); 1295 1296 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1297 T_INFO_ACK); 1298 if (!mp) 1299 return; 1300 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1301 qreply(q, mp); 1302 } 1303 1304 /* 1305 * IP recognizes seven kinds of bind requests: 1306 * 1307 * - A zero-length address binds only to the protocol number. 1308 * 1309 * - A 4-byte address is treated as a request to 1310 * validate that the address is a valid local IPv4 1311 * address, appropriate for an application to bind to. 1312 * IP does the verification, but does not make any note 1313 * of the address at this time. 1314 * 1315 * - A 16-byte address contains is treated as a request 1316 * to validate a local IPv6 address, as the 4-byte 1317 * address case above. 1318 * 1319 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1323 * use it for the inbound fanout of packets. 1324 * 1325 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1326 * information consisting of local and remote addresses 1327 * and ports (unused for raw sockets). In this case, the addresses are both 1328 * validated as appropriate for this operation, and, if 1329 * so, the information is retained for use in the 1330 * inbound fanout. 1331 * 1332 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1333 * fanout information, like the 12-byte case above. 1334 * 1335 * IP will also fill in the IRE request mblk with information 1336 * regarding our peer. In all cases, we notify IP of our protocol 1337 * type by appending a single protocol byte to the bind request. 1338 */ 1339 static mblk_t * 1340 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1341 in_port_t fport) 1342 { 1343 char *cp; 1344 mblk_t *mp; 1345 struct T_bind_req *tbr; 1346 ipa_conn_t *ac; 1347 ipa6_conn_t *ac6; 1348 sin_t *sin; 1349 sin6_t *sin6; 1350 1351 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1352 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1353 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1354 if (mp == NULL) 1355 return (NULL); 1356 mp->b_datap->db_type = M_PROTO; 1357 tbr = (struct T_bind_req *)mp->b_rptr; 1358 tbr->PRIM_type = bind_prim; 1359 tbr->ADDR_offset = sizeof (*tbr); 1360 tbr->CONIND_number = 0; 1361 tbr->ADDR_length = addr_length; 1362 cp = (char *)&tbr[1]; 1363 switch (addr_length) { 1364 case sizeof (ipa_conn_t): 1365 ASSERT(icmp->icmp_family == AF_INET); 1366 /* Append a request for an IRE */ 1367 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1368 if (mp->b_cont == NULL) { 1369 freemsg(mp); 1370 return (NULL); 1371 } 1372 mp->b_cont->b_wptr += sizeof (ire_t); 1373 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1374 1375 /* cp known to be 32 bit aligned */ 1376 ac = (ipa_conn_t *)cp; 1377 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1378 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1379 ac->ac_fport = fport; 1380 ac->ac_lport = 0; 1381 break; 1382 1383 case sizeof (ipa6_conn_t): 1384 ASSERT(icmp->icmp_family == AF_INET6); 1385 /* Append a request for an IRE */ 1386 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1387 if (mp->b_cont == NULL) { 1388 freemsg(mp); 1389 return (NULL); 1390 } 1391 mp->b_cont->b_wptr += sizeof (ire_t); 1392 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1393 1394 /* cp known to be 32 bit aligned */ 1395 ac6 = (ipa6_conn_t *)cp; 1396 ac6->ac6_laddr = icmp->icmp_v6src; 1397 ac6->ac6_faddr = icmp->icmp_v6dst; 1398 ac6->ac6_fport = fport; 1399 ac6->ac6_lport = 0; 1400 break; 1401 1402 case sizeof (sin_t): 1403 ASSERT(icmp->icmp_family == AF_INET); 1404 /* Append a request for an IRE */ 1405 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1406 if (!mp->b_cont) { 1407 freemsg(mp); 1408 return (NULL); 1409 } 1410 mp->b_cont->b_wptr += sizeof (ire_t); 1411 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1412 1413 sin = (sin_t *)cp; 1414 *sin = sin_null; 1415 sin->sin_family = AF_INET; 1416 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1417 break; 1418 1419 case sizeof (sin6_t): 1420 ASSERT(icmp->icmp_family == AF_INET6); 1421 /* Append a request for an IRE */ 1422 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1423 if (!mp->b_cont) { 1424 freemsg(mp); 1425 return (NULL); 1426 } 1427 mp->b_cont->b_wptr += sizeof (ire_t); 1428 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1429 1430 sin6 = (sin6_t *)cp; 1431 *sin6 = sin6_null; 1432 sin6->sin6_family = AF_INET6; 1433 sin6->sin6_addr = icmp->icmp_bound_v6src; 1434 break; 1435 } 1436 /* Add protocol number to end */ 1437 cp[addr_length] = icmp->icmp_proto; 1438 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1439 return (mp); 1440 } 1441 1442 /* For /dev/icmp aka AF_INET open */ 1443 static int 1444 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1445 { 1446 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1447 } 1448 1449 /* For /dev/icmp6 aka AF_INET6 open */ 1450 static int 1451 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1454 } 1455 1456 /* 1457 * This is the open routine for icmp. It allocates a icmp_t structure for 1458 * the stream and, on the first open of the module, creates an ND table. 1459 */ 1460 /*ARGSUSED2*/ 1461 static int 1462 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1463 boolean_t isv6) 1464 { 1465 int err; 1466 icmp_t *icmp; 1467 conn_t *connp; 1468 dev_t conn_dev; 1469 zoneid_t zoneid; 1470 netstack_t *ns; 1471 icmp_stack_t *is; 1472 1473 /* If the stream is already open, return immediately. */ 1474 if (q->q_ptr != NULL) 1475 return (0); 1476 1477 if (sflag == MODOPEN) 1478 return (EINVAL); 1479 1480 ns = netstack_find_by_cred(credp); 1481 ASSERT(ns != NULL); 1482 is = ns->netstack_icmp; 1483 ASSERT(is != NULL); 1484 1485 /* 1486 * For exclusive stacks we set the zoneid to zero 1487 * to make ICMP operate as if in the global zone. 1488 */ 1489 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1490 zoneid = GLOBAL_ZONEID; 1491 else 1492 zoneid = crgetzoneid(credp); 1493 1494 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1495 netstack_rele(ns); 1496 return (EBUSY); 1497 } 1498 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1499 1500 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1501 connp->conn_dev = conn_dev; 1502 icmp = connp->conn_icmp; 1503 1504 /* 1505 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1506 * done by netstack_find_by_cred() 1507 */ 1508 netstack_rele(ns); 1509 1510 /* 1511 * Initialize the icmp_t structure for this stream. 1512 */ 1513 q->q_ptr = connp; 1514 WR(q)->q_ptr = connp; 1515 connp->conn_rq = q; 1516 connp->conn_wq = WR(q); 1517 1518 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1519 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1520 ASSERT(connp->conn_icmp == icmp); 1521 ASSERT(icmp->icmp_connp == connp); 1522 1523 /* Set the initial state of the stream and the privilege status. */ 1524 icmp->icmp_state = TS_UNBND; 1525 if (isv6) { 1526 icmp->icmp_ipversion = IPV6_VERSION; 1527 icmp->icmp_family = AF_INET6; 1528 connp->conn_ulp = IPPROTO_ICMPV6; 1529 /* May be changed by a SO_PROTOTYPE socket option. */ 1530 icmp->icmp_proto = IPPROTO_ICMPV6; 1531 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1532 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1533 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1534 connp->conn_af_isv6 = B_TRUE; 1535 connp->conn_flags |= IPCL_ISV6; 1536 } else { 1537 icmp->icmp_ipversion = IPV4_VERSION; 1538 icmp->icmp_family = AF_INET; 1539 /* May be changed by a SO_PROTOTYPE socket option. */ 1540 icmp->icmp_proto = IPPROTO_ICMP; 1541 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1542 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1543 connp->conn_af_isv6 = B_FALSE; 1544 connp->conn_flags &= ~IPCL_ISV6; 1545 } 1546 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 icmp->icmp_pending_op = -1; 1548 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1549 connp->conn_zoneid = zoneid; 1550 1551 /* 1552 * If the caller has the process-wide flag set, then default to MAC 1553 * exempt mode. This allows read-down to unlabeled hosts. 1554 */ 1555 if (getpflags(NET_MAC_AWARE, credp) != 0) 1556 icmp->icmp_mac_exempt = B_TRUE; 1557 1558 connp->conn_ulp_labeled = is_system_labeled(); 1559 1560 icmp->icmp_is = is; 1561 1562 q->q_hiwat = is->is_recv_hiwat; 1563 WR(q)->q_hiwat = is->is_xmit_hiwat; 1564 WR(q)->q_lowat = is->is_xmit_lowat; 1565 1566 connp->conn_recv = icmp_input; 1567 crhold(credp); 1568 connp->conn_cred = credp; 1569 1570 mutex_enter(&connp->conn_lock); 1571 connp->conn_state_flags &= ~CONN_INCIPIENT; 1572 mutex_exit(&connp->conn_lock); 1573 1574 qprocson(q); 1575 1576 if (icmp->icmp_family == AF_INET6) { 1577 /* Build initial header template for transmit */ 1578 if ((err = icmp_build_hdrs(icmp)) != 0) { 1579 rw_exit(&icmp->icmp_rwlock); 1580 qprocsoff(q); 1581 ipcl_conn_destroy(connp); 1582 return (err); 1583 } 1584 } 1585 rw_exit(&icmp->icmp_rwlock); 1586 1587 /* Set the Stream head write offset. */ 1588 (void) mi_set_sth_wroff(q, 1589 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1590 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1591 1592 return (0); 1593 } 1594 1595 /* 1596 * Which ICMP options OK to set through T_UNITDATA_REQ... 1597 */ 1598 /* ARGSUSED */ 1599 static boolean_t 1600 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1601 { 1602 return (B_TRUE); 1603 } 1604 1605 /* 1606 * This routine gets default values of certain options whose default 1607 * values are maintained by protcol specific code 1608 */ 1609 /* ARGSUSED */ 1610 int 1611 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1612 { 1613 icmp_t *icmp = Q_TO_ICMP(q); 1614 icmp_stack_t *is = icmp->icmp_is; 1615 int *i1 = (int *)ptr; 1616 1617 switch (level) { 1618 case IPPROTO_IP: 1619 switch (name) { 1620 case IP_MULTICAST_TTL: 1621 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1622 return (sizeof (uchar_t)); 1623 case IP_MULTICAST_LOOP: 1624 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1625 return (sizeof (uchar_t)); 1626 } 1627 break; 1628 case IPPROTO_IPV6: 1629 switch (name) { 1630 case IPV6_MULTICAST_HOPS: 1631 *i1 = IP_DEFAULT_MULTICAST_TTL; 1632 return (sizeof (int)); 1633 case IPV6_MULTICAST_LOOP: 1634 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1635 return (sizeof (int)); 1636 case IPV6_UNICAST_HOPS: 1637 *i1 = is->is_ipv6_hoplimit; 1638 return (sizeof (int)); 1639 } 1640 break; 1641 case IPPROTO_ICMPV6: 1642 switch (name) { 1643 case ICMP6_FILTER: 1644 /* Make it look like "pass all" */ 1645 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1646 return (sizeof (icmp6_filter_t)); 1647 } 1648 break; 1649 } 1650 return (-1); 1651 } 1652 1653 /* 1654 * This routine retrieves the current status of socket options. 1655 * It returns the size of the option retrieved. 1656 */ 1657 int 1658 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1659 { 1660 conn_t *connp = Q_TO_CONN(q); 1661 icmp_t *icmp = connp->conn_icmp; 1662 icmp_stack_t *is = icmp->icmp_is; 1663 int *i1 = (int *)ptr; 1664 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1665 1666 switch (level) { 1667 case SOL_SOCKET: 1668 switch (name) { 1669 case SO_DEBUG: 1670 *i1 = icmp->icmp_debug; 1671 break; 1672 case SO_TYPE: 1673 *i1 = SOCK_RAW; 1674 break; 1675 case SO_PROTOTYPE: 1676 *i1 = icmp->icmp_proto; 1677 break; 1678 case SO_REUSEADDR: 1679 *i1 = icmp->icmp_reuseaddr; 1680 break; 1681 1682 /* 1683 * The following three items are available here, 1684 * but are only meaningful to IP. 1685 */ 1686 case SO_DONTROUTE: 1687 *i1 = icmp->icmp_dontroute; 1688 break; 1689 case SO_USELOOPBACK: 1690 *i1 = icmp->icmp_useloopback; 1691 break; 1692 case SO_BROADCAST: 1693 *i1 = icmp->icmp_broadcast; 1694 break; 1695 1696 case SO_SNDBUF: 1697 ASSERT(q->q_hiwat <= INT_MAX); 1698 *i1 = (int)q->q_hiwat; 1699 break; 1700 case SO_RCVBUF: 1701 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1702 *i1 = (int)RD(q)->q_hiwat; 1703 break; 1704 case SO_DGRAM_ERRIND: 1705 *i1 = icmp->icmp_dgram_errind; 1706 break; 1707 case SO_TIMESTAMP: 1708 *i1 = icmp->icmp_timestamp; 1709 break; 1710 case SO_MAC_EXEMPT: 1711 *i1 = icmp->icmp_mac_exempt; 1712 break; 1713 case SO_DOMAIN: 1714 *i1 = icmp->icmp_family; 1715 break; 1716 1717 /* 1718 * Following four not meaningful for icmp 1719 * Action is same as "default" to which we fallthrough 1720 * so we keep them in comments. 1721 * case SO_LINGER: 1722 * case SO_KEEPALIVE: 1723 * case SO_OOBINLINE: 1724 * case SO_ALLZONES: 1725 */ 1726 default: 1727 return (-1); 1728 } 1729 break; 1730 case IPPROTO_IP: 1731 /* 1732 * Only allow IPv4 option processing on IPv4 sockets. 1733 */ 1734 if (icmp->icmp_family != AF_INET) 1735 return (-1); 1736 1737 switch (name) { 1738 case IP_OPTIONS: 1739 case T_IP_OPTIONS: 1740 /* Options are passed up with each packet */ 1741 return (0); 1742 case IP_HDRINCL: 1743 *i1 = (int)icmp->icmp_hdrincl; 1744 break; 1745 case IP_TOS: 1746 case T_IP_TOS: 1747 *i1 = (int)icmp->icmp_type_of_service; 1748 break; 1749 case IP_TTL: 1750 *i1 = (int)icmp->icmp_ttl; 1751 break; 1752 case IP_MULTICAST_IF: 1753 /* 0 address if not set */ 1754 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1755 return (sizeof (ipaddr_t)); 1756 case IP_MULTICAST_TTL: 1757 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1758 return (sizeof (uchar_t)); 1759 case IP_MULTICAST_LOOP: 1760 *ptr = connp->conn_multicast_loop; 1761 return (sizeof (uint8_t)); 1762 case IP_BOUND_IF: 1763 /* Zero if not set */ 1764 *i1 = icmp->icmp_bound_if; 1765 break; /* goto sizeof (int) option return */ 1766 case IP_UNSPEC_SRC: 1767 *ptr = icmp->icmp_unspec_source; 1768 break; /* goto sizeof (int) option return */ 1769 case IP_BROADCAST_TTL: 1770 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1771 return (sizeof (uchar_t)); 1772 case IP_RECVIF: 1773 *ptr = icmp->icmp_recvif; 1774 break; /* goto sizeof (int) option return */ 1775 case IP_RECVPKTINFO: 1776 /* 1777 * This also handles IP_PKTINFO. 1778 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1779 * Differentiation is based on the size of the argument 1780 * passed in. 1781 * This option is handled in IP which will return an 1782 * error for IP_PKTINFO as it's not supported as a 1783 * sticky option. 1784 */ 1785 return (-EINVAL); 1786 /* 1787 * Cannot "get" the value of following options 1788 * at this level. Action is same as "default" to 1789 * which we fallthrough so we keep them in comments. 1790 * 1791 * case IP_ADD_MEMBERSHIP: 1792 * case IP_DROP_MEMBERSHIP: 1793 * case IP_BLOCK_SOURCE: 1794 * case IP_UNBLOCK_SOURCE: 1795 * case IP_ADD_SOURCE_MEMBERSHIP: 1796 * case IP_DROP_SOURCE_MEMBERSHIP: 1797 * case MCAST_JOIN_GROUP: 1798 * case MCAST_LEAVE_GROUP: 1799 * case MCAST_BLOCK_SOURCE: 1800 * case MCAST_UNBLOCK_SOURCE: 1801 * case MCAST_JOIN_SOURCE_GROUP: 1802 * case MCAST_LEAVE_SOURCE_GROUP: 1803 * case MRT_INIT: 1804 * case MRT_DONE: 1805 * case MRT_ADD_VIF: 1806 * case MRT_DEL_VIF: 1807 * case MRT_ADD_MFC: 1808 * case MRT_DEL_MFC: 1809 * case MRT_VERSION: 1810 * case MRT_ASSERT: 1811 * case IP_SEC_OPT: 1812 * case IP_DONTFAILOVER_IF: 1813 * case IP_NEXTHOP: 1814 */ 1815 default: 1816 return (-1); 1817 } 1818 break; 1819 case IPPROTO_IPV6: 1820 /* 1821 * Only allow IPv6 option processing on native IPv6 sockets. 1822 */ 1823 if (icmp->icmp_family != AF_INET6) 1824 return (-1); 1825 switch (name) { 1826 case IPV6_UNICAST_HOPS: 1827 *i1 = (unsigned int)icmp->icmp_ttl; 1828 break; 1829 case IPV6_MULTICAST_IF: 1830 /* 0 index if not set */ 1831 *i1 = icmp->icmp_multicast_if_index; 1832 break; 1833 case IPV6_MULTICAST_HOPS: 1834 *i1 = icmp->icmp_multicast_ttl; 1835 break; 1836 case IPV6_MULTICAST_LOOP: 1837 *i1 = connp->conn_multicast_loop; 1838 break; 1839 case IPV6_BOUND_IF: 1840 /* Zero if not set */ 1841 *i1 = icmp->icmp_bound_if; 1842 break; 1843 case IPV6_UNSPEC_SRC: 1844 *i1 = icmp->icmp_unspec_source; 1845 break; 1846 case IPV6_CHECKSUM: 1847 /* 1848 * Return offset or -1 if no checksum offset. 1849 * Does not apply to IPPROTO_ICMPV6 1850 */ 1851 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1852 return (-1); 1853 1854 if (icmp->icmp_raw_checksum) { 1855 *i1 = icmp->icmp_checksum_off; 1856 } else { 1857 *i1 = -1; 1858 } 1859 break; 1860 case IPV6_JOIN_GROUP: 1861 case IPV6_LEAVE_GROUP: 1862 case MCAST_JOIN_GROUP: 1863 case MCAST_LEAVE_GROUP: 1864 case MCAST_BLOCK_SOURCE: 1865 case MCAST_UNBLOCK_SOURCE: 1866 case MCAST_JOIN_SOURCE_GROUP: 1867 case MCAST_LEAVE_SOURCE_GROUP: 1868 /* cannot "get" the value for these */ 1869 return (-1); 1870 case IPV6_RECVPKTINFO: 1871 *i1 = icmp->icmp_ip_recvpktinfo; 1872 break; 1873 case IPV6_RECVTCLASS: 1874 *i1 = icmp->icmp_ipv6_recvtclass; 1875 break; 1876 case IPV6_RECVPATHMTU: 1877 *i1 = icmp->icmp_ipv6_recvpathmtu; 1878 break; 1879 case IPV6_V6ONLY: 1880 *i1 = 1; 1881 break; 1882 case IPV6_RECVHOPLIMIT: 1883 *i1 = icmp->icmp_ipv6_recvhoplimit; 1884 break; 1885 case IPV6_RECVHOPOPTS: 1886 *i1 = icmp->icmp_ipv6_recvhopopts; 1887 break; 1888 case IPV6_RECVDSTOPTS: 1889 *i1 = icmp->icmp_ipv6_recvdstopts; 1890 break; 1891 case _OLD_IPV6_RECVDSTOPTS: 1892 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1893 break; 1894 case IPV6_RECVRTHDRDSTOPTS: 1895 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1896 break; 1897 case IPV6_RECVRTHDR: 1898 *i1 = icmp->icmp_ipv6_recvrthdr; 1899 break; 1900 case IPV6_PKTINFO: { 1901 /* XXX assumes that caller has room for max size! */ 1902 struct in6_pktinfo *pkti; 1903 1904 pkti = (struct in6_pktinfo *)ptr; 1905 if (ipp->ipp_fields & IPPF_IFINDEX) 1906 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1907 else 1908 pkti->ipi6_ifindex = 0; 1909 if (ipp->ipp_fields & IPPF_ADDR) 1910 pkti->ipi6_addr = ipp->ipp_addr; 1911 else 1912 pkti->ipi6_addr = ipv6_all_zeros; 1913 return (sizeof (struct in6_pktinfo)); 1914 } 1915 case IPV6_NEXTHOP: { 1916 sin6_t *sin6 = (sin6_t *)ptr; 1917 1918 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1919 return (0); 1920 *sin6 = sin6_null; 1921 sin6->sin6_family = AF_INET6; 1922 sin6->sin6_addr = ipp->ipp_nexthop; 1923 return (sizeof (sin6_t)); 1924 } 1925 case IPV6_HOPOPTS: 1926 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1927 return (0); 1928 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1929 return (0); 1930 bcopy((char *)ipp->ipp_hopopts + 1931 icmp->icmp_label_len_v6, ptr, 1932 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1933 if (icmp->icmp_label_len_v6 > 0) { 1934 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1935 ptr[1] = (ipp->ipp_hopoptslen - 1936 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1937 } 1938 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1939 case IPV6_RTHDRDSTOPTS: 1940 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1941 return (0); 1942 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1943 return (ipp->ipp_rtdstoptslen); 1944 case IPV6_RTHDR: 1945 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1946 return (0); 1947 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1948 return (ipp->ipp_rthdrlen); 1949 case IPV6_DSTOPTS: 1950 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1951 return (0); 1952 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1953 return (ipp->ipp_dstoptslen); 1954 case IPV6_PATHMTU: 1955 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1956 return (0); 1957 1958 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1959 (struct ip6_mtuinfo *)ptr, is->is_netstack)); 1960 case IPV6_TCLASS: 1961 if (ipp->ipp_fields & IPPF_TCLASS) 1962 *i1 = ipp->ipp_tclass; 1963 else 1964 *i1 = IPV6_FLOW_TCLASS( 1965 IPV6_DEFAULT_VERS_AND_FLOW); 1966 break; 1967 default: 1968 return (-1); 1969 } 1970 break; 1971 case IPPROTO_ICMPV6: 1972 /* 1973 * Only allow IPv6 option processing on native IPv6 sockets. 1974 */ 1975 if (icmp->icmp_family != AF_INET6) 1976 return (-1); 1977 1978 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1979 return (-1); 1980 1981 switch (name) { 1982 case ICMP6_FILTER: 1983 if (icmp->icmp_filter == NULL) { 1984 /* Make it look like "pass all" */ 1985 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1986 } else { 1987 (void) bcopy(icmp->icmp_filter, ptr, 1988 sizeof (icmp6_filter_t)); 1989 } 1990 return (sizeof (icmp6_filter_t)); 1991 default: 1992 return (-1); 1993 } 1994 default: 1995 return (-1); 1996 } 1997 return (sizeof (int)); 1998 } 1999 2000 /* 2001 * This routine retrieves the current status of socket options. 2002 * It returns the size of the option retrieved. 2003 */ 2004 int 2005 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2006 { 2007 icmp_t *icmp = Q_TO_ICMP(q); 2008 int err; 2009 2010 rw_enter(&icmp->icmp_rwlock, RW_READER); 2011 err = icmp_opt_get_locked(q, level, name, ptr); 2012 rw_exit(&icmp->icmp_rwlock); 2013 return (err); 2014 } 2015 2016 2017 /* This routine sets socket options. */ 2018 /* ARGSUSED */ 2019 int 2020 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2021 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2022 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2023 { 2024 conn_t *connp = Q_TO_CONN(q); 2025 icmp_t *icmp = connp->conn_icmp; 2026 icmp_stack_t *is = icmp->icmp_is; 2027 int *i1 = (int *)invalp; 2028 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2029 boolean_t checkonly; 2030 int error; 2031 2032 switch (optset_context) { 2033 case SETFN_OPTCOM_CHECKONLY: 2034 checkonly = B_TRUE; 2035 /* 2036 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2037 * inlen != 0 implies value supplied and 2038 * we have to "pretend" to set it. 2039 * inlen == 0 implies that there is no 2040 * value part in T_CHECK request and just validation 2041 * done elsewhere should be enough, we just return here. 2042 */ 2043 if (inlen == 0) { 2044 *outlenp = 0; 2045 return (0); 2046 } 2047 break; 2048 case SETFN_OPTCOM_NEGOTIATE: 2049 checkonly = B_FALSE; 2050 break; 2051 case SETFN_UD_NEGOTIATE: 2052 case SETFN_CONN_NEGOTIATE: 2053 checkonly = B_FALSE; 2054 /* 2055 * Negotiating local and "association-related" options 2056 * through T_UNITDATA_REQ. 2057 * 2058 * Following routine can filter out ones we do not 2059 * want to be "set" this way. 2060 */ 2061 if (!icmp_opt_allow_udr_set(level, name)) { 2062 *outlenp = 0; 2063 return (EINVAL); 2064 } 2065 break; 2066 default: 2067 /* 2068 * We should never get here 2069 */ 2070 *outlenp = 0; 2071 return (EINVAL); 2072 } 2073 2074 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2075 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2076 2077 /* 2078 * For fixed length options, no sanity check 2079 * of passed in length is done. It is assumed *_optcom_req() 2080 * routines do the right thing. 2081 */ 2082 2083 switch (level) { 2084 case SOL_SOCKET: 2085 switch (name) { 2086 case SO_DEBUG: 2087 if (!checkonly) 2088 icmp->icmp_debug = onoff; 2089 break; 2090 case SO_PROTOTYPE: 2091 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2092 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2093 secpolicy_net_rawaccess(cr) != 0) { 2094 *outlenp = 0; 2095 return (EACCES); 2096 } 2097 /* Can't use IPPROTO_RAW with IPv6 */ 2098 if ((*i1 & 0xFF) == IPPROTO_RAW && 2099 icmp->icmp_family == AF_INET6) { 2100 *outlenp = 0; 2101 return (EPROTONOSUPPORT); 2102 } 2103 if (checkonly) { 2104 /* T_CHECK case */ 2105 *(int *)outvalp = (*i1 & 0xFF); 2106 break; 2107 } 2108 icmp->icmp_proto = *i1 & 0xFF; 2109 if ((icmp->icmp_proto == IPPROTO_RAW || 2110 icmp->icmp_proto == IPPROTO_IGMP) && 2111 icmp->icmp_family == AF_INET) 2112 icmp->icmp_hdrincl = 1; 2113 else 2114 icmp->icmp_hdrincl = 0; 2115 2116 if (icmp->icmp_family == AF_INET6 && 2117 icmp->icmp_proto == IPPROTO_ICMPV6) { 2118 /* Set offset for icmp6_cksum */ 2119 icmp->icmp_raw_checksum = 0; 2120 icmp->icmp_checksum_off = 2; 2121 } 2122 if (icmp->icmp_proto == IPPROTO_UDP || 2123 icmp->icmp_proto == IPPROTO_TCP || 2124 icmp->icmp_proto == IPPROTO_SCTP) { 2125 icmp->icmp_no_tp_cksum = 1; 2126 icmp->icmp_sticky_ipp.ipp_fields |= 2127 IPPF_NO_CKSUM; 2128 } else { 2129 icmp->icmp_no_tp_cksum = 0; 2130 icmp->icmp_sticky_ipp.ipp_fields &= 2131 ~IPPF_NO_CKSUM; 2132 } 2133 2134 if (icmp->icmp_filter != NULL && 2135 icmp->icmp_proto != IPPROTO_ICMPV6) { 2136 kmem_free(icmp->icmp_filter, 2137 sizeof (icmp6_filter_t)); 2138 icmp->icmp_filter = NULL; 2139 } 2140 2141 /* Rebuild the header template */ 2142 error = icmp_build_hdrs(icmp); 2143 if (error != 0) { 2144 *outlenp = 0; 2145 return (error); 2146 } 2147 2148 /* 2149 * For SCTP, we don't use icmp_bind_proto() for 2150 * raw socket binding. Note that we do not need 2151 * to set *outlenp. 2152 * FIXME: how does SCTP work? 2153 */ 2154 if (icmp->icmp_proto == IPPROTO_SCTP) 2155 return (0); 2156 2157 *outlenp = sizeof (int); 2158 *(int *)outvalp = *i1 & 0xFF; 2159 2160 /* Drop lock across the bind operation */ 2161 rw_exit(&icmp->icmp_rwlock); 2162 icmp_bind_proto(q); 2163 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2164 return (0); 2165 case SO_REUSEADDR: 2166 if (!checkonly) 2167 icmp->icmp_reuseaddr = onoff; 2168 break; 2169 2170 /* 2171 * The following three items are available here, 2172 * but are only meaningful to IP. 2173 */ 2174 case SO_DONTROUTE: 2175 if (!checkonly) 2176 icmp->icmp_dontroute = onoff; 2177 break; 2178 case SO_USELOOPBACK: 2179 if (!checkonly) 2180 icmp->icmp_useloopback = onoff; 2181 break; 2182 case SO_BROADCAST: 2183 if (!checkonly) 2184 icmp->icmp_broadcast = onoff; 2185 break; 2186 2187 case SO_SNDBUF: 2188 if (*i1 > is->is_max_buf) { 2189 *outlenp = 0; 2190 return (ENOBUFS); 2191 } 2192 if (!checkonly) { 2193 q->q_hiwat = *i1; 2194 } 2195 break; 2196 case SO_RCVBUF: 2197 if (*i1 > is->is_max_buf) { 2198 *outlenp = 0; 2199 return (ENOBUFS); 2200 } 2201 if (!checkonly) { 2202 RD(q)->q_hiwat = *i1; 2203 rw_exit(&icmp->icmp_rwlock); 2204 (void) mi_set_sth_hiwat(RD(q), *i1); 2205 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2206 } 2207 break; 2208 case SO_DGRAM_ERRIND: 2209 if (!checkonly) 2210 icmp->icmp_dgram_errind = onoff; 2211 break; 2212 case SO_ALLZONES: 2213 /* 2214 * "soft" error (negative) 2215 * option not handled at this level 2216 * Note: Do not modify *outlenp 2217 */ 2218 return (-EINVAL); 2219 case SO_TIMESTAMP: 2220 if (!checkonly) { 2221 icmp->icmp_timestamp = onoff; 2222 } 2223 break; 2224 case SO_MAC_EXEMPT: 2225 if (secpolicy_net_mac_aware(cr) != 0 || 2226 icmp->icmp_state != TS_UNBND) 2227 return (EACCES); 2228 if (!checkonly) 2229 icmp->icmp_mac_exempt = onoff; 2230 break; 2231 /* 2232 * Following three not meaningful for icmp 2233 * Action is same as "default" so we keep them 2234 * in comments. 2235 * case SO_LINGER: 2236 * case SO_KEEPALIVE: 2237 * case SO_OOBINLINE: 2238 */ 2239 default: 2240 *outlenp = 0; 2241 return (EINVAL); 2242 } 2243 break; 2244 case IPPROTO_IP: 2245 /* 2246 * Only allow IPv4 option processing on IPv4 sockets. 2247 */ 2248 if (icmp->icmp_family != AF_INET) { 2249 *outlenp = 0; 2250 return (ENOPROTOOPT); 2251 } 2252 switch (name) { 2253 case IP_OPTIONS: 2254 case T_IP_OPTIONS: 2255 /* Save options for use by IP. */ 2256 if ((inlen & 0x3) || 2257 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2258 *outlenp = 0; 2259 return (EINVAL); 2260 } 2261 if (checkonly) 2262 break; 2263 2264 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2265 &icmp->icmp_ip_snd_options_len, 2266 icmp->icmp_label_len, invalp, inlen)) { 2267 *outlenp = 0; 2268 return (ENOMEM); 2269 } 2270 2271 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2272 icmp->icmp_ip_snd_options_len; 2273 rw_exit(&icmp->icmp_rwlock); 2274 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2275 is->is_wroff_extra); 2276 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2277 break; 2278 case IP_HDRINCL: 2279 if (!checkonly) 2280 icmp->icmp_hdrincl = onoff; 2281 break; 2282 case IP_TOS: 2283 case T_IP_TOS: 2284 if (!checkonly) { 2285 icmp->icmp_type_of_service = (uint8_t)*i1; 2286 } 2287 break; 2288 case IP_TTL: 2289 if (!checkonly) { 2290 icmp->icmp_ttl = (uint8_t)*i1; 2291 } 2292 break; 2293 case IP_MULTICAST_IF: 2294 /* 2295 * TODO should check OPTMGMT reply and undo this if 2296 * there is an error. 2297 */ 2298 if (!checkonly) 2299 icmp->icmp_multicast_if_addr = *i1; 2300 break; 2301 case IP_MULTICAST_TTL: 2302 if (!checkonly) 2303 icmp->icmp_multicast_ttl = *invalp; 2304 break; 2305 case IP_MULTICAST_LOOP: 2306 if (!checkonly) { 2307 connp->conn_multicast_loop = 2308 (*invalp == 0) ? 0 : 1; 2309 } 2310 break; 2311 case IP_BOUND_IF: 2312 if (!checkonly) 2313 icmp->icmp_bound_if = *i1; 2314 break; 2315 case IP_UNSPEC_SRC: 2316 if (!checkonly) 2317 icmp->icmp_unspec_source = onoff; 2318 break; 2319 case IP_BROADCAST_TTL: 2320 if (!checkonly) 2321 connp->conn_broadcast_ttl = *invalp; 2322 break; 2323 case IP_RECVIF: 2324 if (!checkonly) 2325 icmp->icmp_recvif = onoff; 2326 /* 2327 * pass to ip 2328 */ 2329 return (-EINVAL); 2330 case IP_PKTINFO: { 2331 /* 2332 * This also handles IP_RECVPKTINFO. 2333 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2334 * Differentiation is based on the size of the argument 2335 * passed in. 2336 */ 2337 struct in_pktinfo *pktinfop; 2338 ip4_pkt_t *attr_pktinfop; 2339 2340 if (checkonly) 2341 break; 2342 2343 if (inlen == sizeof (int)) { 2344 /* 2345 * This is IP_RECVPKTINFO option. 2346 * Keep a local copy of wether this option is 2347 * set or not and pass it down to IP for 2348 * processing. 2349 */ 2350 icmp->icmp_ip_recvpktinfo = onoff; 2351 return (-EINVAL); 2352 } 2353 2354 2355 if (inlen != sizeof (struct in_pktinfo)) 2356 return (EINVAL); 2357 2358 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2359 == NULL) { 2360 /* 2361 * sticky option is not supported 2362 */ 2363 return (EINVAL); 2364 } 2365 2366 pktinfop = (struct in_pktinfo *)invalp; 2367 2368 /* 2369 * Atleast one of the values should be specified 2370 */ 2371 if (pktinfop->ipi_ifindex == 0 && 2372 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2373 return (EINVAL); 2374 } 2375 2376 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2377 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2378 } 2379 break; 2380 case IP_ADD_MEMBERSHIP: 2381 case IP_DROP_MEMBERSHIP: 2382 case IP_BLOCK_SOURCE: 2383 case IP_UNBLOCK_SOURCE: 2384 case IP_ADD_SOURCE_MEMBERSHIP: 2385 case IP_DROP_SOURCE_MEMBERSHIP: 2386 case MCAST_JOIN_GROUP: 2387 case MCAST_LEAVE_GROUP: 2388 case MCAST_BLOCK_SOURCE: 2389 case MCAST_UNBLOCK_SOURCE: 2390 case MCAST_JOIN_SOURCE_GROUP: 2391 case MCAST_LEAVE_SOURCE_GROUP: 2392 case MRT_INIT: 2393 case MRT_DONE: 2394 case MRT_ADD_VIF: 2395 case MRT_DEL_VIF: 2396 case MRT_ADD_MFC: 2397 case MRT_DEL_MFC: 2398 case MRT_VERSION: 2399 case MRT_ASSERT: 2400 case IP_SEC_OPT: 2401 case IP_DONTFAILOVER_IF: 2402 case IP_NEXTHOP: 2403 /* 2404 * "soft" error (negative) 2405 * option not handled at this level 2406 * Note: Do not modify *outlenp 2407 */ 2408 return (-EINVAL); 2409 default: 2410 *outlenp = 0; 2411 return (EINVAL); 2412 } 2413 break; 2414 case IPPROTO_IPV6: { 2415 ip6_pkt_t *ipp; 2416 boolean_t sticky; 2417 2418 if (icmp->icmp_family != AF_INET6) { 2419 *outlenp = 0; 2420 return (ENOPROTOOPT); 2421 } 2422 /* 2423 * Deal with both sticky options and ancillary data 2424 */ 2425 if (thisdg_attrs == NULL) { 2426 /* sticky options, or none */ 2427 ipp = &icmp->icmp_sticky_ipp; 2428 sticky = B_TRUE; 2429 } else { 2430 /* ancillary data */ 2431 ipp = (ip6_pkt_t *)thisdg_attrs; 2432 sticky = B_FALSE; 2433 } 2434 2435 switch (name) { 2436 case IPV6_MULTICAST_IF: 2437 if (!checkonly) 2438 icmp->icmp_multicast_if_index = *i1; 2439 break; 2440 case IPV6_UNICAST_HOPS: 2441 /* -1 means use default */ 2442 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2443 *outlenp = 0; 2444 return (EINVAL); 2445 } 2446 if (!checkonly) { 2447 if (*i1 == -1) { 2448 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2449 is->is_ipv6_hoplimit; 2450 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2451 /* Pass modified value to IP. */ 2452 *i1 = ipp->ipp_hoplimit; 2453 } else { 2454 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2455 (uint8_t)*i1; 2456 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2457 } 2458 /* Rebuild the header template */ 2459 error = icmp_build_hdrs(icmp); 2460 if (error != 0) { 2461 *outlenp = 0; 2462 return (error); 2463 } 2464 } 2465 break; 2466 case IPV6_MULTICAST_HOPS: 2467 /* -1 means use default */ 2468 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2469 *outlenp = 0; 2470 return (EINVAL); 2471 } 2472 if (!checkonly) { 2473 if (*i1 == -1) { 2474 icmp->icmp_multicast_ttl = 2475 ipp->ipp_multicast_hops = 2476 IP_DEFAULT_MULTICAST_TTL; 2477 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2478 /* Pass modified value to IP. */ 2479 *i1 = icmp->icmp_multicast_ttl; 2480 } else { 2481 icmp->icmp_multicast_ttl = 2482 ipp->ipp_multicast_hops = 2483 (uint8_t)*i1; 2484 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2485 } 2486 } 2487 break; 2488 case IPV6_MULTICAST_LOOP: 2489 if (*i1 != 0 && *i1 != 1) { 2490 *outlenp = 0; 2491 return (EINVAL); 2492 } 2493 if (!checkonly) 2494 connp->conn_multicast_loop = *i1; 2495 break; 2496 case IPV6_CHECKSUM: 2497 /* 2498 * Integer offset into the user data of where the 2499 * checksum is located. 2500 * Offset of -1 disables option. 2501 * Does not apply to IPPROTO_ICMPV6. 2502 */ 2503 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2504 *outlenp = 0; 2505 return (EINVAL); 2506 } 2507 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2508 /* Negative or not 16 bit aligned offset */ 2509 *outlenp = 0; 2510 return (EINVAL); 2511 } 2512 if (checkonly) 2513 break; 2514 2515 if (*i1 == -1) { 2516 icmp->icmp_raw_checksum = 0; 2517 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2518 } else { 2519 icmp->icmp_raw_checksum = 1; 2520 icmp->icmp_checksum_off = *i1; 2521 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2522 } 2523 /* Rebuild the header template */ 2524 error = icmp_build_hdrs(icmp); 2525 if (error != 0) { 2526 *outlenp = 0; 2527 return (error); 2528 } 2529 break; 2530 case IPV6_JOIN_GROUP: 2531 case IPV6_LEAVE_GROUP: 2532 case MCAST_JOIN_GROUP: 2533 case MCAST_LEAVE_GROUP: 2534 case MCAST_BLOCK_SOURCE: 2535 case MCAST_UNBLOCK_SOURCE: 2536 case MCAST_JOIN_SOURCE_GROUP: 2537 case MCAST_LEAVE_SOURCE_GROUP: 2538 /* 2539 * "soft" error (negative) 2540 * option not handled at this level 2541 * Note: Do not modify *outlenp 2542 */ 2543 return (-EINVAL); 2544 case IPV6_BOUND_IF: 2545 if (!checkonly) 2546 icmp->icmp_bound_if = *i1; 2547 break; 2548 case IPV6_UNSPEC_SRC: 2549 if (!checkonly) 2550 icmp->icmp_unspec_source = onoff; 2551 break; 2552 case IPV6_RECVTCLASS: 2553 if (!checkonly) 2554 icmp->icmp_ipv6_recvtclass = onoff; 2555 break; 2556 /* 2557 * Set boolean switches for ancillary data delivery 2558 */ 2559 case IPV6_RECVPKTINFO: 2560 if (!checkonly) 2561 icmp->icmp_ip_recvpktinfo = onoff; 2562 break; 2563 case IPV6_RECVPATHMTU: 2564 if (!checkonly) 2565 icmp->icmp_ipv6_recvpathmtu = onoff; 2566 break; 2567 case IPV6_RECVHOPLIMIT: 2568 if (!checkonly) 2569 icmp->icmp_ipv6_recvhoplimit = onoff; 2570 break; 2571 case IPV6_RECVHOPOPTS: 2572 if (!checkonly) 2573 icmp->icmp_ipv6_recvhopopts = onoff; 2574 break; 2575 case IPV6_RECVDSTOPTS: 2576 if (!checkonly) 2577 icmp->icmp_ipv6_recvdstopts = onoff; 2578 break; 2579 case _OLD_IPV6_RECVDSTOPTS: 2580 if (!checkonly) 2581 icmp->icmp_old_ipv6_recvdstopts = onoff; 2582 break; 2583 case IPV6_RECVRTHDRDSTOPTS: 2584 if (!checkonly) 2585 icmp->icmp_ipv6_recvrtdstopts = onoff; 2586 break; 2587 case IPV6_RECVRTHDR: 2588 if (!checkonly) 2589 icmp->icmp_ipv6_recvrthdr = onoff; 2590 break; 2591 /* 2592 * Set sticky options or ancillary data. 2593 * If sticky options, (re)build any extension headers 2594 * that might be needed as a result. 2595 */ 2596 case IPV6_PKTINFO: 2597 /* 2598 * The source address and ifindex are verified 2599 * in ip_opt_set(). For ancillary data the 2600 * source address is checked in ip_wput_v6. 2601 */ 2602 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2603 return (EINVAL); 2604 if (checkonly) 2605 break; 2606 2607 if (inlen == 0) { 2608 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2609 ipp->ipp_sticky_ignored |= 2610 (IPPF_IFINDEX|IPPF_ADDR); 2611 } else { 2612 struct in6_pktinfo *pkti; 2613 2614 pkti = (struct in6_pktinfo *)invalp; 2615 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2616 ipp->ipp_addr = pkti->ipi6_addr; 2617 if (ipp->ipp_ifindex != 0) 2618 ipp->ipp_fields |= IPPF_IFINDEX; 2619 else 2620 ipp->ipp_fields &= ~IPPF_IFINDEX; 2621 if (!IN6_IS_ADDR_UNSPECIFIED( 2622 &ipp->ipp_addr)) 2623 ipp->ipp_fields |= IPPF_ADDR; 2624 else 2625 ipp->ipp_fields &= ~IPPF_ADDR; 2626 } 2627 if (sticky) { 2628 error = icmp_build_hdrs(icmp); 2629 if (error != 0) 2630 return (error); 2631 } 2632 break; 2633 case IPV6_HOPLIMIT: 2634 /* This option can only be used as ancillary data. */ 2635 if (sticky) 2636 return (EINVAL); 2637 if (inlen != 0 && inlen != sizeof (int)) 2638 return (EINVAL); 2639 if (checkonly) 2640 break; 2641 2642 if (inlen == 0) { 2643 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2644 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2645 } else { 2646 if (*i1 > 255 || *i1 < -1) 2647 return (EINVAL); 2648 if (*i1 == -1) 2649 ipp->ipp_hoplimit = 2650 is->is_ipv6_hoplimit; 2651 else 2652 ipp->ipp_hoplimit = *i1; 2653 ipp->ipp_fields |= IPPF_HOPLIMIT; 2654 } 2655 break; 2656 case IPV6_TCLASS: 2657 /* 2658 * IPV6_RECVTCLASS accepts -1 as use kernel default 2659 * and [0, 255] as the actualy traffic class. 2660 */ 2661 if (inlen != 0 && inlen != sizeof (int)) 2662 return (EINVAL); 2663 if (checkonly) 2664 break; 2665 2666 if (inlen == 0) { 2667 ipp->ipp_fields &= ~IPPF_TCLASS; 2668 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2669 } else { 2670 if (*i1 >= 256 || *i1 < -1) 2671 return (EINVAL); 2672 if (*i1 == -1) { 2673 ipp->ipp_tclass = 2674 IPV6_FLOW_TCLASS( 2675 IPV6_DEFAULT_VERS_AND_FLOW); 2676 } else { 2677 ipp->ipp_tclass = *i1; 2678 } 2679 ipp->ipp_fields |= IPPF_TCLASS; 2680 } 2681 if (sticky) { 2682 error = icmp_build_hdrs(icmp); 2683 if (error != 0) 2684 return (error); 2685 } 2686 break; 2687 case IPV6_NEXTHOP: 2688 /* 2689 * IP will verify that the nexthop is reachable 2690 * and fail for sticky options. 2691 */ 2692 if (inlen != 0 && inlen != sizeof (sin6_t)) 2693 return (EINVAL); 2694 if (checkonly) 2695 break; 2696 2697 if (inlen == 0) { 2698 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2699 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2700 } else { 2701 sin6_t *sin6 = (sin6_t *)invalp; 2702 2703 if (sin6->sin6_family != AF_INET6) 2704 return (EAFNOSUPPORT); 2705 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2706 return (EADDRNOTAVAIL); 2707 ipp->ipp_nexthop = sin6->sin6_addr; 2708 if (!IN6_IS_ADDR_UNSPECIFIED( 2709 &ipp->ipp_nexthop)) 2710 ipp->ipp_fields |= IPPF_NEXTHOP; 2711 else 2712 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2713 } 2714 if (sticky) { 2715 error = icmp_build_hdrs(icmp); 2716 if (error != 0) 2717 return (error); 2718 } 2719 break; 2720 case IPV6_HOPOPTS: { 2721 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2722 /* 2723 * Sanity checks - minimum size, size a multiple of 2724 * eight bytes, and matching size passed in. 2725 */ 2726 if (inlen != 0 && 2727 inlen != (8 * (hopts->ip6h_len + 1))) 2728 return (EINVAL); 2729 2730 if (checkonly) 2731 break; 2732 error = optcom_pkt_set(invalp, inlen, sticky, 2733 (uchar_t **)&ipp->ipp_hopopts, 2734 &ipp->ipp_hopoptslen, 2735 sticky ? icmp->icmp_label_len_v6 : 0); 2736 if (error != 0) 2737 return (error); 2738 if (ipp->ipp_hopoptslen == 0) { 2739 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2740 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2741 } else { 2742 ipp->ipp_fields |= IPPF_HOPOPTS; 2743 } 2744 if (sticky) { 2745 error = icmp_build_hdrs(icmp); 2746 if (error != 0) 2747 return (error); 2748 } 2749 break; 2750 } 2751 case IPV6_RTHDRDSTOPTS: { 2752 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2753 2754 /* 2755 * Sanity checks - minimum size, size a multiple of 2756 * eight bytes, and matching size passed in. 2757 */ 2758 if (inlen != 0 && 2759 inlen != (8 * (dopts->ip6d_len + 1))) 2760 return (EINVAL); 2761 2762 if (checkonly) 2763 break; 2764 2765 if (inlen == 0) { 2766 if (sticky && 2767 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2768 kmem_free(ipp->ipp_rtdstopts, 2769 ipp->ipp_rtdstoptslen); 2770 ipp->ipp_rtdstopts = NULL; 2771 ipp->ipp_rtdstoptslen = 0; 2772 } 2773 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2774 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2775 } else { 2776 error = optcom_pkt_set(invalp, inlen, sticky, 2777 (uchar_t **)&ipp->ipp_rtdstopts, 2778 &ipp->ipp_rtdstoptslen, 0); 2779 if (error != 0) 2780 return (error); 2781 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2782 } 2783 if (sticky) { 2784 error = icmp_build_hdrs(icmp); 2785 if (error != 0) 2786 return (error); 2787 } 2788 break; 2789 } 2790 case IPV6_DSTOPTS: { 2791 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2792 2793 /* 2794 * Sanity checks - minimum size, size a multiple of 2795 * eight bytes, and matching size passed in. 2796 */ 2797 if (inlen != 0 && 2798 inlen != (8 * (dopts->ip6d_len + 1))) 2799 return (EINVAL); 2800 2801 if (checkonly) 2802 break; 2803 2804 if (inlen == 0) { 2805 if (sticky && 2806 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2807 kmem_free(ipp->ipp_dstopts, 2808 ipp->ipp_dstoptslen); 2809 ipp->ipp_dstopts = NULL; 2810 ipp->ipp_dstoptslen = 0; 2811 } 2812 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2813 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2814 } else { 2815 error = optcom_pkt_set(invalp, inlen, sticky, 2816 (uchar_t **)&ipp->ipp_dstopts, 2817 &ipp->ipp_dstoptslen, 0); 2818 if (error != 0) 2819 return (error); 2820 ipp->ipp_fields |= IPPF_DSTOPTS; 2821 } 2822 if (sticky) { 2823 error = icmp_build_hdrs(icmp); 2824 if (error != 0) 2825 return (error); 2826 } 2827 break; 2828 } 2829 case IPV6_RTHDR: { 2830 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2831 2832 /* 2833 * Sanity checks - minimum size, size a multiple of 2834 * eight bytes, and matching size passed in. 2835 */ 2836 if (inlen != 0 && 2837 inlen != (8 * (rt->ip6r_len + 1))) 2838 return (EINVAL); 2839 2840 if (checkonly) 2841 break; 2842 2843 if (inlen == 0) { 2844 if (sticky && 2845 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2846 kmem_free(ipp->ipp_rthdr, 2847 ipp->ipp_rthdrlen); 2848 ipp->ipp_rthdr = NULL; 2849 ipp->ipp_rthdrlen = 0; 2850 } 2851 ipp->ipp_fields &= ~IPPF_RTHDR; 2852 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2853 } else { 2854 error = optcom_pkt_set(invalp, inlen, sticky, 2855 (uchar_t **)&ipp->ipp_rthdr, 2856 &ipp->ipp_rthdrlen, 0); 2857 if (error != 0) 2858 return (error); 2859 ipp->ipp_fields |= IPPF_RTHDR; 2860 } 2861 if (sticky) { 2862 error = icmp_build_hdrs(icmp); 2863 if (error != 0) 2864 return (error); 2865 } 2866 break; 2867 } 2868 2869 case IPV6_DONTFRAG: 2870 if (checkonly) 2871 break; 2872 2873 if (onoff) { 2874 ipp->ipp_fields |= IPPF_DONTFRAG; 2875 } else { 2876 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2877 } 2878 break; 2879 2880 case IPV6_USE_MIN_MTU: 2881 if (inlen != sizeof (int)) 2882 return (EINVAL); 2883 2884 if (*i1 < -1 || *i1 > 1) 2885 return (EINVAL); 2886 2887 if (checkonly) 2888 break; 2889 2890 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2891 ipp->ipp_use_min_mtu = *i1; 2892 break; 2893 2894 /* 2895 * This option can't be set. Its only returned via 2896 * getsockopt() or ancillary data. 2897 */ 2898 case IPV6_PATHMTU: 2899 return (EINVAL); 2900 2901 case IPV6_BOUND_PIF: 2902 case IPV6_SEC_OPT: 2903 case IPV6_DONTFAILOVER_IF: 2904 case IPV6_SRC_PREFERENCES: 2905 case IPV6_V6ONLY: 2906 /* Handled at IP level */ 2907 return (-EINVAL); 2908 default: 2909 *outlenp = 0; 2910 return (EINVAL); 2911 } 2912 break; 2913 } /* end IPPROTO_IPV6 */ 2914 2915 case IPPROTO_ICMPV6: 2916 /* 2917 * Only allow IPv6 option processing on IPv6 sockets. 2918 */ 2919 if (icmp->icmp_family != AF_INET6) { 2920 *outlenp = 0; 2921 return (ENOPROTOOPT); 2922 } 2923 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2924 *outlenp = 0; 2925 return (ENOPROTOOPT); 2926 } 2927 switch (name) { 2928 case ICMP6_FILTER: 2929 if (!checkonly) { 2930 if ((inlen != 0) && 2931 (inlen != sizeof (icmp6_filter_t))) 2932 return (EINVAL); 2933 2934 if (inlen == 0) { 2935 if (icmp->icmp_filter != NULL) { 2936 kmem_free(icmp->icmp_filter, 2937 sizeof (icmp6_filter_t)); 2938 icmp->icmp_filter = NULL; 2939 } 2940 } else { 2941 if (icmp->icmp_filter == NULL) { 2942 icmp->icmp_filter = kmem_alloc( 2943 sizeof (icmp6_filter_t), 2944 KM_NOSLEEP); 2945 if (icmp->icmp_filter == NULL) { 2946 *outlenp = 0; 2947 return (ENOBUFS); 2948 } 2949 } 2950 (void) bcopy(invalp, icmp->icmp_filter, 2951 inlen); 2952 } 2953 } 2954 break; 2955 2956 default: 2957 *outlenp = 0; 2958 return (EINVAL); 2959 } 2960 break; 2961 default: 2962 *outlenp = 0; 2963 return (EINVAL); 2964 } 2965 /* 2966 * Common case of OK return with outval same as inval. 2967 */ 2968 if (invalp != outvalp) { 2969 /* don't trust bcopy for identical src/dst */ 2970 (void) bcopy(invalp, outvalp, inlen); 2971 } 2972 *outlenp = inlen; 2973 return (0); 2974 } 2975 /* This routine sets socket options. */ 2976 /* ARGSUSED */ 2977 int 2978 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2979 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2980 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2981 { 2982 icmp_t *icmp; 2983 int err; 2984 2985 icmp = Q_TO_ICMP(q); 2986 2987 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2988 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2989 outlenp, outvalp, thisdg_attrs, cr, mblk); 2990 rw_exit(&icmp->icmp_rwlock); 2991 return (err); 2992 } 2993 2994 /* 2995 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2996 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2997 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2998 * headers. 2999 * Returns failure if can't allocate memory. 3000 */ 3001 static int 3002 icmp_build_hdrs(icmp_t *icmp) 3003 { 3004 icmp_stack_t *is = icmp->icmp_is; 3005 uchar_t *hdrs; 3006 uint_t hdrs_len; 3007 ip6_t *ip6h; 3008 ip6i_t *ip6i; 3009 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3010 3011 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3012 hdrs_len = ip_total_hdrs_len_v6(ipp); 3013 ASSERT(hdrs_len != 0); 3014 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3015 /* Need to reallocate */ 3016 if (hdrs_len != 0) { 3017 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3018 if (hdrs == NULL) 3019 return (ENOMEM); 3020 } else { 3021 hdrs = NULL; 3022 } 3023 if (icmp->icmp_sticky_hdrs_len != 0) { 3024 kmem_free(icmp->icmp_sticky_hdrs, 3025 icmp->icmp_sticky_hdrs_len); 3026 } 3027 icmp->icmp_sticky_hdrs = hdrs; 3028 icmp->icmp_sticky_hdrs_len = hdrs_len; 3029 } 3030 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3031 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3032 3033 /* Set header fields not in ipp */ 3034 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3035 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3036 ip6h = (ip6_t *)&ip6i[1]; 3037 3038 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3039 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3040 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3041 } 3042 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3043 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3044 } 3045 } else { 3046 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3047 } 3048 3049 if (!(ipp->ipp_fields & IPPF_ADDR)) 3050 ip6h->ip6_src = icmp->icmp_v6src; 3051 3052 /* Try to get everything in a single mblk */ 3053 if (hdrs_len > icmp->icmp_max_hdr_len) { 3054 icmp->icmp_max_hdr_len = hdrs_len; 3055 rw_exit(&icmp->icmp_rwlock); 3056 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3057 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3058 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3059 } 3060 return (0); 3061 } 3062 3063 /* 3064 * This routine retrieves the value of an ND variable in a icmpparam_t 3065 * structure. It is called through nd_getset when a user reads the 3066 * variable. 3067 */ 3068 /* ARGSUSED */ 3069 static int 3070 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3071 { 3072 icmpparam_t *icmppa = (icmpparam_t *)cp; 3073 3074 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3075 return (0); 3076 } 3077 3078 /* 3079 * Walk through the param array specified registering each element with the 3080 * named dispatch (ND) handler. 3081 */ 3082 static boolean_t 3083 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3084 { 3085 for (; cnt-- > 0; icmppa++) { 3086 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3087 if (!nd_load(ndp, icmppa->icmp_param_name, 3088 icmp_param_get, icmp_param_set, 3089 (caddr_t)icmppa)) { 3090 nd_free(ndp); 3091 return (B_FALSE); 3092 } 3093 } 3094 } 3095 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3096 NULL)) { 3097 nd_free(ndp); 3098 return (B_FALSE); 3099 } 3100 return (B_TRUE); 3101 } 3102 3103 /* This routine sets an ND variable in a icmpparam_t structure. */ 3104 /* ARGSUSED */ 3105 static int 3106 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3107 { 3108 long new_value; 3109 icmpparam_t *icmppa = (icmpparam_t *)cp; 3110 3111 /* 3112 * Fail the request if the new value does not lie within the 3113 * required bounds. 3114 */ 3115 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3116 new_value < icmppa->icmp_param_min || 3117 new_value > icmppa->icmp_param_max) { 3118 return (EINVAL); 3119 } 3120 /* Set the new value */ 3121 icmppa->icmp_param_value = new_value; 3122 return (0); 3123 } 3124 /*ARGSUSED2*/ 3125 static void 3126 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3127 { 3128 conn_t *connp = (conn_t *)arg1; 3129 struct T_unitdata_ind *tudi; 3130 uchar_t *rptr; 3131 icmp_t *icmp; 3132 icmp_stack_t *is; 3133 sin_t *sin; 3134 sin6_t *sin6; 3135 ip6_t *ip6h; 3136 ip6i_t *ip6i; 3137 mblk_t *mp1; 3138 int hdr_len; 3139 ipha_t *ipha; 3140 int udi_size; /* Size of T_unitdata_ind */ 3141 uint_t ipvers; 3142 ip6_pkt_t ipp; 3143 uint8_t nexthdr; 3144 ip_pktinfo_t *pinfo = NULL; 3145 mblk_t *options_mp = NULL; 3146 uint_t icmp_opt = 0; 3147 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3148 uint_t hopstrip; 3149 3150 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3151 3152 icmp = connp->conn_icmp; 3153 is = icmp->icmp_is; 3154 rptr = mp->b_rptr; 3155 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3156 ASSERT(OK_32PTR(rptr)); 3157 3158 /* 3159 * IP should have prepended the options data in an M_CTL 3160 * Check M_CTL "type" to make sure are not here bcos of 3161 * a valid ICMP message 3162 */ 3163 if (DB_TYPE(mp) == M_CTL) { 3164 /* 3165 * FIXME: does IP still do this? 3166 * IP sends up the IPSEC_IN message for handling IPSEC 3167 * policy at the TCP level. We don't need it here. 3168 */ 3169 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3170 mp1 = mp->b_cont; 3171 freeb(mp); 3172 mp = mp1; 3173 rptr = mp->b_rptr; 3174 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3175 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3176 IN_PKTINFO) { 3177 /* 3178 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3179 * has been prepended to the packet by IP. We need to 3180 * extract the mblk and adjust the rptr 3181 */ 3182 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3183 options_mp = mp; 3184 mp = mp->b_cont; 3185 rptr = mp->b_rptr; 3186 } else { 3187 /* 3188 * ICMP messages. 3189 */ 3190 icmp_icmp_error(connp->conn_rq, mp); 3191 return; 3192 } 3193 } 3194 3195 /* 3196 * Discard message if it is misaligned or smaller than the IP header. 3197 */ 3198 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3199 freemsg(mp); 3200 if (options_mp != NULL) 3201 freeb(options_mp); 3202 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3203 return; 3204 } 3205 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3206 3207 /* Handle M_DATA messages containing IP packets messages */ 3208 if (ipvers == IPV4_VERSION) { 3209 /* 3210 * Special case where IP attaches 3211 * the IRE needs to be handled so that we don't send up 3212 * IRE to the user land. 3213 */ 3214 ipha = (ipha_t *)rptr; 3215 hdr_len = IPH_HDR_LENGTH(ipha); 3216 3217 if (ipha->ipha_protocol == IPPROTO_TCP) { 3218 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3219 3220 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3221 TH_SYN) && mp->b_cont != NULL) { 3222 mp1 = mp->b_cont; 3223 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3224 freeb(mp1); 3225 mp->b_cont = NULL; 3226 } 3227 } 3228 } 3229 if (is->is_bsd_compat) { 3230 ushort_t len; 3231 len = ntohs(ipha->ipha_length); 3232 3233 if (mp->b_datap->db_ref > 1) { 3234 /* 3235 * Allocate a new IP header so that we can 3236 * modify ipha_length. 3237 */ 3238 mblk_t *mp1; 3239 3240 mp1 = allocb(hdr_len, BPRI_MED); 3241 if (!mp1) { 3242 freemsg(mp); 3243 if (options_mp != NULL) 3244 freeb(options_mp); 3245 BUMP_MIB(&is->is_rawip_mib, 3246 rawipInErrors); 3247 return; 3248 } 3249 bcopy(rptr, mp1->b_rptr, hdr_len); 3250 mp->b_rptr = rptr + hdr_len; 3251 rptr = mp1->b_rptr; 3252 ipha = (ipha_t *)rptr; 3253 mp1->b_cont = mp; 3254 mp1->b_wptr = rptr + hdr_len; 3255 mp = mp1; 3256 } 3257 len -= hdr_len; 3258 ipha->ipha_length = htons(len); 3259 } 3260 } 3261 3262 /* 3263 * This is the inbound data path. Packets are passed upstream as 3264 * T_UNITDATA_IND messages with full IP headers still attached. 3265 */ 3266 if (icmp->icmp_family == AF_INET) { 3267 ASSERT(ipvers == IPV4_VERSION); 3268 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3269 if (icmp->icmp_recvif && (pinfo != NULL) && 3270 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3271 udi_size += sizeof (struct T_opthdr) + 3272 sizeof (uint_t); 3273 } 3274 3275 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3276 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3277 udi_size += sizeof (struct T_opthdr) + 3278 sizeof (struct in_pktinfo); 3279 } 3280 3281 /* 3282 * If SO_TIMESTAMP is set allocate the appropriate sized 3283 * buffer. Since gethrestime() expects a pointer aligned 3284 * argument, we allocate space necessary for extra 3285 * alignment (even though it might not be used). 3286 */ 3287 if (icmp->icmp_timestamp) { 3288 udi_size += sizeof (struct T_opthdr) + 3289 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3290 } 3291 mp1 = allocb(udi_size, BPRI_MED); 3292 if (mp1 == NULL) { 3293 freemsg(mp); 3294 if (options_mp != NULL) 3295 freeb(options_mp); 3296 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3297 return; 3298 } 3299 mp1->b_cont = mp; 3300 mp = mp1; 3301 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3302 mp->b_datap->db_type = M_PROTO; 3303 mp->b_wptr = (uchar_t *)tudi + udi_size; 3304 tudi->PRIM_type = T_UNITDATA_IND; 3305 tudi->SRC_length = sizeof (sin_t); 3306 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3307 sin = (sin_t *)&tudi[1]; 3308 *sin = sin_null; 3309 sin->sin_family = AF_INET; 3310 sin->sin_addr.s_addr = ipha->ipha_src; 3311 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3312 sizeof (sin_t); 3313 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3314 tudi->OPT_length = udi_size; 3315 3316 /* 3317 * Add options if IP_RECVIF is set 3318 */ 3319 if (udi_size != 0) { 3320 char *dstopt; 3321 3322 dstopt = (char *)&sin[1]; 3323 if (icmp->icmp_recvif && (pinfo != NULL) && 3324 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3325 3326 struct T_opthdr *toh; 3327 uint_t *dstptr; 3328 3329 toh = (struct T_opthdr *)dstopt; 3330 toh->level = IPPROTO_IP; 3331 toh->name = IP_RECVIF; 3332 toh->len = sizeof (struct T_opthdr) + 3333 sizeof (uint_t); 3334 toh->status = 0; 3335 dstopt += sizeof (struct T_opthdr); 3336 dstptr = (uint_t *)dstopt; 3337 *dstptr = pinfo->ip_pkt_ifindex; 3338 dstopt += sizeof (uint_t); 3339 udi_size -= toh->len; 3340 } 3341 if (icmp->icmp_timestamp) { 3342 struct T_opthdr *toh; 3343 3344 toh = (struct T_opthdr *)dstopt; 3345 toh->level = SOL_SOCKET; 3346 toh->name = SCM_TIMESTAMP; 3347 toh->len = sizeof (struct T_opthdr) + 3348 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3349 toh->status = 0; 3350 dstopt += sizeof (struct T_opthdr); 3351 /* Align for gethrestime() */ 3352 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3353 sizeof (intptr_t)); 3354 gethrestime((timestruc_t *)dstopt); 3355 dstopt = (char *)toh + toh->len; 3356 udi_size -= toh->len; 3357 } 3358 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3359 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3360 struct T_opthdr *toh; 3361 struct in_pktinfo *pktinfop; 3362 3363 toh = (struct T_opthdr *)dstopt; 3364 toh->level = IPPROTO_IP; 3365 toh->name = IP_PKTINFO; 3366 toh->len = sizeof (struct T_opthdr) + 3367 sizeof (in_pktinfo_t); 3368 toh->status = 0; 3369 dstopt += sizeof (struct T_opthdr); 3370 pktinfop = (struct in_pktinfo *)dstopt; 3371 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3372 pktinfop->ipi_spec_dst = 3373 pinfo->ip_pkt_match_addr; 3374 3375 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3376 3377 dstopt += sizeof (struct in_pktinfo); 3378 udi_size -= toh->len; 3379 } 3380 3381 /* Consumed all of allocated space */ 3382 ASSERT(udi_size == 0); 3383 } 3384 3385 if (options_mp != NULL) 3386 freeb(options_mp); 3387 3388 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3389 putnext(connp->conn_rq, mp); 3390 return; 3391 } 3392 3393 /* 3394 * We don't need options_mp in the IPv6 path. 3395 */ 3396 if (options_mp != NULL) { 3397 freeb(options_mp); 3398 options_mp = NULL; 3399 } 3400 3401 /* 3402 * Discard message if it is smaller than the IPv6 header 3403 * or if the header is malformed. 3404 */ 3405 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3406 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3407 icmp->icmp_family != AF_INET6) { 3408 freemsg(mp); 3409 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3410 return; 3411 } 3412 3413 /* Initialize */ 3414 ipp.ipp_fields = 0; 3415 hopstrip = 0; 3416 3417 ip6h = (ip6_t *)rptr; 3418 /* 3419 * Call on ip_find_hdr_v6 which gets the total hdr len 3420 * as well as individual lenghts of ext hdrs (and ptrs to 3421 * them). 3422 */ 3423 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3424 /* Look for ifindex information */ 3425 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3426 ip6i = (ip6i_t *)ip6h; 3427 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3428 ASSERT(ip6i->ip6i_ifindex != 0); 3429 ipp.ipp_fields |= IPPF_IFINDEX; 3430 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3431 } 3432 rptr = (uchar_t *)&ip6i[1]; 3433 mp->b_rptr = rptr; 3434 if (rptr == mp->b_wptr) { 3435 mp1 = mp->b_cont; 3436 freeb(mp); 3437 mp = mp1; 3438 rptr = mp->b_rptr; 3439 } 3440 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3441 ip6h = (ip6_t *)rptr; 3442 } 3443 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3444 3445 /* 3446 * We need to lie a bit to the user because users inside 3447 * labeled compartments should not see their own labels. We 3448 * assume that in all other respects IP has checked the label, 3449 * and that the label is always first among the options. (If 3450 * it's not first, then this code won't see it, and the option 3451 * will be passed along to the user.) 3452 * 3453 * If we had multilevel ICMP sockets, then the following code 3454 * should be skipped for them to allow the user to see the 3455 * label. 3456 * 3457 * Alignment restrictions in the definition of IP options 3458 * (namely, the requirement that the 4-octet DOI goes on a 3459 * 4-octet boundary) mean that we know exactly where the option 3460 * should start, but we're lenient for other hosts. 3461 * 3462 * Note that there are no multilevel ICMP or raw IP sockets 3463 * yet, thus nobody ever sees the IP6OPT_LS option. 3464 */ 3465 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3466 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3467 const uchar_t *ucp = 3468 (const uchar_t *)ipp.ipp_hopopts + 2; 3469 int remlen = ipp.ipp_hopoptslen - 2; 3470 3471 while (remlen > 0) { 3472 if (*ucp == IP6OPT_PAD1) { 3473 remlen--; 3474 ucp++; 3475 } else if (*ucp == IP6OPT_PADN) { 3476 remlen -= ucp[1] + 2; 3477 ucp += ucp[1] + 2; 3478 } else if (*ucp == ip6opt_ls) { 3479 hopstrip = (ucp - 3480 (const uchar_t *)ipp.ipp_hopopts) + 3481 ucp[1] + 2; 3482 hopstrip = (hopstrip + 7) & ~7; 3483 break; 3484 } else { 3485 /* label option must be first */ 3486 break; 3487 } 3488 } 3489 } 3490 } else { 3491 hdr_len = IPV6_HDR_LEN; 3492 ip6i = NULL; 3493 nexthdr = ip6h->ip6_nxt; 3494 } 3495 /* 3496 * One special case where IP attaches the IRE needs to 3497 * be handled so that we don't send up IRE to the user land. 3498 */ 3499 if (nexthdr == IPPROTO_TCP) { 3500 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3501 3502 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3503 mp->b_cont != NULL) { 3504 mp1 = mp->b_cont; 3505 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3506 freeb(mp1); 3507 mp->b_cont = NULL; 3508 } 3509 } 3510 } 3511 /* 3512 * Check a filter for ICMPv6 types if needed. 3513 * Verify raw checksums if needed. 3514 */ 3515 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3516 if (icmp->icmp_filter != NULL) { 3517 int type; 3518 3519 /* Assumes that IP has done the pullupmsg */ 3520 type = mp->b_rptr[hdr_len]; 3521 3522 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3523 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3524 freemsg(mp); 3525 return; 3526 } 3527 } else { 3528 /* Checksum */ 3529 uint16_t *up; 3530 uint32_t sum; 3531 int remlen; 3532 3533 up = (uint16_t *)&ip6h->ip6_src; 3534 3535 remlen = msgdsize(mp) - hdr_len; 3536 sum = htons(icmp->icmp_proto + remlen) 3537 + up[0] + up[1] + up[2] + up[3] 3538 + up[4] + up[5] + up[6] + up[7] 3539 + up[8] + up[9] + up[10] + up[11] 3540 + up[12] + up[13] + up[14] + up[15]; 3541 sum = (sum & 0xffff) + (sum >> 16); 3542 sum = IP_CSUM(mp, hdr_len, sum); 3543 if (sum != 0) { 3544 /* IPv6 RAW checksum failed */ 3545 ip0dbg(("icmp_rput: RAW checksum " 3546 "failed %x\n", sum)); 3547 freemsg(mp); 3548 BUMP_MIB(&is->is_rawip_mib, 3549 rawipInCksumErrs); 3550 return; 3551 } 3552 } 3553 } 3554 /* Skip all the IPv6 headers per API */ 3555 mp->b_rptr += hdr_len; 3556 3557 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3558 3559 /* 3560 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3561 * maintain state information, instead of relying on icmp_t 3562 * structure, since there arent any locks protecting these members 3563 * and there is a window where there might be a race between a 3564 * thread setting options on the write side and a thread reading 3565 * these options on the read size. 3566 */ 3567 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3568 IPPF_RTHDR|IPPF_IFINDEX)) { 3569 if (icmp->icmp_ipv6_recvhopopts && 3570 (ipp.ipp_fields & IPPF_HOPOPTS) && 3571 ipp.ipp_hopoptslen > hopstrip) { 3572 udi_size += sizeof (struct T_opthdr) + 3573 ipp.ipp_hopoptslen - hopstrip; 3574 icmp_opt |= IPPF_HOPOPTS; 3575 } 3576 if ((icmp->icmp_ipv6_recvdstopts || 3577 icmp->icmp_old_ipv6_recvdstopts) && 3578 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3579 udi_size += sizeof (struct T_opthdr) + 3580 ipp.ipp_dstoptslen; 3581 icmp_opt |= IPPF_DSTOPTS; 3582 } 3583 if (((icmp->icmp_ipv6_recvdstopts && 3584 icmp->icmp_ipv6_recvrthdr && 3585 (ipp.ipp_fields & IPPF_RTHDR)) || 3586 icmp->icmp_ipv6_recvrtdstopts) && 3587 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3588 udi_size += sizeof (struct T_opthdr) + 3589 ipp.ipp_rtdstoptslen; 3590 icmp_opt |= IPPF_RTDSTOPTS; 3591 } 3592 if (icmp->icmp_ipv6_recvrthdr && 3593 (ipp.ipp_fields & IPPF_RTHDR)) { 3594 udi_size += sizeof (struct T_opthdr) + 3595 ipp.ipp_rthdrlen; 3596 icmp_opt |= IPPF_RTHDR; 3597 } 3598 if (icmp->icmp_ip_recvpktinfo && 3599 (ipp.ipp_fields & IPPF_IFINDEX)) { 3600 udi_size += sizeof (struct T_opthdr) + 3601 sizeof (struct in6_pktinfo); 3602 icmp_opt |= IPPF_IFINDEX; 3603 } 3604 } 3605 if (icmp->icmp_ipv6_recvhoplimit) { 3606 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3607 icmp_ipv6_recvhoplimit = B_TRUE; 3608 } 3609 3610 if (icmp->icmp_ipv6_recvtclass) 3611 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3612 3613 /* 3614 * If SO_TIMESTAMP is set allocate the appropriate sized 3615 * buffer. Since gethrestime() expects a pointer aligned 3616 * argument, we allocate space necessary for extra 3617 * alignment (even though it might not be used). 3618 */ 3619 if (icmp->icmp_timestamp) { 3620 udi_size += sizeof (struct T_opthdr) + 3621 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3622 } 3623 3624 mp1 = allocb(udi_size, BPRI_MED); 3625 if (mp1 == NULL) { 3626 freemsg(mp); 3627 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3628 return; 3629 } 3630 mp1->b_cont = mp; 3631 mp = mp1; 3632 mp->b_datap->db_type = M_PROTO; 3633 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3634 mp->b_wptr = (uchar_t *)tudi + udi_size; 3635 tudi->PRIM_type = T_UNITDATA_IND; 3636 tudi->SRC_length = sizeof (sin6_t); 3637 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3638 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3639 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3640 tudi->OPT_length = udi_size; 3641 sin6 = (sin6_t *)&tudi[1]; 3642 sin6->sin6_port = 0; 3643 sin6->sin6_family = AF_INET6; 3644 3645 sin6->sin6_addr = ip6h->ip6_src; 3646 /* No sin6_flowinfo per API */ 3647 sin6->sin6_flowinfo = 0; 3648 /* For link-scope source pass up scope id */ 3649 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3650 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3651 sin6->sin6_scope_id = ipp.ipp_ifindex; 3652 else 3653 sin6->sin6_scope_id = 0; 3654 3655 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3656 icmp->icmp_zoneid, is->is_netstack); 3657 3658 if (udi_size != 0) { 3659 uchar_t *dstopt; 3660 3661 dstopt = (uchar_t *)&sin6[1]; 3662 if (icmp_opt & IPPF_IFINDEX) { 3663 struct T_opthdr *toh; 3664 struct in6_pktinfo *pkti; 3665 3666 toh = (struct T_opthdr *)dstopt; 3667 toh->level = IPPROTO_IPV6; 3668 toh->name = IPV6_PKTINFO; 3669 toh->len = sizeof (struct T_opthdr) + 3670 sizeof (*pkti); 3671 toh->status = 0; 3672 dstopt += sizeof (struct T_opthdr); 3673 pkti = (struct in6_pktinfo *)dstopt; 3674 pkti->ipi6_addr = ip6h->ip6_dst; 3675 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3676 dstopt += sizeof (*pkti); 3677 udi_size -= toh->len; 3678 } 3679 if (icmp_ipv6_recvhoplimit) { 3680 struct T_opthdr *toh; 3681 3682 toh = (struct T_opthdr *)dstopt; 3683 toh->level = IPPROTO_IPV6; 3684 toh->name = IPV6_HOPLIMIT; 3685 toh->len = sizeof (struct T_opthdr) + 3686 sizeof (uint_t); 3687 toh->status = 0; 3688 dstopt += sizeof (struct T_opthdr); 3689 *(uint_t *)dstopt = ip6h->ip6_hops; 3690 dstopt += sizeof (uint_t); 3691 udi_size -= toh->len; 3692 } 3693 if (icmp->icmp_ipv6_recvtclass) { 3694 struct T_opthdr *toh; 3695 3696 toh = (struct T_opthdr *)dstopt; 3697 toh->level = IPPROTO_IPV6; 3698 toh->name = IPV6_TCLASS; 3699 toh->len = sizeof (struct T_opthdr) + 3700 sizeof (uint_t); 3701 toh->status = 0; 3702 dstopt += sizeof (struct T_opthdr); 3703 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3704 dstopt += sizeof (uint_t); 3705 udi_size -= toh->len; 3706 } 3707 if (icmp->icmp_timestamp) { 3708 struct T_opthdr *toh; 3709 3710 toh = (struct T_opthdr *)dstopt; 3711 toh->level = SOL_SOCKET; 3712 toh->name = SCM_TIMESTAMP; 3713 toh->len = sizeof (struct T_opthdr) + 3714 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3715 toh->status = 0; 3716 dstopt += sizeof (struct T_opthdr); 3717 /* Align for gethrestime() */ 3718 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3719 sizeof (intptr_t)); 3720 gethrestime((timestruc_t *)dstopt); 3721 dstopt = (uchar_t *)toh + toh->len; 3722 udi_size -= toh->len; 3723 } 3724 if (icmp_opt & IPPF_HOPOPTS) { 3725 struct T_opthdr *toh; 3726 3727 toh = (struct T_opthdr *)dstopt; 3728 toh->level = IPPROTO_IPV6; 3729 toh->name = IPV6_HOPOPTS; 3730 toh->len = sizeof (struct T_opthdr) + 3731 ipp.ipp_hopoptslen - hopstrip; 3732 toh->status = 0; 3733 dstopt += sizeof (struct T_opthdr); 3734 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3735 ipp.ipp_hopoptslen - hopstrip); 3736 if (hopstrip > 0) { 3737 /* copy next header value and fake length */ 3738 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3739 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3740 hopstrip / 8; 3741 } 3742 dstopt += ipp.ipp_hopoptslen - hopstrip; 3743 udi_size -= toh->len; 3744 } 3745 if (icmp_opt & IPPF_RTDSTOPTS) { 3746 struct T_opthdr *toh; 3747 3748 toh = (struct T_opthdr *)dstopt; 3749 toh->level = IPPROTO_IPV6; 3750 toh->name = IPV6_DSTOPTS; 3751 toh->len = sizeof (struct T_opthdr) + 3752 ipp.ipp_rtdstoptslen; 3753 toh->status = 0; 3754 dstopt += sizeof (struct T_opthdr); 3755 bcopy(ipp.ipp_rtdstopts, dstopt, 3756 ipp.ipp_rtdstoptslen); 3757 dstopt += ipp.ipp_rtdstoptslen; 3758 udi_size -= toh->len; 3759 } 3760 if (icmp_opt & IPPF_RTHDR) { 3761 struct T_opthdr *toh; 3762 3763 toh = (struct T_opthdr *)dstopt; 3764 toh->level = IPPROTO_IPV6; 3765 toh->name = IPV6_RTHDR; 3766 toh->len = sizeof (struct T_opthdr) + 3767 ipp.ipp_rthdrlen; 3768 toh->status = 0; 3769 dstopt += sizeof (struct T_opthdr); 3770 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3771 dstopt += ipp.ipp_rthdrlen; 3772 udi_size -= toh->len; 3773 } 3774 if (icmp_opt & IPPF_DSTOPTS) { 3775 struct T_opthdr *toh; 3776 3777 toh = (struct T_opthdr *)dstopt; 3778 toh->level = IPPROTO_IPV6; 3779 toh->name = IPV6_DSTOPTS; 3780 toh->len = sizeof (struct T_opthdr) + 3781 ipp.ipp_dstoptslen; 3782 toh->status = 0; 3783 dstopt += sizeof (struct T_opthdr); 3784 bcopy(ipp.ipp_dstopts, dstopt, 3785 ipp.ipp_dstoptslen); 3786 dstopt += ipp.ipp_dstoptslen; 3787 udi_size -= toh->len; 3788 } 3789 /* Consumed all of allocated space */ 3790 ASSERT(udi_size == 0); 3791 } 3792 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3793 putnext(connp->conn_rq, mp); 3794 } 3795 3796 /* 3797 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3798 * immediately. 3799 */ 3800 static void 3801 icmp_bind_result(conn_t *connp, mblk_t *mp) 3802 { 3803 struct T_error_ack *tea; 3804 3805 switch (mp->b_datap->db_type) { 3806 case M_PROTO: 3807 case M_PCPROTO: 3808 /* M_PROTO messages contain some type of TPI message. */ 3809 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3810 freemsg(mp); 3811 return; 3812 } 3813 tea = (struct T_error_ack *)mp->b_rptr; 3814 3815 switch (tea->PRIM_type) { 3816 case T_ERROR_ACK: 3817 switch (tea->ERROR_prim) { 3818 case O_T_BIND_REQ: 3819 case T_BIND_REQ: 3820 icmp_bind_error(connp, mp); 3821 return; 3822 default: 3823 break; 3824 } 3825 ASSERT(0); 3826 freemsg(mp); 3827 return; 3828 3829 case T_BIND_ACK: 3830 icmp_bind_ack(connp, mp); 3831 return; 3832 3833 default: 3834 break; 3835 } 3836 freemsg(mp); 3837 return; 3838 default: 3839 /* FIXME: other cases? */ 3840 ASSERT(0); 3841 freemsg(mp); 3842 return; 3843 } 3844 } 3845 3846 /* 3847 * Process a T_BIND_ACK 3848 */ 3849 static void 3850 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3851 { 3852 icmp_t *icmp = connp->conn_icmp; 3853 mblk_t *mp1; 3854 ire_t *ire; 3855 struct T_bind_ack *tba; 3856 uchar_t *addrp; 3857 ipa_conn_t *ac; 3858 ipa6_conn_t *ac6; 3859 3860 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3861 /* 3862 * We know if headers are included or not so we can 3863 * safely do this. 3864 */ 3865 if (icmp->icmp_state == TS_UNBND) { 3866 /* 3867 * TPI has not yet bound - bind sent by 3868 * icmp_bind_proto. 3869 */ 3870 freemsg(mp); 3871 rw_exit(&icmp->icmp_rwlock); 3872 return; 3873 } 3874 ASSERT(icmp->icmp_pending_op != -1); 3875 3876 /* 3877 * If a broadcast/multicast address was bound set 3878 * the source address to 0. 3879 * This ensures no datagrams with broadcast address 3880 * as source address are emitted (which would violate 3881 * RFC1122 - Hosts requirements) 3882 * 3883 * Note that when connecting the returned IRE is 3884 * for the destination address and we only perform 3885 * the broadcast check for the source address (it 3886 * is OK to connect to a broadcast/multicast address.) 3887 */ 3888 mp1 = mp->b_cont; 3889 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3890 ire = (ire_t *)mp1->b_rptr; 3891 3892 /* 3893 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3894 * local address. 3895 */ 3896 if (ire->ire_type == IRE_BROADCAST && 3897 icmp->icmp_state != TS_DATA_XFER) { 3898 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3899 icmp->icmp_pending_op == O_T_BIND_REQ); 3900 /* This was just a local bind to a MC/broadcast addr */ 3901 V6_SET_ZERO(icmp->icmp_v6src); 3902 if (icmp->icmp_family == AF_INET6) 3903 (void) icmp_build_hdrs(icmp); 3904 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3905 /* 3906 * Local address not yet set - pick it from the 3907 * T_bind_ack 3908 */ 3909 tba = (struct T_bind_ack *)mp->b_rptr; 3910 addrp = &mp->b_rptr[tba->ADDR_offset]; 3911 switch (icmp->icmp_family) { 3912 case AF_INET: 3913 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3914 ac = (ipa_conn_t *)addrp; 3915 } else { 3916 ASSERT(tba->ADDR_length == 3917 sizeof (ipa_conn_x_t)); 3918 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3919 } 3920 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3921 &icmp->icmp_v6src); 3922 break; 3923 case AF_INET6: 3924 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3925 ac6 = (ipa6_conn_t *)addrp; 3926 } else { 3927 ASSERT(tba->ADDR_length == 3928 sizeof (ipa6_conn_x_t)); 3929 ac6 = &((ipa6_conn_x_t *) 3930 addrp)->ac6x_conn; 3931 } 3932 icmp->icmp_v6src = ac6->ac6_laddr; 3933 (void) icmp_build_hdrs(icmp); 3934 } 3935 } 3936 mp1 = mp1->b_cont; 3937 } 3938 icmp->icmp_pending_op = -1; 3939 rw_exit(&icmp->icmp_rwlock); 3940 /* 3941 * Look for one or more appended ACK message added by 3942 * icmp_connect or icmp_disconnect. 3943 * If none found just send up the T_BIND_ACK. 3944 * icmp_connect has appended a T_OK_ACK and a 3945 * T_CONN_CON. 3946 * icmp_disconnect has appended a T_OK_ACK. 3947 */ 3948 if (mp1 != NULL) { 3949 if (mp->b_cont == mp1) 3950 mp->b_cont = NULL; 3951 else { 3952 ASSERT(mp->b_cont->b_cont == mp1); 3953 mp->b_cont->b_cont = NULL; 3954 } 3955 freemsg(mp); 3956 mp = mp1; 3957 while (mp != NULL) { 3958 mp1 = mp->b_cont; 3959 mp->b_cont = NULL; 3960 putnext(connp->conn_rq, mp); 3961 mp = mp1; 3962 } 3963 return; 3964 } 3965 freemsg(mp->b_cont); 3966 mp->b_cont = NULL; 3967 putnext(connp->conn_rq, mp); 3968 } 3969 3970 static void 3971 icmp_bind_error(conn_t *connp, mblk_t *mp) 3972 { 3973 icmp_t *icmp = connp->conn_icmp; 3974 struct T_error_ack *tea; 3975 3976 tea = (struct T_error_ack *)mp->b_rptr; 3977 /* 3978 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3979 * clear out the source address before 3980 * passing the message upstream. 3981 * If this was caused by a T_CONN_REQ 3982 * revert back to bound state. 3983 */ 3984 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3985 if (icmp->icmp_state == TS_UNBND) { 3986 /* 3987 * TPI has not yet bound - bind sent by icmp_bind_proto. 3988 */ 3989 freemsg(mp); 3990 rw_exit(&icmp->icmp_rwlock); 3991 return; 3992 } 3993 ASSERT(icmp->icmp_pending_op != -1); 3994 tea->ERROR_prim = icmp->icmp_pending_op; 3995 icmp->icmp_pending_op = -1; 3996 3997 switch (tea->ERROR_prim) { 3998 case T_CONN_REQ: 3999 ASSERT(icmp->icmp_state == TS_DATA_XFER); 4000 /* Connect failed */ 4001 /* Revert back to the bound source */ 4002 icmp->icmp_v6src = icmp->icmp_bound_v6src; 4003 icmp->icmp_state = TS_IDLE; 4004 if (icmp->icmp_family == AF_INET6) 4005 (void) icmp_build_hdrs(icmp); 4006 break; 4007 4008 case T_DISCON_REQ: 4009 case T_BIND_REQ: 4010 case O_T_BIND_REQ: 4011 V6_SET_ZERO(icmp->icmp_v6src); 4012 V6_SET_ZERO(icmp->icmp_bound_v6src); 4013 icmp->icmp_state = TS_UNBND; 4014 if (icmp->icmp_family == AF_INET6) 4015 (void) icmp_build_hdrs(icmp); 4016 break; 4017 default: 4018 break; 4019 } 4020 rw_exit(&icmp->icmp_rwlock); 4021 putnext(connp->conn_rq, mp); 4022 } 4023 4024 /* 4025 * return SNMP stuff in buffer in mpdata 4026 */ 4027 mblk_t * 4028 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4029 { 4030 mblk_t *mpdata; 4031 struct opthdr *optp; 4032 conn_t *connp = Q_TO_CONN(q); 4033 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4034 mblk_t *mp2ctl; 4035 4036 /* 4037 * make a copy of the original message 4038 */ 4039 mp2ctl = copymsg(mpctl); 4040 4041 if (mpctl == NULL || 4042 (mpdata = mpctl->b_cont) == NULL) { 4043 freemsg(mpctl); 4044 freemsg(mp2ctl); 4045 return (0); 4046 } 4047 4048 /* fixed length structure for IPv4 and IPv6 counters */ 4049 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4050 optp->level = EXPER_RAWIP; 4051 optp->name = 0; 4052 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4053 sizeof (is->is_rawip_mib)); 4054 optp->len = msgdsize(mpdata); 4055 qreply(q, mpctl); 4056 4057 return (mp2ctl); 4058 } 4059 4060 /* 4061 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4062 * TODO: If this ever actually tries to set anything, it needs to be 4063 * to do the appropriate locking. 4064 */ 4065 /* ARGSUSED */ 4066 int 4067 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4068 uchar_t *ptr, int len) 4069 { 4070 switch (level) { 4071 case EXPER_RAWIP: 4072 return (0); 4073 default: 4074 return (1); 4075 } 4076 } 4077 4078 /* Report for ndd "icmp_status" */ 4079 /* ARGSUSED */ 4080 static int 4081 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4082 { 4083 conn_t *connp; 4084 ip_stack_t *ipst; 4085 char laddrbuf[INET6_ADDRSTRLEN]; 4086 char faddrbuf[INET6_ADDRSTRLEN]; 4087 int i; 4088 4089 (void) mi_mpprintf(mp, 4090 "RAWIP " MI_COL_HDRPAD_STR 4091 /* 01234567[89ABCDEF] */ 4092 " src addr dest addr state"); 4093 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4094 4095 connp = Q_TO_CONN(q); 4096 ipst = connp->conn_netstack->netstack_ip; 4097 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4098 connf_t *connfp; 4099 char *state; 4100 4101 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4102 connp = NULL; 4103 4104 while ((connp = ipcl_get_next_conn(connfp, connp, 4105 IPCL_RAWIPCONN)) != NULL) { 4106 icmp_t *icmp; 4107 4108 mutex_enter(&(connp)->conn_lock); 4109 icmp = connp->conn_icmp; 4110 4111 if (icmp->icmp_state == TS_UNBND) 4112 state = "UNBOUND"; 4113 else if (icmp->icmp_state == TS_IDLE) 4114 state = "IDLE"; 4115 else if (icmp->icmp_state == TS_DATA_XFER) 4116 state = "CONNECTED"; 4117 else 4118 state = "UnkState"; 4119 4120 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4121 (void *)icmp, 4122 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4123 sizeof (faddrbuf)), 4124 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4125 sizeof (laddrbuf)), 4126 state); 4127 mutex_exit(&(connp)->conn_lock); 4128 } 4129 } 4130 return (0); 4131 } 4132 4133 /* 4134 * This routine creates a T_UDERROR_IND message and passes it upstream. 4135 * The address and options are copied from the T_UNITDATA_REQ message 4136 * passed in mp. This message is freed. 4137 */ 4138 static void 4139 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4140 { 4141 mblk_t *mp1; 4142 uchar_t *rptr = mp->b_rptr; 4143 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4144 4145 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4146 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4147 tudr->OPT_length, err); 4148 if (mp1) 4149 qreply(q, mp1); 4150 freemsg(mp); 4151 } 4152 4153 /* 4154 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4155 * After some error checking, the message is passed downstream to ip. 4156 */ 4157 static void 4158 icmp_unbind(queue_t *q, mblk_t *mp) 4159 { 4160 icmp_t *icmp = Q_TO_ICMP(q); 4161 4162 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4163 /* If a bind has not been done, we can't unbind. */ 4164 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4165 rw_exit(&icmp->icmp_rwlock); 4166 icmp_err_ack(q, mp, TOUTSTATE, 0); 4167 return; 4168 } 4169 icmp->icmp_pending_op = T_UNBIND_REQ; 4170 rw_exit(&icmp->icmp_rwlock); 4171 4172 /* 4173 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4174 * and therefore ip_unbind must never return NULL. 4175 */ 4176 mp = ip_unbind(q, mp); 4177 ASSERT(mp != NULL); 4178 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4179 4180 /* 4181 * Once we're unbound from IP, the pending operation may be cleared 4182 * here. 4183 */ 4184 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4185 V6_SET_ZERO(icmp->icmp_v6src); 4186 V6_SET_ZERO(icmp->icmp_bound_v6src); 4187 icmp->icmp_pending_op = -1; 4188 icmp->icmp_state = TS_UNBND; 4189 if (icmp->icmp_family == AF_INET6) 4190 (void) icmp_build_hdrs(icmp); 4191 rw_exit(&icmp->icmp_rwlock); 4192 4193 qreply(q, mp); 4194 } 4195 4196 /* 4197 * Process IPv4 packets that already include an IP header. 4198 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4199 * IPPROTO_IGMP). 4200 */ 4201 static void 4202 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4203 { 4204 icmp_stack_t *is = icmp->icmp_is; 4205 ipha_t *ipha; 4206 int ip_hdr_length; 4207 int tp_hdr_len; 4208 mblk_t *mp1; 4209 uint_t pkt_len; 4210 ip_opt_info_t optinfo; 4211 conn_t *connp = icmp->icmp_connp; 4212 4213 optinfo.ip_opt_flags = 0; 4214 optinfo.ip_opt_ill_index = 0; 4215 ipha = (ipha_t *)mp->b_rptr; 4216 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4217 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4218 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4219 ASSERT(icmp != NULL); 4220 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4221 freemsg(mp); 4222 return; 4223 } 4224 ipha = (ipha_t *)mp->b_rptr; 4225 } 4226 ipha->ipha_version_and_hdr_length = 4227 (IP_VERSION<<4) | (ip_hdr_length>>2); 4228 4229 /* 4230 * For the socket of SOCK_RAW type, the checksum is provided in the 4231 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4232 * tell IP that the application has sent a complete IP header and not 4233 * to compute the transport checksum nor change the DF flag. 4234 */ 4235 ipha->ipha_ident = IP_HDR_INCLUDED; 4236 ipha->ipha_hdr_checksum = 0; 4237 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4238 /* Insert options if any */ 4239 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4240 /* 4241 * Put the IP header plus any transport header that is 4242 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4243 * that at least the checksum field is in the first mblk.) 4244 */ 4245 switch (ipha->ipha_protocol) { 4246 case IPPROTO_UDP: 4247 tp_hdr_len = 8; 4248 break; 4249 case IPPROTO_TCP: 4250 tp_hdr_len = 20; 4251 break; 4252 default: 4253 tp_hdr_len = 0; 4254 break; 4255 } 4256 /* 4257 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4258 * tp_hdr_len bytes will be in a single mblk. 4259 */ 4260 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4261 tp_hdr_len)) { 4262 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4263 tp_hdr_len)) { 4264 BUMP_MIB(&is->is_rawip_mib, 4265 rawipOutErrors); 4266 freemsg(mp); 4267 return; 4268 } 4269 ipha = (ipha_t *)mp->b_rptr; 4270 } 4271 4272 /* 4273 * if the length is larger then the max allowed IP packet, 4274 * then send an error and abort the processing. 4275 */ 4276 pkt_len = ntohs(ipha->ipha_length) 4277 + icmp->icmp_ip_snd_options_len; 4278 if (pkt_len > IP_MAXPACKET) { 4279 icmp_ud_err(q, mp, EMSGSIZE); 4280 return; 4281 } 4282 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4283 tp_hdr_len, BPRI_LO))) { 4284 icmp_ud_err(q, mp, ENOMEM); 4285 return; 4286 } 4287 mp1->b_rptr += is->is_wroff_extra; 4288 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4289 4290 ipha->ipha_length = htons((uint16_t)pkt_len); 4291 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4292 4293 /* Copy transport header if any */ 4294 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4295 mp1->b_wptr += tp_hdr_len; 4296 4297 /* Add options */ 4298 ipha = (ipha_t *)mp1->b_rptr; 4299 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4300 icmp->icmp_ip_snd_options_len); 4301 4302 /* Drop IP header and transport header from original */ 4303 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4304 4305 mp1->b_cont = mp; 4306 mp = mp1; 4307 /* 4308 * Massage source route putting first source 4309 * route in ipha_dst. 4310 */ 4311 (void) ip_massage_options(ipha, is->is_netstack); 4312 } 4313 4314 if (pktinfop != NULL) { 4315 /* 4316 * Over write the source address provided in the header 4317 */ 4318 if (pktinfop->ip4_addr != INADDR_ANY) { 4319 ipha->ipha_src = pktinfop->ip4_addr; 4320 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4321 } 4322 4323 if (pktinfop->ip4_ill_index != 0) { 4324 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4325 } 4326 } 4327 4328 mblk_setcred(mp, connp->conn_cred); 4329 ip_output_options(connp, mp, q, IP_WPUT, 4330 &optinfo); 4331 } 4332 4333 static boolean_t 4334 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4335 { 4336 int err; 4337 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4338 icmp_stack_t *is = icmp->icmp_is; 4339 conn_t *connp = icmp->icmp_connp; 4340 4341 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4342 opt_storage, icmp->icmp_mac_exempt, 4343 is->is_netstack->netstack_ip); 4344 if (err == 0) { 4345 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4346 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4347 opt_storage); 4348 } 4349 if (err != 0) { 4350 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4351 DTRACE_PROBE4( 4352 tx__ip__log__drop__updatelabel__icmp, 4353 char *, "queue(1) failed to update options(2) on mp(3)", 4354 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4355 icmp_ud_err(q, mp, err); 4356 return (B_FALSE); 4357 } 4358 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4359 return (B_TRUE); 4360 } 4361 4362 /* 4363 * This routine handles all messages passed downstream. It either 4364 * consumes the message or passes it downstream; it never queues a 4365 * a message. 4366 */ 4367 static void 4368 icmp_wput(queue_t *q, mblk_t *mp) 4369 { 4370 uchar_t *rptr = mp->b_rptr; 4371 ipha_t *ipha; 4372 mblk_t *mp1; 4373 int ip_hdr_length; 4374 #define tudr ((struct T_unitdata_req *)rptr) 4375 size_t ip_len; 4376 conn_t *connp = Q_TO_CONN(q); 4377 icmp_t *icmp = connp->conn_icmp; 4378 icmp_stack_t *is = icmp->icmp_is; 4379 sin6_t *sin6; 4380 sin_t *sin; 4381 ipaddr_t v4dst; 4382 ip4_pkt_t pktinfo; 4383 ip4_pkt_t *pktinfop = &pktinfo; 4384 ip_opt_info_t optinfo; 4385 4386 switch (mp->b_datap->db_type) { 4387 case M_DATA: 4388 if (icmp->icmp_hdrincl) { 4389 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4390 ipha = (ipha_t *)mp->b_rptr; 4391 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4392 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4393 BUMP_MIB(&is->is_rawip_mib, 4394 rawipOutErrors); 4395 freemsg(mp); 4396 return; 4397 } 4398 ipha = (ipha_t *)mp->b_rptr; 4399 } 4400 /* 4401 * If this connection was used for v6 (inconceivable!) 4402 * or if we have a new destination, then it's time to 4403 * figure a new label. 4404 */ 4405 if (is_system_labeled() && 4406 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4407 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4408 ipha->ipha_dst) && 4409 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4410 return; 4411 } 4412 icmp_wput_hdrincl(q, mp, icmp, NULL); 4413 return; 4414 } 4415 freemsg(mp); 4416 return; 4417 case M_PROTO: 4418 case M_PCPROTO: 4419 ip_len = mp->b_wptr - rptr; 4420 if (ip_len >= sizeof (struct T_unitdata_req)) { 4421 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4422 if (((union T_primitives *)rptr)->type 4423 == T_UNITDATA_REQ) 4424 break; 4425 } 4426 /* FALLTHRU */ 4427 default: 4428 icmp_wput_other(q, mp); 4429 return; 4430 } 4431 4432 /* Handle T_UNITDATA_REQ messages here. */ 4433 4434 4435 4436 if (icmp->icmp_state == TS_UNBND) { 4437 /* If a port has not been bound to the stream, fail. */ 4438 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4439 icmp_ud_err(q, mp, EPROTO); 4440 return; 4441 } 4442 mp1 = mp->b_cont; 4443 if (mp1 == NULL) { 4444 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4445 icmp_ud_err(q, mp, EPROTO); 4446 return; 4447 } 4448 4449 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4450 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4451 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4452 return; 4453 } 4454 4455 switch (icmp->icmp_family) { 4456 case AF_INET6: 4457 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4458 if (!OK_32PTR((char *)sin6) || 4459 tudr->DEST_length != sizeof (sin6_t) || 4460 sin6->sin6_family != AF_INET6) { 4461 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4462 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4463 return; 4464 } 4465 4466 /* No support for mapped addresses on raw sockets */ 4467 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4468 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4469 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4470 return; 4471 } 4472 4473 /* 4474 * Destination is a native IPv6 address. 4475 * Send out an IPv6 format packet. 4476 */ 4477 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4478 return; 4479 4480 case AF_INET: 4481 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4482 if (!OK_32PTR((char *)sin) || 4483 tudr->DEST_length != sizeof (sin_t) || 4484 sin->sin_family != AF_INET) { 4485 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4486 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4487 return; 4488 } 4489 /* Extract and ipaddr */ 4490 v4dst = sin->sin_addr.s_addr; 4491 break; 4492 4493 default: 4494 ASSERT(0); 4495 } 4496 4497 pktinfop->ip4_ill_index = 0; 4498 pktinfop->ip4_addr = INADDR_ANY; 4499 optinfo.ip_opt_flags = 0; 4500 optinfo.ip_opt_ill_index = 0; 4501 4502 4503 /* 4504 * If options passed in, feed it for verification and handling 4505 */ 4506 if (tudr->OPT_length != 0) { 4507 int error; 4508 4509 error = 0; 4510 if (icmp_unitdata_opt_process(q, mp, &error, 4511 (void *)pktinfop) < 0) { 4512 /* failure */ 4513 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4514 icmp_ud_err(q, mp, error); 4515 return; 4516 } 4517 ASSERT(error == 0); 4518 /* 4519 * Note: Success in processing options. 4520 * mp option buffer represented by 4521 * OPT_length/offset now potentially modified 4522 * and contain option setting results 4523 */ 4524 4525 } 4526 4527 if (v4dst == INADDR_ANY) 4528 v4dst = htonl(INADDR_LOOPBACK); 4529 4530 /* Check if our saved options are valid; update if not */ 4531 if (is_system_labeled() && 4532 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4533 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4534 !icmp_update_label(q, icmp, mp, v4dst)) { 4535 return; 4536 } 4537 4538 /* Protocol 255 contains full IP headers */ 4539 if (icmp->icmp_hdrincl) { 4540 freeb(mp); 4541 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4542 return; 4543 } 4544 4545 4546 /* Add an IP header */ 4547 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4548 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4549 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4550 mp1->b_datap->db_ref != 1 || 4551 !OK_32PTR(ipha)) { 4552 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4553 BPRI_LO))) { 4554 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4555 icmp_ud_err(q, mp, ENOMEM); 4556 return; 4557 } 4558 mp1->b_cont = mp->b_cont; 4559 ipha = (ipha_t *)mp1->b_datap->db_lim; 4560 mp1->b_wptr = (uchar_t *)ipha; 4561 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4562 } 4563 #ifdef _BIG_ENDIAN 4564 /* Set version, header length, and tos */ 4565 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4566 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4567 icmp->icmp_type_of_service); 4568 /* Set ttl and protocol */ 4569 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4570 #else 4571 /* Set version, header length, and tos */ 4572 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4573 ((icmp->icmp_type_of_service << 8) | 4574 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4575 /* Set ttl and protocol */ 4576 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4577 #endif 4578 if (pktinfop->ip4_addr != INADDR_ANY) { 4579 ipha->ipha_src = pktinfop->ip4_addr; 4580 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4581 } else { 4582 4583 /* 4584 * Copy our address into the packet. If this is zero, 4585 * ip will fill in the real source address. 4586 */ 4587 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4588 } 4589 4590 ipha->ipha_fragment_offset_and_flags = 0; 4591 4592 if (pktinfop->ip4_ill_index != 0) { 4593 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4594 } 4595 4596 4597 /* 4598 * For the socket of SOCK_RAW type, the checksum is provided in the 4599 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4600 * tell IP that the application has sent a complete IP header and not 4601 * to compute the transport checksum nor change the DF flag. 4602 */ 4603 ipha->ipha_ident = IP_HDR_INCLUDED; 4604 4605 /* Finish common formatting of the packet. */ 4606 mp1->b_rptr = (uchar_t *)ipha; 4607 4608 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4609 if (mp1->b_cont != NULL) 4610 ip_len += msgdsize(mp1->b_cont); 4611 4612 /* 4613 * Set the length into the IP header. 4614 * If the length is greater than the maximum allowed by IP, 4615 * then free the message and return. Do not try and send it 4616 * as this can cause problems in layers below. 4617 */ 4618 if (ip_len > IP_MAXPACKET) { 4619 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4620 icmp_ud_err(q, mp, EMSGSIZE); 4621 return; 4622 } 4623 ipha->ipha_length = htons((uint16_t)ip_len); 4624 /* 4625 * Copy in the destination address from the T_UNITDATA 4626 * request 4627 */ 4628 ipha->ipha_dst = v4dst; 4629 4630 /* 4631 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4632 */ 4633 if (CLASSD(v4dst)) 4634 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4635 4636 /* Copy in options if any */ 4637 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4638 bcopy(icmp->icmp_ip_snd_options, 4639 &ipha[1], icmp->icmp_ip_snd_options_len); 4640 /* 4641 * Massage source route putting first source route in ipha_dst. 4642 * Ignore the destination in the T_unitdata_req. 4643 */ 4644 (void) ip_massage_options(ipha, is->is_netstack); 4645 } 4646 4647 freeb(mp); 4648 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4649 mblk_setcred(mp1, connp->conn_cred); 4650 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4651 #undef ipha 4652 #undef tudr 4653 } 4654 4655 static boolean_t 4656 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4657 { 4658 int err; 4659 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4660 icmp_stack_t *is = icmp->icmp_is; 4661 conn_t *connp = icmp->icmp_connp; 4662 4663 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4664 opt_storage, icmp->icmp_mac_exempt, 4665 is->is_netstack->netstack_ip); 4666 if (err == 0) { 4667 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4668 &icmp->icmp_label_len_v6, opt_storage); 4669 } 4670 if (err != 0) { 4671 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4672 DTRACE_PROBE4( 4673 tx__ip__log__drop__updatelabel__icmp6, 4674 char *, "queue(1) failed to update options(2) on mp(3)", 4675 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4676 icmp_ud_err(wq, mp, err); 4677 return (B_FALSE); 4678 } 4679 4680 icmp->icmp_v6lastdst = *dst; 4681 return (B_TRUE); 4682 } 4683 4684 /* 4685 * icmp_wput_ipv6(): 4686 * Assumes that icmp_wput did some sanity checking on the destination 4687 * address, but that the label may not yet be correct. 4688 */ 4689 void 4690 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4691 { 4692 ip6_t *ip6h; 4693 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4694 mblk_t *mp1; 4695 int ip_hdr_len = IPV6_HDR_LEN; 4696 size_t ip_len; 4697 icmp_t *icmp = Q_TO_ICMP(q); 4698 icmp_stack_t *is = icmp->icmp_is; 4699 ip6_pkt_t ipp_s; /* For ancillary data options */ 4700 ip6_pkt_t *ipp = &ipp_s; 4701 ip6_pkt_t *tipp; 4702 uint32_t csum = 0; 4703 uint_t ignore = 0; 4704 uint_t option_exists = 0, is_sticky = 0; 4705 uint8_t *cp; 4706 uint8_t *nxthdr_ptr; 4707 in6_addr_t ip6_dst; 4708 4709 /* 4710 * If the local address is a mapped address return 4711 * an error. 4712 * It would be possible to send an IPv6 packet but the 4713 * response would never make it back to the application 4714 * since it is bound to a mapped address. 4715 */ 4716 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4717 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4718 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4719 return; 4720 } 4721 4722 ipp->ipp_fields = 0; 4723 ipp->ipp_sticky_ignored = 0; 4724 4725 /* 4726 * If TPI options passed in, feed it for verification and handling 4727 */ 4728 if (tudr_optlen != 0) { 4729 int error; 4730 4731 if (icmp_unitdata_opt_process(q, mp, &error, 4732 (void *)ipp) < 0) { 4733 /* failure */ 4734 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4735 icmp_ud_err(q, mp, error); 4736 return; 4737 } 4738 ignore = ipp->ipp_sticky_ignored; 4739 ASSERT(error == 0); 4740 } 4741 4742 if (sin6->sin6_scope_id != 0 && 4743 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4744 /* 4745 * IPPF_SCOPE_ID is special. It's neither a sticky 4746 * option nor ancillary data. It needs to be 4747 * explicitly set in options_exists. 4748 */ 4749 option_exists |= IPPF_SCOPE_ID; 4750 } 4751 4752 /* 4753 * Compute the destination address 4754 */ 4755 ip6_dst = sin6->sin6_addr; 4756 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4757 ip6_dst = ipv6_loopback; 4758 4759 /* 4760 * If we're not going to the same destination as last time, then 4761 * recompute the label required. This is done in a separate routine to 4762 * avoid blowing up our stack here. 4763 */ 4764 if (is_system_labeled() && 4765 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4766 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4767 return; 4768 } 4769 4770 /* 4771 * If there's a security label here, then we ignore any options the 4772 * user may try to set. We keep the peer's label as a hidden sticky 4773 * option. 4774 */ 4775 if (icmp->icmp_label_len_v6 > 0) { 4776 ignore &= ~IPPF_HOPOPTS; 4777 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4778 } 4779 4780 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4781 (ipp->ipp_fields == 0)) { 4782 /* No sticky options nor ancillary data. */ 4783 goto no_options; 4784 } 4785 4786 /* 4787 * Go through the options figuring out where each is going to 4788 * come from and build two masks. The first mask indicates if 4789 * the option exists at all. The second mask indicates if the 4790 * option is sticky or ancillary. 4791 */ 4792 if (!(ignore & IPPF_HOPOPTS)) { 4793 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4794 option_exists |= IPPF_HOPOPTS; 4795 ip_hdr_len += ipp->ipp_hopoptslen; 4796 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4797 option_exists |= IPPF_HOPOPTS; 4798 is_sticky |= IPPF_HOPOPTS; 4799 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4800 } 4801 } 4802 4803 if (!(ignore & IPPF_RTHDR)) { 4804 if (ipp->ipp_fields & IPPF_RTHDR) { 4805 option_exists |= IPPF_RTHDR; 4806 ip_hdr_len += ipp->ipp_rthdrlen; 4807 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4808 option_exists |= IPPF_RTHDR; 4809 is_sticky |= IPPF_RTHDR; 4810 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4811 } 4812 } 4813 4814 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4815 /* 4816 * Need to have a router header to use these. 4817 */ 4818 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4819 option_exists |= IPPF_RTDSTOPTS; 4820 ip_hdr_len += ipp->ipp_rtdstoptslen; 4821 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4822 option_exists |= IPPF_RTDSTOPTS; 4823 is_sticky |= IPPF_RTDSTOPTS; 4824 ip_hdr_len += 4825 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4826 } 4827 } 4828 4829 if (!(ignore & IPPF_DSTOPTS)) { 4830 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4831 option_exists |= IPPF_DSTOPTS; 4832 ip_hdr_len += ipp->ipp_dstoptslen; 4833 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4834 option_exists |= IPPF_DSTOPTS; 4835 is_sticky |= IPPF_DSTOPTS; 4836 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4837 } 4838 } 4839 4840 if (!(ignore & IPPF_IFINDEX)) { 4841 if (ipp->ipp_fields & IPPF_IFINDEX) { 4842 option_exists |= IPPF_IFINDEX; 4843 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4844 option_exists |= IPPF_IFINDEX; 4845 is_sticky |= IPPF_IFINDEX; 4846 } 4847 } 4848 4849 if (!(ignore & IPPF_ADDR)) { 4850 if (ipp->ipp_fields & IPPF_ADDR) { 4851 option_exists |= IPPF_ADDR; 4852 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4853 option_exists |= IPPF_ADDR; 4854 is_sticky |= IPPF_ADDR; 4855 } 4856 } 4857 4858 if (!(ignore & IPPF_DONTFRAG)) { 4859 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4860 option_exists |= IPPF_DONTFRAG; 4861 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4862 option_exists |= IPPF_DONTFRAG; 4863 is_sticky |= IPPF_DONTFRAG; 4864 } 4865 } 4866 4867 if (!(ignore & IPPF_USE_MIN_MTU)) { 4868 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4869 option_exists |= IPPF_USE_MIN_MTU; 4870 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4871 IPPF_USE_MIN_MTU) { 4872 option_exists |= IPPF_USE_MIN_MTU; 4873 is_sticky |= IPPF_USE_MIN_MTU; 4874 } 4875 } 4876 4877 if (!(ignore & IPPF_NEXTHOP)) { 4878 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4879 option_exists |= IPPF_NEXTHOP; 4880 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4881 option_exists |= IPPF_NEXTHOP; 4882 is_sticky |= IPPF_NEXTHOP; 4883 } 4884 } 4885 4886 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4887 option_exists |= IPPF_HOPLIMIT; 4888 /* IPV6_HOPLIMIT can never be sticky */ 4889 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4890 4891 if (!(ignore & IPPF_UNICAST_HOPS) && 4892 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4893 option_exists |= IPPF_UNICAST_HOPS; 4894 is_sticky |= IPPF_UNICAST_HOPS; 4895 } 4896 4897 if (!(ignore & IPPF_MULTICAST_HOPS) && 4898 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4899 option_exists |= IPPF_MULTICAST_HOPS; 4900 is_sticky |= IPPF_MULTICAST_HOPS; 4901 } 4902 4903 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4904 /* This is a sticky socket option only */ 4905 option_exists |= IPPF_NO_CKSUM; 4906 is_sticky |= IPPF_NO_CKSUM; 4907 } 4908 4909 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4910 /* This is a sticky socket option only */ 4911 option_exists |= IPPF_RAW_CKSUM; 4912 is_sticky |= IPPF_RAW_CKSUM; 4913 } 4914 4915 if (!(ignore & IPPF_TCLASS)) { 4916 if (ipp->ipp_fields & IPPF_TCLASS) { 4917 option_exists |= IPPF_TCLASS; 4918 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4919 option_exists |= IPPF_TCLASS; 4920 is_sticky |= IPPF_TCLASS; 4921 } 4922 } 4923 4924 no_options: 4925 4926 /* 4927 * If any options carried in the ip6i_t were specified, we 4928 * need to account for the ip6i_t in the data we'll be sending 4929 * down. 4930 */ 4931 if (option_exists & IPPF_HAS_IP6I) 4932 ip_hdr_len += sizeof (ip6i_t); 4933 4934 /* check/fix buffer config, setup pointers into it */ 4935 mp1 = mp->b_cont; 4936 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4937 if ((mp1->b_datap->db_ref != 1) || 4938 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4939 !OK_32PTR(ip6h)) { 4940 /* Try to get everything in a single mblk next time */ 4941 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4942 icmp->icmp_max_hdr_len = ip_hdr_len; 4943 (void) mi_set_sth_wroff(RD(q), 4944 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4945 } 4946 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4947 if (!mp1) { 4948 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4949 icmp_ud_err(q, mp, ENOMEM); 4950 return; 4951 } 4952 mp1->b_cont = mp->b_cont; 4953 mp1->b_wptr = mp1->b_datap->db_lim; 4954 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4955 } 4956 mp1->b_rptr = (unsigned char *)ip6h; 4957 ip6i = (ip6i_t *)ip6h; 4958 4959 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4960 if (option_exists & IPPF_HAS_IP6I) { 4961 ip6h = (ip6_t *)&ip6i[1]; 4962 ip6i->ip6i_flags = 0; 4963 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4964 4965 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4966 if (option_exists & IPPF_SCOPE_ID) { 4967 ip6i->ip6i_flags |= IP6I_IFINDEX; 4968 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4969 } else if (option_exists & IPPF_IFINDEX) { 4970 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4971 ASSERT(tipp->ipp_ifindex != 0); 4972 ip6i->ip6i_flags |= IP6I_IFINDEX; 4973 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4974 } 4975 4976 if (option_exists & IPPF_RAW_CKSUM) { 4977 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4978 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4979 } 4980 4981 if (option_exists & IPPF_NO_CKSUM) { 4982 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4983 } 4984 4985 if (option_exists & IPPF_ADDR) { 4986 /* 4987 * Enable per-packet source address verification if 4988 * IPV6_PKTINFO specified the source address. 4989 * ip6_src is set in the transport's _wput function. 4990 */ 4991 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4992 } 4993 4994 if (option_exists & IPPF_DONTFRAG) { 4995 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4996 } 4997 4998 if (option_exists & IPPF_USE_MIN_MTU) { 4999 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5000 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5001 } 5002 5003 if (option_exists & IPPF_NEXTHOP) { 5004 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5005 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5006 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5007 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5008 } 5009 5010 /* 5011 * tell IP this is an ip6i_t private header 5012 */ 5013 ip6i->ip6i_nxt = IPPROTO_RAW; 5014 } 5015 5016 /* Initialize IPv6 header */ 5017 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5018 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5019 5020 /* Set the hoplimit of the outgoing packet. */ 5021 if (option_exists & IPPF_HOPLIMIT) { 5022 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5023 ip6h->ip6_hops = ipp->ipp_hoplimit; 5024 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5025 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5026 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5027 if (option_exists & IPPF_MULTICAST_HOPS) 5028 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5029 } else { 5030 ip6h->ip6_hops = icmp->icmp_ttl; 5031 if (option_exists & IPPF_UNICAST_HOPS) 5032 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5033 } 5034 5035 if (option_exists & IPPF_ADDR) { 5036 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5037 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5038 ip6h->ip6_src = tipp->ipp_addr; 5039 } else { 5040 /* 5041 * The source address was not set using IPV6_PKTINFO. 5042 * First look at the bound source. 5043 * If unspecified fallback to __sin6_src_id. 5044 */ 5045 ip6h->ip6_src = icmp->icmp_v6src; 5046 if (sin6->__sin6_src_id != 0 && 5047 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5048 ip_srcid_find_id(sin6->__sin6_src_id, 5049 &ip6h->ip6_src, icmp->icmp_zoneid, 5050 is->is_netstack); 5051 } 5052 } 5053 5054 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5055 cp = (uint8_t *)&ip6h[1]; 5056 5057 /* 5058 * Here's where we have to start stringing together 5059 * any extension headers in the right order: 5060 * Hop-by-hop, destination, routing, and final destination opts. 5061 */ 5062 if (option_exists & IPPF_HOPOPTS) { 5063 /* Hop-by-hop options */ 5064 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5065 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5066 5067 *nxthdr_ptr = IPPROTO_HOPOPTS; 5068 nxthdr_ptr = &hbh->ip6h_nxt; 5069 5070 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5071 cp += tipp->ipp_hopoptslen; 5072 } 5073 /* 5074 * En-route destination options 5075 * Only do them if there's a routing header as well 5076 */ 5077 if (option_exists & IPPF_RTDSTOPTS) { 5078 ip6_dest_t *dst = (ip6_dest_t *)cp; 5079 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5080 5081 *nxthdr_ptr = IPPROTO_DSTOPTS; 5082 nxthdr_ptr = &dst->ip6d_nxt; 5083 5084 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5085 cp += tipp->ipp_rtdstoptslen; 5086 } 5087 /* 5088 * Routing header next 5089 */ 5090 if (option_exists & IPPF_RTHDR) { 5091 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5092 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5093 5094 *nxthdr_ptr = IPPROTO_ROUTING; 5095 nxthdr_ptr = &rt->ip6r_nxt; 5096 5097 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5098 cp += tipp->ipp_rthdrlen; 5099 } 5100 /* 5101 * Do ultimate destination options 5102 */ 5103 if (option_exists & IPPF_DSTOPTS) { 5104 ip6_dest_t *dest = (ip6_dest_t *)cp; 5105 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5106 5107 *nxthdr_ptr = IPPROTO_DSTOPTS; 5108 nxthdr_ptr = &dest->ip6d_nxt; 5109 5110 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5111 cp += tipp->ipp_dstoptslen; 5112 } 5113 5114 /* 5115 * Now set the last header pointer to the proto passed in 5116 */ 5117 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5118 *nxthdr_ptr = icmp->icmp_proto; 5119 5120 /* 5121 * Copy in the destination address 5122 */ 5123 ip6h->ip6_dst = ip6_dst; 5124 5125 ip6h->ip6_vcf = 5126 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5127 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5128 5129 if (option_exists & IPPF_TCLASS) { 5130 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5131 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5132 tipp->ipp_tclass); 5133 } 5134 if (option_exists & IPPF_RTHDR) { 5135 ip6_rthdr_t *rth; 5136 5137 /* 5138 * Perform any processing needed for source routing. 5139 * We know that all extension headers will be in the same mblk 5140 * as the IPv6 header. 5141 */ 5142 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5143 if (rth != NULL && rth->ip6r_segleft != 0) { 5144 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5145 /* 5146 * Drop packet - only support Type 0 routing. 5147 * Notify the application as well. 5148 */ 5149 icmp_ud_err(q, mp, EPROTO); 5150 BUMP_MIB(&is->is_rawip_mib, 5151 rawipOutErrors); 5152 return; 5153 } 5154 /* 5155 * rth->ip6r_len is twice the number of 5156 * addresses in the header 5157 */ 5158 if (rth->ip6r_len & 0x1) { 5159 icmp_ud_err(q, mp, EPROTO); 5160 BUMP_MIB(&is->is_rawip_mib, 5161 rawipOutErrors); 5162 return; 5163 } 5164 /* 5165 * Shuffle the routing header and ip6_dst 5166 * addresses, and get the checksum difference 5167 * between the first hop (in ip6_dst) and 5168 * the destination (in the last routing hdr entry). 5169 */ 5170 csum = ip_massage_options_v6(ip6h, rth, 5171 is->is_netstack); 5172 /* 5173 * Verify that the first hop isn't a mapped address. 5174 * Routers along the path need to do this verification 5175 * for subsequent hops. 5176 */ 5177 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5178 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5179 BUMP_MIB(&is->is_rawip_mib, 5180 rawipOutErrors); 5181 return; 5182 } 5183 } 5184 } 5185 5186 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5187 if (mp1->b_cont != NULL) 5188 ip_len += msgdsize(mp1->b_cont); 5189 5190 /* 5191 * Set the length into the IP header. 5192 * If the length is greater than the maximum allowed by IP, 5193 * then free the message and return. Do not try and send it 5194 * as this can cause problems in layers below. 5195 */ 5196 if (ip_len > IP_MAXPACKET) { 5197 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5198 icmp_ud_err(q, mp, EMSGSIZE); 5199 return; 5200 } 5201 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5202 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5203 uint16_t *cksum_ptr; 5204 uint_t ext_hdrs_len; 5205 5206 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5207 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5208 icmp->icmp_checksum_off == 2); 5209 5210 /* 5211 * We make it easy for IP to include our pseudo header 5212 * by putting our length in uh_checksum, modified (if 5213 * we have a routing header) by the checksum difference 5214 * between the ultimate destination and first hop addresses. 5215 * Note: ICMPv6 must always checksum the packet. 5216 */ 5217 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5218 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5219 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5220 BUMP_MIB(&is->is_rawip_mib, 5221 rawipOutErrors); 5222 freemsg(mp); 5223 return; 5224 } 5225 ip6i = (ip6i_t *)mp1->b_rptr; 5226 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5227 ip6h = (ip6_t *)&ip6i[1]; 5228 else 5229 ip6h = (ip6_t *)ip6i; 5230 } 5231 /* Add payload length to checksum */ 5232 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5233 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5234 csum += htons(ip_len - ext_hdrs_len); 5235 5236 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5237 csum = (csum & 0xFFFF) + (csum >> 16); 5238 *cksum_ptr = (uint16_t)csum; 5239 } 5240 5241 #ifdef _LITTLE_ENDIAN 5242 ip_len = htons(ip_len); 5243 #endif 5244 ip6h->ip6_plen = (uint16_t)ip_len; 5245 5246 freeb(mp); 5247 5248 /* We're done. Pass the packet to IP */ 5249 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5250 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5251 } 5252 5253 static void 5254 icmp_wput_other(queue_t *q, mblk_t *mp) 5255 { 5256 uchar_t *rptr = mp->b_rptr; 5257 struct iocblk *iocp; 5258 #define tudr ((struct T_unitdata_req *)rptr) 5259 conn_t *connp = Q_TO_CONN(q); 5260 icmp_t *icmp = connp->conn_icmp; 5261 icmp_stack_t *is = icmp->icmp_is; 5262 cred_t *cr; 5263 5264 cr = DB_CREDDEF(mp, connp->conn_cred); 5265 5266 switch (mp->b_datap->db_type) { 5267 case M_PROTO: 5268 case M_PCPROTO: 5269 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5270 /* 5271 * If the message does not contain a PRIM_type, 5272 * throw it away. 5273 */ 5274 freemsg(mp); 5275 return; 5276 } 5277 switch (((union T_primitives *)rptr)->type) { 5278 case T_ADDR_REQ: 5279 icmp_addr_req(q, mp); 5280 return; 5281 case O_T_BIND_REQ: 5282 case T_BIND_REQ: 5283 icmp_bind(q, mp); 5284 return; 5285 case T_CONN_REQ: 5286 icmp_connect(q, mp); 5287 return; 5288 case T_CAPABILITY_REQ: 5289 icmp_capability_req(q, mp); 5290 return; 5291 case T_INFO_REQ: 5292 icmp_info_req(q, mp); 5293 return; 5294 case T_UNITDATA_REQ: 5295 /* 5296 * If a T_UNITDATA_REQ gets here, the address must 5297 * be bad. Valid T_UNITDATA_REQs are found above 5298 * and break to below this switch. 5299 */ 5300 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5301 return; 5302 case T_UNBIND_REQ: 5303 icmp_unbind(q, mp); 5304 return; 5305 5306 case T_SVR4_OPTMGMT_REQ: 5307 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5308 cr)) { 5309 /* Only IP can return anything meaningful */ 5310 (void) svr4_optcom_req(q, mp, cr, 5311 &icmp_opt_obj, B_TRUE); 5312 } 5313 return; 5314 5315 case T_OPTMGMT_REQ: 5316 /* Only IP can return anything meaningful */ 5317 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5318 return; 5319 5320 case T_DISCON_REQ: 5321 icmp_disconnect(q, mp); 5322 return; 5323 5324 /* The following TPI message is not supported by icmp. */ 5325 case O_T_CONN_RES: 5326 case T_CONN_RES: 5327 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5328 return; 5329 5330 /* The following 3 TPI requests are illegal for icmp. */ 5331 case T_DATA_REQ: 5332 case T_EXDATA_REQ: 5333 case T_ORDREL_REQ: 5334 freemsg(mp); 5335 (void) putctl1(RD(q), M_ERROR, EPROTO); 5336 return; 5337 default: 5338 break; 5339 } 5340 break; 5341 case M_IOCTL: 5342 iocp = (struct iocblk *)mp->b_rptr; 5343 switch (iocp->ioc_cmd) { 5344 case TI_GETPEERNAME: 5345 if (icmp->icmp_state != TS_DATA_XFER) { 5346 /* 5347 * If a default destination address has not 5348 * been associated with the stream, then we 5349 * don't know the peer's name. 5350 */ 5351 iocp->ioc_error = ENOTCONN; 5352 err_ret:; 5353 iocp->ioc_count = 0; 5354 mp->b_datap->db_type = M_IOCACK; 5355 qreply(q, mp); 5356 return; 5357 } 5358 /* FALLTHRU */ 5359 case TI_GETMYNAME: 5360 /* 5361 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5362 * need to copyin the user's strbuf structure. 5363 * Processing will continue in the M_IOCDATA case 5364 * below. 5365 */ 5366 mi_copyin(q, mp, NULL, 5367 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5368 return; 5369 case ND_SET: 5370 /* nd_getset performs the necessary error checking */ 5371 case ND_GET: 5372 if (nd_getset(q, is->is_nd, mp)) { 5373 qreply(q, mp); 5374 return; 5375 } 5376 break; 5377 default: 5378 break; 5379 } 5380 break; 5381 case M_IOCDATA: 5382 icmp_wput_iocdata(q, mp); 5383 return; 5384 default: 5385 break; 5386 } 5387 ip_wput(q, mp); 5388 } 5389 5390 /* 5391 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5392 * messages. 5393 */ 5394 static void 5395 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5396 { 5397 mblk_t *mp1; 5398 STRUCT_HANDLE(strbuf, sb); 5399 icmp_t *icmp; 5400 in6_addr_t v6addr; 5401 ipaddr_t v4addr; 5402 uint32_t flowinfo = 0; 5403 int addrlen; 5404 5405 /* Make sure it is one of ours. */ 5406 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5407 case TI_GETMYNAME: 5408 case TI_GETPEERNAME: 5409 break; 5410 default: 5411 icmp = Q_TO_ICMP(q); 5412 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5413 return; 5414 } 5415 switch (mi_copy_state(q, mp, &mp1)) { 5416 case -1: 5417 return; 5418 case MI_COPY_CASE(MI_COPY_IN, 1): 5419 break; 5420 case MI_COPY_CASE(MI_COPY_OUT, 1): 5421 /* 5422 * The address has been copied out, so now 5423 * copyout the strbuf. 5424 */ 5425 mi_copyout(q, mp); 5426 return; 5427 case MI_COPY_CASE(MI_COPY_OUT, 2): 5428 /* 5429 * The address and strbuf have been copied out. 5430 * We're done, so just acknowledge the original 5431 * M_IOCTL. 5432 */ 5433 mi_copy_done(q, mp, 0); 5434 return; 5435 default: 5436 /* 5437 * Something strange has happened, so acknowledge 5438 * the original M_IOCTL with an EPROTO error. 5439 */ 5440 mi_copy_done(q, mp, EPROTO); 5441 return; 5442 } 5443 /* 5444 * Now we have the strbuf structure for TI_GETMYNAME 5445 * and TI_GETPEERNAME. Next we copyout the requested 5446 * address and then we'll copyout the strbuf. 5447 */ 5448 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5449 (void *)mp1->b_rptr); 5450 icmp = Q_TO_ICMP(q); 5451 if (icmp->icmp_family == AF_INET) 5452 addrlen = sizeof (sin_t); 5453 else 5454 addrlen = sizeof (sin6_t); 5455 5456 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5457 mi_copy_done(q, mp, EINVAL); 5458 return; 5459 } 5460 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5461 case TI_GETMYNAME: 5462 if (icmp->icmp_family == AF_INET) { 5463 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5464 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5465 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5466 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5467 } else { 5468 /* 5469 * INADDR_ANY 5470 * icmp_v6src is not set, we might be bound to 5471 * broadcast/multicast. Use icmp_bound_v6src as 5472 * local address instead (that could 5473 * also still be INADDR_ANY) 5474 */ 5475 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5476 } 5477 } else { 5478 /* icmp->icmp_family == AF_INET6 */ 5479 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5480 v6addr = icmp->icmp_v6src; 5481 } else { 5482 /* 5483 * UNSPECIFIED 5484 * icmp_v6src is not set, we might be bound to 5485 * broadcast/multicast. Use icmp_bound_v6src as 5486 * local address instead (that could 5487 * also still be UNSPECIFIED) 5488 */ 5489 v6addr = icmp->icmp_bound_v6src; 5490 } 5491 } 5492 break; 5493 case TI_GETPEERNAME: 5494 if (icmp->icmp_family == AF_INET) { 5495 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5496 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5497 } else { 5498 /* icmp->icmp_family == AF_INET6) */ 5499 v6addr = icmp->icmp_v6dst; 5500 flowinfo = icmp->icmp_flowinfo; 5501 } 5502 break; 5503 default: 5504 mi_copy_done(q, mp, EPROTO); 5505 return; 5506 } 5507 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5508 if (!mp1) 5509 return; 5510 5511 if (icmp->icmp_family == AF_INET) { 5512 sin_t *sin; 5513 5514 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5515 sin = (sin_t *)mp1->b_rptr; 5516 mp1->b_wptr = (uchar_t *)&sin[1]; 5517 *sin = sin_null; 5518 sin->sin_family = AF_INET; 5519 sin->sin_addr.s_addr = v4addr; 5520 } else { 5521 /* icmp->icmp_family == AF_INET6 */ 5522 sin6_t *sin6; 5523 5524 ASSERT(icmp->icmp_family == AF_INET6); 5525 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5526 sin6 = (sin6_t *)mp1->b_rptr; 5527 mp1->b_wptr = (uchar_t *)&sin6[1]; 5528 *sin6 = sin6_null; 5529 sin6->sin6_family = AF_INET6; 5530 sin6->sin6_flowinfo = flowinfo; 5531 sin6->sin6_addr = v6addr; 5532 } 5533 /* Copy out the address */ 5534 mi_copyout(q, mp); 5535 } 5536 5537 static int 5538 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5539 void *thisdg_attrs) 5540 { 5541 conn_t *connp = Q_TO_CONN(q); 5542 struct T_unitdata_req *udreqp; 5543 int is_absreq_failure; 5544 cred_t *cr; 5545 5546 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5547 *errorp = 0; 5548 5549 cr = DB_CREDDEF(mp, connp->conn_cred); 5550 5551 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5552 udreqp->OPT_offset, cr, &icmp_opt_obj, 5553 thisdg_attrs, &is_absreq_failure); 5554 5555 if (*errorp != 0) { 5556 /* 5557 * Note: No special action needed in this 5558 * module for "is_absreq_failure" 5559 */ 5560 return (-1); /* failure */ 5561 } 5562 ASSERT(is_absreq_failure == 0); 5563 return (0); /* success */ 5564 } 5565 5566 void 5567 icmp_ddi_init(void) 5568 { 5569 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5570 icmp_opt_obj.odb_opt_arr_cnt); 5571 5572 /* 5573 * We want to be informed each time a stack is created or 5574 * destroyed in the kernel, so we can maintain the 5575 * set of icmp_stack_t's. 5576 */ 5577 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5578 } 5579 5580 void 5581 icmp_ddi_destroy(void) 5582 { 5583 netstack_unregister(NS_ICMP); 5584 } 5585 5586 /* 5587 * Initialize the ICMP stack instance. 5588 */ 5589 static void * 5590 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5591 { 5592 icmp_stack_t *is; 5593 icmpparam_t *pa; 5594 5595 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5596 is->is_netstack = ns; 5597 5598 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5599 is->is_param_arr = pa; 5600 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5601 5602 (void) icmp_param_register(&is->is_nd, 5603 is->is_param_arr, A_CNT(icmp_param_arr)); 5604 is->is_ksp = rawip_kstat_init(stackid); 5605 return (is); 5606 } 5607 5608 /* 5609 * Free the ICMP stack instance. 5610 */ 5611 static void 5612 rawip_stack_fini(netstackid_t stackid, void *arg) 5613 { 5614 icmp_stack_t *is = (icmp_stack_t *)arg; 5615 5616 nd_free(&is->is_nd); 5617 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5618 is->is_param_arr = NULL; 5619 5620 rawip_kstat_fini(stackid, is->is_ksp); 5621 is->is_ksp = NULL; 5622 kmem_free(is, sizeof (*is)); 5623 } 5624 5625 static void * 5626 rawip_kstat_init(netstackid_t stackid) { 5627 kstat_t *ksp; 5628 5629 rawip_named_kstat_t template = { 5630 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5631 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5632 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5633 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5634 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5635 }; 5636 5637 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5638 KSTAT_TYPE_NAMED, 5639 NUM_OF_FIELDS(rawip_named_kstat_t), 5640 0, stackid); 5641 if (ksp == NULL || ksp->ks_data == NULL) 5642 return (NULL); 5643 5644 bcopy(&template, ksp->ks_data, sizeof (template)); 5645 ksp->ks_update = rawip_kstat_update; 5646 ksp->ks_private = (void *)(uintptr_t)stackid; 5647 5648 kstat_install(ksp); 5649 return (ksp); 5650 } 5651 5652 static void 5653 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5654 { 5655 if (ksp != NULL) { 5656 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5657 kstat_delete_netstack(ksp, stackid); 5658 } 5659 } 5660 5661 static int 5662 rawip_kstat_update(kstat_t *ksp, int rw) 5663 { 5664 rawip_named_kstat_t *rawipkp; 5665 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5666 netstack_t *ns; 5667 icmp_stack_t *is; 5668 5669 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5670 return (EIO); 5671 5672 if (rw == KSTAT_WRITE) 5673 return (EACCES); 5674 5675 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5676 5677 ns = netstack_find_by_stackid(stackid); 5678 if (ns == NULL) 5679 return (-1); 5680 is = ns->netstack_icmp; 5681 if (is == NULL) { 5682 netstack_rele(ns); 5683 return (-1); 5684 } 5685 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5686 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5687 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5688 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5689 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5690 netstack_rele(ns); 5691 return (0); 5692 } 5693