1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 icmp->icmp_ip_snd_options_len = 0; 733 } 734 735 if (icmp->icmp_filter != NULL) { 736 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 737 icmp->icmp_filter = NULL; 738 } 739 /* Free memory associated with sticky options */ 740 if (icmp->icmp_sticky_hdrs_len != 0) { 741 kmem_free(icmp->icmp_sticky_hdrs, 742 icmp->icmp_sticky_hdrs_len); 743 icmp->icmp_sticky_hdrs = NULL; 744 icmp->icmp_sticky_hdrs_len = 0; 745 } 746 ip6_pkt_free(&icmp->icmp_sticky_ipp); 747 748 /* 749 * Clear any fields which the kmem_cache constructor clears. 750 * Only icmp_connp needs to be preserved. 751 * TBD: We should make this more efficient to avoid clearing 752 * everything. 753 */ 754 ASSERT(icmp->icmp_connp == connp); 755 bzero(icmp, sizeof (icmp_t)); 756 icmp->icmp_connp = connp; 757 } 758 759 static int 760 icmp_close(queue_t *q) 761 { 762 conn_t *connp = (conn_t *)q->q_ptr; 763 764 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 765 766 ip_quiesce_conn(connp); 767 768 qprocsoff(connp->conn_rq); 769 770 icmp_close_free(connp); 771 772 /* 773 * Now we are truly single threaded on this stream, and can 774 * delete the things hanging off the connp, and finally the connp. 775 * We removed this connp from the fanout list, it cannot be 776 * accessed thru the fanouts, and we already waited for the 777 * conn_ref to drop to 0. We are already in close, so 778 * there cannot be any other thread from the top. qprocsoff 779 * has completed, and service has completed or won't run in 780 * future. 781 */ 782 ASSERT(connp->conn_ref == 1); 783 784 inet_minor_free(ip_minor_arena, connp->conn_dev); 785 786 connp->conn_ref--; 787 ipcl_conn_destroy(connp); 788 789 q->q_ptr = WR(q)->q_ptr = NULL; 790 return (0); 791 } 792 793 /* 794 * This routine handles each T_DISCON_REQ message passed to icmp 795 * as an indicating that ICMP is no longer connected. This results 796 * in sending a T_BIND_REQ to IP to restore the binding to just 797 * the local address. 798 * 799 * This routine sends down a T_BIND_REQ to IP with the following mblks: 800 * T_BIND_REQ - specifying just the local address. 801 * T_OK_ACK - for the T_DISCON_REQ 802 * 803 * The disconnect completes in icmp_bind_result. 804 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 805 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 806 * convert it to an error ack for the appropriate primitive. 807 */ 808 static void 809 icmp_disconnect(queue_t *q, mblk_t *mp) 810 { 811 icmp_t *icmp; 812 mblk_t *mp1; 813 conn_t *connp = Q_TO_CONN(q); 814 815 icmp = connp->conn_icmp; 816 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 817 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 818 rw_exit(&icmp->icmp_rwlock); 819 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 820 "icmp_disconnect: bad state, %d", icmp->icmp_state); 821 icmp_err_ack(q, mp, TOUTSTATE, 0); 822 return; 823 } 824 icmp->icmp_pending_op = T_DISCON_REQ; 825 icmp->icmp_v6src = icmp->icmp_bound_v6src; 826 icmp->icmp_state = TS_IDLE; 827 828 /* 829 * Send down bind to IP to remove the full binding and revert 830 * to the local address binding. 831 */ 832 if (icmp->icmp_family == AF_INET) { 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 834 } else { 835 ASSERT(icmp->icmp_family == AF_INET6); 836 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 837 } 838 if (mp1 == NULL) { 839 icmp->icmp_pending_op = -1; 840 rw_exit(&icmp->icmp_rwlock); 841 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 842 return; 843 } 844 mp = mi_tpi_ok_ack_alloc(mp); 845 if (mp == NULL) { 846 /* Unable to reuse the T_DISCON_REQ for the ack. */ 847 icmp->icmp_pending_op = -1; 848 rw_exit(&icmp->icmp_rwlock); 849 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 850 return; 851 } 852 853 if (icmp->icmp_family == AF_INET6) { 854 int error; 855 856 /* Rebuild the header template */ 857 error = icmp_build_hdrs(icmp); 858 if (error != 0) { 859 icmp->icmp_pending_op = -1; 860 rw_exit(&icmp->icmp_rwlock); 861 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 862 freemsg(mp1); 863 return; 864 } 865 } 866 867 rw_exit(&icmp->icmp_rwlock); 868 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 869 linkb(mp1, mp); 870 871 if (icmp->icmp_family == AF_INET6) 872 mp1 = ip_bind_v6(q, mp1, connp, NULL); 873 else 874 mp1 = ip_bind_v4(q, mp1, connp); 875 876 /* The above return NULL if the bind needs to be deferred */ 877 if (mp1 != NULL) 878 icmp_bind_result(connp, mp1); 879 else 880 CONN_INC_REF(connp); 881 } 882 883 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 884 static void 885 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 886 { 887 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 888 qreply(q, mp); 889 } 890 891 /* Shorthand to generate and send TPI error acks to our client */ 892 static void 893 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 894 t_scalar_t t_error, int sys_error) 895 { 896 struct T_error_ack *teackp; 897 898 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 899 M_PCPROTO, T_ERROR_ACK)) != NULL) { 900 teackp = (struct T_error_ack *)mp->b_rptr; 901 teackp->ERROR_prim = primitive; 902 teackp->TLI_error = t_error; 903 teackp->UNIX_error = sys_error; 904 qreply(q, mp); 905 } 906 } 907 908 /* 909 * icmp_icmp_error is called by icmp_input to process ICMP 910 * messages passed up by IP. 911 * Generates the appropriate T_UDERROR_IND for permanent 912 * (non-transient) errors. 913 * Assumes that IP has pulled up everything up to and including 914 * the ICMP header. 915 */ 916 static void 917 icmp_icmp_error(queue_t *q, mblk_t *mp) 918 { 919 icmph_t *icmph; 920 ipha_t *ipha; 921 int iph_hdr_length; 922 sin_t sin; 923 sin6_t sin6; 924 mblk_t *mp1; 925 int error = 0; 926 icmp_t *icmp = Q_TO_ICMP(q); 927 928 ipha = (ipha_t *)mp->b_rptr; 929 930 ASSERT(OK_32PTR(mp->b_rptr)); 931 932 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 933 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 934 icmp_icmp_error_ipv6(q, mp); 935 return; 936 } 937 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 938 939 /* Skip past the outer IP and ICMP headers */ 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 942 ipha = (ipha_t *)&icmph[1]; 943 iph_hdr_length = IPH_HDR_LENGTH(ipha); 944 945 switch (icmph->icmph_type) { 946 case ICMP_DEST_UNREACHABLE: 947 switch (icmph->icmph_code) { 948 case ICMP_FRAGMENTATION_NEEDED: 949 /* 950 * IP has already adjusted the path MTU. 951 */ 952 break; 953 case ICMP_PORT_UNREACHABLE: 954 case ICMP_PROTOCOL_UNREACHABLE: 955 error = ECONNREFUSED; 956 break; 957 default: 958 /* Transient errors */ 959 break; 960 } 961 break; 962 default: 963 /* Transient errors */ 964 break; 965 } 966 if (error == 0) { 967 freemsg(mp); 968 return; 969 } 970 971 /* 972 * Deliver T_UDERROR_IND when the application has asked for it. 973 * The socket layer enables this automatically when connected. 974 */ 975 if (!icmp->icmp_dgram_errind) { 976 freemsg(mp); 977 return; 978 } 979 980 switch (icmp->icmp_family) { 981 case AF_INET: 982 sin = sin_null; 983 sin.sin_family = AF_INET; 984 sin.sin_addr.s_addr = ipha->ipha_dst; 985 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 986 error); 987 break; 988 case AF_INET6: 989 sin6 = sin6_null; 990 sin6.sin6_family = AF_INET6; 991 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 994 NULL, 0, error); 995 break; 996 } 997 if (mp1) 998 putnext(q, mp1); 999 freemsg(mp); 1000 } 1001 1002 /* 1003 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1004 * for IPv6 packets. 1005 * Send permanent (non-transient) errors upstream. 1006 * Assumes that IP has pulled up all the extension headers as well 1007 * as the ICMPv6 header. 1008 */ 1009 static void 1010 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1011 { 1012 icmp6_t *icmp6; 1013 ip6_t *ip6h, *outer_ip6h; 1014 uint16_t iph_hdr_length; 1015 uint8_t *nexthdrp; 1016 sin6_t sin6; 1017 mblk_t *mp1; 1018 int error = 0; 1019 icmp_t *icmp = Q_TO_ICMP(q); 1020 1021 outer_ip6h = (ip6_t *)mp->b_rptr; 1022 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1023 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1024 else 1025 iph_hdr_length = IPV6_HDR_LEN; 1026 1027 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1028 ip6h = (ip6_t *)&icmp6[1]; 1029 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 switch (icmp6->icmp6_type) { 1035 case ICMP6_DST_UNREACH: 1036 switch (icmp6->icmp6_code) { 1037 case ICMP6_DST_UNREACH_NOPORT: 1038 error = ECONNREFUSED; 1039 break; 1040 case ICMP6_DST_UNREACH_ADMIN: 1041 case ICMP6_DST_UNREACH_NOROUTE: 1042 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1043 case ICMP6_DST_UNREACH_ADDR: 1044 /* Transient errors */ 1045 break; 1046 default: 1047 break; 1048 } 1049 break; 1050 case ICMP6_PACKET_TOO_BIG: { 1051 struct T_unitdata_ind *tudi; 1052 struct T_opthdr *toh; 1053 size_t udi_size; 1054 mblk_t *newmp; 1055 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1056 sizeof (struct ip6_mtuinfo); 1057 sin6_t *sin6; 1058 struct ip6_mtuinfo *mtuinfo; 1059 1060 /* 1061 * If the application has requested to receive path mtu 1062 * information, send up an empty message containing an 1063 * IPV6_PATHMTU ancillary data item. 1064 */ 1065 if (!icmp->icmp_ipv6_recvpathmtu) 1066 break; 1067 1068 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1069 opt_length; 1070 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1071 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1072 break; 1073 } 1074 1075 /* 1076 * newmp->b_cont is left to NULL on purpose. This is an 1077 * empty message containing only ancillary data. 1078 */ 1079 newmp->b_datap->db_type = M_PROTO; 1080 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1081 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1082 tudi->PRIM_type = T_UNITDATA_IND; 1083 tudi->SRC_length = sizeof (sin6_t); 1084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1085 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1086 tudi->OPT_length = opt_length; 1087 1088 sin6 = (sin6_t *)&tudi[1]; 1089 bzero(sin6, sizeof (sin6_t)); 1090 sin6->sin6_family = AF_INET6; 1091 sin6->sin6_addr = icmp->icmp_v6dst; 1092 1093 toh = (struct T_opthdr *)&sin6[1]; 1094 toh->level = IPPROTO_IPV6; 1095 toh->name = IPV6_PATHMTU; 1096 toh->len = opt_length; 1097 toh->status = 0; 1098 1099 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1100 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1101 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1102 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1103 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1104 /* 1105 * We've consumed everything we need from the original 1106 * message. Free it, then send our empty message. 1107 */ 1108 freemsg(mp); 1109 putnext(q, newmp); 1110 return; 1111 } 1112 case ICMP6_TIME_EXCEEDED: 1113 /* Transient errors */ 1114 break; 1115 case ICMP6_PARAM_PROB: 1116 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1117 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1118 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1119 (uchar_t *)nexthdrp) { 1120 error = ECONNREFUSED; 1121 break; 1122 } 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin6 = sin6_null; 1140 sin6.sin6_family = AF_INET6; 1141 sin6.sin6_addr = ip6h->ip6_dst; 1142 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1143 1144 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1145 error); 1146 if (mp1) 1147 putnext(q, mp1); 1148 freemsg(mp); 1149 } 1150 1151 /* 1152 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1153 * The local address is filled in if endpoint is bound. The remote address 1154 * is filled in if remote address has been precified ("connected endpoint") 1155 * (The concept of connected CLTS sockets is alien to published TPI 1156 * but we support it anyway). 1157 */ 1158 static void 1159 icmp_addr_req(queue_t *q, mblk_t *mp) 1160 { 1161 icmp_t *icmp = Q_TO_ICMP(q); 1162 mblk_t *ackmp; 1163 struct T_addr_ack *taa; 1164 1165 /* Make it large enough for worst case */ 1166 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1167 2 * sizeof (sin6_t), 1); 1168 if (ackmp == NULL) { 1169 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1170 return; 1171 } 1172 taa = (struct T_addr_ack *)ackmp->b_rptr; 1173 1174 bzero(taa, sizeof (struct T_addr_ack)); 1175 ackmp->b_wptr = (uchar_t *)&taa[1]; 1176 1177 taa->PRIM_type = T_ADDR_ACK; 1178 ackmp->b_datap->db_type = M_PCPROTO; 1179 rw_enter(&icmp->icmp_rwlock, RW_READER); 1180 /* 1181 * Note: Following code assumes 32 bit alignment of basic 1182 * data structures like sin_t and struct T_addr_ack. 1183 */ 1184 if (icmp->icmp_state != TS_UNBND) { 1185 /* 1186 * Fill in local address 1187 */ 1188 taa->LOCADDR_offset = sizeof (*taa); 1189 if (icmp->icmp_family == AF_INET) { 1190 sin_t *sin; 1191 1192 taa->LOCADDR_length = sizeof (sin_t); 1193 sin = (sin_t *)&taa[1]; 1194 /* Fill zeroes and then intialize non-zero fields */ 1195 *sin = sin_null; 1196 sin->sin_family = AF_INET; 1197 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1198 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1199 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1200 sin->sin_addr.s_addr); 1201 } else { 1202 /* 1203 * INADDR_ANY 1204 * icmp_v6src is not set, we might be bound to 1205 * broadcast/multicast. Use icmp_bound_v6src as 1206 * local address instead (that could 1207 * also still be INADDR_ANY) 1208 */ 1209 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1210 sin->sin_addr.s_addr); 1211 } 1212 ackmp->b_wptr = (uchar_t *)&sin[1]; 1213 } else { 1214 sin6_t *sin6; 1215 1216 ASSERT(icmp->icmp_family == AF_INET6); 1217 taa->LOCADDR_length = sizeof (sin6_t); 1218 sin6 = (sin6_t *)&taa[1]; 1219 /* Fill zeroes and then intialize non-zero fields */ 1220 *sin6 = sin6_null; 1221 sin6->sin6_family = AF_INET6; 1222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1223 sin6->sin6_addr = icmp->icmp_v6src; 1224 } else { 1225 /* 1226 * UNSPECIFIED 1227 * icmp_v6src is not set, we might be bound to 1228 * broadcast/multicast. Use icmp_bound_v6src as 1229 * local address instead (that could 1230 * also still be UNSPECIFIED) 1231 */ 1232 sin6->sin6_addr = icmp->icmp_bound_v6src; 1233 } 1234 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1235 } 1236 } 1237 rw_exit(&icmp->icmp_rwlock); 1238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1239 qreply(q, ackmp); 1240 } 1241 1242 static void 1243 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1244 { 1245 *tap = icmp_g_t_info_ack; 1246 1247 if (icmp->icmp_family == AF_INET6) 1248 tap->ADDR_size = sizeof (sin6_t); 1249 else 1250 tap->ADDR_size = sizeof (sin_t); 1251 tap->CURRENT_state = icmp->icmp_state; 1252 tap->OPT_size = icmp_max_optsize; 1253 } 1254 1255 /* 1256 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1257 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1258 * icmp_g_t_info_ack. The current state of the stream is copied from 1259 * icmp_state. 1260 */ 1261 static void 1262 icmp_capability_req(queue_t *q, mblk_t *mp) 1263 { 1264 icmp_t *icmp = Q_TO_ICMP(q); 1265 t_uscalar_t cap_bits1; 1266 struct T_capability_ack *tcap; 1267 1268 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1269 1270 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1271 mp->b_datap->db_type, T_CAPABILITY_ACK); 1272 if (!mp) 1273 return; 1274 1275 tcap = (struct T_capability_ack *)mp->b_rptr; 1276 tcap->CAP_bits1 = 0; 1277 1278 if (cap_bits1 & TC1_INFO) { 1279 icmp_copy_info(&tcap->INFO_ack, icmp); 1280 tcap->CAP_bits1 |= TC1_INFO; 1281 } 1282 1283 qreply(q, mp); 1284 } 1285 1286 /* 1287 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1288 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1289 * The current state of the stream is copied from icmp_state. 1290 */ 1291 static void 1292 icmp_info_req(queue_t *q, mblk_t *mp) 1293 { 1294 icmp_t *icmp = Q_TO_ICMP(q); 1295 1296 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1297 T_INFO_ACK); 1298 if (!mp) 1299 return; 1300 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1301 qreply(q, mp); 1302 } 1303 1304 /* 1305 * IP recognizes seven kinds of bind requests: 1306 * 1307 * - A zero-length address binds only to the protocol number. 1308 * 1309 * - A 4-byte address is treated as a request to 1310 * validate that the address is a valid local IPv4 1311 * address, appropriate for an application to bind to. 1312 * IP does the verification, but does not make any note 1313 * of the address at this time. 1314 * 1315 * - A 16-byte address contains is treated as a request 1316 * to validate a local IPv6 address, as the 4-byte 1317 * address case above. 1318 * 1319 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1323 * use it for the inbound fanout of packets. 1324 * 1325 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1326 * information consisting of local and remote addresses 1327 * and ports (unused for raw sockets). In this case, the addresses are both 1328 * validated as appropriate for this operation, and, if 1329 * so, the information is retained for use in the 1330 * inbound fanout. 1331 * 1332 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1333 * fanout information, like the 12-byte case above. 1334 * 1335 * IP will also fill in the IRE request mblk with information 1336 * regarding our peer. In all cases, we notify IP of our protocol 1337 * type by appending a single protocol byte to the bind request. 1338 */ 1339 static mblk_t * 1340 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1341 in_port_t fport) 1342 { 1343 char *cp; 1344 mblk_t *mp; 1345 struct T_bind_req *tbr; 1346 ipa_conn_t *ac; 1347 ipa6_conn_t *ac6; 1348 sin_t *sin; 1349 sin6_t *sin6; 1350 1351 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1352 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1353 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1354 if (mp == NULL) 1355 return (NULL); 1356 mp->b_datap->db_type = M_PROTO; 1357 tbr = (struct T_bind_req *)mp->b_rptr; 1358 tbr->PRIM_type = bind_prim; 1359 tbr->ADDR_offset = sizeof (*tbr); 1360 tbr->CONIND_number = 0; 1361 tbr->ADDR_length = addr_length; 1362 cp = (char *)&tbr[1]; 1363 switch (addr_length) { 1364 case sizeof (ipa_conn_t): 1365 ASSERT(icmp->icmp_family == AF_INET); 1366 /* Append a request for an IRE */ 1367 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1368 if (mp->b_cont == NULL) { 1369 freemsg(mp); 1370 return (NULL); 1371 } 1372 mp->b_cont->b_wptr += sizeof (ire_t); 1373 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1374 1375 /* cp known to be 32 bit aligned */ 1376 ac = (ipa_conn_t *)cp; 1377 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1378 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1379 ac->ac_fport = fport; 1380 ac->ac_lport = 0; 1381 break; 1382 1383 case sizeof (ipa6_conn_t): 1384 ASSERT(icmp->icmp_family == AF_INET6); 1385 /* Append a request for an IRE */ 1386 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1387 if (mp->b_cont == NULL) { 1388 freemsg(mp); 1389 return (NULL); 1390 } 1391 mp->b_cont->b_wptr += sizeof (ire_t); 1392 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1393 1394 /* cp known to be 32 bit aligned */ 1395 ac6 = (ipa6_conn_t *)cp; 1396 ac6->ac6_laddr = icmp->icmp_v6src; 1397 ac6->ac6_faddr = icmp->icmp_v6dst; 1398 ac6->ac6_fport = fport; 1399 ac6->ac6_lport = 0; 1400 break; 1401 1402 case sizeof (sin_t): 1403 ASSERT(icmp->icmp_family == AF_INET); 1404 /* Append a request for an IRE */ 1405 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1406 if (!mp->b_cont) { 1407 freemsg(mp); 1408 return (NULL); 1409 } 1410 mp->b_cont->b_wptr += sizeof (ire_t); 1411 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1412 1413 sin = (sin_t *)cp; 1414 *sin = sin_null; 1415 sin->sin_family = AF_INET; 1416 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1417 break; 1418 1419 case sizeof (sin6_t): 1420 ASSERT(icmp->icmp_family == AF_INET6); 1421 /* Append a request for an IRE */ 1422 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1423 if (!mp->b_cont) { 1424 freemsg(mp); 1425 return (NULL); 1426 } 1427 mp->b_cont->b_wptr += sizeof (ire_t); 1428 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1429 1430 sin6 = (sin6_t *)cp; 1431 *sin6 = sin6_null; 1432 sin6->sin6_family = AF_INET6; 1433 sin6->sin6_addr = icmp->icmp_bound_v6src; 1434 break; 1435 } 1436 /* Add protocol number to end */ 1437 cp[addr_length] = icmp->icmp_proto; 1438 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1439 return (mp); 1440 } 1441 1442 /* For /dev/icmp aka AF_INET open */ 1443 static int 1444 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1445 { 1446 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1447 } 1448 1449 /* For /dev/icmp6 aka AF_INET6 open */ 1450 static int 1451 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1454 } 1455 1456 /* 1457 * This is the open routine for icmp. It allocates a icmp_t structure for 1458 * the stream and, on the first open of the module, creates an ND table. 1459 */ 1460 /*ARGSUSED2*/ 1461 static int 1462 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1463 boolean_t isv6) 1464 { 1465 int err; 1466 icmp_t *icmp; 1467 conn_t *connp; 1468 dev_t conn_dev; 1469 zoneid_t zoneid; 1470 netstack_t *ns; 1471 icmp_stack_t *is; 1472 1473 /* If the stream is already open, return immediately. */ 1474 if (q->q_ptr != NULL) 1475 return (0); 1476 1477 if (sflag == MODOPEN) 1478 return (EINVAL); 1479 1480 ns = netstack_find_by_cred(credp); 1481 ASSERT(ns != NULL); 1482 is = ns->netstack_icmp; 1483 ASSERT(is != NULL); 1484 1485 /* 1486 * For exclusive stacks we set the zoneid to zero 1487 * to make ICMP operate as if in the global zone. 1488 */ 1489 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1490 zoneid = GLOBAL_ZONEID; 1491 else 1492 zoneid = crgetzoneid(credp); 1493 1494 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1495 netstack_rele(ns); 1496 return (EBUSY); 1497 } 1498 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1499 1500 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1501 connp->conn_dev = conn_dev; 1502 icmp = connp->conn_icmp; 1503 1504 /* 1505 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1506 * done by netstack_find_by_cred() 1507 */ 1508 netstack_rele(ns); 1509 1510 /* 1511 * Initialize the icmp_t structure for this stream. 1512 */ 1513 q->q_ptr = connp; 1514 WR(q)->q_ptr = connp; 1515 connp->conn_rq = q; 1516 connp->conn_wq = WR(q); 1517 1518 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1519 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1520 ASSERT(connp->conn_icmp == icmp); 1521 ASSERT(icmp->icmp_connp == connp); 1522 1523 /* Set the initial state of the stream and the privilege status. */ 1524 icmp->icmp_state = TS_UNBND; 1525 if (isv6) { 1526 icmp->icmp_ipversion = IPV6_VERSION; 1527 icmp->icmp_family = AF_INET6; 1528 connp->conn_ulp = IPPROTO_ICMPV6; 1529 /* May be changed by a SO_PROTOTYPE socket option. */ 1530 icmp->icmp_proto = IPPROTO_ICMPV6; 1531 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1532 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1533 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1534 connp->conn_af_isv6 = B_TRUE; 1535 connp->conn_flags |= IPCL_ISV6; 1536 } else { 1537 icmp->icmp_ipversion = IPV4_VERSION; 1538 icmp->icmp_family = AF_INET; 1539 /* May be changed by a SO_PROTOTYPE socket option. */ 1540 icmp->icmp_proto = IPPROTO_ICMP; 1541 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1542 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1543 connp->conn_af_isv6 = B_FALSE; 1544 connp->conn_flags &= ~IPCL_ISV6; 1545 } 1546 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 icmp->icmp_pending_op = -1; 1548 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1549 connp->conn_zoneid = zoneid; 1550 1551 /* 1552 * If the caller has the process-wide flag set, then default to MAC 1553 * exempt mode. This allows read-down to unlabeled hosts. 1554 */ 1555 if (getpflags(NET_MAC_AWARE, credp) != 0) 1556 icmp->icmp_mac_exempt = B_TRUE; 1557 1558 connp->conn_ulp_labeled = is_system_labeled(); 1559 1560 icmp->icmp_is = is; 1561 1562 q->q_hiwat = is->is_recv_hiwat; 1563 WR(q)->q_hiwat = is->is_xmit_hiwat; 1564 WR(q)->q_lowat = is->is_xmit_lowat; 1565 1566 connp->conn_recv = icmp_input; 1567 crhold(credp); 1568 connp->conn_cred = credp; 1569 1570 mutex_enter(&connp->conn_lock); 1571 connp->conn_state_flags &= ~CONN_INCIPIENT; 1572 mutex_exit(&connp->conn_lock); 1573 1574 qprocson(q); 1575 1576 if (icmp->icmp_family == AF_INET6) { 1577 /* Build initial header template for transmit */ 1578 if ((err = icmp_build_hdrs(icmp)) != 0) { 1579 rw_exit(&icmp->icmp_rwlock); 1580 qprocsoff(q); 1581 ipcl_conn_destroy(connp); 1582 return (err); 1583 } 1584 } 1585 rw_exit(&icmp->icmp_rwlock); 1586 1587 /* Set the Stream head write offset. */ 1588 (void) mi_set_sth_wroff(q, 1589 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1590 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1591 1592 return (0); 1593 } 1594 1595 /* 1596 * Which ICMP options OK to set through T_UNITDATA_REQ... 1597 */ 1598 /* ARGSUSED */ 1599 static boolean_t 1600 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1601 { 1602 return (B_TRUE); 1603 } 1604 1605 /* 1606 * This routine gets default values of certain options whose default 1607 * values are maintained by protcol specific code 1608 */ 1609 /* ARGSUSED */ 1610 int 1611 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1612 { 1613 icmp_t *icmp = Q_TO_ICMP(q); 1614 icmp_stack_t *is = icmp->icmp_is; 1615 int *i1 = (int *)ptr; 1616 1617 switch (level) { 1618 case IPPROTO_IP: 1619 switch (name) { 1620 case IP_MULTICAST_TTL: 1621 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1622 return (sizeof (uchar_t)); 1623 case IP_MULTICAST_LOOP: 1624 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1625 return (sizeof (uchar_t)); 1626 } 1627 break; 1628 case IPPROTO_IPV6: 1629 switch (name) { 1630 case IPV6_MULTICAST_HOPS: 1631 *i1 = IP_DEFAULT_MULTICAST_TTL; 1632 return (sizeof (int)); 1633 case IPV6_MULTICAST_LOOP: 1634 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1635 return (sizeof (int)); 1636 case IPV6_UNICAST_HOPS: 1637 *i1 = is->is_ipv6_hoplimit; 1638 return (sizeof (int)); 1639 } 1640 break; 1641 case IPPROTO_ICMPV6: 1642 switch (name) { 1643 case ICMP6_FILTER: 1644 /* Make it look like "pass all" */ 1645 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1646 return (sizeof (icmp6_filter_t)); 1647 } 1648 break; 1649 } 1650 return (-1); 1651 } 1652 1653 /* 1654 * This routine retrieves the current status of socket options. 1655 * It returns the size of the option retrieved. 1656 */ 1657 int 1658 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1659 { 1660 conn_t *connp = Q_TO_CONN(q); 1661 icmp_t *icmp = connp->conn_icmp; 1662 icmp_stack_t *is = icmp->icmp_is; 1663 int *i1 = (int *)ptr; 1664 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1665 1666 switch (level) { 1667 case SOL_SOCKET: 1668 switch (name) { 1669 case SO_DEBUG: 1670 *i1 = icmp->icmp_debug; 1671 break; 1672 case SO_TYPE: 1673 *i1 = SOCK_RAW; 1674 break; 1675 case SO_PROTOTYPE: 1676 *i1 = icmp->icmp_proto; 1677 break; 1678 case SO_REUSEADDR: 1679 *i1 = icmp->icmp_reuseaddr; 1680 break; 1681 1682 /* 1683 * The following three items are available here, 1684 * but are only meaningful to IP. 1685 */ 1686 case SO_DONTROUTE: 1687 *i1 = icmp->icmp_dontroute; 1688 break; 1689 case SO_USELOOPBACK: 1690 *i1 = icmp->icmp_useloopback; 1691 break; 1692 case SO_BROADCAST: 1693 *i1 = icmp->icmp_broadcast; 1694 break; 1695 1696 case SO_SNDBUF: 1697 ASSERT(q->q_hiwat <= INT_MAX); 1698 *i1 = (int)q->q_hiwat; 1699 break; 1700 case SO_RCVBUF: 1701 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1702 *i1 = (int)RD(q)->q_hiwat; 1703 break; 1704 case SO_DGRAM_ERRIND: 1705 *i1 = icmp->icmp_dgram_errind; 1706 break; 1707 case SO_TIMESTAMP: 1708 *i1 = icmp->icmp_timestamp; 1709 break; 1710 case SO_MAC_EXEMPT: 1711 *i1 = icmp->icmp_mac_exempt; 1712 break; 1713 case SO_DOMAIN: 1714 *i1 = icmp->icmp_family; 1715 break; 1716 1717 /* 1718 * Following four not meaningful for icmp 1719 * Action is same as "default" to which we fallthrough 1720 * so we keep them in comments. 1721 * case SO_LINGER: 1722 * case SO_KEEPALIVE: 1723 * case SO_OOBINLINE: 1724 * case SO_ALLZONES: 1725 */ 1726 default: 1727 return (-1); 1728 } 1729 break; 1730 case IPPROTO_IP: 1731 /* 1732 * Only allow IPv4 option processing on IPv4 sockets. 1733 */ 1734 if (icmp->icmp_family != AF_INET) 1735 return (-1); 1736 1737 switch (name) { 1738 case IP_OPTIONS: 1739 case T_IP_OPTIONS: 1740 /* Options are passed up with each packet */ 1741 return (0); 1742 case IP_HDRINCL: 1743 *i1 = (int)icmp->icmp_hdrincl; 1744 break; 1745 case IP_TOS: 1746 case T_IP_TOS: 1747 *i1 = (int)icmp->icmp_type_of_service; 1748 break; 1749 case IP_TTL: 1750 *i1 = (int)icmp->icmp_ttl; 1751 break; 1752 case IP_MULTICAST_IF: 1753 /* 0 address if not set */ 1754 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1755 return (sizeof (ipaddr_t)); 1756 case IP_MULTICAST_TTL: 1757 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1758 return (sizeof (uchar_t)); 1759 case IP_MULTICAST_LOOP: 1760 *ptr = connp->conn_multicast_loop; 1761 return (sizeof (uint8_t)); 1762 case IP_BOUND_IF: 1763 /* Zero if not set */ 1764 *i1 = icmp->icmp_bound_if; 1765 break; /* goto sizeof (int) option return */ 1766 case IP_UNSPEC_SRC: 1767 *ptr = icmp->icmp_unspec_source; 1768 break; /* goto sizeof (int) option return */ 1769 case IP_RECVIF: 1770 *ptr = icmp->icmp_recvif; 1771 break; /* goto sizeof (int) option return */ 1772 case IP_RECVPKTINFO: 1773 /* 1774 * This also handles IP_PKTINFO. 1775 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1776 * Differentiation is based on the size of the argument 1777 * passed in. 1778 * This option is handled in IP which will return an 1779 * error for IP_PKTINFO as it's not supported as a 1780 * sticky option. 1781 */ 1782 return (-EINVAL); 1783 /* 1784 * Cannot "get" the value of following options 1785 * at this level. Action is same as "default" to 1786 * which we fallthrough so we keep them in comments. 1787 * 1788 * case IP_ADD_MEMBERSHIP: 1789 * case IP_DROP_MEMBERSHIP: 1790 * case IP_BLOCK_SOURCE: 1791 * case IP_UNBLOCK_SOURCE: 1792 * case IP_ADD_SOURCE_MEMBERSHIP: 1793 * case IP_DROP_SOURCE_MEMBERSHIP: 1794 * case MCAST_JOIN_GROUP: 1795 * case MCAST_LEAVE_GROUP: 1796 * case MCAST_BLOCK_SOURCE: 1797 * case MCAST_UNBLOCK_SOURCE: 1798 * case MCAST_JOIN_SOURCE_GROUP: 1799 * case MCAST_LEAVE_SOURCE_GROUP: 1800 * case MRT_INIT: 1801 * case MRT_DONE: 1802 * case MRT_ADD_VIF: 1803 * case MRT_DEL_VIF: 1804 * case MRT_ADD_MFC: 1805 * case MRT_DEL_MFC: 1806 * case MRT_VERSION: 1807 * case MRT_ASSERT: 1808 * case IP_SEC_OPT: 1809 * case IP_DONTFAILOVER_IF: 1810 * case IP_NEXTHOP: 1811 */ 1812 default: 1813 return (-1); 1814 } 1815 break; 1816 case IPPROTO_IPV6: 1817 /* 1818 * Only allow IPv6 option processing on native IPv6 sockets. 1819 */ 1820 if (icmp->icmp_family != AF_INET6) 1821 return (-1); 1822 switch (name) { 1823 case IPV6_UNICAST_HOPS: 1824 *i1 = (unsigned int)icmp->icmp_ttl; 1825 break; 1826 case IPV6_MULTICAST_IF: 1827 /* 0 index if not set */ 1828 *i1 = icmp->icmp_multicast_if_index; 1829 break; 1830 case IPV6_MULTICAST_HOPS: 1831 *i1 = icmp->icmp_multicast_ttl; 1832 break; 1833 case IPV6_MULTICAST_LOOP: 1834 *i1 = connp->conn_multicast_loop; 1835 break; 1836 case IPV6_BOUND_IF: 1837 /* Zero if not set */ 1838 *i1 = icmp->icmp_bound_if; 1839 break; 1840 case IPV6_UNSPEC_SRC: 1841 *i1 = icmp->icmp_unspec_source; 1842 break; 1843 case IPV6_CHECKSUM: 1844 /* 1845 * Return offset or -1 if no checksum offset. 1846 * Does not apply to IPPROTO_ICMPV6 1847 */ 1848 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1849 return (-1); 1850 1851 if (icmp->icmp_raw_checksum) { 1852 *i1 = icmp->icmp_checksum_off; 1853 } else { 1854 *i1 = -1; 1855 } 1856 break; 1857 case IPV6_JOIN_GROUP: 1858 case IPV6_LEAVE_GROUP: 1859 case MCAST_JOIN_GROUP: 1860 case MCAST_LEAVE_GROUP: 1861 case MCAST_BLOCK_SOURCE: 1862 case MCAST_UNBLOCK_SOURCE: 1863 case MCAST_JOIN_SOURCE_GROUP: 1864 case MCAST_LEAVE_SOURCE_GROUP: 1865 /* cannot "get" the value for these */ 1866 return (-1); 1867 case IPV6_RECVPKTINFO: 1868 *i1 = icmp->icmp_ip_recvpktinfo; 1869 break; 1870 case IPV6_RECVTCLASS: 1871 *i1 = icmp->icmp_ipv6_recvtclass; 1872 break; 1873 case IPV6_RECVPATHMTU: 1874 *i1 = icmp->icmp_ipv6_recvpathmtu; 1875 break; 1876 case IPV6_V6ONLY: 1877 *i1 = 1; 1878 break; 1879 case IPV6_RECVHOPLIMIT: 1880 *i1 = icmp->icmp_ipv6_recvhoplimit; 1881 break; 1882 case IPV6_RECVHOPOPTS: 1883 *i1 = icmp->icmp_ipv6_recvhopopts; 1884 break; 1885 case IPV6_RECVDSTOPTS: 1886 *i1 = icmp->icmp_ipv6_recvdstopts; 1887 break; 1888 case _OLD_IPV6_RECVDSTOPTS: 1889 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1890 break; 1891 case IPV6_RECVRTHDRDSTOPTS: 1892 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1893 break; 1894 case IPV6_RECVRTHDR: 1895 *i1 = icmp->icmp_ipv6_recvrthdr; 1896 break; 1897 case IPV6_PKTINFO: { 1898 /* XXX assumes that caller has room for max size! */ 1899 struct in6_pktinfo *pkti; 1900 1901 pkti = (struct in6_pktinfo *)ptr; 1902 if (ipp->ipp_fields & IPPF_IFINDEX) 1903 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1904 else 1905 pkti->ipi6_ifindex = 0; 1906 if (ipp->ipp_fields & IPPF_ADDR) 1907 pkti->ipi6_addr = ipp->ipp_addr; 1908 else 1909 pkti->ipi6_addr = ipv6_all_zeros; 1910 return (sizeof (struct in6_pktinfo)); 1911 } 1912 case IPV6_NEXTHOP: { 1913 sin6_t *sin6 = (sin6_t *)ptr; 1914 1915 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1916 return (0); 1917 *sin6 = sin6_null; 1918 sin6->sin6_family = AF_INET6; 1919 sin6->sin6_addr = ipp->ipp_nexthop; 1920 return (sizeof (sin6_t)); 1921 } 1922 case IPV6_HOPOPTS: 1923 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1924 return (0); 1925 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1926 return (0); 1927 bcopy((char *)ipp->ipp_hopopts + 1928 icmp->icmp_label_len_v6, ptr, 1929 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1930 if (icmp->icmp_label_len_v6 > 0) { 1931 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1932 ptr[1] = (ipp->ipp_hopoptslen - 1933 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1934 } 1935 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1936 case IPV6_RTHDRDSTOPTS: 1937 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1938 return (0); 1939 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1940 return (ipp->ipp_rtdstoptslen); 1941 case IPV6_RTHDR: 1942 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1943 return (0); 1944 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1945 return (ipp->ipp_rthdrlen); 1946 case IPV6_DSTOPTS: 1947 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1948 return (0); 1949 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1950 return (ipp->ipp_dstoptslen); 1951 case IPV6_PATHMTU: 1952 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1953 return (0); 1954 1955 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1956 (struct ip6_mtuinfo *)ptr, is->is_netstack)); 1957 case IPV6_TCLASS: 1958 if (ipp->ipp_fields & IPPF_TCLASS) 1959 *i1 = ipp->ipp_tclass; 1960 else 1961 *i1 = IPV6_FLOW_TCLASS( 1962 IPV6_DEFAULT_VERS_AND_FLOW); 1963 break; 1964 default: 1965 return (-1); 1966 } 1967 break; 1968 case IPPROTO_ICMPV6: 1969 /* 1970 * Only allow IPv6 option processing on native IPv6 sockets. 1971 */ 1972 if (icmp->icmp_family != AF_INET6) 1973 return (-1); 1974 1975 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1976 return (-1); 1977 1978 switch (name) { 1979 case ICMP6_FILTER: 1980 if (icmp->icmp_filter == NULL) { 1981 /* Make it look like "pass all" */ 1982 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1983 } else { 1984 (void) bcopy(icmp->icmp_filter, ptr, 1985 sizeof (icmp6_filter_t)); 1986 } 1987 return (sizeof (icmp6_filter_t)); 1988 default: 1989 return (-1); 1990 } 1991 default: 1992 return (-1); 1993 } 1994 return (sizeof (int)); 1995 } 1996 1997 /* 1998 * This routine retrieves the current status of socket options. 1999 * It returns the size of the option retrieved. 2000 */ 2001 int 2002 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2003 { 2004 icmp_t *icmp = Q_TO_ICMP(q); 2005 int err; 2006 2007 rw_enter(&icmp->icmp_rwlock, RW_READER); 2008 err = icmp_opt_get_locked(q, level, name, ptr); 2009 rw_exit(&icmp->icmp_rwlock); 2010 return (err); 2011 } 2012 2013 2014 /* This routine sets socket options. */ 2015 /* ARGSUSED */ 2016 int 2017 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2018 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2019 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2020 { 2021 conn_t *connp = Q_TO_CONN(q); 2022 icmp_t *icmp = connp->conn_icmp; 2023 icmp_stack_t *is = icmp->icmp_is; 2024 int *i1 = (int *)invalp; 2025 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2026 boolean_t checkonly; 2027 int error; 2028 2029 switch (optset_context) { 2030 case SETFN_OPTCOM_CHECKONLY: 2031 checkonly = B_TRUE; 2032 /* 2033 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2034 * inlen != 0 implies value supplied and 2035 * we have to "pretend" to set it. 2036 * inlen == 0 implies that there is no 2037 * value part in T_CHECK request and just validation 2038 * done elsewhere should be enough, we just return here. 2039 */ 2040 if (inlen == 0) { 2041 *outlenp = 0; 2042 return (0); 2043 } 2044 break; 2045 case SETFN_OPTCOM_NEGOTIATE: 2046 checkonly = B_FALSE; 2047 break; 2048 case SETFN_UD_NEGOTIATE: 2049 case SETFN_CONN_NEGOTIATE: 2050 checkonly = B_FALSE; 2051 /* 2052 * Negotiating local and "association-related" options 2053 * through T_UNITDATA_REQ. 2054 * 2055 * Following routine can filter out ones we do not 2056 * want to be "set" this way. 2057 */ 2058 if (!icmp_opt_allow_udr_set(level, name)) { 2059 *outlenp = 0; 2060 return (EINVAL); 2061 } 2062 break; 2063 default: 2064 /* 2065 * We should never get here 2066 */ 2067 *outlenp = 0; 2068 return (EINVAL); 2069 } 2070 2071 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2072 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2073 2074 /* 2075 * For fixed length options, no sanity check 2076 * of passed in length is done. It is assumed *_optcom_req() 2077 * routines do the right thing. 2078 */ 2079 2080 switch (level) { 2081 case SOL_SOCKET: 2082 switch (name) { 2083 case SO_DEBUG: 2084 if (!checkonly) 2085 icmp->icmp_debug = onoff; 2086 break; 2087 case SO_PROTOTYPE: 2088 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2089 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2090 secpolicy_net_rawaccess(cr) != 0) { 2091 *outlenp = 0; 2092 return (EACCES); 2093 } 2094 /* Can't use IPPROTO_RAW with IPv6 */ 2095 if ((*i1 & 0xFF) == IPPROTO_RAW && 2096 icmp->icmp_family == AF_INET6) { 2097 *outlenp = 0; 2098 return (EPROTONOSUPPORT); 2099 } 2100 if (checkonly) { 2101 /* T_CHECK case */ 2102 *(int *)outvalp = (*i1 & 0xFF); 2103 break; 2104 } 2105 icmp->icmp_proto = *i1 & 0xFF; 2106 if ((icmp->icmp_proto == IPPROTO_RAW || 2107 icmp->icmp_proto == IPPROTO_IGMP) && 2108 icmp->icmp_family == AF_INET) 2109 icmp->icmp_hdrincl = 1; 2110 else 2111 icmp->icmp_hdrincl = 0; 2112 2113 if (icmp->icmp_family == AF_INET6 && 2114 icmp->icmp_proto == IPPROTO_ICMPV6) { 2115 /* Set offset for icmp6_cksum */ 2116 icmp->icmp_raw_checksum = 0; 2117 icmp->icmp_checksum_off = 2; 2118 } 2119 if (icmp->icmp_proto == IPPROTO_UDP || 2120 icmp->icmp_proto == IPPROTO_TCP || 2121 icmp->icmp_proto == IPPROTO_SCTP) { 2122 icmp->icmp_no_tp_cksum = 1; 2123 icmp->icmp_sticky_ipp.ipp_fields |= 2124 IPPF_NO_CKSUM; 2125 } else { 2126 icmp->icmp_no_tp_cksum = 0; 2127 icmp->icmp_sticky_ipp.ipp_fields &= 2128 ~IPPF_NO_CKSUM; 2129 } 2130 2131 if (icmp->icmp_filter != NULL && 2132 icmp->icmp_proto != IPPROTO_ICMPV6) { 2133 kmem_free(icmp->icmp_filter, 2134 sizeof (icmp6_filter_t)); 2135 icmp->icmp_filter = NULL; 2136 } 2137 2138 /* Rebuild the header template */ 2139 error = icmp_build_hdrs(icmp); 2140 if (error != 0) { 2141 *outlenp = 0; 2142 return (error); 2143 } 2144 2145 /* 2146 * For SCTP, we don't use icmp_bind_proto() for 2147 * raw socket binding. Note that we do not need 2148 * to set *outlenp. 2149 * FIXME: how does SCTP work? 2150 */ 2151 if (icmp->icmp_proto == IPPROTO_SCTP) 2152 return (0); 2153 2154 *outlenp = sizeof (int); 2155 *(int *)outvalp = *i1 & 0xFF; 2156 2157 /* Drop lock across the bind operation */ 2158 rw_exit(&icmp->icmp_rwlock); 2159 icmp_bind_proto(q); 2160 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2161 return (0); 2162 case SO_REUSEADDR: 2163 if (!checkonly) 2164 icmp->icmp_reuseaddr = onoff; 2165 break; 2166 2167 /* 2168 * The following three items are available here, 2169 * but are only meaningful to IP. 2170 */ 2171 case SO_DONTROUTE: 2172 if (!checkonly) 2173 icmp->icmp_dontroute = onoff; 2174 break; 2175 case SO_USELOOPBACK: 2176 if (!checkonly) 2177 icmp->icmp_useloopback = onoff; 2178 break; 2179 case SO_BROADCAST: 2180 if (!checkonly) 2181 icmp->icmp_broadcast = onoff; 2182 break; 2183 2184 case SO_SNDBUF: 2185 if (*i1 > is->is_max_buf) { 2186 *outlenp = 0; 2187 return (ENOBUFS); 2188 } 2189 if (!checkonly) { 2190 q->q_hiwat = *i1; 2191 } 2192 break; 2193 case SO_RCVBUF: 2194 if (*i1 > is->is_max_buf) { 2195 *outlenp = 0; 2196 return (ENOBUFS); 2197 } 2198 if (!checkonly) { 2199 RD(q)->q_hiwat = *i1; 2200 rw_exit(&icmp->icmp_rwlock); 2201 (void) mi_set_sth_hiwat(RD(q), *i1); 2202 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2203 } 2204 break; 2205 case SO_DGRAM_ERRIND: 2206 if (!checkonly) 2207 icmp->icmp_dgram_errind = onoff; 2208 break; 2209 case SO_ALLZONES: 2210 /* 2211 * "soft" error (negative) 2212 * option not handled at this level 2213 * Note: Do not modify *outlenp 2214 */ 2215 return (-EINVAL); 2216 case SO_TIMESTAMP: 2217 if (!checkonly) { 2218 icmp->icmp_timestamp = onoff; 2219 } 2220 break; 2221 case SO_MAC_EXEMPT: 2222 if (secpolicy_net_mac_aware(cr) != 0 || 2223 icmp->icmp_state != TS_UNBND) 2224 return (EACCES); 2225 if (!checkonly) 2226 icmp->icmp_mac_exempt = onoff; 2227 break; 2228 /* 2229 * Following three not meaningful for icmp 2230 * Action is same as "default" so we keep them 2231 * in comments. 2232 * case SO_LINGER: 2233 * case SO_KEEPALIVE: 2234 * case SO_OOBINLINE: 2235 */ 2236 default: 2237 *outlenp = 0; 2238 return (EINVAL); 2239 } 2240 break; 2241 case IPPROTO_IP: 2242 /* 2243 * Only allow IPv4 option processing on IPv4 sockets. 2244 */ 2245 if (icmp->icmp_family != AF_INET) { 2246 *outlenp = 0; 2247 return (ENOPROTOOPT); 2248 } 2249 switch (name) { 2250 case IP_OPTIONS: 2251 case T_IP_OPTIONS: 2252 /* Save options for use by IP. */ 2253 if ((inlen & 0x3) || 2254 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2255 *outlenp = 0; 2256 return (EINVAL); 2257 } 2258 if (checkonly) 2259 break; 2260 2261 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2262 &icmp->icmp_ip_snd_options_len, 2263 icmp->icmp_label_len, invalp, inlen)) { 2264 *outlenp = 0; 2265 return (ENOMEM); 2266 } 2267 2268 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2269 icmp->icmp_ip_snd_options_len; 2270 rw_exit(&icmp->icmp_rwlock); 2271 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2272 is->is_wroff_extra); 2273 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2274 break; 2275 case IP_HDRINCL: 2276 if (!checkonly) 2277 icmp->icmp_hdrincl = onoff; 2278 break; 2279 case IP_TOS: 2280 case T_IP_TOS: 2281 if (!checkonly) { 2282 icmp->icmp_type_of_service = (uint8_t)*i1; 2283 } 2284 break; 2285 case IP_TTL: 2286 if (!checkonly) { 2287 icmp->icmp_ttl = (uint8_t)*i1; 2288 } 2289 break; 2290 case IP_MULTICAST_IF: 2291 /* 2292 * TODO should check OPTMGMT reply and undo this if 2293 * there is an error. 2294 */ 2295 if (!checkonly) 2296 icmp->icmp_multicast_if_addr = *i1; 2297 break; 2298 case IP_MULTICAST_TTL: 2299 if (!checkonly) 2300 icmp->icmp_multicast_ttl = *invalp; 2301 break; 2302 case IP_MULTICAST_LOOP: 2303 if (!checkonly) { 2304 connp->conn_multicast_loop = 2305 (*invalp == 0) ? 0 : 1; 2306 } 2307 break; 2308 case IP_BOUND_IF: 2309 if (!checkonly) 2310 icmp->icmp_bound_if = *i1; 2311 break; 2312 case IP_UNSPEC_SRC: 2313 if (!checkonly) 2314 icmp->icmp_unspec_source = onoff; 2315 break; 2316 case IP_RECVIF: 2317 if (!checkonly) 2318 icmp->icmp_recvif = onoff; 2319 /* 2320 * pass to ip 2321 */ 2322 return (-EINVAL); 2323 case IP_PKTINFO: { 2324 /* 2325 * This also handles IP_RECVPKTINFO. 2326 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2327 * Differentiation is based on the size of the argument 2328 * passed in. 2329 */ 2330 struct in_pktinfo *pktinfop; 2331 ip4_pkt_t *attr_pktinfop; 2332 2333 if (checkonly) 2334 break; 2335 2336 if (inlen == sizeof (int)) { 2337 /* 2338 * This is IP_RECVPKTINFO option. 2339 * Keep a local copy of wether this option is 2340 * set or not and pass it down to IP for 2341 * processing. 2342 */ 2343 icmp->icmp_ip_recvpktinfo = onoff; 2344 return (-EINVAL); 2345 } 2346 2347 2348 if (inlen != sizeof (struct in_pktinfo)) 2349 return (EINVAL); 2350 2351 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2352 == NULL) { 2353 /* 2354 * sticky option is not supported 2355 */ 2356 return (EINVAL); 2357 } 2358 2359 pktinfop = (struct in_pktinfo *)invalp; 2360 2361 /* 2362 * Atleast one of the values should be specified 2363 */ 2364 if (pktinfop->ipi_ifindex == 0 && 2365 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2366 return (EINVAL); 2367 } 2368 2369 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2370 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2371 } 2372 break; 2373 case IP_ADD_MEMBERSHIP: 2374 case IP_DROP_MEMBERSHIP: 2375 case IP_BLOCK_SOURCE: 2376 case IP_UNBLOCK_SOURCE: 2377 case IP_ADD_SOURCE_MEMBERSHIP: 2378 case IP_DROP_SOURCE_MEMBERSHIP: 2379 case MCAST_JOIN_GROUP: 2380 case MCAST_LEAVE_GROUP: 2381 case MCAST_BLOCK_SOURCE: 2382 case MCAST_UNBLOCK_SOURCE: 2383 case MCAST_JOIN_SOURCE_GROUP: 2384 case MCAST_LEAVE_SOURCE_GROUP: 2385 case MRT_INIT: 2386 case MRT_DONE: 2387 case MRT_ADD_VIF: 2388 case MRT_DEL_VIF: 2389 case MRT_ADD_MFC: 2390 case MRT_DEL_MFC: 2391 case MRT_VERSION: 2392 case MRT_ASSERT: 2393 case IP_SEC_OPT: 2394 case IP_DONTFAILOVER_IF: 2395 case IP_NEXTHOP: 2396 /* 2397 * "soft" error (negative) 2398 * option not handled at this level 2399 * Note: Do not modify *outlenp 2400 */ 2401 return (-EINVAL); 2402 default: 2403 *outlenp = 0; 2404 return (EINVAL); 2405 } 2406 break; 2407 case IPPROTO_IPV6: { 2408 ip6_pkt_t *ipp; 2409 boolean_t sticky; 2410 2411 if (icmp->icmp_family != AF_INET6) { 2412 *outlenp = 0; 2413 return (ENOPROTOOPT); 2414 } 2415 /* 2416 * Deal with both sticky options and ancillary data 2417 */ 2418 if (thisdg_attrs == NULL) { 2419 /* sticky options, or none */ 2420 ipp = &icmp->icmp_sticky_ipp; 2421 sticky = B_TRUE; 2422 } else { 2423 /* ancillary data */ 2424 ipp = (ip6_pkt_t *)thisdg_attrs; 2425 sticky = B_FALSE; 2426 } 2427 2428 switch (name) { 2429 case IPV6_MULTICAST_IF: 2430 if (!checkonly) 2431 icmp->icmp_multicast_if_index = *i1; 2432 break; 2433 case IPV6_UNICAST_HOPS: 2434 /* -1 means use default */ 2435 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2436 *outlenp = 0; 2437 return (EINVAL); 2438 } 2439 if (!checkonly) { 2440 if (*i1 == -1) { 2441 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2442 is->is_ipv6_hoplimit; 2443 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2444 /* Pass modified value to IP. */ 2445 *i1 = ipp->ipp_hoplimit; 2446 } else { 2447 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2448 (uint8_t)*i1; 2449 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2450 } 2451 /* Rebuild the header template */ 2452 error = icmp_build_hdrs(icmp); 2453 if (error != 0) { 2454 *outlenp = 0; 2455 return (error); 2456 } 2457 } 2458 break; 2459 case IPV6_MULTICAST_HOPS: 2460 /* -1 means use default */ 2461 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2462 *outlenp = 0; 2463 return (EINVAL); 2464 } 2465 if (!checkonly) { 2466 if (*i1 == -1) { 2467 icmp->icmp_multicast_ttl = 2468 ipp->ipp_multicast_hops = 2469 IP_DEFAULT_MULTICAST_TTL; 2470 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2471 /* Pass modified value to IP. */ 2472 *i1 = icmp->icmp_multicast_ttl; 2473 } else { 2474 icmp->icmp_multicast_ttl = 2475 ipp->ipp_multicast_hops = 2476 (uint8_t)*i1; 2477 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2478 } 2479 } 2480 break; 2481 case IPV6_MULTICAST_LOOP: 2482 if (*i1 != 0 && *i1 != 1) { 2483 *outlenp = 0; 2484 return (EINVAL); 2485 } 2486 if (!checkonly) 2487 connp->conn_multicast_loop = *i1; 2488 break; 2489 case IPV6_CHECKSUM: 2490 /* 2491 * Integer offset into the user data of where the 2492 * checksum is located. 2493 * Offset of -1 disables option. 2494 * Does not apply to IPPROTO_ICMPV6. 2495 */ 2496 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2497 *outlenp = 0; 2498 return (EINVAL); 2499 } 2500 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2501 /* Negative or not 16 bit aligned offset */ 2502 *outlenp = 0; 2503 return (EINVAL); 2504 } 2505 if (checkonly) 2506 break; 2507 2508 if (*i1 == -1) { 2509 icmp->icmp_raw_checksum = 0; 2510 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2511 } else { 2512 icmp->icmp_raw_checksum = 1; 2513 icmp->icmp_checksum_off = *i1; 2514 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2515 } 2516 /* Rebuild the header template */ 2517 error = icmp_build_hdrs(icmp); 2518 if (error != 0) { 2519 *outlenp = 0; 2520 return (error); 2521 } 2522 break; 2523 case IPV6_JOIN_GROUP: 2524 case IPV6_LEAVE_GROUP: 2525 case MCAST_JOIN_GROUP: 2526 case MCAST_LEAVE_GROUP: 2527 case MCAST_BLOCK_SOURCE: 2528 case MCAST_UNBLOCK_SOURCE: 2529 case MCAST_JOIN_SOURCE_GROUP: 2530 case MCAST_LEAVE_SOURCE_GROUP: 2531 /* 2532 * "soft" error (negative) 2533 * option not handled at this level 2534 * Note: Do not modify *outlenp 2535 */ 2536 return (-EINVAL); 2537 case IPV6_BOUND_IF: 2538 if (!checkonly) 2539 icmp->icmp_bound_if = *i1; 2540 break; 2541 case IPV6_UNSPEC_SRC: 2542 if (!checkonly) 2543 icmp->icmp_unspec_source = onoff; 2544 break; 2545 case IPV6_RECVTCLASS: 2546 if (!checkonly) 2547 icmp->icmp_ipv6_recvtclass = onoff; 2548 break; 2549 /* 2550 * Set boolean switches for ancillary data delivery 2551 */ 2552 case IPV6_RECVPKTINFO: 2553 if (!checkonly) 2554 icmp->icmp_ip_recvpktinfo = onoff; 2555 break; 2556 case IPV6_RECVPATHMTU: 2557 if (!checkonly) 2558 icmp->icmp_ipv6_recvpathmtu = onoff; 2559 break; 2560 case IPV6_RECVHOPLIMIT: 2561 if (!checkonly) 2562 icmp->icmp_ipv6_recvhoplimit = onoff; 2563 break; 2564 case IPV6_RECVHOPOPTS: 2565 if (!checkonly) 2566 icmp->icmp_ipv6_recvhopopts = onoff; 2567 break; 2568 case IPV6_RECVDSTOPTS: 2569 if (!checkonly) 2570 icmp->icmp_ipv6_recvdstopts = onoff; 2571 break; 2572 case _OLD_IPV6_RECVDSTOPTS: 2573 if (!checkonly) 2574 icmp->icmp_old_ipv6_recvdstopts = onoff; 2575 break; 2576 case IPV6_RECVRTHDRDSTOPTS: 2577 if (!checkonly) 2578 icmp->icmp_ipv6_recvrtdstopts = onoff; 2579 break; 2580 case IPV6_RECVRTHDR: 2581 if (!checkonly) 2582 icmp->icmp_ipv6_recvrthdr = onoff; 2583 break; 2584 /* 2585 * Set sticky options or ancillary data. 2586 * If sticky options, (re)build any extension headers 2587 * that might be needed as a result. 2588 */ 2589 case IPV6_PKTINFO: 2590 /* 2591 * The source address and ifindex are verified 2592 * in ip_opt_set(). For ancillary data the 2593 * source address is checked in ip_wput_v6. 2594 */ 2595 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2596 return (EINVAL); 2597 if (checkonly) 2598 break; 2599 2600 if (inlen == 0) { 2601 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2602 ipp->ipp_sticky_ignored |= 2603 (IPPF_IFINDEX|IPPF_ADDR); 2604 } else { 2605 struct in6_pktinfo *pkti; 2606 2607 pkti = (struct in6_pktinfo *)invalp; 2608 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2609 ipp->ipp_addr = pkti->ipi6_addr; 2610 if (ipp->ipp_ifindex != 0) 2611 ipp->ipp_fields |= IPPF_IFINDEX; 2612 else 2613 ipp->ipp_fields &= ~IPPF_IFINDEX; 2614 if (!IN6_IS_ADDR_UNSPECIFIED( 2615 &ipp->ipp_addr)) 2616 ipp->ipp_fields |= IPPF_ADDR; 2617 else 2618 ipp->ipp_fields &= ~IPPF_ADDR; 2619 } 2620 if (sticky) { 2621 error = icmp_build_hdrs(icmp); 2622 if (error != 0) 2623 return (error); 2624 } 2625 break; 2626 case IPV6_HOPLIMIT: 2627 /* This option can only be used as ancillary data. */ 2628 if (sticky) 2629 return (EINVAL); 2630 if (inlen != 0 && inlen != sizeof (int)) 2631 return (EINVAL); 2632 if (checkonly) 2633 break; 2634 2635 if (inlen == 0) { 2636 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2637 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2638 } else { 2639 if (*i1 > 255 || *i1 < -1) 2640 return (EINVAL); 2641 if (*i1 == -1) 2642 ipp->ipp_hoplimit = 2643 is->is_ipv6_hoplimit; 2644 else 2645 ipp->ipp_hoplimit = *i1; 2646 ipp->ipp_fields |= IPPF_HOPLIMIT; 2647 } 2648 break; 2649 case IPV6_TCLASS: 2650 /* 2651 * IPV6_RECVTCLASS accepts -1 as use kernel default 2652 * and [0, 255] as the actualy traffic class. 2653 */ 2654 if (inlen != 0 && inlen != sizeof (int)) 2655 return (EINVAL); 2656 if (checkonly) 2657 break; 2658 2659 if (inlen == 0) { 2660 ipp->ipp_fields &= ~IPPF_TCLASS; 2661 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2662 } else { 2663 if (*i1 >= 256 || *i1 < -1) 2664 return (EINVAL); 2665 if (*i1 == -1) { 2666 ipp->ipp_tclass = 2667 IPV6_FLOW_TCLASS( 2668 IPV6_DEFAULT_VERS_AND_FLOW); 2669 } else { 2670 ipp->ipp_tclass = *i1; 2671 } 2672 ipp->ipp_fields |= IPPF_TCLASS; 2673 } 2674 if (sticky) { 2675 error = icmp_build_hdrs(icmp); 2676 if (error != 0) 2677 return (error); 2678 } 2679 break; 2680 case IPV6_NEXTHOP: 2681 /* 2682 * IP will verify that the nexthop is reachable 2683 * and fail for sticky options. 2684 */ 2685 if (inlen != 0 && inlen != sizeof (sin6_t)) 2686 return (EINVAL); 2687 if (checkonly) 2688 break; 2689 2690 if (inlen == 0) { 2691 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2692 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2693 } else { 2694 sin6_t *sin6 = (sin6_t *)invalp; 2695 2696 if (sin6->sin6_family != AF_INET6) 2697 return (EAFNOSUPPORT); 2698 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2699 return (EADDRNOTAVAIL); 2700 ipp->ipp_nexthop = sin6->sin6_addr; 2701 if (!IN6_IS_ADDR_UNSPECIFIED( 2702 &ipp->ipp_nexthop)) 2703 ipp->ipp_fields |= IPPF_NEXTHOP; 2704 else 2705 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2706 } 2707 if (sticky) { 2708 error = icmp_build_hdrs(icmp); 2709 if (error != 0) 2710 return (error); 2711 } 2712 break; 2713 case IPV6_HOPOPTS: { 2714 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2715 /* 2716 * Sanity checks - minimum size, size a multiple of 2717 * eight bytes, and matching size passed in. 2718 */ 2719 if (inlen != 0 && 2720 inlen != (8 * (hopts->ip6h_len + 1))) 2721 return (EINVAL); 2722 2723 if (checkonly) 2724 break; 2725 error = optcom_pkt_set(invalp, inlen, sticky, 2726 (uchar_t **)&ipp->ipp_hopopts, 2727 &ipp->ipp_hopoptslen, 2728 sticky ? icmp->icmp_label_len_v6 : 0); 2729 if (error != 0) 2730 return (error); 2731 if (ipp->ipp_hopoptslen == 0) { 2732 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2733 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2734 } else { 2735 ipp->ipp_fields |= IPPF_HOPOPTS; 2736 } 2737 if (sticky) { 2738 error = icmp_build_hdrs(icmp); 2739 if (error != 0) 2740 return (error); 2741 } 2742 break; 2743 } 2744 case IPV6_RTHDRDSTOPTS: { 2745 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2746 2747 /* 2748 * Sanity checks - minimum size, size a multiple of 2749 * eight bytes, and matching size passed in. 2750 */ 2751 if (inlen != 0 && 2752 inlen != (8 * (dopts->ip6d_len + 1))) 2753 return (EINVAL); 2754 2755 if (checkonly) 2756 break; 2757 2758 if (inlen == 0) { 2759 if (sticky && 2760 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2761 kmem_free(ipp->ipp_rtdstopts, 2762 ipp->ipp_rtdstoptslen); 2763 ipp->ipp_rtdstopts = NULL; 2764 ipp->ipp_rtdstoptslen = 0; 2765 } 2766 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2767 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2768 } else { 2769 error = optcom_pkt_set(invalp, inlen, sticky, 2770 (uchar_t **)&ipp->ipp_rtdstopts, 2771 &ipp->ipp_rtdstoptslen, 0); 2772 if (error != 0) 2773 return (error); 2774 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2775 } 2776 if (sticky) { 2777 error = icmp_build_hdrs(icmp); 2778 if (error != 0) 2779 return (error); 2780 } 2781 break; 2782 } 2783 case IPV6_DSTOPTS: { 2784 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2785 2786 /* 2787 * Sanity checks - minimum size, size a multiple of 2788 * eight bytes, and matching size passed in. 2789 */ 2790 if (inlen != 0 && 2791 inlen != (8 * (dopts->ip6d_len + 1))) 2792 return (EINVAL); 2793 2794 if (checkonly) 2795 break; 2796 2797 if (inlen == 0) { 2798 if (sticky && 2799 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2800 kmem_free(ipp->ipp_dstopts, 2801 ipp->ipp_dstoptslen); 2802 ipp->ipp_dstopts = NULL; 2803 ipp->ipp_dstoptslen = 0; 2804 } 2805 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2806 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2807 } else { 2808 error = optcom_pkt_set(invalp, inlen, sticky, 2809 (uchar_t **)&ipp->ipp_dstopts, 2810 &ipp->ipp_dstoptslen, 0); 2811 if (error != 0) 2812 return (error); 2813 ipp->ipp_fields |= IPPF_DSTOPTS; 2814 } 2815 if (sticky) { 2816 error = icmp_build_hdrs(icmp); 2817 if (error != 0) 2818 return (error); 2819 } 2820 break; 2821 } 2822 case IPV6_RTHDR: { 2823 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2824 2825 /* 2826 * Sanity checks - minimum size, size a multiple of 2827 * eight bytes, and matching size passed in. 2828 */ 2829 if (inlen != 0 && 2830 inlen != (8 * (rt->ip6r_len + 1))) 2831 return (EINVAL); 2832 2833 if (checkonly) 2834 break; 2835 2836 if (inlen == 0) { 2837 if (sticky && 2838 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2839 kmem_free(ipp->ipp_rthdr, 2840 ipp->ipp_rthdrlen); 2841 ipp->ipp_rthdr = NULL; 2842 ipp->ipp_rthdrlen = 0; 2843 } 2844 ipp->ipp_fields &= ~IPPF_RTHDR; 2845 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2846 } else { 2847 error = optcom_pkt_set(invalp, inlen, sticky, 2848 (uchar_t **)&ipp->ipp_rthdr, 2849 &ipp->ipp_rthdrlen, 0); 2850 if (error != 0) 2851 return (error); 2852 ipp->ipp_fields |= IPPF_RTHDR; 2853 } 2854 if (sticky) { 2855 error = icmp_build_hdrs(icmp); 2856 if (error != 0) 2857 return (error); 2858 } 2859 break; 2860 } 2861 2862 case IPV6_DONTFRAG: 2863 if (checkonly) 2864 break; 2865 2866 if (onoff) { 2867 ipp->ipp_fields |= IPPF_DONTFRAG; 2868 } else { 2869 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2870 } 2871 break; 2872 2873 case IPV6_USE_MIN_MTU: 2874 if (inlen != sizeof (int)) 2875 return (EINVAL); 2876 2877 if (*i1 < -1 || *i1 > 1) 2878 return (EINVAL); 2879 2880 if (checkonly) 2881 break; 2882 2883 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2884 ipp->ipp_use_min_mtu = *i1; 2885 break; 2886 2887 /* 2888 * This option can't be set. Its only returned via 2889 * getsockopt() or ancillary data. 2890 */ 2891 case IPV6_PATHMTU: 2892 return (EINVAL); 2893 2894 case IPV6_BOUND_PIF: 2895 case IPV6_SEC_OPT: 2896 case IPV6_DONTFAILOVER_IF: 2897 case IPV6_SRC_PREFERENCES: 2898 case IPV6_V6ONLY: 2899 /* Handled at IP level */ 2900 return (-EINVAL); 2901 default: 2902 *outlenp = 0; 2903 return (EINVAL); 2904 } 2905 break; 2906 } /* end IPPROTO_IPV6 */ 2907 2908 case IPPROTO_ICMPV6: 2909 /* 2910 * Only allow IPv6 option processing on IPv6 sockets. 2911 */ 2912 if (icmp->icmp_family != AF_INET6) { 2913 *outlenp = 0; 2914 return (ENOPROTOOPT); 2915 } 2916 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2917 *outlenp = 0; 2918 return (ENOPROTOOPT); 2919 } 2920 switch (name) { 2921 case ICMP6_FILTER: 2922 if (!checkonly) { 2923 if ((inlen != 0) && 2924 (inlen != sizeof (icmp6_filter_t))) 2925 return (EINVAL); 2926 2927 if (inlen == 0) { 2928 if (icmp->icmp_filter != NULL) { 2929 kmem_free(icmp->icmp_filter, 2930 sizeof (icmp6_filter_t)); 2931 icmp->icmp_filter = NULL; 2932 } 2933 } else { 2934 if (icmp->icmp_filter == NULL) { 2935 icmp->icmp_filter = kmem_alloc( 2936 sizeof (icmp6_filter_t), 2937 KM_NOSLEEP); 2938 if (icmp->icmp_filter == NULL) { 2939 *outlenp = 0; 2940 return (ENOBUFS); 2941 } 2942 } 2943 (void) bcopy(invalp, icmp->icmp_filter, 2944 inlen); 2945 } 2946 } 2947 break; 2948 2949 default: 2950 *outlenp = 0; 2951 return (EINVAL); 2952 } 2953 break; 2954 default: 2955 *outlenp = 0; 2956 return (EINVAL); 2957 } 2958 /* 2959 * Common case of OK return with outval same as inval. 2960 */ 2961 if (invalp != outvalp) { 2962 /* don't trust bcopy for identical src/dst */ 2963 (void) bcopy(invalp, outvalp, inlen); 2964 } 2965 *outlenp = inlen; 2966 return (0); 2967 } 2968 /* This routine sets socket options. */ 2969 /* ARGSUSED */ 2970 int 2971 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2972 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2973 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2974 { 2975 icmp_t *icmp; 2976 int err; 2977 2978 icmp = Q_TO_ICMP(q); 2979 2980 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2981 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2982 outlenp, outvalp, thisdg_attrs, cr, mblk); 2983 rw_exit(&icmp->icmp_rwlock); 2984 return (err); 2985 } 2986 2987 /* 2988 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2989 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2990 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2991 * headers. 2992 * Returns failure if can't allocate memory. 2993 */ 2994 static int 2995 icmp_build_hdrs(icmp_t *icmp) 2996 { 2997 icmp_stack_t *is = icmp->icmp_is; 2998 uchar_t *hdrs; 2999 uint_t hdrs_len; 3000 ip6_t *ip6h; 3001 ip6i_t *ip6i; 3002 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3003 3004 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3005 hdrs_len = ip_total_hdrs_len_v6(ipp); 3006 ASSERT(hdrs_len != 0); 3007 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3008 /* Need to reallocate */ 3009 if (hdrs_len != 0) { 3010 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3011 if (hdrs == NULL) 3012 return (ENOMEM); 3013 } else { 3014 hdrs = NULL; 3015 } 3016 if (icmp->icmp_sticky_hdrs_len != 0) { 3017 kmem_free(icmp->icmp_sticky_hdrs, 3018 icmp->icmp_sticky_hdrs_len); 3019 } 3020 icmp->icmp_sticky_hdrs = hdrs; 3021 icmp->icmp_sticky_hdrs_len = hdrs_len; 3022 } 3023 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3024 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3025 3026 /* Set header fields not in ipp */ 3027 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3028 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3029 ip6h = (ip6_t *)&ip6i[1]; 3030 3031 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3032 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3033 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3034 } 3035 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3036 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3037 } 3038 } else { 3039 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3040 } 3041 3042 if (!(ipp->ipp_fields & IPPF_ADDR)) 3043 ip6h->ip6_src = icmp->icmp_v6src; 3044 3045 /* Try to get everything in a single mblk */ 3046 if (hdrs_len > icmp->icmp_max_hdr_len) { 3047 icmp->icmp_max_hdr_len = hdrs_len; 3048 rw_exit(&icmp->icmp_rwlock); 3049 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3050 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3051 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3052 } 3053 return (0); 3054 } 3055 3056 /* 3057 * This routine retrieves the value of an ND variable in a icmpparam_t 3058 * structure. It is called through nd_getset when a user reads the 3059 * variable. 3060 */ 3061 /* ARGSUSED */ 3062 static int 3063 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3064 { 3065 icmpparam_t *icmppa = (icmpparam_t *)cp; 3066 3067 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3068 return (0); 3069 } 3070 3071 /* 3072 * Walk through the param array specified registering each element with the 3073 * named dispatch (ND) handler. 3074 */ 3075 static boolean_t 3076 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3077 { 3078 for (; cnt-- > 0; icmppa++) { 3079 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3080 if (!nd_load(ndp, icmppa->icmp_param_name, 3081 icmp_param_get, icmp_param_set, 3082 (caddr_t)icmppa)) { 3083 nd_free(ndp); 3084 return (B_FALSE); 3085 } 3086 } 3087 } 3088 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3089 NULL)) { 3090 nd_free(ndp); 3091 return (B_FALSE); 3092 } 3093 return (B_TRUE); 3094 } 3095 3096 /* This routine sets an ND variable in a icmpparam_t structure. */ 3097 /* ARGSUSED */ 3098 static int 3099 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3100 { 3101 long new_value; 3102 icmpparam_t *icmppa = (icmpparam_t *)cp; 3103 3104 /* 3105 * Fail the request if the new value does not lie within the 3106 * required bounds. 3107 */ 3108 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3109 new_value < icmppa->icmp_param_min || 3110 new_value > icmppa->icmp_param_max) { 3111 return (EINVAL); 3112 } 3113 /* Set the new value */ 3114 icmppa->icmp_param_value = new_value; 3115 return (0); 3116 } 3117 /*ARGSUSED2*/ 3118 static void 3119 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3120 { 3121 conn_t *connp = (conn_t *)arg1; 3122 struct T_unitdata_ind *tudi; 3123 uchar_t *rptr; 3124 icmp_t *icmp; 3125 icmp_stack_t *is; 3126 sin_t *sin; 3127 sin6_t *sin6; 3128 ip6_t *ip6h; 3129 ip6i_t *ip6i; 3130 mblk_t *mp1; 3131 int hdr_len; 3132 ipha_t *ipha; 3133 int udi_size; /* Size of T_unitdata_ind */ 3134 uint_t ipvers; 3135 ip6_pkt_t ipp; 3136 uint8_t nexthdr; 3137 ip_pktinfo_t *pinfo = NULL; 3138 mblk_t *options_mp = NULL; 3139 uint_t icmp_opt = 0; 3140 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3141 uint_t hopstrip; 3142 3143 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3144 3145 icmp = connp->conn_icmp; 3146 is = icmp->icmp_is; 3147 rptr = mp->b_rptr; 3148 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3149 ASSERT(OK_32PTR(rptr)); 3150 3151 /* 3152 * IP should have prepended the options data in an M_CTL 3153 * Check M_CTL "type" to make sure are not here bcos of 3154 * a valid ICMP message 3155 */ 3156 if (DB_TYPE(mp) == M_CTL) { 3157 /* 3158 * FIXME: does IP still do this? 3159 * IP sends up the IPSEC_IN message for handling IPSEC 3160 * policy at the TCP level. We don't need it here. 3161 */ 3162 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3163 mp1 = mp->b_cont; 3164 freeb(mp); 3165 mp = mp1; 3166 rptr = mp->b_rptr; 3167 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3168 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3169 IN_PKTINFO) { 3170 /* 3171 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3172 * has been prepended to the packet by IP. We need to 3173 * extract the mblk and adjust the rptr 3174 */ 3175 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3176 options_mp = mp; 3177 mp = mp->b_cont; 3178 rptr = mp->b_rptr; 3179 } else { 3180 /* 3181 * ICMP messages. 3182 */ 3183 icmp_icmp_error(connp->conn_rq, mp); 3184 return; 3185 } 3186 } 3187 3188 /* 3189 * Discard message if it is misaligned or smaller than the IP header. 3190 */ 3191 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3192 freemsg(mp); 3193 if (options_mp != NULL) 3194 freeb(options_mp); 3195 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3196 return; 3197 } 3198 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3199 3200 /* Handle M_DATA messages containing IP packets messages */ 3201 if (ipvers == IPV4_VERSION) { 3202 /* 3203 * Special case where IP attaches 3204 * the IRE needs to be handled so that we don't send up 3205 * IRE to the user land. 3206 */ 3207 ipha = (ipha_t *)rptr; 3208 hdr_len = IPH_HDR_LENGTH(ipha); 3209 3210 if (ipha->ipha_protocol == IPPROTO_TCP) { 3211 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3212 3213 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3214 TH_SYN) && mp->b_cont != NULL) { 3215 mp1 = mp->b_cont; 3216 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3217 freeb(mp1); 3218 mp->b_cont = NULL; 3219 } 3220 } 3221 } 3222 if (is->is_bsd_compat) { 3223 ushort_t len; 3224 len = ntohs(ipha->ipha_length); 3225 3226 if (mp->b_datap->db_ref > 1) { 3227 /* 3228 * Allocate a new IP header so that we can 3229 * modify ipha_length. 3230 */ 3231 mblk_t *mp1; 3232 3233 mp1 = allocb(hdr_len, BPRI_MED); 3234 if (!mp1) { 3235 freemsg(mp); 3236 if (options_mp != NULL) 3237 freeb(options_mp); 3238 BUMP_MIB(&is->is_rawip_mib, 3239 rawipInErrors); 3240 return; 3241 } 3242 bcopy(rptr, mp1->b_rptr, hdr_len); 3243 mp->b_rptr = rptr + hdr_len; 3244 rptr = mp1->b_rptr; 3245 ipha = (ipha_t *)rptr; 3246 mp1->b_cont = mp; 3247 mp1->b_wptr = rptr + hdr_len; 3248 mp = mp1; 3249 } 3250 len -= hdr_len; 3251 ipha->ipha_length = htons(len); 3252 } 3253 } 3254 3255 /* 3256 * This is the inbound data path. Packets are passed upstream as 3257 * T_UNITDATA_IND messages with full IP headers still attached. 3258 */ 3259 if (icmp->icmp_family == AF_INET) { 3260 ASSERT(ipvers == IPV4_VERSION); 3261 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3262 if (icmp->icmp_recvif && (pinfo != NULL) && 3263 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3264 udi_size += sizeof (struct T_opthdr) + 3265 sizeof (uint_t); 3266 } 3267 3268 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3269 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3270 udi_size += sizeof (struct T_opthdr) + 3271 sizeof (struct in_pktinfo); 3272 } 3273 3274 /* 3275 * If SO_TIMESTAMP is set allocate the appropriate sized 3276 * buffer. Since gethrestime() expects a pointer aligned 3277 * argument, we allocate space necessary for extra 3278 * alignment (even though it might not be used). 3279 */ 3280 if (icmp->icmp_timestamp) { 3281 udi_size += sizeof (struct T_opthdr) + 3282 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3283 } 3284 mp1 = allocb(udi_size, BPRI_MED); 3285 if (mp1 == NULL) { 3286 freemsg(mp); 3287 if (options_mp != NULL) 3288 freeb(options_mp); 3289 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3290 return; 3291 } 3292 mp1->b_cont = mp; 3293 mp = mp1; 3294 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3295 mp->b_datap->db_type = M_PROTO; 3296 mp->b_wptr = (uchar_t *)tudi + udi_size; 3297 tudi->PRIM_type = T_UNITDATA_IND; 3298 tudi->SRC_length = sizeof (sin_t); 3299 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3300 sin = (sin_t *)&tudi[1]; 3301 *sin = sin_null; 3302 sin->sin_family = AF_INET; 3303 sin->sin_addr.s_addr = ipha->ipha_src; 3304 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3305 sizeof (sin_t); 3306 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3307 tudi->OPT_length = udi_size; 3308 3309 /* 3310 * Add options if IP_RECVIF is set 3311 */ 3312 if (udi_size != 0) { 3313 char *dstopt; 3314 3315 dstopt = (char *)&sin[1]; 3316 if (icmp->icmp_recvif && (pinfo != NULL) && 3317 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3318 3319 struct T_opthdr *toh; 3320 uint_t *dstptr; 3321 3322 toh = (struct T_opthdr *)dstopt; 3323 toh->level = IPPROTO_IP; 3324 toh->name = IP_RECVIF; 3325 toh->len = sizeof (struct T_opthdr) + 3326 sizeof (uint_t); 3327 toh->status = 0; 3328 dstopt += sizeof (struct T_opthdr); 3329 dstptr = (uint_t *)dstopt; 3330 *dstptr = pinfo->ip_pkt_ifindex; 3331 dstopt += sizeof (uint_t); 3332 udi_size -= toh->len; 3333 } 3334 if (icmp->icmp_timestamp) { 3335 struct T_opthdr *toh; 3336 3337 toh = (struct T_opthdr *)dstopt; 3338 toh->level = SOL_SOCKET; 3339 toh->name = SCM_TIMESTAMP; 3340 toh->len = sizeof (struct T_opthdr) + 3341 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3342 toh->status = 0; 3343 dstopt += sizeof (struct T_opthdr); 3344 /* Align for gethrestime() */ 3345 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3346 sizeof (intptr_t)); 3347 gethrestime((timestruc_t *)dstopt); 3348 dstopt = (char *)toh + toh->len; 3349 udi_size -= toh->len; 3350 } 3351 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3352 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3353 struct T_opthdr *toh; 3354 struct in_pktinfo *pktinfop; 3355 3356 toh = (struct T_opthdr *)dstopt; 3357 toh->level = IPPROTO_IP; 3358 toh->name = IP_PKTINFO; 3359 toh->len = sizeof (struct T_opthdr) + 3360 sizeof (in_pktinfo_t); 3361 toh->status = 0; 3362 dstopt += sizeof (struct T_opthdr); 3363 pktinfop = (struct in_pktinfo *)dstopt; 3364 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3365 pktinfop->ipi_spec_dst = 3366 pinfo->ip_pkt_match_addr; 3367 3368 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3369 3370 dstopt += sizeof (struct in_pktinfo); 3371 udi_size -= toh->len; 3372 } 3373 3374 /* Consumed all of allocated space */ 3375 ASSERT(udi_size == 0); 3376 } 3377 3378 if (options_mp != NULL) 3379 freeb(options_mp); 3380 3381 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3382 putnext(connp->conn_rq, mp); 3383 return; 3384 } 3385 3386 /* 3387 * We don't need options_mp in the IPv6 path. 3388 */ 3389 if (options_mp != NULL) { 3390 freeb(options_mp); 3391 options_mp = NULL; 3392 } 3393 3394 /* 3395 * Discard message if it is smaller than the IPv6 header 3396 * or if the header is malformed. 3397 */ 3398 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3399 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3400 icmp->icmp_family != AF_INET6) { 3401 freemsg(mp); 3402 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3403 return; 3404 } 3405 3406 /* Initialize */ 3407 ipp.ipp_fields = 0; 3408 hopstrip = 0; 3409 3410 ip6h = (ip6_t *)rptr; 3411 /* 3412 * Call on ip_find_hdr_v6 which gets the total hdr len 3413 * as well as individual lenghts of ext hdrs (and ptrs to 3414 * them). 3415 */ 3416 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3417 /* Look for ifindex information */ 3418 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3419 ip6i = (ip6i_t *)ip6h; 3420 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3421 ASSERT(ip6i->ip6i_ifindex != 0); 3422 ipp.ipp_fields |= IPPF_IFINDEX; 3423 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3424 } 3425 rptr = (uchar_t *)&ip6i[1]; 3426 mp->b_rptr = rptr; 3427 if (rptr == mp->b_wptr) { 3428 mp1 = mp->b_cont; 3429 freeb(mp); 3430 mp = mp1; 3431 rptr = mp->b_rptr; 3432 } 3433 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3434 ip6h = (ip6_t *)rptr; 3435 } 3436 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3437 3438 /* 3439 * We need to lie a bit to the user because users inside 3440 * labeled compartments should not see their own labels. We 3441 * assume that in all other respects IP has checked the label, 3442 * and that the label is always first among the options. (If 3443 * it's not first, then this code won't see it, and the option 3444 * will be passed along to the user.) 3445 * 3446 * If we had multilevel ICMP sockets, then the following code 3447 * should be skipped for them to allow the user to see the 3448 * label. 3449 * 3450 * Alignment restrictions in the definition of IP options 3451 * (namely, the requirement that the 4-octet DOI goes on a 3452 * 4-octet boundary) mean that we know exactly where the option 3453 * should start, but we're lenient for other hosts. 3454 * 3455 * Note that there are no multilevel ICMP or raw IP sockets 3456 * yet, thus nobody ever sees the IP6OPT_LS option. 3457 */ 3458 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3459 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3460 const uchar_t *ucp = 3461 (const uchar_t *)ipp.ipp_hopopts + 2; 3462 int remlen = ipp.ipp_hopoptslen - 2; 3463 3464 while (remlen > 0) { 3465 if (*ucp == IP6OPT_PAD1) { 3466 remlen--; 3467 ucp++; 3468 } else if (*ucp == IP6OPT_PADN) { 3469 remlen -= ucp[1] + 2; 3470 ucp += ucp[1] + 2; 3471 } else if (*ucp == ip6opt_ls) { 3472 hopstrip = (ucp - 3473 (const uchar_t *)ipp.ipp_hopopts) + 3474 ucp[1] + 2; 3475 hopstrip = (hopstrip + 7) & ~7; 3476 break; 3477 } else { 3478 /* label option must be first */ 3479 break; 3480 } 3481 } 3482 } 3483 } else { 3484 hdr_len = IPV6_HDR_LEN; 3485 ip6i = NULL; 3486 nexthdr = ip6h->ip6_nxt; 3487 } 3488 /* 3489 * One special case where IP attaches the IRE needs to 3490 * be handled so that we don't send up IRE to the user land. 3491 */ 3492 if (nexthdr == IPPROTO_TCP) { 3493 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3494 3495 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3496 mp->b_cont != NULL) { 3497 mp1 = mp->b_cont; 3498 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3499 freeb(mp1); 3500 mp->b_cont = NULL; 3501 } 3502 } 3503 } 3504 /* 3505 * Check a filter for ICMPv6 types if needed. 3506 * Verify raw checksums if needed. 3507 */ 3508 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3509 if (icmp->icmp_filter != NULL) { 3510 int type; 3511 3512 /* Assumes that IP has done the pullupmsg */ 3513 type = mp->b_rptr[hdr_len]; 3514 3515 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3516 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3517 freemsg(mp); 3518 return; 3519 } 3520 } else { 3521 /* Checksum */ 3522 uint16_t *up; 3523 uint32_t sum; 3524 int remlen; 3525 3526 up = (uint16_t *)&ip6h->ip6_src; 3527 3528 remlen = msgdsize(mp) - hdr_len; 3529 sum = htons(icmp->icmp_proto + remlen) 3530 + up[0] + up[1] + up[2] + up[3] 3531 + up[4] + up[5] + up[6] + up[7] 3532 + up[8] + up[9] + up[10] + up[11] 3533 + up[12] + up[13] + up[14] + up[15]; 3534 sum = (sum & 0xffff) + (sum >> 16); 3535 sum = IP_CSUM(mp, hdr_len, sum); 3536 if (sum != 0) { 3537 /* IPv6 RAW checksum failed */ 3538 ip0dbg(("icmp_rput: RAW checksum " 3539 "failed %x\n", sum)); 3540 freemsg(mp); 3541 BUMP_MIB(&is->is_rawip_mib, 3542 rawipInCksumErrs); 3543 return; 3544 } 3545 } 3546 } 3547 /* Skip all the IPv6 headers per API */ 3548 mp->b_rptr += hdr_len; 3549 3550 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3551 3552 /* 3553 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3554 * maintain state information, instead of relying on icmp_t 3555 * structure, since there arent any locks protecting these members 3556 * and there is a window where there might be a race between a 3557 * thread setting options on the write side and a thread reading 3558 * these options on the read size. 3559 */ 3560 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3561 IPPF_RTHDR|IPPF_IFINDEX)) { 3562 if (icmp->icmp_ipv6_recvhopopts && 3563 (ipp.ipp_fields & IPPF_HOPOPTS) && 3564 ipp.ipp_hopoptslen > hopstrip) { 3565 udi_size += sizeof (struct T_opthdr) + 3566 ipp.ipp_hopoptslen - hopstrip; 3567 icmp_opt |= IPPF_HOPOPTS; 3568 } 3569 if ((icmp->icmp_ipv6_recvdstopts || 3570 icmp->icmp_old_ipv6_recvdstopts) && 3571 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3572 udi_size += sizeof (struct T_opthdr) + 3573 ipp.ipp_dstoptslen; 3574 icmp_opt |= IPPF_DSTOPTS; 3575 } 3576 if (((icmp->icmp_ipv6_recvdstopts && 3577 icmp->icmp_ipv6_recvrthdr && 3578 (ipp.ipp_fields & IPPF_RTHDR)) || 3579 icmp->icmp_ipv6_recvrtdstopts) && 3580 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3581 udi_size += sizeof (struct T_opthdr) + 3582 ipp.ipp_rtdstoptslen; 3583 icmp_opt |= IPPF_RTDSTOPTS; 3584 } 3585 if (icmp->icmp_ipv6_recvrthdr && 3586 (ipp.ipp_fields & IPPF_RTHDR)) { 3587 udi_size += sizeof (struct T_opthdr) + 3588 ipp.ipp_rthdrlen; 3589 icmp_opt |= IPPF_RTHDR; 3590 } 3591 if (icmp->icmp_ip_recvpktinfo && 3592 (ipp.ipp_fields & IPPF_IFINDEX)) { 3593 udi_size += sizeof (struct T_opthdr) + 3594 sizeof (struct in6_pktinfo); 3595 icmp_opt |= IPPF_IFINDEX; 3596 } 3597 } 3598 if (icmp->icmp_ipv6_recvhoplimit) { 3599 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3600 icmp_ipv6_recvhoplimit = B_TRUE; 3601 } 3602 3603 if (icmp->icmp_ipv6_recvtclass) 3604 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3605 3606 mp1 = allocb(udi_size, BPRI_MED); 3607 if (mp1 == NULL) { 3608 freemsg(mp); 3609 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3610 return; 3611 } 3612 mp1->b_cont = mp; 3613 mp = mp1; 3614 mp->b_datap->db_type = M_PROTO; 3615 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3616 mp->b_wptr = (uchar_t *)tudi + udi_size; 3617 tudi->PRIM_type = T_UNITDATA_IND; 3618 tudi->SRC_length = sizeof (sin6_t); 3619 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3620 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3621 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3622 tudi->OPT_length = udi_size; 3623 sin6 = (sin6_t *)&tudi[1]; 3624 sin6->sin6_port = 0; 3625 sin6->sin6_family = AF_INET6; 3626 3627 sin6->sin6_addr = ip6h->ip6_src; 3628 /* No sin6_flowinfo per API */ 3629 sin6->sin6_flowinfo = 0; 3630 /* For link-scope source pass up scope id */ 3631 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3632 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3633 sin6->sin6_scope_id = ipp.ipp_ifindex; 3634 else 3635 sin6->sin6_scope_id = 0; 3636 3637 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3638 icmp->icmp_zoneid, is->is_netstack); 3639 3640 if (udi_size != 0) { 3641 uchar_t *dstopt; 3642 3643 dstopt = (uchar_t *)&sin6[1]; 3644 if (icmp_opt & IPPF_IFINDEX) { 3645 struct T_opthdr *toh; 3646 struct in6_pktinfo *pkti; 3647 3648 toh = (struct T_opthdr *)dstopt; 3649 toh->level = IPPROTO_IPV6; 3650 toh->name = IPV6_PKTINFO; 3651 toh->len = sizeof (struct T_opthdr) + 3652 sizeof (*pkti); 3653 toh->status = 0; 3654 dstopt += sizeof (struct T_opthdr); 3655 pkti = (struct in6_pktinfo *)dstopt; 3656 pkti->ipi6_addr = ip6h->ip6_dst; 3657 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3658 dstopt += sizeof (*pkti); 3659 udi_size -= toh->len; 3660 } 3661 if (icmp_ipv6_recvhoplimit) { 3662 struct T_opthdr *toh; 3663 3664 toh = (struct T_opthdr *)dstopt; 3665 toh->level = IPPROTO_IPV6; 3666 toh->name = IPV6_HOPLIMIT; 3667 toh->len = sizeof (struct T_opthdr) + 3668 sizeof (uint_t); 3669 toh->status = 0; 3670 dstopt += sizeof (struct T_opthdr); 3671 *(uint_t *)dstopt = ip6h->ip6_hops; 3672 dstopt += sizeof (uint_t); 3673 udi_size -= toh->len; 3674 } 3675 if (icmp->icmp_ipv6_recvtclass) { 3676 struct T_opthdr *toh; 3677 3678 toh = (struct T_opthdr *)dstopt; 3679 toh->level = IPPROTO_IPV6; 3680 toh->name = IPV6_TCLASS; 3681 toh->len = sizeof (struct T_opthdr) + 3682 sizeof (uint_t); 3683 toh->status = 0; 3684 dstopt += sizeof (struct T_opthdr); 3685 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3686 dstopt += sizeof (uint_t); 3687 udi_size -= toh->len; 3688 } 3689 if (icmp_opt & IPPF_HOPOPTS) { 3690 struct T_opthdr *toh; 3691 3692 toh = (struct T_opthdr *)dstopt; 3693 toh->level = IPPROTO_IPV6; 3694 toh->name = IPV6_HOPOPTS; 3695 toh->len = sizeof (struct T_opthdr) + 3696 ipp.ipp_hopoptslen - hopstrip; 3697 toh->status = 0; 3698 dstopt += sizeof (struct T_opthdr); 3699 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3700 ipp.ipp_hopoptslen - hopstrip); 3701 if (hopstrip > 0) { 3702 /* copy next header value and fake length */ 3703 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3704 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3705 hopstrip / 8; 3706 } 3707 dstopt += ipp.ipp_hopoptslen - hopstrip; 3708 udi_size -= toh->len; 3709 } 3710 if (icmp_opt & IPPF_RTDSTOPTS) { 3711 struct T_opthdr *toh; 3712 3713 toh = (struct T_opthdr *)dstopt; 3714 toh->level = IPPROTO_IPV6; 3715 toh->name = IPV6_DSTOPTS; 3716 toh->len = sizeof (struct T_opthdr) + 3717 ipp.ipp_rtdstoptslen; 3718 toh->status = 0; 3719 dstopt += sizeof (struct T_opthdr); 3720 bcopy(ipp.ipp_rtdstopts, dstopt, 3721 ipp.ipp_rtdstoptslen); 3722 dstopt += ipp.ipp_rtdstoptslen; 3723 udi_size -= toh->len; 3724 } 3725 if (icmp_opt & IPPF_RTHDR) { 3726 struct T_opthdr *toh; 3727 3728 toh = (struct T_opthdr *)dstopt; 3729 toh->level = IPPROTO_IPV6; 3730 toh->name = IPV6_RTHDR; 3731 toh->len = sizeof (struct T_opthdr) + 3732 ipp.ipp_rthdrlen; 3733 toh->status = 0; 3734 dstopt += sizeof (struct T_opthdr); 3735 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3736 dstopt += ipp.ipp_rthdrlen; 3737 udi_size -= toh->len; 3738 } 3739 if (icmp_opt & IPPF_DSTOPTS) { 3740 struct T_opthdr *toh; 3741 3742 toh = (struct T_opthdr *)dstopt; 3743 toh->level = IPPROTO_IPV6; 3744 toh->name = IPV6_DSTOPTS; 3745 toh->len = sizeof (struct T_opthdr) + 3746 ipp.ipp_dstoptslen; 3747 toh->status = 0; 3748 dstopt += sizeof (struct T_opthdr); 3749 bcopy(ipp.ipp_dstopts, dstopt, 3750 ipp.ipp_dstoptslen); 3751 dstopt += ipp.ipp_dstoptslen; 3752 udi_size -= toh->len; 3753 } 3754 /* Consumed all of allocated space */ 3755 ASSERT(udi_size == 0); 3756 } 3757 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3758 putnext(connp->conn_rq, mp); 3759 } 3760 3761 /* 3762 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3763 * immediately. 3764 */ 3765 static void 3766 icmp_bind_result(conn_t *connp, mblk_t *mp) 3767 { 3768 struct T_error_ack *tea; 3769 3770 switch (mp->b_datap->db_type) { 3771 case M_PROTO: 3772 case M_PCPROTO: 3773 /* M_PROTO messages contain some type of TPI message. */ 3774 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3775 freemsg(mp); 3776 return; 3777 } 3778 tea = (struct T_error_ack *)mp->b_rptr; 3779 3780 switch (tea->PRIM_type) { 3781 case T_ERROR_ACK: 3782 switch (tea->ERROR_prim) { 3783 case O_T_BIND_REQ: 3784 case T_BIND_REQ: 3785 icmp_bind_error(connp, mp); 3786 return; 3787 default: 3788 break; 3789 } 3790 ASSERT(0); 3791 freemsg(mp); 3792 return; 3793 3794 case T_BIND_ACK: 3795 icmp_bind_ack(connp, mp); 3796 return; 3797 3798 default: 3799 break; 3800 } 3801 freemsg(mp); 3802 return; 3803 default: 3804 /* FIXME: other cases? */ 3805 ASSERT(0); 3806 freemsg(mp); 3807 return; 3808 } 3809 } 3810 3811 /* 3812 * Process a T_BIND_ACK 3813 */ 3814 static void 3815 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3816 { 3817 icmp_t *icmp = connp->conn_icmp; 3818 mblk_t *mp1; 3819 ire_t *ire; 3820 struct T_bind_ack *tba; 3821 uchar_t *addrp; 3822 ipa_conn_t *ac; 3823 ipa6_conn_t *ac6; 3824 3825 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3826 /* 3827 * We know if headers are included or not so we can 3828 * safely do this. 3829 */ 3830 if (icmp->icmp_state == TS_UNBND) { 3831 /* 3832 * TPI has not yet bound - bind sent by 3833 * icmp_bind_proto. 3834 */ 3835 freemsg(mp); 3836 rw_exit(&icmp->icmp_rwlock); 3837 return; 3838 } 3839 ASSERT(icmp->icmp_pending_op != -1); 3840 3841 /* 3842 * If a broadcast/multicast address was bound set 3843 * the source address to 0. 3844 * This ensures no datagrams with broadcast address 3845 * as source address are emitted (which would violate 3846 * RFC1122 - Hosts requirements) 3847 * 3848 * Note that when connecting the returned IRE is 3849 * for the destination address and we only perform 3850 * the broadcast check for the source address (it 3851 * is OK to connect to a broadcast/multicast address.) 3852 */ 3853 mp1 = mp->b_cont; 3854 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3855 ire = (ire_t *)mp1->b_rptr; 3856 3857 /* 3858 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3859 * local address. 3860 */ 3861 if (ire->ire_type == IRE_BROADCAST && 3862 icmp->icmp_state != TS_DATA_XFER) { 3863 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3864 icmp->icmp_pending_op == O_T_BIND_REQ); 3865 /* This was just a local bind to a MC/broadcast addr */ 3866 V6_SET_ZERO(icmp->icmp_v6src); 3867 if (icmp->icmp_family == AF_INET6) 3868 (void) icmp_build_hdrs(icmp); 3869 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3870 /* 3871 * Local address not yet set - pick it from the 3872 * T_bind_ack 3873 */ 3874 tba = (struct T_bind_ack *)mp->b_rptr; 3875 addrp = &mp->b_rptr[tba->ADDR_offset]; 3876 switch (icmp->icmp_family) { 3877 case AF_INET: 3878 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3879 ac = (ipa_conn_t *)addrp; 3880 } else { 3881 ASSERT(tba->ADDR_length == 3882 sizeof (ipa_conn_x_t)); 3883 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3884 } 3885 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3886 &icmp->icmp_v6src); 3887 break; 3888 case AF_INET6: 3889 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3890 ac6 = (ipa6_conn_t *)addrp; 3891 } else { 3892 ASSERT(tba->ADDR_length == 3893 sizeof (ipa6_conn_x_t)); 3894 ac6 = &((ipa6_conn_x_t *) 3895 addrp)->ac6x_conn; 3896 } 3897 icmp->icmp_v6src = ac6->ac6_laddr; 3898 (void) icmp_build_hdrs(icmp); 3899 } 3900 } 3901 mp1 = mp1->b_cont; 3902 } 3903 icmp->icmp_pending_op = -1; 3904 rw_exit(&icmp->icmp_rwlock); 3905 /* 3906 * Look for one or more appended ACK message added by 3907 * icmp_connect or icmp_disconnect. 3908 * If none found just send up the T_BIND_ACK. 3909 * icmp_connect has appended a T_OK_ACK and a 3910 * T_CONN_CON. 3911 * icmp_disconnect has appended a T_OK_ACK. 3912 */ 3913 if (mp1 != NULL) { 3914 if (mp->b_cont == mp1) 3915 mp->b_cont = NULL; 3916 else { 3917 ASSERT(mp->b_cont->b_cont == mp1); 3918 mp->b_cont->b_cont = NULL; 3919 } 3920 freemsg(mp); 3921 mp = mp1; 3922 while (mp != NULL) { 3923 mp1 = mp->b_cont; 3924 mp->b_cont = NULL; 3925 putnext(connp->conn_rq, mp); 3926 mp = mp1; 3927 } 3928 return; 3929 } 3930 freemsg(mp->b_cont); 3931 mp->b_cont = NULL; 3932 putnext(connp->conn_rq, mp); 3933 } 3934 3935 static void 3936 icmp_bind_error(conn_t *connp, mblk_t *mp) 3937 { 3938 icmp_t *icmp = connp->conn_icmp; 3939 struct T_error_ack *tea; 3940 3941 tea = (struct T_error_ack *)mp->b_rptr; 3942 /* 3943 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3944 * clear out the source address before 3945 * passing the message upstream. 3946 * If this was caused by a T_CONN_REQ 3947 * revert back to bound state. 3948 */ 3949 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3950 if (icmp->icmp_state == TS_UNBND) { 3951 /* 3952 * TPI has not yet bound - bind sent by icmp_bind_proto. 3953 */ 3954 freemsg(mp); 3955 rw_exit(&icmp->icmp_rwlock); 3956 return; 3957 } 3958 ASSERT(icmp->icmp_pending_op != -1); 3959 tea->ERROR_prim = icmp->icmp_pending_op; 3960 icmp->icmp_pending_op = -1; 3961 3962 switch (tea->ERROR_prim) { 3963 case T_CONN_REQ: 3964 ASSERT(icmp->icmp_state == TS_DATA_XFER); 3965 /* Connect failed */ 3966 /* Revert back to the bound source */ 3967 icmp->icmp_v6src = icmp->icmp_bound_v6src; 3968 icmp->icmp_state = TS_IDLE; 3969 if (icmp->icmp_family == AF_INET6) 3970 (void) icmp_build_hdrs(icmp); 3971 break; 3972 3973 case T_DISCON_REQ: 3974 case T_BIND_REQ: 3975 case O_T_BIND_REQ: 3976 V6_SET_ZERO(icmp->icmp_v6src); 3977 V6_SET_ZERO(icmp->icmp_bound_v6src); 3978 icmp->icmp_state = TS_UNBND; 3979 if (icmp->icmp_family == AF_INET6) 3980 (void) icmp_build_hdrs(icmp); 3981 break; 3982 default: 3983 break; 3984 } 3985 rw_exit(&icmp->icmp_rwlock); 3986 putnext(connp->conn_rq, mp); 3987 } 3988 3989 /* 3990 * return SNMP stuff in buffer in mpdata 3991 */ 3992 mblk_t * 3993 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 3994 { 3995 mblk_t *mpdata; 3996 struct opthdr *optp; 3997 conn_t *connp = Q_TO_CONN(q); 3998 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 3999 mblk_t *mp2ctl; 4000 4001 /* 4002 * make a copy of the original message 4003 */ 4004 mp2ctl = copymsg(mpctl); 4005 4006 if (mpctl == NULL || 4007 (mpdata = mpctl->b_cont) == NULL) { 4008 freemsg(mpctl); 4009 freemsg(mp2ctl); 4010 return (0); 4011 } 4012 4013 /* fixed length structure for IPv4 and IPv6 counters */ 4014 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4015 optp->level = EXPER_RAWIP; 4016 optp->name = 0; 4017 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4018 sizeof (is->is_rawip_mib)); 4019 optp->len = msgdsize(mpdata); 4020 qreply(q, mpctl); 4021 4022 return (mp2ctl); 4023 } 4024 4025 /* 4026 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4027 * TODO: If this ever actually tries to set anything, it needs to be 4028 * to do the appropriate locking. 4029 */ 4030 /* ARGSUSED */ 4031 int 4032 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4033 uchar_t *ptr, int len) 4034 { 4035 switch (level) { 4036 case EXPER_RAWIP: 4037 return (0); 4038 default: 4039 return (1); 4040 } 4041 } 4042 4043 /* Report for ndd "icmp_status" */ 4044 /* ARGSUSED */ 4045 static int 4046 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4047 { 4048 conn_t *connp; 4049 ip_stack_t *ipst; 4050 char laddrbuf[INET6_ADDRSTRLEN]; 4051 char faddrbuf[INET6_ADDRSTRLEN]; 4052 int i; 4053 4054 (void) mi_mpprintf(mp, 4055 "RAWIP " MI_COL_HDRPAD_STR 4056 /* 01234567[89ABCDEF] */ 4057 " src addr dest addr state"); 4058 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4059 4060 connp = Q_TO_CONN(q); 4061 ipst = connp->conn_netstack->netstack_ip; 4062 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4063 connf_t *connfp; 4064 char *state; 4065 4066 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4067 connp = NULL; 4068 4069 while ((connp = ipcl_get_next_conn(connfp, connp, 4070 IPCL_RAWIPCONN)) != NULL) { 4071 icmp_t *icmp; 4072 4073 mutex_enter(&(connp)->conn_lock); 4074 icmp = connp->conn_icmp; 4075 4076 if (icmp->icmp_state == TS_UNBND) 4077 state = "UNBOUND"; 4078 else if (icmp->icmp_state == TS_IDLE) 4079 state = "IDLE"; 4080 else if (icmp->icmp_state == TS_DATA_XFER) 4081 state = "CONNECTED"; 4082 else 4083 state = "UnkState"; 4084 4085 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4086 (void *)icmp, 4087 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4088 sizeof (faddrbuf)), 4089 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4090 sizeof (laddrbuf)), 4091 state); 4092 mutex_exit(&(connp)->conn_lock); 4093 } 4094 } 4095 return (0); 4096 } 4097 4098 /* 4099 * This routine creates a T_UDERROR_IND message and passes it upstream. 4100 * The address and options are copied from the T_UNITDATA_REQ message 4101 * passed in mp. This message is freed. 4102 */ 4103 static void 4104 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4105 { 4106 mblk_t *mp1; 4107 uchar_t *rptr = mp->b_rptr; 4108 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4109 4110 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4111 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4112 tudr->OPT_length, err); 4113 if (mp1) 4114 qreply(q, mp1); 4115 freemsg(mp); 4116 } 4117 4118 /* 4119 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4120 * After some error checking, the message is passed downstream to ip. 4121 */ 4122 static void 4123 icmp_unbind(queue_t *q, mblk_t *mp) 4124 { 4125 icmp_t *icmp = Q_TO_ICMP(q); 4126 4127 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4128 /* If a bind has not been done, we can't unbind. */ 4129 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4130 rw_exit(&icmp->icmp_rwlock); 4131 icmp_err_ack(q, mp, TOUTSTATE, 0); 4132 return; 4133 } 4134 icmp->icmp_pending_op = T_UNBIND_REQ; 4135 rw_exit(&icmp->icmp_rwlock); 4136 4137 /* 4138 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4139 * and therefore ip_unbind must never return NULL. 4140 */ 4141 mp = ip_unbind(q, mp); 4142 ASSERT(mp != NULL); 4143 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4144 4145 /* 4146 * Once we're unbound from IP, the pending operation may be cleared 4147 * here. 4148 */ 4149 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4150 V6_SET_ZERO(icmp->icmp_v6src); 4151 V6_SET_ZERO(icmp->icmp_bound_v6src); 4152 icmp->icmp_pending_op = -1; 4153 icmp->icmp_state = TS_UNBND; 4154 if (icmp->icmp_family == AF_INET6) 4155 (void) icmp_build_hdrs(icmp); 4156 rw_exit(&icmp->icmp_rwlock); 4157 4158 qreply(q, mp); 4159 } 4160 4161 /* 4162 * Process IPv4 packets that already include an IP header. 4163 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4164 * IPPROTO_IGMP). 4165 */ 4166 static void 4167 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4168 { 4169 icmp_stack_t *is = icmp->icmp_is; 4170 ipha_t *ipha; 4171 int ip_hdr_length; 4172 int tp_hdr_len; 4173 mblk_t *mp1; 4174 uint_t pkt_len; 4175 ip_opt_info_t optinfo; 4176 conn_t *connp = icmp->icmp_connp; 4177 4178 optinfo.ip_opt_flags = 0; 4179 optinfo.ip_opt_ill_index = 0; 4180 ipha = (ipha_t *)mp->b_rptr; 4181 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4182 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4183 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4184 ASSERT(icmp != NULL); 4185 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4186 freemsg(mp); 4187 return; 4188 } 4189 ipha = (ipha_t *)mp->b_rptr; 4190 } 4191 ipha->ipha_version_and_hdr_length = 4192 (IP_VERSION<<4) | (ip_hdr_length>>2); 4193 4194 /* 4195 * For the socket of SOCK_RAW type, the checksum is provided in the 4196 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4197 * tell IP that the application has sent a complete IP header and not 4198 * to compute the transport checksum nor change the DF flag. 4199 */ 4200 ipha->ipha_ident = IP_HDR_INCLUDED; 4201 ipha->ipha_hdr_checksum = 0; 4202 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4203 /* Insert options if any */ 4204 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4205 /* 4206 * Put the IP header plus any transport header that is 4207 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4208 * that at least the checksum field is in the first mblk.) 4209 */ 4210 switch (ipha->ipha_protocol) { 4211 case IPPROTO_UDP: 4212 tp_hdr_len = 8; 4213 break; 4214 case IPPROTO_TCP: 4215 tp_hdr_len = 20; 4216 break; 4217 default: 4218 tp_hdr_len = 0; 4219 break; 4220 } 4221 /* 4222 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4223 * tp_hdr_len bytes will be in a single mblk. 4224 */ 4225 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4226 tp_hdr_len)) { 4227 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4228 tp_hdr_len)) { 4229 BUMP_MIB(&is->is_rawip_mib, 4230 rawipOutErrors); 4231 freemsg(mp); 4232 return; 4233 } 4234 ipha = (ipha_t *)mp->b_rptr; 4235 } 4236 4237 /* 4238 * if the length is larger then the max allowed IP packet, 4239 * then send an error and abort the processing. 4240 */ 4241 pkt_len = ntohs(ipha->ipha_length) 4242 + icmp->icmp_ip_snd_options_len; 4243 if (pkt_len > IP_MAXPACKET) { 4244 icmp_ud_err(q, mp, EMSGSIZE); 4245 return; 4246 } 4247 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4248 tp_hdr_len, BPRI_LO))) { 4249 icmp_ud_err(q, mp, ENOMEM); 4250 return; 4251 } 4252 mp1->b_rptr += is->is_wroff_extra; 4253 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4254 4255 ipha->ipha_length = htons((uint16_t)pkt_len); 4256 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4257 4258 /* Copy transport header if any */ 4259 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4260 mp1->b_wptr += tp_hdr_len; 4261 4262 /* Add options */ 4263 ipha = (ipha_t *)mp1->b_rptr; 4264 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4265 icmp->icmp_ip_snd_options_len); 4266 4267 /* Drop IP header and transport header from original */ 4268 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4269 4270 mp1->b_cont = mp; 4271 mp = mp1; 4272 /* 4273 * Massage source route putting first source 4274 * route in ipha_dst. 4275 */ 4276 (void) ip_massage_options(ipha, is->is_netstack); 4277 } 4278 4279 if (pktinfop != NULL) { 4280 /* 4281 * Over write the source address provided in the header 4282 */ 4283 if (pktinfop->ip4_addr != INADDR_ANY) { 4284 ipha->ipha_src = pktinfop->ip4_addr; 4285 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4286 } 4287 4288 if (pktinfop->ip4_ill_index != 0) { 4289 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4290 } 4291 } 4292 4293 mblk_setcred(mp, connp->conn_cred); 4294 ip_output_options(connp, mp, q, IP_WPUT, 4295 &optinfo); 4296 } 4297 4298 static boolean_t 4299 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4300 { 4301 int err; 4302 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4303 icmp_stack_t *is = icmp->icmp_is; 4304 conn_t *connp = icmp->icmp_connp; 4305 4306 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4307 opt_storage, icmp->icmp_mac_exempt, 4308 is->is_netstack->netstack_ip); 4309 if (err == 0) { 4310 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4311 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4312 opt_storage); 4313 } 4314 if (err != 0) { 4315 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4316 DTRACE_PROBE4( 4317 tx__ip__log__drop__updatelabel__icmp, 4318 char *, "queue(1) failed to update options(2) on mp(3)", 4319 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4320 icmp_ud_err(q, mp, err); 4321 return (B_FALSE); 4322 } 4323 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4324 return (B_TRUE); 4325 } 4326 4327 /* 4328 * This routine handles all messages passed downstream. It either 4329 * consumes the message or passes it downstream; it never queues a 4330 * a message. 4331 */ 4332 static void 4333 icmp_wput(queue_t *q, mblk_t *mp) 4334 { 4335 uchar_t *rptr = mp->b_rptr; 4336 ipha_t *ipha; 4337 mblk_t *mp1; 4338 int ip_hdr_length; 4339 #define tudr ((struct T_unitdata_req *)rptr) 4340 size_t ip_len; 4341 conn_t *connp = Q_TO_CONN(q); 4342 icmp_t *icmp = connp->conn_icmp; 4343 icmp_stack_t *is = icmp->icmp_is; 4344 sin6_t *sin6; 4345 sin_t *sin; 4346 ipaddr_t v4dst; 4347 ip4_pkt_t pktinfo; 4348 ip4_pkt_t *pktinfop = &pktinfo; 4349 ip_opt_info_t optinfo; 4350 4351 switch (mp->b_datap->db_type) { 4352 case M_DATA: 4353 if (icmp->icmp_hdrincl) { 4354 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4355 ipha = (ipha_t *)mp->b_rptr; 4356 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4357 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4358 BUMP_MIB(&is->is_rawip_mib, 4359 rawipOutErrors); 4360 freemsg(mp); 4361 return; 4362 } 4363 ipha = (ipha_t *)mp->b_rptr; 4364 } 4365 /* 4366 * If this connection was used for v6 (inconceivable!) 4367 * or if we have a new destination, then it's time to 4368 * figure a new label. 4369 */ 4370 if (is_system_labeled() && 4371 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4372 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4373 ipha->ipha_dst) && 4374 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4375 return; 4376 } 4377 icmp_wput_hdrincl(q, mp, icmp, NULL); 4378 return; 4379 } 4380 freemsg(mp); 4381 return; 4382 case M_PROTO: 4383 case M_PCPROTO: 4384 ip_len = mp->b_wptr - rptr; 4385 if (ip_len >= sizeof (struct T_unitdata_req)) { 4386 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4387 if (((union T_primitives *)rptr)->type 4388 == T_UNITDATA_REQ) 4389 break; 4390 } 4391 /* FALLTHRU */ 4392 default: 4393 icmp_wput_other(q, mp); 4394 return; 4395 } 4396 4397 /* Handle T_UNITDATA_REQ messages here. */ 4398 4399 4400 4401 if (icmp->icmp_state == TS_UNBND) { 4402 /* If a port has not been bound to the stream, fail. */ 4403 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4404 icmp_ud_err(q, mp, EPROTO); 4405 return; 4406 } 4407 mp1 = mp->b_cont; 4408 if (mp1 == NULL) { 4409 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4410 icmp_ud_err(q, mp, EPROTO); 4411 return; 4412 } 4413 4414 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4415 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4416 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4417 return; 4418 } 4419 4420 switch (icmp->icmp_family) { 4421 case AF_INET6: 4422 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4423 if (!OK_32PTR((char *)sin6) || 4424 tudr->DEST_length != sizeof (sin6_t) || 4425 sin6->sin6_family != AF_INET6) { 4426 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4427 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4428 return; 4429 } 4430 4431 /* No support for mapped addresses on raw sockets */ 4432 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4433 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4434 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4435 return; 4436 } 4437 4438 /* 4439 * Destination is a native IPv6 address. 4440 * Send out an IPv6 format packet. 4441 */ 4442 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4443 return; 4444 4445 case AF_INET: 4446 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4447 if (!OK_32PTR((char *)sin) || 4448 tudr->DEST_length != sizeof (sin_t) || 4449 sin->sin_family != AF_INET) { 4450 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4451 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4452 return; 4453 } 4454 /* Extract and ipaddr */ 4455 v4dst = sin->sin_addr.s_addr; 4456 break; 4457 4458 default: 4459 ASSERT(0); 4460 } 4461 4462 pktinfop->ip4_ill_index = 0; 4463 pktinfop->ip4_addr = INADDR_ANY; 4464 optinfo.ip_opt_flags = 0; 4465 optinfo.ip_opt_ill_index = 0; 4466 4467 4468 /* 4469 * If options passed in, feed it for verification and handling 4470 */ 4471 if (tudr->OPT_length != 0) { 4472 int error; 4473 4474 error = 0; 4475 if (icmp_unitdata_opt_process(q, mp, &error, 4476 (void *)pktinfop) < 0) { 4477 /* failure */ 4478 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4479 icmp_ud_err(q, mp, error); 4480 return; 4481 } 4482 ASSERT(error == 0); 4483 /* 4484 * Note: Success in processing options. 4485 * mp option buffer represented by 4486 * OPT_length/offset now potentially modified 4487 * and contain option setting results 4488 */ 4489 4490 } 4491 4492 if (v4dst == INADDR_ANY) 4493 v4dst = htonl(INADDR_LOOPBACK); 4494 4495 /* Check if our saved options are valid; update if not */ 4496 if (is_system_labeled() && 4497 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4498 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4499 !icmp_update_label(q, icmp, mp, v4dst)) { 4500 return; 4501 } 4502 4503 /* Protocol 255 contains full IP headers */ 4504 if (icmp->icmp_hdrincl) { 4505 freeb(mp); 4506 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4507 return; 4508 } 4509 4510 4511 /* Add an IP header */ 4512 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4513 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4514 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4515 mp1->b_datap->db_ref != 1 || 4516 !OK_32PTR(ipha)) { 4517 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4518 BPRI_LO))) { 4519 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4520 icmp_ud_err(q, mp, ENOMEM); 4521 return; 4522 } 4523 mp1->b_cont = mp->b_cont; 4524 ipha = (ipha_t *)mp1->b_datap->db_lim; 4525 mp1->b_wptr = (uchar_t *)ipha; 4526 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4527 } 4528 #ifdef _BIG_ENDIAN 4529 /* Set version, header length, and tos */ 4530 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4531 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4532 icmp->icmp_type_of_service); 4533 /* Set ttl and protocol */ 4534 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4535 #else 4536 /* Set version, header length, and tos */ 4537 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4538 ((icmp->icmp_type_of_service << 8) | 4539 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4540 /* Set ttl and protocol */ 4541 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4542 #endif 4543 if (pktinfop->ip4_addr != INADDR_ANY) { 4544 ipha->ipha_src = pktinfop->ip4_addr; 4545 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4546 } else { 4547 4548 /* 4549 * Copy our address into the packet. If this is zero, 4550 * ip will fill in the real source address. 4551 */ 4552 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4553 } 4554 4555 ipha->ipha_fragment_offset_and_flags = 0; 4556 4557 if (pktinfop->ip4_ill_index != 0) { 4558 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4559 } 4560 4561 4562 /* 4563 * For the socket of SOCK_RAW type, the checksum is provided in the 4564 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4565 * tell IP that the application has sent a complete IP header and not 4566 * to compute the transport checksum nor change the DF flag. 4567 */ 4568 ipha->ipha_ident = IP_HDR_INCLUDED; 4569 4570 /* Finish common formatting of the packet. */ 4571 mp1->b_rptr = (uchar_t *)ipha; 4572 4573 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4574 if (mp1->b_cont != NULL) 4575 ip_len += msgdsize(mp1->b_cont); 4576 4577 /* 4578 * Set the length into the IP header. 4579 * If the length is greater than the maximum allowed by IP, 4580 * then free the message and return. Do not try and send it 4581 * as this can cause problems in layers below. 4582 */ 4583 if (ip_len > IP_MAXPACKET) { 4584 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4585 icmp_ud_err(q, mp, EMSGSIZE); 4586 return; 4587 } 4588 ipha->ipha_length = htons((uint16_t)ip_len); 4589 /* 4590 * Copy in the destination address from the T_UNITDATA 4591 * request 4592 */ 4593 ipha->ipha_dst = v4dst; 4594 4595 /* 4596 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4597 */ 4598 if (CLASSD(v4dst)) 4599 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4600 4601 /* Copy in options if any */ 4602 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4603 bcopy(icmp->icmp_ip_snd_options, 4604 &ipha[1], icmp->icmp_ip_snd_options_len); 4605 /* 4606 * Massage source route putting first source route in ipha_dst. 4607 * Ignore the destination in the T_unitdata_req. 4608 */ 4609 (void) ip_massage_options(ipha, is->is_netstack); 4610 } 4611 4612 freeb(mp); 4613 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4614 mblk_setcred(mp1, connp->conn_cred); 4615 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4616 #undef ipha 4617 #undef tudr 4618 } 4619 4620 static boolean_t 4621 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4622 { 4623 int err; 4624 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4625 icmp_stack_t *is = icmp->icmp_is; 4626 conn_t *connp = icmp->icmp_connp; 4627 4628 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4629 opt_storage, icmp->icmp_mac_exempt, 4630 is->is_netstack->netstack_ip); 4631 if (err == 0) { 4632 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4633 &icmp->icmp_label_len_v6, opt_storage); 4634 } 4635 if (err != 0) { 4636 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4637 DTRACE_PROBE4( 4638 tx__ip__log__drop__updatelabel__icmp6, 4639 char *, "queue(1) failed to update options(2) on mp(3)", 4640 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4641 icmp_ud_err(wq, mp, err); 4642 return (B_FALSE); 4643 } 4644 4645 icmp->icmp_v6lastdst = *dst; 4646 return (B_TRUE); 4647 } 4648 4649 /* 4650 * icmp_wput_ipv6(): 4651 * Assumes that icmp_wput did some sanity checking on the destination 4652 * address, but that the label may not yet be correct. 4653 */ 4654 void 4655 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4656 { 4657 ip6_t *ip6h; 4658 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4659 mblk_t *mp1; 4660 int ip_hdr_len = IPV6_HDR_LEN; 4661 size_t ip_len; 4662 icmp_t *icmp = Q_TO_ICMP(q); 4663 icmp_stack_t *is = icmp->icmp_is; 4664 ip6_pkt_t ipp_s; /* For ancillary data options */ 4665 ip6_pkt_t *ipp = &ipp_s; 4666 ip6_pkt_t *tipp; 4667 uint32_t csum = 0; 4668 uint_t ignore = 0; 4669 uint_t option_exists = 0, is_sticky = 0; 4670 uint8_t *cp; 4671 uint8_t *nxthdr_ptr; 4672 in6_addr_t ip6_dst; 4673 4674 /* 4675 * If the local address is a mapped address return 4676 * an error. 4677 * It would be possible to send an IPv6 packet but the 4678 * response would never make it back to the application 4679 * since it is bound to a mapped address. 4680 */ 4681 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4682 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4683 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4684 return; 4685 } 4686 4687 ipp->ipp_fields = 0; 4688 ipp->ipp_sticky_ignored = 0; 4689 4690 /* 4691 * If TPI options passed in, feed it for verification and handling 4692 */ 4693 if (tudr_optlen != 0) { 4694 int error; 4695 4696 if (icmp_unitdata_opt_process(q, mp, &error, 4697 (void *)ipp) < 0) { 4698 /* failure */ 4699 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4700 icmp_ud_err(q, mp, error); 4701 return; 4702 } 4703 ignore = ipp->ipp_sticky_ignored; 4704 ASSERT(error == 0); 4705 } 4706 4707 if (sin6->sin6_scope_id != 0 && 4708 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4709 /* 4710 * IPPF_SCOPE_ID is special. It's neither a sticky 4711 * option nor ancillary data. It needs to be 4712 * explicitly set in options_exists. 4713 */ 4714 option_exists |= IPPF_SCOPE_ID; 4715 } 4716 4717 /* 4718 * Compute the destination address 4719 */ 4720 ip6_dst = sin6->sin6_addr; 4721 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4722 ip6_dst = ipv6_loopback; 4723 4724 /* 4725 * If we're not going to the same destination as last time, then 4726 * recompute the label required. This is done in a separate routine to 4727 * avoid blowing up our stack here. 4728 */ 4729 if (is_system_labeled() && 4730 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4731 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4732 return; 4733 } 4734 4735 /* 4736 * If there's a security label here, then we ignore any options the 4737 * user may try to set. We keep the peer's label as a hidden sticky 4738 * option. 4739 */ 4740 if (icmp->icmp_label_len_v6 > 0) { 4741 ignore &= ~IPPF_HOPOPTS; 4742 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4743 } 4744 4745 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4746 (ipp->ipp_fields == 0)) { 4747 /* No sticky options nor ancillary data. */ 4748 goto no_options; 4749 } 4750 4751 /* 4752 * Go through the options figuring out where each is going to 4753 * come from and build two masks. The first mask indicates if 4754 * the option exists at all. The second mask indicates if the 4755 * option is sticky or ancillary. 4756 */ 4757 if (!(ignore & IPPF_HOPOPTS)) { 4758 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4759 option_exists |= IPPF_HOPOPTS; 4760 ip_hdr_len += ipp->ipp_hopoptslen; 4761 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4762 option_exists |= IPPF_HOPOPTS; 4763 is_sticky |= IPPF_HOPOPTS; 4764 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4765 } 4766 } 4767 4768 if (!(ignore & IPPF_RTHDR)) { 4769 if (ipp->ipp_fields & IPPF_RTHDR) { 4770 option_exists |= IPPF_RTHDR; 4771 ip_hdr_len += ipp->ipp_rthdrlen; 4772 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4773 option_exists |= IPPF_RTHDR; 4774 is_sticky |= IPPF_RTHDR; 4775 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4776 } 4777 } 4778 4779 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4780 /* 4781 * Need to have a router header to use these. 4782 */ 4783 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4784 option_exists |= IPPF_RTDSTOPTS; 4785 ip_hdr_len += ipp->ipp_rtdstoptslen; 4786 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4787 option_exists |= IPPF_RTDSTOPTS; 4788 is_sticky |= IPPF_RTDSTOPTS; 4789 ip_hdr_len += 4790 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4791 } 4792 } 4793 4794 if (!(ignore & IPPF_DSTOPTS)) { 4795 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4796 option_exists |= IPPF_DSTOPTS; 4797 ip_hdr_len += ipp->ipp_dstoptslen; 4798 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4799 option_exists |= IPPF_DSTOPTS; 4800 is_sticky |= IPPF_DSTOPTS; 4801 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4802 } 4803 } 4804 4805 if (!(ignore & IPPF_IFINDEX)) { 4806 if (ipp->ipp_fields & IPPF_IFINDEX) { 4807 option_exists |= IPPF_IFINDEX; 4808 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4809 option_exists |= IPPF_IFINDEX; 4810 is_sticky |= IPPF_IFINDEX; 4811 } 4812 } 4813 4814 if (!(ignore & IPPF_ADDR)) { 4815 if (ipp->ipp_fields & IPPF_ADDR) { 4816 option_exists |= IPPF_ADDR; 4817 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4818 option_exists |= IPPF_ADDR; 4819 is_sticky |= IPPF_ADDR; 4820 } 4821 } 4822 4823 if (!(ignore & IPPF_DONTFRAG)) { 4824 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4825 option_exists |= IPPF_DONTFRAG; 4826 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4827 option_exists |= IPPF_DONTFRAG; 4828 is_sticky |= IPPF_DONTFRAG; 4829 } 4830 } 4831 4832 if (!(ignore & IPPF_USE_MIN_MTU)) { 4833 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4834 option_exists |= IPPF_USE_MIN_MTU; 4835 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4836 IPPF_USE_MIN_MTU) { 4837 option_exists |= IPPF_USE_MIN_MTU; 4838 is_sticky |= IPPF_USE_MIN_MTU; 4839 } 4840 } 4841 4842 if (!(ignore & IPPF_NEXTHOP)) { 4843 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4844 option_exists |= IPPF_NEXTHOP; 4845 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4846 option_exists |= IPPF_NEXTHOP; 4847 is_sticky |= IPPF_NEXTHOP; 4848 } 4849 } 4850 4851 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4852 option_exists |= IPPF_HOPLIMIT; 4853 /* IPV6_HOPLIMIT can never be sticky */ 4854 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4855 4856 if (!(ignore & IPPF_UNICAST_HOPS) && 4857 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4858 option_exists |= IPPF_UNICAST_HOPS; 4859 is_sticky |= IPPF_UNICAST_HOPS; 4860 } 4861 4862 if (!(ignore & IPPF_MULTICAST_HOPS) && 4863 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4864 option_exists |= IPPF_MULTICAST_HOPS; 4865 is_sticky |= IPPF_MULTICAST_HOPS; 4866 } 4867 4868 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4869 /* This is a sticky socket option only */ 4870 option_exists |= IPPF_NO_CKSUM; 4871 is_sticky |= IPPF_NO_CKSUM; 4872 } 4873 4874 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4875 /* This is a sticky socket option only */ 4876 option_exists |= IPPF_RAW_CKSUM; 4877 is_sticky |= IPPF_RAW_CKSUM; 4878 } 4879 4880 if (!(ignore & IPPF_TCLASS)) { 4881 if (ipp->ipp_fields & IPPF_TCLASS) { 4882 option_exists |= IPPF_TCLASS; 4883 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4884 option_exists |= IPPF_TCLASS; 4885 is_sticky |= IPPF_TCLASS; 4886 } 4887 } 4888 4889 no_options: 4890 4891 /* 4892 * If any options carried in the ip6i_t were specified, we 4893 * need to account for the ip6i_t in the data we'll be sending 4894 * down. 4895 */ 4896 if (option_exists & IPPF_HAS_IP6I) 4897 ip_hdr_len += sizeof (ip6i_t); 4898 4899 /* check/fix buffer config, setup pointers into it */ 4900 mp1 = mp->b_cont; 4901 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4902 if ((mp1->b_datap->db_ref != 1) || 4903 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4904 !OK_32PTR(ip6h)) { 4905 /* Try to get everything in a single mblk next time */ 4906 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4907 icmp->icmp_max_hdr_len = ip_hdr_len; 4908 (void) mi_set_sth_wroff(RD(q), 4909 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4910 } 4911 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4912 if (!mp1) { 4913 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4914 icmp_ud_err(q, mp, ENOMEM); 4915 return; 4916 } 4917 mp1->b_cont = mp->b_cont; 4918 mp1->b_wptr = mp1->b_datap->db_lim; 4919 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4920 } 4921 mp1->b_rptr = (unsigned char *)ip6h; 4922 ip6i = (ip6i_t *)ip6h; 4923 4924 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4925 if (option_exists & IPPF_HAS_IP6I) { 4926 ip6h = (ip6_t *)&ip6i[1]; 4927 ip6i->ip6i_flags = 0; 4928 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4929 4930 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4931 if (option_exists & IPPF_SCOPE_ID) { 4932 ip6i->ip6i_flags |= IP6I_IFINDEX; 4933 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4934 } else if (option_exists & IPPF_IFINDEX) { 4935 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4936 ASSERT(tipp->ipp_ifindex != 0); 4937 ip6i->ip6i_flags |= IP6I_IFINDEX; 4938 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4939 } 4940 4941 if (option_exists & IPPF_RAW_CKSUM) { 4942 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4943 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4944 } 4945 4946 if (option_exists & IPPF_NO_CKSUM) { 4947 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4948 } 4949 4950 if (option_exists & IPPF_ADDR) { 4951 /* 4952 * Enable per-packet source address verification if 4953 * IPV6_PKTINFO specified the source address. 4954 * ip6_src is set in the transport's _wput function. 4955 */ 4956 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4957 } 4958 4959 if (option_exists & IPPF_DONTFRAG) { 4960 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4961 } 4962 4963 if (option_exists & IPPF_USE_MIN_MTU) { 4964 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4965 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4966 } 4967 4968 if (option_exists & IPPF_NEXTHOP) { 4969 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4970 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4971 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4972 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4973 } 4974 4975 /* 4976 * tell IP this is an ip6i_t private header 4977 */ 4978 ip6i->ip6i_nxt = IPPROTO_RAW; 4979 } 4980 4981 /* Initialize IPv6 header */ 4982 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4983 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4984 4985 /* Set the hoplimit of the outgoing packet. */ 4986 if (option_exists & IPPF_HOPLIMIT) { 4987 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4988 ip6h->ip6_hops = ipp->ipp_hoplimit; 4989 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4990 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4991 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 4992 if (option_exists & IPPF_MULTICAST_HOPS) 4993 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4994 } else { 4995 ip6h->ip6_hops = icmp->icmp_ttl; 4996 if (option_exists & IPPF_UNICAST_HOPS) 4997 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4998 } 4999 5000 if (option_exists & IPPF_ADDR) { 5001 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5002 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5003 ip6h->ip6_src = tipp->ipp_addr; 5004 } else { 5005 /* 5006 * The source address was not set using IPV6_PKTINFO. 5007 * First look at the bound source. 5008 * If unspecified fallback to __sin6_src_id. 5009 */ 5010 ip6h->ip6_src = icmp->icmp_v6src; 5011 if (sin6->__sin6_src_id != 0 && 5012 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5013 ip_srcid_find_id(sin6->__sin6_src_id, 5014 &ip6h->ip6_src, icmp->icmp_zoneid, 5015 is->is_netstack); 5016 } 5017 } 5018 5019 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5020 cp = (uint8_t *)&ip6h[1]; 5021 5022 /* 5023 * Here's where we have to start stringing together 5024 * any extension headers in the right order: 5025 * Hop-by-hop, destination, routing, and final destination opts. 5026 */ 5027 if (option_exists & IPPF_HOPOPTS) { 5028 /* Hop-by-hop options */ 5029 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5030 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5031 5032 *nxthdr_ptr = IPPROTO_HOPOPTS; 5033 nxthdr_ptr = &hbh->ip6h_nxt; 5034 5035 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5036 cp += tipp->ipp_hopoptslen; 5037 } 5038 /* 5039 * En-route destination options 5040 * Only do them if there's a routing header as well 5041 */ 5042 if (option_exists & IPPF_RTDSTOPTS) { 5043 ip6_dest_t *dst = (ip6_dest_t *)cp; 5044 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5045 5046 *nxthdr_ptr = IPPROTO_DSTOPTS; 5047 nxthdr_ptr = &dst->ip6d_nxt; 5048 5049 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5050 cp += tipp->ipp_rtdstoptslen; 5051 } 5052 /* 5053 * Routing header next 5054 */ 5055 if (option_exists & IPPF_RTHDR) { 5056 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5057 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5058 5059 *nxthdr_ptr = IPPROTO_ROUTING; 5060 nxthdr_ptr = &rt->ip6r_nxt; 5061 5062 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5063 cp += tipp->ipp_rthdrlen; 5064 } 5065 /* 5066 * Do ultimate destination options 5067 */ 5068 if (option_exists & IPPF_DSTOPTS) { 5069 ip6_dest_t *dest = (ip6_dest_t *)cp; 5070 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5071 5072 *nxthdr_ptr = IPPROTO_DSTOPTS; 5073 nxthdr_ptr = &dest->ip6d_nxt; 5074 5075 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5076 cp += tipp->ipp_dstoptslen; 5077 } 5078 5079 /* 5080 * Now set the last header pointer to the proto passed in 5081 */ 5082 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5083 *nxthdr_ptr = icmp->icmp_proto; 5084 5085 /* 5086 * Copy in the destination address 5087 */ 5088 ip6h->ip6_dst = ip6_dst; 5089 5090 ip6h->ip6_vcf = 5091 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5092 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5093 5094 if (option_exists & IPPF_TCLASS) { 5095 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5096 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5097 tipp->ipp_tclass); 5098 } 5099 if (option_exists & IPPF_RTHDR) { 5100 ip6_rthdr_t *rth; 5101 5102 /* 5103 * Perform any processing needed for source routing. 5104 * We know that all extension headers will be in the same mblk 5105 * as the IPv6 header. 5106 */ 5107 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5108 if (rth != NULL && rth->ip6r_segleft != 0) { 5109 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5110 /* 5111 * Drop packet - only support Type 0 routing. 5112 * Notify the application as well. 5113 */ 5114 icmp_ud_err(q, mp, EPROTO); 5115 BUMP_MIB(&is->is_rawip_mib, 5116 rawipOutErrors); 5117 return; 5118 } 5119 /* 5120 * rth->ip6r_len is twice the number of 5121 * addresses in the header 5122 */ 5123 if (rth->ip6r_len & 0x1) { 5124 icmp_ud_err(q, mp, EPROTO); 5125 BUMP_MIB(&is->is_rawip_mib, 5126 rawipOutErrors); 5127 return; 5128 } 5129 /* 5130 * Shuffle the routing header and ip6_dst 5131 * addresses, and get the checksum difference 5132 * between the first hop (in ip6_dst) and 5133 * the destination (in the last routing hdr entry). 5134 */ 5135 csum = ip_massage_options_v6(ip6h, rth, 5136 is->is_netstack); 5137 /* 5138 * Verify that the first hop isn't a mapped address. 5139 * Routers along the path need to do this verification 5140 * for subsequent hops. 5141 */ 5142 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5143 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5144 BUMP_MIB(&is->is_rawip_mib, 5145 rawipOutErrors); 5146 return; 5147 } 5148 } 5149 } 5150 5151 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5152 if (mp1->b_cont != NULL) 5153 ip_len += msgdsize(mp1->b_cont); 5154 5155 /* 5156 * Set the length into the IP header. 5157 * If the length is greater than the maximum allowed by IP, 5158 * then free the message and return. Do not try and send it 5159 * as this can cause problems in layers below. 5160 */ 5161 if (ip_len > IP_MAXPACKET) { 5162 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5163 icmp_ud_err(q, mp, EMSGSIZE); 5164 return; 5165 } 5166 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5167 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5168 uint16_t *cksum_ptr; 5169 uint_t ext_hdrs_len; 5170 5171 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5172 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5173 icmp->icmp_checksum_off == 2); 5174 5175 /* 5176 * We make it easy for IP to include our pseudo header 5177 * by putting our length in uh_checksum, modified (if 5178 * we have a routing header) by the checksum difference 5179 * between the ultimate destination and first hop addresses. 5180 * Note: ICMPv6 must always checksum the packet. 5181 */ 5182 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5183 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5184 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5185 BUMP_MIB(&is->is_rawip_mib, 5186 rawipOutErrors); 5187 freemsg(mp); 5188 return; 5189 } 5190 ip6i = (ip6i_t *)mp1->b_rptr; 5191 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5192 ip6h = (ip6_t *)&ip6i[1]; 5193 else 5194 ip6h = (ip6_t *)ip6i; 5195 } 5196 /* Add payload length to checksum */ 5197 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5198 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5199 csum += htons(ip_len - ext_hdrs_len); 5200 5201 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5202 csum = (csum & 0xFFFF) + (csum >> 16); 5203 *cksum_ptr = (uint16_t)csum; 5204 } 5205 5206 #ifdef _LITTLE_ENDIAN 5207 ip_len = htons(ip_len); 5208 #endif 5209 ip6h->ip6_plen = (uint16_t)ip_len; 5210 5211 freeb(mp); 5212 5213 /* We're done. Pass the packet to IP */ 5214 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5215 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5216 } 5217 5218 static void 5219 icmp_wput_other(queue_t *q, mblk_t *mp) 5220 { 5221 uchar_t *rptr = mp->b_rptr; 5222 struct iocblk *iocp; 5223 #define tudr ((struct T_unitdata_req *)rptr) 5224 conn_t *connp = Q_TO_CONN(q); 5225 icmp_t *icmp = connp->conn_icmp; 5226 icmp_stack_t *is = icmp->icmp_is; 5227 cred_t *cr; 5228 5229 cr = DB_CREDDEF(mp, connp->conn_cred); 5230 5231 switch (mp->b_datap->db_type) { 5232 case M_PROTO: 5233 case M_PCPROTO: 5234 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5235 /* 5236 * If the message does not contain a PRIM_type, 5237 * throw it away. 5238 */ 5239 freemsg(mp); 5240 return; 5241 } 5242 switch (((union T_primitives *)rptr)->type) { 5243 case T_ADDR_REQ: 5244 icmp_addr_req(q, mp); 5245 return; 5246 case O_T_BIND_REQ: 5247 case T_BIND_REQ: 5248 icmp_bind(q, mp); 5249 return; 5250 case T_CONN_REQ: 5251 icmp_connect(q, mp); 5252 return; 5253 case T_CAPABILITY_REQ: 5254 icmp_capability_req(q, mp); 5255 return; 5256 case T_INFO_REQ: 5257 icmp_info_req(q, mp); 5258 return; 5259 case T_UNITDATA_REQ: 5260 /* 5261 * If a T_UNITDATA_REQ gets here, the address must 5262 * be bad. Valid T_UNITDATA_REQs are found above 5263 * and break to below this switch. 5264 */ 5265 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5266 return; 5267 case T_UNBIND_REQ: 5268 icmp_unbind(q, mp); 5269 return; 5270 5271 case T_SVR4_OPTMGMT_REQ: 5272 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5273 cr)) { 5274 /* Only IP can return anything meaningful */ 5275 (void) svr4_optcom_req(q, mp, cr, 5276 &icmp_opt_obj, B_TRUE); 5277 } 5278 return; 5279 5280 case T_OPTMGMT_REQ: 5281 /* Only IP can return anything meaningful */ 5282 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5283 return; 5284 5285 case T_DISCON_REQ: 5286 icmp_disconnect(q, mp); 5287 return; 5288 5289 /* The following TPI message is not supported by icmp. */ 5290 case O_T_CONN_RES: 5291 case T_CONN_RES: 5292 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5293 return; 5294 5295 /* The following 3 TPI requests are illegal for icmp. */ 5296 case T_DATA_REQ: 5297 case T_EXDATA_REQ: 5298 case T_ORDREL_REQ: 5299 freemsg(mp); 5300 (void) putctl1(RD(q), M_ERROR, EPROTO); 5301 return; 5302 default: 5303 break; 5304 } 5305 break; 5306 case M_IOCTL: 5307 iocp = (struct iocblk *)mp->b_rptr; 5308 switch (iocp->ioc_cmd) { 5309 case TI_GETPEERNAME: 5310 if (icmp->icmp_state != TS_DATA_XFER) { 5311 /* 5312 * If a default destination address has not 5313 * been associated with the stream, then we 5314 * don't know the peer's name. 5315 */ 5316 iocp->ioc_error = ENOTCONN; 5317 err_ret:; 5318 iocp->ioc_count = 0; 5319 mp->b_datap->db_type = M_IOCACK; 5320 qreply(q, mp); 5321 return; 5322 } 5323 /* FALLTHRU */ 5324 case TI_GETMYNAME: 5325 /* 5326 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5327 * need to copyin the user's strbuf structure. 5328 * Processing will continue in the M_IOCDATA case 5329 * below. 5330 */ 5331 mi_copyin(q, mp, NULL, 5332 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5333 return; 5334 case ND_SET: 5335 /* nd_getset performs the necessary error checking */ 5336 case ND_GET: 5337 if (nd_getset(q, is->is_nd, mp)) { 5338 qreply(q, mp); 5339 return; 5340 } 5341 break; 5342 default: 5343 break; 5344 } 5345 break; 5346 case M_IOCDATA: 5347 icmp_wput_iocdata(q, mp); 5348 return; 5349 default: 5350 break; 5351 } 5352 ip_wput(q, mp); 5353 } 5354 5355 /* 5356 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5357 * messages. 5358 */ 5359 static void 5360 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5361 { 5362 mblk_t *mp1; 5363 STRUCT_HANDLE(strbuf, sb); 5364 icmp_t *icmp; 5365 in6_addr_t v6addr; 5366 ipaddr_t v4addr; 5367 uint32_t flowinfo = 0; 5368 int addrlen; 5369 5370 /* Make sure it is one of ours. */ 5371 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5372 case TI_GETMYNAME: 5373 case TI_GETPEERNAME: 5374 break; 5375 default: 5376 icmp = Q_TO_ICMP(q); 5377 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5378 return; 5379 } 5380 switch (mi_copy_state(q, mp, &mp1)) { 5381 case -1: 5382 return; 5383 case MI_COPY_CASE(MI_COPY_IN, 1): 5384 break; 5385 case MI_COPY_CASE(MI_COPY_OUT, 1): 5386 /* 5387 * The address has been copied out, so now 5388 * copyout the strbuf. 5389 */ 5390 mi_copyout(q, mp); 5391 return; 5392 case MI_COPY_CASE(MI_COPY_OUT, 2): 5393 /* 5394 * The address and strbuf have been copied out. 5395 * We're done, so just acknowledge the original 5396 * M_IOCTL. 5397 */ 5398 mi_copy_done(q, mp, 0); 5399 return; 5400 default: 5401 /* 5402 * Something strange has happened, so acknowledge 5403 * the original M_IOCTL with an EPROTO error. 5404 */ 5405 mi_copy_done(q, mp, EPROTO); 5406 return; 5407 } 5408 /* 5409 * Now we have the strbuf structure for TI_GETMYNAME 5410 * and TI_GETPEERNAME. Next we copyout the requested 5411 * address and then we'll copyout the strbuf. 5412 */ 5413 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5414 (void *)mp1->b_rptr); 5415 icmp = Q_TO_ICMP(q); 5416 if (icmp->icmp_family == AF_INET) 5417 addrlen = sizeof (sin_t); 5418 else 5419 addrlen = sizeof (sin6_t); 5420 5421 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5422 mi_copy_done(q, mp, EINVAL); 5423 return; 5424 } 5425 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5426 case TI_GETMYNAME: 5427 if (icmp->icmp_family == AF_INET) { 5428 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5429 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5430 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5431 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5432 } else { 5433 /* 5434 * INADDR_ANY 5435 * icmp_v6src is not set, we might be bound to 5436 * broadcast/multicast. Use icmp_bound_v6src as 5437 * local address instead (that could 5438 * also still be INADDR_ANY) 5439 */ 5440 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5441 } 5442 } else { 5443 /* icmp->icmp_family == AF_INET6 */ 5444 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5445 v6addr = icmp->icmp_v6src; 5446 } else { 5447 /* 5448 * UNSPECIFIED 5449 * icmp_v6src is not set, we might be bound to 5450 * broadcast/multicast. Use icmp_bound_v6src as 5451 * local address instead (that could 5452 * also still be UNSPECIFIED) 5453 */ 5454 v6addr = icmp->icmp_bound_v6src; 5455 } 5456 } 5457 break; 5458 case TI_GETPEERNAME: 5459 if (icmp->icmp_family == AF_INET) { 5460 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5461 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5462 } else { 5463 /* icmp->icmp_family == AF_INET6) */ 5464 v6addr = icmp->icmp_v6dst; 5465 flowinfo = icmp->icmp_flowinfo; 5466 } 5467 break; 5468 default: 5469 mi_copy_done(q, mp, EPROTO); 5470 return; 5471 } 5472 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5473 if (!mp1) 5474 return; 5475 5476 if (icmp->icmp_family == AF_INET) { 5477 sin_t *sin; 5478 5479 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5480 sin = (sin_t *)mp1->b_rptr; 5481 mp1->b_wptr = (uchar_t *)&sin[1]; 5482 *sin = sin_null; 5483 sin->sin_family = AF_INET; 5484 sin->sin_addr.s_addr = v4addr; 5485 } else { 5486 /* icmp->icmp_family == AF_INET6 */ 5487 sin6_t *sin6; 5488 5489 ASSERT(icmp->icmp_family == AF_INET6); 5490 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5491 sin6 = (sin6_t *)mp1->b_rptr; 5492 mp1->b_wptr = (uchar_t *)&sin6[1]; 5493 *sin6 = sin6_null; 5494 sin6->sin6_family = AF_INET6; 5495 sin6->sin6_flowinfo = flowinfo; 5496 sin6->sin6_addr = v6addr; 5497 } 5498 /* Copy out the address */ 5499 mi_copyout(q, mp); 5500 } 5501 5502 static int 5503 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5504 void *thisdg_attrs) 5505 { 5506 conn_t *connp = Q_TO_CONN(q); 5507 struct T_unitdata_req *udreqp; 5508 int is_absreq_failure; 5509 cred_t *cr; 5510 5511 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5512 *errorp = 0; 5513 5514 cr = DB_CREDDEF(mp, connp->conn_cred); 5515 5516 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5517 udreqp->OPT_offset, cr, &icmp_opt_obj, 5518 thisdg_attrs, &is_absreq_failure); 5519 5520 if (*errorp != 0) { 5521 /* 5522 * Note: No special action needed in this 5523 * module for "is_absreq_failure" 5524 */ 5525 return (-1); /* failure */ 5526 } 5527 ASSERT(is_absreq_failure == 0); 5528 return (0); /* success */ 5529 } 5530 5531 void 5532 icmp_ddi_init(void) 5533 { 5534 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5535 icmp_opt_obj.odb_opt_arr_cnt); 5536 5537 /* 5538 * We want to be informed each time a stack is created or 5539 * destroyed in the kernel, so we can maintain the 5540 * set of icmp_stack_t's. 5541 */ 5542 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5543 } 5544 5545 void 5546 icmp_ddi_destroy(void) 5547 { 5548 netstack_unregister(NS_ICMP); 5549 } 5550 5551 /* 5552 * Initialize the ICMP stack instance. 5553 */ 5554 static void * 5555 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5556 { 5557 icmp_stack_t *is; 5558 icmpparam_t *pa; 5559 5560 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5561 is->is_netstack = ns; 5562 5563 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5564 is->is_param_arr = pa; 5565 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5566 5567 (void) icmp_param_register(&is->is_nd, 5568 is->is_param_arr, A_CNT(icmp_param_arr)); 5569 is->is_ksp = rawip_kstat_init(stackid); 5570 return (is); 5571 } 5572 5573 /* 5574 * Free the ICMP stack instance. 5575 */ 5576 static void 5577 rawip_stack_fini(netstackid_t stackid, void *arg) 5578 { 5579 icmp_stack_t *is = (icmp_stack_t *)arg; 5580 5581 nd_free(&is->is_nd); 5582 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5583 is->is_param_arr = NULL; 5584 5585 rawip_kstat_fini(stackid, is->is_ksp); 5586 is->is_ksp = NULL; 5587 kmem_free(is, sizeof (*is)); 5588 } 5589 5590 static void * 5591 rawip_kstat_init(netstackid_t stackid) { 5592 kstat_t *ksp; 5593 5594 rawip_named_kstat_t template = { 5595 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5596 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5597 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5598 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5599 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5600 }; 5601 5602 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5603 KSTAT_TYPE_NAMED, 5604 NUM_OF_FIELDS(rawip_named_kstat_t), 5605 0, stackid); 5606 if (ksp == NULL || ksp->ks_data == NULL) 5607 return (NULL); 5608 5609 bcopy(&template, ksp->ks_data, sizeof (template)); 5610 ksp->ks_update = rawip_kstat_update; 5611 ksp->ks_private = (void *)(uintptr_t)stackid; 5612 5613 kstat_install(ksp); 5614 return (ksp); 5615 } 5616 5617 static void 5618 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5619 { 5620 if (ksp != NULL) { 5621 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5622 kstat_delete_netstack(ksp, stackid); 5623 } 5624 } 5625 5626 static int 5627 rawip_kstat_update(kstat_t *ksp, int rw) 5628 { 5629 rawip_named_kstat_t *rawipkp; 5630 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5631 netstack_t *ns; 5632 icmp_stack_t *is; 5633 5634 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5635 return (EIO); 5636 5637 if (rw == KSTAT_WRITE) 5638 return (EACCES); 5639 5640 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5641 5642 ns = netstack_find_by_stackid(stackid); 5643 if (ns == NULL) 5644 return (-1); 5645 is = ns->netstack_icmp; 5646 if (is == NULL) { 5647 netstack_rele(ns); 5648 return (-1); 5649 } 5650 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5651 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5652 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5653 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5654 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5655 netstack_rele(ns); 5656 return (0); 5657 } 5658