1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/stropts.h> 33 #include <sys/strlog.h> 34 #include <sys/strsun.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/cmn_err.h> 42 #include <sys/debug.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/priv.h> 46 #include <sys/zone.h> 47 #include <sys/time.h> 48 49 #include <sys/socket.h> 50 #include <sys/isa_defs.h> 51 #include <sys/suntpi.h> 52 #include <sys/xti_inet.h> 53 #include <sys/netstack.h> 54 55 #include <net/route.h> 56 #include <net/if.h> 57 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <inet/common.h> 62 #include <inet/ip.h> 63 #include <inet/ip6.h> 64 #include <inet/mi.h> 65 #include <inet/nd.h> 66 #include <inet/optcom.h> 67 #include <inet/snmpcom.h> 68 #include <inet/kstatcom.h> 69 #include <inet/rawip_impl.h> 70 71 #include <netinet/ip_mroute.h> 72 #include <inet/tcp.h> 73 #include <net/pfkeyv2.h> 74 #include <inet/ipsec_info.h> 75 #include <inet/ipclassifier.h> 76 77 #include <sys/tsol/label.h> 78 #include <sys/tsol/tnet.h> 79 80 #include <inet/ip_ire.h> 81 #include <inet/ip_if.h> 82 83 #include <inet/ip_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_bind(queue_t *q, mblk_t *mp); 106 static void icmp_bind_proto(queue_t *q); 107 static void icmp_bind_result(conn_t *, mblk_t *); 108 static void icmp_bind_ack(conn_t *, mblk_t *mp); 109 static void icmp_bind_error(conn_t *, mblk_t *mp); 110 static int icmp_build_hdrs(icmp_t *icmp); 111 static void icmp_capability_req(queue_t *q, mblk_t *mp); 112 static int icmp_close(queue_t *q); 113 static void icmp_connect(queue_t *q, mblk_t *mp); 114 static void icmp_disconnect(queue_t *q, mblk_t *mp); 115 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 116 int sys_error); 117 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 118 t_scalar_t t_error, int sys_error); 119 static void icmp_icmp_error(queue_t *q, mblk_t *mp); 120 static void icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 121 static void icmp_info_req(queue_t *q, mblk_t *mp); 122 static void icmp_input(void *, mblk_t *, void *); 123 static mblk_t *icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, 124 t_scalar_t addr_length, in_port_t); 125 static int icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, 126 cred_t *credp, boolean_t isv6); 127 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 128 cred_t *credp); 129 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 130 cred_t *credp); 131 static void icmp_output(queue_t *q, mblk_t *mp); 132 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 133 int *errorp, void *thisdg_attrs); 134 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 135 int icmp_opt_set(queue_t *q, uint_t optset_context, 136 int level, int name, uint_t inlen, 137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 138 void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 139 int icmp_opt_get(queue_t *q, int level, int name, 140 uchar_t *ptr); 141 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 142 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 143 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 144 caddr_t cp, cred_t *cr); 145 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 146 uchar_t *ptr, int len); 147 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 148 cred_t *cr); 149 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 150 static void icmp_unbind(queue_t *q, mblk_t *mp); 151 static void icmp_wput(queue_t *q, mblk_t *mp); 152 static void icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, 153 t_scalar_t tudr_optlen); 154 static void icmp_wput_other(queue_t *q, mblk_t *mp); 155 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 156 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 157 158 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 159 static void rawip_stack_fini(netstackid_t stackid, void *arg); 160 161 static void *rawip_kstat_init(netstackid_t stackid); 162 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 163 static int rawip_kstat_update(kstat_t *kp, int rw); 164 165 166 static struct module_info icmp_mod_info = { 167 5707, "icmp", 1, INFPSZ, 512, 128 168 }; 169 170 /* 171 * Entry points for ICMP as a device. 172 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 173 */ 174 static struct qinit icmprinitv4 = { 175 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 176 }; 177 178 static struct qinit icmprinitv6 = { 179 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 180 }; 181 182 static struct qinit icmpwinit = { 183 (pfi_t)icmp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &icmp_mod_info 184 }; 185 186 /* For AF_INET aka /dev/icmp */ 187 struct streamtab icmpinfov4 = { 188 &icmprinitv4, &icmpwinit 189 }; 190 191 /* For AF_INET6 aka /dev/icmp6 */ 192 struct streamtab icmpinfov6 = { 193 &icmprinitv6, &icmpwinit 194 }; 195 196 static sin_t sin_null; /* Zero address for quick clears */ 197 static sin6_t sin6_null; /* Zero address for quick clears */ 198 199 /* Default structure copied into T_INFO_ACK messages */ 200 static struct T_info_ack icmp_g_t_info_ack = { 201 T_INFO_ACK, 202 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 203 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 204 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 205 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 206 0, /* ADDR_size - filled in later. */ 207 0, /* OPT_size - not initialized here */ 208 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 209 T_CLTS, /* SERV_type. icmp supports connection-less. */ 210 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 211 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 212 }; 213 214 /* 215 * Table of ND variables supported by icmp. These are loaded into is_nd 216 * when the stack instance is created. 217 * All of these are alterable, within the min/max values given, at run time. 218 */ 219 static icmpparam_t icmp_param_arr[] = { 220 /* min max value name */ 221 { 0, 128, 32, "icmp_wroff_extra" }, 222 { 1, 255, 255, "icmp_ipv4_ttl" }, 223 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 224 { 0, 1, 1, "icmp_bsd_compat" }, 225 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 226 { 0, 65536, 1024, "icmp_xmit_lowat"}, 227 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 228 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 229 }; 230 #define is_wroff_extra is_param_arr[0].icmp_param_value 231 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 232 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 233 #define is_bsd_compat is_param_arr[3].icmp_param_value 234 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 235 #define is_xmit_lowat is_param_arr[5].icmp_param_value 236 #define is_recv_hiwat is_param_arr[6].icmp_param_value 237 #define is_max_buf is_param_arr[7].icmp_param_value 238 239 /* 240 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 241 * passed to icmp_wput. 242 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 243 * protocol type placed in the message following the address. A T_BIND_ACK 244 * message is returned by ip_bind_v4/v6. 245 */ 246 static void 247 icmp_bind(queue_t *q, mblk_t *mp) 248 { 249 sin_t *sin; 250 sin6_t *sin6; 251 mblk_t *mp1; 252 struct T_bind_req *tbr; 253 icmp_t *icmp; 254 conn_t *connp = Q_TO_CONN(q); 255 256 icmp = connp->conn_icmp; 257 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 258 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 259 "icmp_bind: bad req, len %u", 260 (uint_t)(mp->b_wptr - mp->b_rptr)); 261 icmp_err_ack(q, mp, TPROTO, 0); 262 return; 263 } 264 if (icmp->icmp_state != TS_UNBND) { 265 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 266 "icmp_bind: bad state, %d", icmp->icmp_state); 267 icmp_err_ack(q, mp, TOUTSTATE, 0); 268 return; 269 } 270 /* 271 * Reallocate the message to make sure we have enough room for an 272 * address and the protocol type. 273 */ 274 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 275 if (!mp1) { 276 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 277 return; 278 } 279 mp = mp1; 280 tbr = (struct T_bind_req *)mp->b_rptr; 281 switch (tbr->ADDR_length) { 282 case 0: /* Generic request */ 283 tbr->ADDR_offset = sizeof (struct T_bind_req); 284 if (icmp->icmp_family == AF_INET) { 285 tbr->ADDR_length = sizeof (sin_t); 286 sin = (sin_t *)&tbr[1]; 287 *sin = sin_null; 288 sin->sin_family = AF_INET; 289 mp->b_wptr = (uchar_t *)&sin[1]; 290 } else { 291 ASSERT(icmp->icmp_family == AF_INET6); 292 tbr->ADDR_length = sizeof (sin6_t); 293 sin6 = (sin6_t *)&tbr[1]; 294 *sin6 = sin6_null; 295 sin6->sin6_family = AF_INET6; 296 mp->b_wptr = (uchar_t *)&sin6[1]; 297 } 298 break; 299 case sizeof (sin_t): /* Complete IP address */ 300 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 301 sizeof (sin_t)); 302 if (sin == NULL || !OK_32PTR((char *)sin)) { 303 icmp_err_ack(q, mp, TSYSERR, EINVAL); 304 return; 305 } 306 if (icmp->icmp_family != AF_INET || 307 sin->sin_family != AF_INET) { 308 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 309 return; 310 } 311 break; 312 case sizeof (sin6_t): /* Complete IP address */ 313 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 314 sizeof (sin6_t)); 315 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 316 icmp_err_ack(q, mp, TSYSERR, EINVAL); 317 return; 318 } 319 if (icmp->icmp_family != AF_INET6 || 320 sin6->sin6_family != AF_INET6) { 321 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 322 return; 323 } 324 /* No support for mapped addresses on raw sockets */ 325 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 326 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 327 return; 328 } 329 break; 330 default: 331 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 332 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 333 icmp_err_ack(q, mp, TBADADDR, 0); 334 return; 335 } 336 337 /* 338 * The state must be TS_UNBND. TPI mandates that users must send 339 * TPI primitives only 1 at a time and wait for the response before 340 * sending the next primitive. 341 */ 342 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 343 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 344 rw_exit(&icmp->icmp_rwlock); 345 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 346 "icmp_bind: bad state, %d", icmp->icmp_state); 347 icmp_err_ack(q, mp, TOUTSTATE, 0); 348 return; 349 } 350 351 icmp->icmp_pending_op = tbr->PRIM_type; 352 353 /* 354 * Copy the source address into our icmp structure. This address 355 * may still be zero; if so, ip will fill in the correct address 356 * each time an outbound packet is passed to it. 357 * If we are binding to a broadcast or multicast address then 358 * icmp_bind_ack will clear the source address when it receives 359 * the T_BIND_ACK. 360 */ 361 icmp->icmp_state = TS_IDLE; 362 363 if (icmp->icmp_family == AF_INET) { 364 ASSERT(sin != NULL); 365 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 366 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 367 &icmp->icmp_v6src); 368 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 369 icmp->icmp_ip_snd_options_len; 370 icmp->icmp_bound_v6src = icmp->icmp_v6src; 371 } else { 372 int error; 373 374 ASSERT(sin6 != NULL); 375 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 376 icmp->icmp_v6src = sin6->sin6_addr; 377 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 378 icmp->icmp_bound_v6src = icmp->icmp_v6src; 379 380 /* Rebuild the header template */ 381 error = icmp_build_hdrs(icmp); 382 if (error != 0) { 383 icmp->icmp_pending_op = -1; 384 rw_exit(&icmp->icmp_rwlock); 385 icmp_err_ack(q, mp, TSYSERR, error); 386 return; 387 } 388 } 389 /* 390 * Place protocol type in the O_T_BIND_REQ/T_BIND_REQ following 391 * the address. 392 */ 393 *mp->b_wptr++ = icmp->icmp_proto; 394 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 395 /* 396 * Append a request for an IRE if src not 0 (INADDR_ANY) 397 */ 398 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 399 if (!mp->b_cont) { 400 icmp->icmp_pending_op = -1; 401 rw_exit(&icmp->icmp_rwlock); 402 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 403 return; 404 } 405 mp->b_cont->b_wptr += sizeof (ire_t); 406 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 407 } 408 rw_exit(&icmp->icmp_rwlock); 409 410 /* Pass the O_T_BIND_REQ/T_BIND_REQ to ip. */ 411 if (icmp->icmp_family == AF_INET6) 412 mp = ip_bind_v6(q, mp, connp, NULL); 413 else 414 mp = ip_bind_v4(q, mp, connp); 415 416 /* The above return NULL if the bind needs to be deferred */ 417 if (mp != NULL) 418 icmp_bind_result(connp, mp); 419 else 420 CONN_INC_REF(connp); 421 } 422 423 /* 424 * Send message to IP to just bind to the protocol. 425 */ 426 static void 427 icmp_bind_proto(queue_t *q) 428 { 429 mblk_t *mp; 430 struct T_bind_req *tbr; 431 icmp_t *icmp; 432 conn_t *connp = Q_TO_CONN(q); 433 434 icmp = connp->conn_icmp; 435 436 mp = allocb(sizeof (struct T_bind_req) + sizeof (sin6_t) + 1, 437 BPRI_MED); 438 if (!mp) { 439 return; 440 } 441 mp->b_datap->db_type = M_PROTO; 442 tbr = (struct T_bind_req *)mp->b_rptr; 443 tbr->PRIM_type = O_T_BIND_REQ; /* change to T_BIND_REQ ? */ 444 tbr->ADDR_offset = sizeof (struct T_bind_req); 445 446 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 447 if (icmp->icmp_ipversion == IPV4_VERSION) { 448 sin_t *sin; 449 450 tbr->ADDR_length = sizeof (sin_t); 451 sin = (sin_t *)&tbr[1]; 452 *sin = sin_null; 453 sin->sin_family = AF_INET; 454 mp->b_wptr = (uchar_t *)&sin[1]; 455 } else { 456 sin6_t *sin6; 457 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 tbr->ADDR_length = sizeof (sin6_t); 460 sin6 = (sin6_t *)&tbr[1]; 461 *sin6 = sin6_null; 462 sin6->sin6_family = AF_INET6; 463 mp->b_wptr = (uchar_t *)&sin6[1]; 464 } 465 466 /* Place protocol type in the O_T_BIND_REQ following the address. */ 467 *mp->b_wptr++ = icmp->icmp_proto; 468 rw_exit(&icmp->icmp_rwlock); 469 470 /* Pass the O_T_BIND_REQ to ip. */ 471 if (icmp->icmp_family == AF_INET6) 472 mp = ip_bind_v6(q, mp, connp, NULL); 473 else 474 mp = ip_bind_v4(q, mp, connp); 475 476 /* The above return NULL if the bind needs to be deferred */ 477 if (mp != NULL) 478 icmp_bind_result(connp, mp); 479 else 480 CONN_INC_REF(connp); 481 } 482 483 /* 484 * This is called from ip_wput_nondata to handle the results of a 485 * deferred RAWIP bind. It is called once the bind has been completed. 486 */ 487 void 488 rawip_resume_bind(conn_t *connp, mblk_t *mp) 489 { 490 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 491 492 icmp_bind_result(connp, mp); 493 494 CONN_OPER_PENDING_DONE(connp); 495 } 496 497 /* 498 * This routine handles each T_CONN_REQ message passed to icmp. It 499 * associates a default destination address with the stream. 500 * 501 * This routine sends down a T_BIND_REQ to IP with the following mblks: 502 * T_BIND_REQ - specifying local and remote address. 503 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 504 * T_OK_ACK - for the T_CONN_REQ 505 * T_CONN_CON - to keep the TPI user happy 506 * 507 * The connect completes in icmp_bind_result. 508 * When a T_BIND_ACK is received information is extracted from the IRE 509 * and the two appended messages are sent to the TPI user. 510 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 511 * convert it to an error ack for the appropriate primitive. 512 */ 513 static void 514 icmp_connect(queue_t *q, mblk_t *mp) 515 { 516 sin_t *sin; 517 sin6_t *sin6; 518 mblk_t *mp1, *mp2; 519 struct T_conn_req *tcr; 520 icmp_t *icmp; 521 ipaddr_t v4dst; 522 in6_addr_t v6dst; 523 uint32_t flowinfo; 524 conn_t *connp = Q_TO_CONN(q); 525 526 icmp = connp->conn_icmp; 527 tcr = (struct T_conn_req *)mp->b_rptr; 528 /* Sanity checks */ 529 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 530 icmp_err_ack(q, mp, TPROTO, 0); 531 return; 532 } 533 534 if (tcr->OPT_length != 0) { 535 icmp_err_ack(q, mp, TBADOPT, 0); 536 return; 537 } 538 539 switch (tcr->DEST_length) { 540 default: 541 icmp_err_ack(q, mp, TBADADDR, 0); 542 return; 543 544 case sizeof (sin_t): 545 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 546 sizeof (sin_t)); 547 if (sin == NULL || !OK_32PTR((char *)sin)) { 548 icmp_err_ack(q, mp, TSYSERR, EINVAL); 549 return; 550 } 551 if (icmp->icmp_family != AF_INET || 552 sin->sin_family != AF_INET) { 553 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 554 return; 555 } 556 v4dst = sin->sin_addr.s_addr; 557 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 558 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 559 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 560 icmp->icmp_ip_snd_options_len; 561 break; 562 563 case sizeof (sin6_t): 564 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 565 sizeof (sin6_t)); 566 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 567 icmp_err_ack(q, mp, TSYSERR, EINVAL); 568 return; 569 } 570 if (icmp->icmp_family != AF_INET6 || 571 sin6->sin6_family != AF_INET6) { 572 icmp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 573 return; 574 } 575 /* No support for mapped addresses on raw sockets */ 576 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 577 icmp_err_ack(q, mp, TSYSERR, EADDRNOTAVAIL); 578 return; 579 } 580 v6dst = sin6->sin6_addr; 581 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 582 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 583 flowinfo = sin6->sin6_flowinfo; 584 break; 585 } 586 if (icmp->icmp_ipversion == IPV4_VERSION) { 587 /* 588 * Interpret a zero destination to mean loopback. 589 * Update the T_CONN_REQ (sin/sin6) since it is used to 590 * generate the T_CONN_CON. 591 */ 592 if (v4dst == INADDR_ANY) { 593 v4dst = htonl(INADDR_LOOPBACK); 594 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 595 if (icmp->icmp_family == AF_INET) { 596 sin->sin_addr.s_addr = v4dst; 597 } else { 598 sin6->sin6_addr = v6dst; 599 } 600 } 601 icmp->icmp_v6dst = v6dst; 602 icmp->icmp_flowinfo = 0; 603 604 /* 605 * If the destination address is multicast and 606 * an outgoing multicast interface has been set, 607 * use the address of that interface as our 608 * source address if no source address has been set. 609 */ 610 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 611 CLASSD(v4dst) && 612 icmp->icmp_multicast_if_addr != INADDR_ANY) { 613 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 614 &icmp->icmp_v6src); 615 } 616 } else { 617 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 618 /* 619 * Interpret a zero destination to mean loopback. 620 * Update the T_CONN_REQ (sin/sin6) since it is used to 621 * generate the T_CONN_CON. 622 */ 623 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 624 v6dst = ipv6_loopback; 625 sin6->sin6_addr = v6dst; 626 } 627 icmp->icmp_v6dst = v6dst; 628 icmp->icmp_flowinfo = flowinfo; 629 /* 630 * If the destination address is multicast and 631 * an outgoing multicast interface has been set, 632 * then the ip bind logic will pick the correct source 633 * address (i.e. matching the outgoing multicast interface). 634 */ 635 } 636 637 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 638 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 639 rw_exit(&icmp->icmp_rwlock); 640 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 641 "icmp_connect: bad state, %d", icmp->icmp_state); 642 icmp_err_ack(q, mp, TOUTSTATE, 0); 643 return; 644 } 645 icmp->icmp_pending_op = T_CONN_REQ; 646 647 if (icmp->icmp_state == TS_DATA_XFER) { 648 /* Already connected - clear out state */ 649 icmp->icmp_v6src = icmp->icmp_bound_v6src; 650 icmp->icmp_state = TS_IDLE; 651 } 652 653 /* 654 * Send down bind to IP to verify that there is a route 655 * and to determine the source address. 656 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 657 */ 658 if (icmp->icmp_family == AF_INET) { 659 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa_conn_t), 660 sin->sin_port); 661 } else { 662 ASSERT(icmp->icmp_family == AF_INET6); 663 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (ipa6_conn_t), 664 sin6->sin6_port); 665 } 666 if (mp1 == NULL) { 667 icmp->icmp_pending_op = -1; 668 rw_exit(&icmp->icmp_rwlock); 669 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 670 return; 671 } 672 673 /* 674 * We also have to send a connection confirmation to 675 * keep TLI happy. Prepare it for icmp_bind_result. 676 */ 677 if (icmp->icmp_family == AF_INET) { 678 mp2 = mi_tpi_conn_con(NULL, (char *)sin, sizeof (*sin), NULL, 679 0); 680 } else { 681 ASSERT(icmp->icmp_family == AF_INET6); 682 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, sizeof (*sin6), NULL, 683 0); 684 } 685 if (mp2 == NULL) { 686 freemsg(mp1); 687 icmp->icmp_pending_op = -1; 688 rw_exit(&icmp->icmp_rwlock); 689 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 690 return; 691 } 692 693 mp = mi_tpi_ok_ack_alloc(mp); 694 if (mp == NULL) { 695 /* Unable to reuse the T_CONN_REQ for the ack. */ 696 freemsg(mp2); 697 icmp->icmp_pending_op = -1; 698 rw_exit(&icmp->icmp_rwlock); 699 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 700 return; 701 } 702 703 icmp->icmp_state = TS_DATA_XFER; 704 rw_exit(&icmp->icmp_rwlock); 705 706 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 707 linkb(mp1, mp); 708 linkb(mp1, mp2); 709 710 mblk_setcred(mp1, connp->conn_cred); 711 if (icmp->icmp_family == AF_INET) 712 mp1 = ip_bind_v4(q, mp1, connp); 713 else 714 mp1 = ip_bind_v6(q, mp1, connp, NULL); 715 716 /* The above return NULL if the bind needs to be deferred */ 717 if (mp1 != NULL) 718 icmp_bind_result(connp, mp1); 719 else 720 CONN_INC_REF(connp); 721 } 722 723 static void 724 icmp_close_free(conn_t *connp) 725 { 726 icmp_t *icmp = connp->conn_icmp; 727 728 /* If there are any options associated with the stream, free them. */ 729 if (icmp->icmp_ip_snd_options != NULL) { 730 mi_free((char *)icmp->icmp_ip_snd_options); 731 icmp->icmp_ip_snd_options = NULL; 732 icmp->icmp_ip_snd_options_len = 0; 733 } 734 735 if (icmp->icmp_filter != NULL) { 736 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 737 icmp->icmp_filter = NULL; 738 } 739 /* Free memory associated with sticky options */ 740 if (icmp->icmp_sticky_hdrs_len != 0) { 741 kmem_free(icmp->icmp_sticky_hdrs, 742 icmp->icmp_sticky_hdrs_len); 743 icmp->icmp_sticky_hdrs = NULL; 744 icmp->icmp_sticky_hdrs_len = 0; 745 } 746 ip6_pkt_free(&icmp->icmp_sticky_ipp); 747 748 /* 749 * Clear any fields which the kmem_cache constructor clears. 750 * Only icmp_connp needs to be preserved. 751 * TBD: We should make this more efficient to avoid clearing 752 * everything. 753 */ 754 ASSERT(icmp->icmp_connp == connp); 755 bzero(icmp, sizeof (icmp_t)); 756 icmp->icmp_connp = connp; 757 } 758 759 static int 760 icmp_close(queue_t *q) 761 { 762 conn_t *connp = (conn_t *)q->q_ptr; 763 764 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 765 766 ip_quiesce_conn(connp); 767 768 qprocsoff(connp->conn_rq); 769 770 icmp_close_free(connp); 771 772 /* 773 * Now we are truly single threaded on this stream, and can 774 * delete the things hanging off the connp, and finally the connp. 775 * We removed this connp from the fanout list, it cannot be 776 * accessed thru the fanouts, and we already waited for the 777 * conn_ref to drop to 0. We are already in close, so 778 * there cannot be any other thread from the top. qprocsoff 779 * has completed, and service has completed or won't run in 780 * future. 781 */ 782 ASSERT(connp->conn_ref == 1); 783 784 inet_minor_free(ip_minor_arena, connp->conn_dev); 785 786 connp->conn_ref--; 787 ipcl_conn_destroy(connp); 788 789 q->q_ptr = WR(q)->q_ptr = NULL; 790 return (0); 791 } 792 793 /* 794 * This routine handles each T_DISCON_REQ message passed to icmp 795 * as an indicating that ICMP is no longer connected. This results 796 * in sending a T_BIND_REQ to IP to restore the binding to just 797 * the local address. 798 * 799 * This routine sends down a T_BIND_REQ to IP with the following mblks: 800 * T_BIND_REQ - specifying just the local address. 801 * T_OK_ACK - for the T_DISCON_REQ 802 * 803 * The disconnect completes in icmp_bind_result. 804 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 805 * Should icmp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 806 * convert it to an error ack for the appropriate primitive. 807 */ 808 static void 809 icmp_disconnect(queue_t *q, mblk_t *mp) 810 { 811 icmp_t *icmp; 812 mblk_t *mp1; 813 conn_t *connp = Q_TO_CONN(q); 814 815 icmp = connp->conn_icmp; 816 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 817 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 818 rw_exit(&icmp->icmp_rwlock); 819 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 820 "icmp_disconnect: bad state, %d", icmp->icmp_state); 821 icmp_err_ack(q, mp, TOUTSTATE, 0); 822 return; 823 } 824 icmp->icmp_pending_op = T_DISCON_REQ; 825 icmp->icmp_v6src = icmp->icmp_bound_v6src; 826 icmp->icmp_state = TS_IDLE; 827 828 /* 829 * Send down bind to IP to remove the full binding and revert 830 * to the local address binding. 831 */ 832 if (icmp->icmp_family == AF_INET) { 833 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin_t), 0); 834 } else { 835 ASSERT(icmp->icmp_family == AF_INET6); 836 mp1 = icmp_ip_bind_mp(icmp, O_T_BIND_REQ, sizeof (sin6_t), 0); 837 } 838 if (mp1 == NULL) { 839 icmp->icmp_pending_op = -1; 840 rw_exit(&icmp->icmp_rwlock); 841 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 842 return; 843 } 844 mp = mi_tpi_ok_ack_alloc(mp); 845 if (mp == NULL) { 846 /* Unable to reuse the T_DISCON_REQ for the ack. */ 847 icmp->icmp_pending_op = -1; 848 rw_exit(&icmp->icmp_rwlock); 849 icmp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 850 return; 851 } 852 853 if (icmp->icmp_family == AF_INET6) { 854 int error; 855 856 /* Rebuild the header template */ 857 error = icmp_build_hdrs(icmp); 858 if (error != 0) { 859 icmp->icmp_pending_op = -1; 860 rw_exit(&icmp->icmp_rwlock); 861 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 862 freemsg(mp1); 863 return; 864 } 865 } 866 867 rw_exit(&icmp->icmp_rwlock); 868 /* Append the T_OK_ACK to the T_BIND_REQ for icmp_bind_result */ 869 linkb(mp1, mp); 870 871 if (icmp->icmp_family == AF_INET6) 872 mp1 = ip_bind_v6(q, mp1, connp, NULL); 873 else 874 mp1 = ip_bind_v4(q, mp1, connp); 875 876 /* The above return NULL if the bind needs to be deferred */ 877 if (mp1 != NULL) 878 icmp_bind_result(connp, mp1); 879 else 880 CONN_INC_REF(connp); 881 } 882 883 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 884 static void 885 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 886 { 887 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 888 qreply(q, mp); 889 } 890 891 /* Shorthand to generate and send TPI error acks to our client */ 892 static void 893 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 894 t_scalar_t t_error, int sys_error) 895 { 896 struct T_error_ack *teackp; 897 898 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 899 M_PCPROTO, T_ERROR_ACK)) != NULL) { 900 teackp = (struct T_error_ack *)mp->b_rptr; 901 teackp->ERROR_prim = primitive; 902 teackp->TLI_error = t_error; 903 teackp->UNIX_error = sys_error; 904 qreply(q, mp); 905 } 906 } 907 908 /* 909 * icmp_icmp_error is called by icmp_input to process ICMP 910 * messages passed up by IP. 911 * Generates the appropriate T_UDERROR_IND for permanent 912 * (non-transient) errors. 913 * Assumes that IP has pulled up everything up to and including 914 * the ICMP header. 915 */ 916 static void 917 icmp_icmp_error(queue_t *q, mblk_t *mp) 918 { 919 icmph_t *icmph; 920 ipha_t *ipha; 921 int iph_hdr_length; 922 sin_t sin; 923 sin6_t sin6; 924 mblk_t *mp1; 925 int error = 0; 926 icmp_t *icmp = Q_TO_ICMP(q); 927 928 ipha = (ipha_t *)mp->b_rptr; 929 930 ASSERT(OK_32PTR(mp->b_rptr)); 931 932 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 933 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 934 icmp_icmp_error_ipv6(q, mp); 935 return; 936 } 937 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 938 939 /* Skip past the outer IP and ICMP headers */ 940 iph_hdr_length = IPH_HDR_LENGTH(ipha); 941 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 942 ipha = (ipha_t *)&icmph[1]; 943 iph_hdr_length = IPH_HDR_LENGTH(ipha); 944 945 switch (icmph->icmph_type) { 946 case ICMP_DEST_UNREACHABLE: 947 switch (icmph->icmph_code) { 948 case ICMP_FRAGMENTATION_NEEDED: 949 /* 950 * IP has already adjusted the path MTU. 951 */ 952 break; 953 case ICMP_PORT_UNREACHABLE: 954 case ICMP_PROTOCOL_UNREACHABLE: 955 error = ECONNREFUSED; 956 break; 957 default: 958 /* Transient errors */ 959 break; 960 } 961 break; 962 default: 963 /* Transient errors */ 964 break; 965 } 966 if (error == 0) { 967 freemsg(mp); 968 return; 969 } 970 971 /* 972 * Deliver T_UDERROR_IND when the application has asked for it. 973 * The socket layer enables this automatically when connected. 974 */ 975 if (!icmp->icmp_dgram_errind) { 976 freemsg(mp); 977 return; 978 } 979 980 switch (icmp->icmp_family) { 981 case AF_INET: 982 sin = sin_null; 983 sin.sin_family = AF_INET; 984 sin.sin_addr.s_addr = ipha->ipha_dst; 985 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 986 error); 987 break; 988 case AF_INET6: 989 sin6 = sin6_null; 990 sin6.sin6_family = AF_INET6; 991 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 992 993 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 994 NULL, 0, error); 995 break; 996 } 997 if (mp1) 998 putnext(q, mp1); 999 freemsg(mp); 1000 } 1001 1002 /* 1003 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1004 * for IPv6 packets. 1005 * Send permanent (non-transient) errors upstream. 1006 * Assumes that IP has pulled up all the extension headers as well 1007 * as the ICMPv6 header. 1008 */ 1009 static void 1010 icmp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1011 { 1012 icmp6_t *icmp6; 1013 ip6_t *ip6h, *outer_ip6h; 1014 uint16_t iph_hdr_length; 1015 uint8_t *nexthdrp; 1016 sin6_t sin6; 1017 mblk_t *mp1; 1018 int error = 0; 1019 icmp_t *icmp = Q_TO_ICMP(q); 1020 1021 outer_ip6h = (ip6_t *)mp->b_rptr; 1022 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1023 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1024 else 1025 iph_hdr_length = IPV6_HDR_LEN; 1026 1027 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1028 ip6h = (ip6_t *)&icmp6[1]; 1029 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1030 freemsg(mp); 1031 return; 1032 } 1033 1034 switch (icmp6->icmp6_type) { 1035 case ICMP6_DST_UNREACH: 1036 switch (icmp6->icmp6_code) { 1037 case ICMP6_DST_UNREACH_NOPORT: 1038 error = ECONNREFUSED; 1039 break; 1040 case ICMP6_DST_UNREACH_ADMIN: 1041 case ICMP6_DST_UNREACH_NOROUTE: 1042 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1043 case ICMP6_DST_UNREACH_ADDR: 1044 /* Transient errors */ 1045 break; 1046 default: 1047 break; 1048 } 1049 break; 1050 case ICMP6_PACKET_TOO_BIG: { 1051 struct T_unitdata_ind *tudi; 1052 struct T_opthdr *toh; 1053 size_t udi_size; 1054 mblk_t *newmp; 1055 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1056 sizeof (struct ip6_mtuinfo); 1057 sin6_t *sin6; 1058 struct ip6_mtuinfo *mtuinfo; 1059 1060 /* 1061 * If the application has requested to receive path mtu 1062 * information, send up an empty message containing an 1063 * IPV6_PATHMTU ancillary data item. 1064 */ 1065 if (!icmp->icmp_ipv6_recvpathmtu) 1066 break; 1067 1068 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1069 opt_length; 1070 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1071 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1072 break; 1073 } 1074 1075 /* 1076 * newmp->b_cont is left to NULL on purpose. This is an 1077 * empty message containing only ancillary data. 1078 */ 1079 newmp->b_datap->db_type = M_PROTO; 1080 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1081 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1082 tudi->PRIM_type = T_UNITDATA_IND; 1083 tudi->SRC_length = sizeof (sin6_t); 1084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1085 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1086 tudi->OPT_length = opt_length; 1087 1088 sin6 = (sin6_t *)&tudi[1]; 1089 bzero(sin6, sizeof (sin6_t)); 1090 sin6->sin6_family = AF_INET6; 1091 sin6->sin6_addr = icmp->icmp_v6dst; 1092 1093 toh = (struct T_opthdr *)&sin6[1]; 1094 toh->level = IPPROTO_IPV6; 1095 toh->name = IPV6_PATHMTU; 1096 toh->len = opt_length; 1097 toh->status = 0; 1098 1099 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1100 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1101 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1102 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1103 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1104 /* 1105 * We've consumed everything we need from the original 1106 * message. Free it, then send our empty message. 1107 */ 1108 freemsg(mp); 1109 putnext(q, newmp); 1110 return; 1111 } 1112 case ICMP6_TIME_EXCEEDED: 1113 /* Transient errors */ 1114 break; 1115 case ICMP6_PARAM_PROB: 1116 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1117 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1118 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1119 (uchar_t *)nexthdrp) { 1120 error = ECONNREFUSED; 1121 break; 1122 } 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin6 = sin6_null; 1140 sin6.sin6_family = AF_INET6; 1141 sin6.sin6_addr = ip6h->ip6_dst; 1142 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1143 1144 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 1145 error); 1146 if (mp1) 1147 putnext(q, mp1); 1148 freemsg(mp); 1149 } 1150 1151 /* 1152 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1153 * The local address is filled in if endpoint is bound. The remote address 1154 * is filled in if remote address has been precified ("connected endpoint") 1155 * (The concept of connected CLTS sockets is alien to published TPI 1156 * but we support it anyway). 1157 */ 1158 static void 1159 icmp_addr_req(queue_t *q, mblk_t *mp) 1160 { 1161 icmp_t *icmp = Q_TO_ICMP(q); 1162 mblk_t *ackmp; 1163 struct T_addr_ack *taa; 1164 1165 /* Make it large enough for worst case */ 1166 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1167 2 * sizeof (sin6_t), 1); 1168 if (ackmp == NULL) { 1169 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1170 return; 1171 } 1172 taa = (struct T_addr_ack *)ackmp->b_rptr; 1173 1174 bzero(taa, sizeof (struct T_addr_ack)); 1175 ackmp->b_wptr = (uchar_t *)&taa[1]; 1176 1177 taa->PRIM_type = T_ADDR_ACK; 1178 ackmp->b_datap->db_type = M_PCPROTO; 1179 rw_enter(&icmp->icmp_rwlock, RW_READER); 1180 /* 1181 * Note: Following code assumes 32 bit alignment of basic 1182 * data structures like sin_t and struct T_addr_ack. 1183 */ 1184 if (icmp->icmp_state != TS_UNBND) { 1185 /* 1186 * Fill in local address 1187 */ 1188 taa->LOCADDR_offset = sizeof (*taa); 1189 if (icmp->icmp_family == AF_INET) { 1190 sin_t *sin; 1191 1192 taa->LOCADDR_length = sizeof (sin_t); 1193 sin = (sin_t *)&taa[1]; 1194 /* Fill zeroes and then intialize non-zero fields */ 1195 *sin = sin_null; 1196 sin->sin_family = AF_INET; 1197 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1198 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1199 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1200 sin->sin_addr.s_addr); 1201 } else { 1202 /* 1203 * INADDR_ANY 1204 * icmp_v6src is not set, we might be bound to 1205 * broadcast/multicast. Use icmp_bound_v6src as 1206 * local address instead (that could 1207 * also still be INADDR_ANY) 1208 */ 1209 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1210 sin->sin_addr.s_addr); 1211 } 1212 ackmp->b_wptr = (uchar_t *)&sin[1]; 1213 } else { 1214 sin6_t *sin6; 1215 1216 ASSERT(icmp->icmp_family == AF_INET6); 1217 taa->LOCADDR_length = sizeof (sin6_t); 1218 sin6 = (sin6_t *)&taa[1]; 1219 /* Fill zeroes and then intialize non-zero fields */ 1220 *sin6 = sin6_null; 1221 sin6->sin6_family = AF_INET6; 1222 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1223 sin6->sin6_addr = icmp->icmp_v6src; 1224 } else { 1225 /* 1226 * UNSPECIFIED 1227 * icmp_v6src is not set, we might be bound to 1228 * broadcast/multicast. Use icmp_bound_v6src as 1229 * local address instead (that could 1230 * also still be UNSPECIFIED) 1231 */ 1232 sin6->sin6_addr = icmp->icmp_bound_v6src; 1233 } 1234 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1235 } 1236 } 1237 rw_exit(&icmp->icmp_rwlock); 1238 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1239 qreply(q, ackmp); 1240 } 1241 1242 static void 1243 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1244 { 1245 *tap = icmp_g_t_info_ack; 1246 1247 if (icmp->icmp_family == AF_INET6) 1248 tap->ADDR_size = sizeof (sin6_t); 1249 else 1250 tap->ADDR_size = sizeof (sin_t); 1251 tap->CURRENT_state = icmp->icmp_state; 1252 tap->OPT_size = icmp_max_optsize; 1253 } 1254 1255 /* 1256 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1257 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1258 * icmp_g_t_info_ack. The current state of the stream is copied from 1259 * icmp_state. 1260 */ 1261 static void 1262 icmp_capability_req(queue_t *q, mblk_t *mp) 1263 { 1264 icmp_t *icmp = Q_TO_ICMP(q); 1265 t_uscalar_t cap_bits1; 1266 struct T_capability_ack *tcap; 1267 1268 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1269 1270 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1271 mp->b_datap->db_type, T_CAPABILITY_ACK); 1272 if (!mp) 1273 return; 1274 1275 tcap = (struct T_capability_ack *)mp->b_rptr; 1276 tcap->CAP_bits1 = 0; 1277 1278 if (cap_bits1 & TC1_INFO) { 1279 icmp_copy_info(&tcap->INFO_ack, icmp); 1280 tcap->CAP_bits1 |= TC1_INFO; 1281 } 1282 1283 qreply(q, mp); 1284 } 1285 1286 /* 1287 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1288 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1289 * The current state of the stream is copied from icmp_state. 1290 */ 1291 static void 1292 icmp_info_req(queue_t *q, mblk_t *mp) 1293 { 1294 icmp_t *icmp = Q_TO_ICMP(q); 1295 1296 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1297 T_INFO_ACK); 1298 if (!mp) 1299 return; 1300 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1301 qreply(q, mp); 1302 } 1303 1304 /* 1305 * IP recognizes seven kinds of bind requests: 1306 * 1307 * - A zero-length address binds only to the protocol number. 1308 * 1309 * - A 4-byte address is treated as a request to 1310 * validate that the address is a valid local IPv4 1311 * address, appropriate for an application to bind to. 1312 * IP does the verification, but does not make any note 1313 * of the address at this time. 1314 * 1315 * - A 16-byte address contains is treated as a request 1316 * to validate a local IPv6 address, as the 4-byte 1317 * address case above. 1318 * 1319 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1320 * use it for the inbound fanout of packets. 1321 * 1322 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1323 * use it for the inbound fanout of packets. 1324 * 1325 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1326 * information consisting of local and remote addresses 1327 * and ports (unused for raw sockets). In this case, the addresses are both 1328 * validated as appropriate for this operation, and, if 1329 * so, the information is retained for use in the 1330 * inbound fanout. 1331 * 1332 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1333 * fanout information, like the 12-byte case above. 1334 * 1335 * IP will also fill in the IRE request mblk with information 1336 * regarding our peer. In all cases, we notify IP of our protocol 1337 * type by appending a single protocol byte to the bind request. 1338 */ 1339 static mblk_t * 1340 icmp_ip_bind_mp(icmp_t *icmp, t_scalar_t bind_prim, t_scalar_t addr_length, 1341 in_port_t fport) 1342 { 1343 char *cp; 1344 mblk_t *mp; 1345 struct T_bind_req *tbr; 1346 ipa_conn_t *ac; 1347 ipa6_conn_t *ac6; 1348 sin_t *sin; 1349 sin6_t *sin6; 1350 1351 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1352 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 1353 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1354 if (mp == NULL) 1355 return (NULL); 1356 mp->b_datap->db_type = M_PROTO; 1357 tbr = (struct T_bind_req *)mp->b_rptr; 1358 tbr->PRIM_type = bind_prim; 1359 tbr->ADDR_offset = sizeof (*tbr); 1360 tbr->CONIND_number = 0; 1361 tbr->ADDR_length = addr_length; 1362 cp = (char *)&tbr[1]; 1363 switch (addr_length) { 1364 case sizeof (ipa_conn_t): 1365 ASSERT(icmp->icmp_family == AF_INET); 1366 /* Append a request for an IRE */ 1367 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1368 if (mp->b_cont == NULL) { 1369 freemsg(mp); 1370 return (NULL); 1371 } 1372 mp->b_cont->b_wptr += sizeof (ire_t); 1373 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1374 1375 /* cp known to be 32 bit aligned */ 1376 ac = (ipa_conn_t *)cp; 1377 ac->ac_laddr = V4_PART_OF_V6(icmp->icmp_v6src); 1378 ac->ac_faddr = V4_PART_OF_V6(icmp->icmp_v6dst); 1379 ac->ac_fport = fport; 1380 ac->ac_lport = 0; 1381 break; 1382 1383 case sizeof (ipa6_conn_t): 1384 ASSERT(icmp->icmp_family == AF_INET6); 1385 /* Append a request for an IRE */ 1386 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1387 if (mp->b_cont == NULL) { 1388 freemsg(mp); 1389 return (NULL); 1390 } 1391 mp->b_cont->b_wptr += sizeof (ire_t); 1392 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1393 1394 /* cp known to be 32 bit aligned */ 1395 ac6 = (ipa6_conn_t *)cp; 1396 ac6->ac6_laddr = icmp->icmp_v6src; 1397 ac6->ac6_faddr = icmp->icmp_v6dst; 1398 ac6->ac6_fport = fport; 1399 ac6->ac6_lport = 0; 1400 break; 1401 1402 case sizeof (sin_t): 1403 ASSERT(icmp->icmp_family == AF_INET); 1404 /* Append a request for an IRE */ 1405 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1406 if (!mp->b_cont) { 1407 freemsg(mp); 1408 return (NULL); 1409 } 1410 mp->b_cont->b_wptr += sizeof (ire_t); 1411 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1412 1413 sin = (sin_t *)cp; 1414 *sin = sin_null; 1415 sin->sin_family = AF_INET; 1416 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 1417 break; 1418 1419 case sizeof (sin6_t): 1420 ASSERT(icmp->icmp_family == AF_INET6); 1421 /* Append a request for an IRE */ 1422 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1423 if (!mp->b_cont) { 1424 freemsg(mp); 1425 return (NULL); 1426 } 1427 mp->b_cont->b_wptr += sizeof (ire_t); 1428 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1429 1430 sin6 = (sin6_t *)cp; 1431 *sin6 = sin6_null; 1432 sin6->sin6_family = AF_INET6; 1433 sin6->sin6_addr = icmp->icmp_bound_v6src; 1434 break; 1435 } 1436 /* Add protocol number to end */ 1437 cp[addr_length] = icmp->icmp_proto; 1438 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1439 return (mp); 1440 } 1441 1442 /* For /dev/icmp aka AF_INET open */ 1443 static int 1444 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1445 { 1446 return (icmp_open(q, devp, flag, sflag, credp, B_FALSE)); 1447 } 1448 1449 /* For /dev/icmp6 aka AF_INET6 open */ 1450 static int 1451 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (icmp_open(q, devp, flag, sflag, credp, B_TRUE)); 1454 } 1455 1456 /* 1457 * This is the open routine for icmp. It allocates a icmp_t structure for 1458 * the stream and, on the first open of the module, creates an ND table. 1459 */ 1460 /*ARGSUSED2*/ 1461 static int 1462 icmp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1463 boolean_t isv6) 1464 { 1465 int err; 1466 icmp_t *icmp; 1467 conn_t *connp; 1468 dev_t conn_dev; 1469 zoneid_t zoneid; 1470 netstack_t *ns; 1471 icmp_stack_t *is; 1472 1473 /* If the stream is already open, return immediately. */ 1474 if (q->q_ptr != NULL) 1475 return (0); 1476 1477 if (sflag == MODOPEN) 1478 return (EINVAL); 1479 1480 ns = netstack_find_by_cred(credp); 1481 ASSERT(ns != NULL); 1482 is = ns->netstack_icmp; 1483 ASSERT(is != NULL); 1484 1485 /* 1486 * For exclusive stacks we set the zoneid to zero 1487 * to make ICMP operate as if in the global zone. 1488 */ 1489 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1490 zoneid = GLOBAL_ZONEID; 1491 else 1492 zoneid = crgetzoneid(credp); 1493 1494 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 1495 netstack_rele(ns); 1496 return (EBUSY); 1497 } 1498 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1499 1500 connp = ipcl_conn_create(IPCL_RAWIPCONN, KM_SLEEP, ns); 1501 connp->conn_dev = conn_dev; 1502 icmp = connp->conn_icmp; 1503 1504 /* 1505 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1506 * done by netstack_find_by_cred() 1507 */ 1508 netstack_rele(ns); 1509 1510 /* 1511 * Initialize the icmp_t structure for this stream. 1512 */ 1513 q->q_ptr = connp; 1514 WR(q)->q_ptr = connp; 1515 connp->conn_rq = q; 1516 connp->conn_wq = WR(q); 1517 1518 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1519 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1520 ASSERT(connp->conn_icmp == icmp); 1521 ASSERT(icmp->icmp_connp == connp); 1522 1523 /* Set the initial state of the stream and the privilege status. */ 1524 icmp->icmp_state = TS_UNBND; 1525 if (isv6) { 1526 icmp->icmp_ipversion = IPV6_VERSION; 1527 icmp->icmp_family = AF_INET6; 1528 connp->conn_ulp = IPPROTO_ICMPV6; 1529 /* May be changed by a SO_PROTOTYPE socket option. */ 1530 icmp->icmp_proto = IPPROTO_ICMPV6; 1531 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1532 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1533 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1534 connp->conn_af_isv6 = B_TRUE; 1535 connp->conn_flags |= IPCL_ISV6; 1536 } else { 1537 icmp->icmp_ipversion = IPV4_VERSION; 1538 icmp->icmp_family = AF_INET; 1539 /* May be changed by a SO_PROTOTYPE socket option. */ 1540 icmp->icmp_proto = IPPROTO_ICMP; 1541 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1542 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1543 connp->conn_af_isv6 = B_FALSE; 1544 connp->conn_flags &= ~IPCL_ISV6; 1545 } 1546 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 icmp->icmp_pending_op = -1; 1548 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1549 connp->conn_zoneid = zoneid; 1550 1551 /* 1552 * If the caller has the process-wide flag set, then default to MAC 1553 * exempt mode. This allows read-down to unlabeled hosts. 1554 */ 1555 if (getpflags(NET_MAC_AWARE, credp) != 0) 1556 icmp->icmp_mac_exempt = B_TRUE; 1557 1558 connp->conn_ulp_labeled = is_system_labeled(); 1559 1560 icmp->icmp_is = is; 1561 1562 q->q_hiwat = is->is_recv_hiwat; 1563 WR(q)->q_hiwat = is->is_xmit_hiwat; 1564 WR(q)->q_lowat = is->is_xmit_lowat; 1565 1566 connp->conn_recv = icmp_input; 1567 crhold(credp); 1568 connp->conn_cred = credp; 1569 1570 mutex_enter(&connp->conn_lock); 1571 connp->conn_state_flags &= ~CONN_INCIPIENT; 1572 mutex_exit(&connp->conn_lock); 1573 1574 qprocson(q); 1575 1576 if (icmp->icmp_family == AF_INET6) { 1577 /* Build initial header template for transmit */ 1578 if ((err = icmp_build_hdrs(icmp)) != 0) { 1579 rw_exit(&icmp->icmp_rwlock); 1580 qprocsoff(q); 1581 ipcl_conn_destroy(connp); 1582 return (err); 1583 } 1584 } 1585 rw_exit(&icmp->icmp_rwlock); 1586 1587 /* Set the Stream head write offset. */ 1588 (void) mi_set_sth_wroff(q, 1589 icmp->icmp_max_hdr_len + is->is_wroff_extra); 1590 (void) mi_set_sth_hiwat(q, q->q_hiwat); 1591 1592 return (0); 1593 } 1594 1595 /* 1596 * Which ICMP options OK to set through T_UNITDATA_REQ... 1597 */ 1598 /* ARGSUSED */ 1599 static boolean_t 1600 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1601 { 1602 return (B_TRUE); 1603 } 1604 1605 /* 1606 * This routine gets default values of certain options whose default 1607 * values are maintained by protcol specific code 1608 */ 1609 /* ARGSUSED */ 1610 int 1611 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1612 { 1613 icmp_t *icmp = Q_TO_ICMP(q); 1614 icmp_stack_t *is = icmp->icmp_is; 1615 int *i1 = (int *)ptr; 1616 1617 switch (level) { 1618 case IPPROTO_IP: 1619 switch (name) { 1620 case IP_MULTICAST_TTL: 1621 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1622 return (sizeof (uchar_t)); 1623 case IP_MULTICAST_LOOP: 1624 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1625 return (sizeof (uchar_t)); 1626 } 1627 break; 1628 case IPPROTO_IPV6: 1629 switch (name) { 1630 case IPV6_MULTICAST_HOPS: 1631 *i1 = IP_DEFAULT_MULTICAST_TTL; 1632 return (sizeof (int)); 1633 case IPV6_MULTICAST_LOOP: 1634 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1635 return (sizeof (int)); 1636 case IPV6_UNICAST_HOPS: 1637 *i1 = is->is_ipv6_hoplimit; 1638 return (sizeof (int)); 1639 } 1640 break; 1641 case IPPROTO_ICMPV6: 1642 switch (name) { 1643 case ICMP6_FILTER: 1644 /* Make it look like "pass all" */ 1645 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1646 return (sizeof (icmp6_filter_t)); 1647 } 1648 break; 1649 } 1650 return (-1); 1651 } 1652 1653 /* 1654 * This routine retrieves the current status of socket options. 1655 * It returns the size of the option retrieved. 1656 */ 1657 int 1658 icmp_opt_get_locked(queue_t *q, int level, int name, uchar_t *ptr) 1659 { 1660 conn_t *connp = Q_TO_CONN(q); 1661 icmp_t *icmp = connp->conn_icmp; 1662 icmp_stack_t *is = icmp->icmp_is; 1663 int *i1 = (int *)ptr; 1664 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1665 1666 switch (level) { 1667 case SOL_SOCKET: 1668 switch (name) { 1669 case SO_DEBUG: 1670 *i1 = icmp->icmp_debug; 1671 break; 1672 case SO_TYPE: 1673 *i1 = SOCK_RAW; 1674 break; 1675 case SO_PROTOTYPE: 1676 *i1 = icmp->icmp_proto; 1677 break; 1678 case SO_REUSEADDR: 1679 *i1 = icmp->icmp_reuseaddr; 1680 break; 1681 1682 /* 1683 * The following three items are available here, 1684 * but are only meaningful to IP. 1685 */ 1686 case SO_DONTROUTE: 1687 *i1 = icmp->icmp_dontroute; 1688 break; 1689 case SO_USELOOPBACK: 1690 *i1 = icmp->icmp_useloopback; 1691 break; 1692 case SO_BROADCAST: 1693 *i1 = icmp->icmp_broadcast; 1694 break; 1695 1696 case SO_SNDBUF: 1697 ASSERT(q->q_hiwat <= INT_MAX); 1698 *i1 = (int)q->q_hiwat; 1699 break; 1700 case SO_RCVBUF: 1701 ASSERT(RD(q)->q_hiwat <= INT_MAX); 1702 *i1 = (int)RD(q)->q_hiwat; 1703 break; 1704 case SO_DGRAM_ERRIND: 1705 *i1 = icmp->icmp_dgram_errind; 1706 break; 1707 case SO_TIMESTAMP: 1708 *i1 = icmp->icmp_timestamp; 1709 break; 1710 case SO_MAC_EXEMPT: 1711 *i1 = icmp->icmp_mac_exempt; 1712 break; 1713 case SO_DOMAIN: 1714 *i1 = icmp->icmp_family; 1715 break; 1716 1717 /* 1718 * Following four not meaningful for icmp 1719 * Action is same as "default" to which we fallthrough 1720 * so we keep them in comments. 1721 * case SO_LINGER: 1722 * case SO_KEEPALIVE: 1723 * case SO_OOBINLINE: 1724 * case SO_ALLZONES: 1725 */ 1726 default: 1727 return (-1); 1728 } 1729 break; 1730 case IPPROTO_IP: 1731 /* 1732 * Only allow IPv4 option processing on IPv4 sockets. 1733 */ 1734 if (icmp->icmp_family != AF_INET) 1735 return (-1); 1736 1737 switch (name) { 1738 case IP_OPTIONS: 1739 case T_IP_OPTIONS: 1740 /* Options are passed up with each packet */ 1741 return (0); 1742 case IP_HDRINCL: 1743 *i1 = (int)icmp->icmp_hdrincl; 1744 break; 1745 case IP_TOS: 1746 case T_IP_TOS: 1747 *i1 = (int)icmp->icmp_type_of_service; 1748 break; 1749 case IP_TTL: 1750 *i1 = (int)icmp->icmp_ttl; 1751 break; 1752 case IP_MULTICAST_IF: 1753 /* 0 address if not set */ 1754 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1755 return (sizeof (ipaddr_t)); 1756 case IP_MULTICAST_TTL: 1757 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1758 return (sizeof (uchar_t)); 1759 case IP_MULTICAST_LOOP: 1760 *ptr = connp->conn_multicast_loop; 1761 return (sizeof (uint8_t)); 1762 case IP_BOUND_IF: 1763 /* Zero if not set */ 1764 *i1 = icmp->icmp_bound_if; 1765 break; /* goto sizeof (int) option return */ 1766 case IP_UNSPEC_SRC: 1767 *ptr = icmp->icmp_unspec_source; 1768 break; /* goto sizeof (int) option return */ 1769 case IP_XMIT_IF: 1770 *i1 = icmp->icmp_xmit_if; 1771 break; /* goto sizeof (int) option return */ 1772 case IP_RECVIF: 1773 *ptr = icmp->icmp_recvif; 1774 break; /* goto sizeof (int) option return */ 1775 case IP_RECVPKTINFO: 1776 /* 1777 * This also handles IP_PKTINFO. 1778 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1779 * Differentiation is based on the size of the argument 1780 * passed in. 1781 * This option is handled in IP which will return an 1782 * error for IP_PKTINFO as it's not supported as a 1783 * sticky option. 1784 */ 1785 return (-EINVAL); 1786 /* 1787 * Cannot "get" the value of following options 1788 * at this level. Action is same as "default" to 1789 * which we fallthrough so we keep them in comments. 1790 * 1791 * case IP_ADD_MEMBERSHIP: 1792 * case IP_DROP_MEMBERSHIP: 1793 * case IP_BLOCK_SOURCE: 1794 * case IP_UNBLOCK_SOURCE: 1795 * case IP_ADD_SOURCE_MEMBERSHIP: 1796 * case IP_DROP_SOURCE_MEMBERSHIP: 1797 * case MCAST_JOIN_GROUP: 1798 * case MCAST_LEAVE_GROUP: 1799 * case MCAST_BLOCK_SOURCE: 1800 * case MCAST_UNBLOCK_SOURCE: 1801 * case MCAST_JOIN_SOURCE_GROUP: 1802 * case MCAST_LEAVE_SOURCE_GROUP: 1803 * case MRT_INIT: 1804 * case MRT_DONE: 1805 * case MRT_ADD_VIF: 1806 * case MRT_DEL_VIF: 1807 * case MRT_ADD_MFC: 1808 * case MRT_DEL_MFC: 1809 * case MRT_VERSION: 1810 * case MRT_ASSERT: 1811 * case IP_SEC_OPT: 1812 * case IP_DONTFAILOVER_IF: 1813 * case IP_NEXTHOP: 1814 */ 1815 default: 1816 return (-1); 1817 } 1818 break; 1819 case IPPROTO_IPV6: 1820 /* 1821 * Only allow IPv6 option processing on native IPv6 sockets. 1822 */ 1823 if (icmp->icmp_family != AF_INET6) 1824 return (-1); 1825 switch (name) { 1826 case IPV6_UNICAST_HOPS: 1827 *i1 = (unsigned int)icmp->icmp_ttl; 1828 break; 1829 case IPV6_MULTICAST_IF: 1830 /* 0 index if not set */ 1831 *i1 = icmp->icmp_multicast_if_index; 1832 break; 1833 case IPV6_MULTICAST_HOPS: 1834 *i1 = icmp->icmp_multicast_ttl; 1835 break; 1836 case IPV6_MULTICAST_LOOP: 1837 *i1 = connp->conn_multicast_loop; 1838 break; 1839 case IPV6_BOUND_IF: 1840 /* Zero if not set */ 1841 *i1 = icmp->icmp_bound_if; 1842 break; 1843 case IPV6_UNSPEC_SRC: 1844 *i1 = icmp->icmp_unspec_source; 1845 break; 1846 case IPV6_CHECKSUM: 1847 /* 1848 * Return offset or -1 if no checksum offset. 1849 * Does not apply to IPPROTO_ICMPV6 1850 */ 1851 if (icmp->icmp_proto == IPPROTO_ICMPV6) 1852 return (-1); 1853 1854 if (icmp->icmp_raw_checksum) { 1855 *i1 = icmp->icmp_checksum_off; 1856 } else { 1857 *i1 = -1; 1858 } 1859 break; 1860 case IPV6_JOIN_GROUP: 1861 case IPV6_LEAVE_GROUP: 1862 case MCAST_JOIN_GROUP: 1863 case MCAST_LEAVE_GROUP: 1864 case MCAST_BLOCK_SOURCE: 1865 case MCAST_UNBLOCK_SOURCE: 1866 case MCAST_JOIN_SOURCE_GROUP: 1867 case MCAST_LEAVE_SOURCE_GROUP: 1868 /* cannot "get" the value for these */ 1869 return (-1); 1870 case IPV6_RECVPKTINFO: 1871 *i1 = icmp->icmp_ip_recvpktinfo; 1872 break; 1873 case IPV6_RECVTCLASS: 1874 *i1 = icmp->icmp_ipv6_recvtclass; 1875 break; 1876 case IPV6_RECVPATHMTU: 1877 *i1 = icmp->icmp_ipv6_recvpathmtu; 1878 break; 1879 case IPV6_V6ONLY: 1880 *i1 = 1; 1881 break; 1882 case IPV6_RECVHOPLIMIT: 1883 *i1 = icmp->icmp_ipv6_recvhoplimit; 1884 break; 1885 case IPV6_RECVHOPOPTS: 1886 *i1 = icmp->icmp_ipv6_recvhopopts; 1887 break; 1888 case IPV6_RECVDSTOPTS: 1889 *i1 = icmp->icmp_ipv6_recvdstopts; 1890 break; 1891 case _OLD_IPV6_RECVDSTOPTS: 1892 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1893 break; 1894 case IPV6_RECVRTHDRDSTOPTS: 1895 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1896 break; 1897 case IPV6_RECVRTHDR: 1898 *i1 = icmp->icmp_ipv6_recvrthdr; 1899 break; 1900 case IPV6_PKTINFO: { 1901 /* XXX assumes that caller has room for max size! */ 1902 struct in6_pktinfo *pkti; 1903 1904 pkti = (struct in6_pktinfo *)ptr; 1905 if (ipp->ipp_fields & IPPF_IFINDEX) 1906 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1907 else 1908 pkti->ipi6_ifindex = 0; 1909 if (ipp->ipp_fields & IPPF_ADDR) 1910 pkti->ipi6_addr = ipp->ipp_addr; 1911 else 1912 pkti->ipi6_addr = ipv6_all_zeros; 1913 return (sizeof (struct in6_pktinfo)); 1914 } 1915 case IPV6_NEXTHOP: { 1916 sin6_t *sin6 = (sin6_t *)ptr; 1917 1918 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 1919 return (0); 1920 *sin6 = sin6_null; 1921 sin6->sin6_family = AF_INET6; 1922 sin6->sin6_addr = ipp->ipp_nexthop; 1923 return (sizeof (sin6_t)); 1924 } 1925 case IPV6_HOPOPTS: 1926 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 1927 return (0); 1928 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 1929 return (0); 1930 bcopy((char *)ipp->ipp_hopopts + 1931 icmp->icmp_label_len_v6, ptr, 1932 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1933 if (icmp->icmp_label_len_v6 > 0) { 1934 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 1935 ptr[1] = (ipp->ipp_hopoptslen - 1936 icmp->icmp_label_len_v6 + 7) / 8 - 1; 1937 } 1938 return (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 1939 case IPV6_RTHDRDSTOPTS: 1940 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 1941 return (0); 1942 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 1943 return (ipp->ipp_rtdstoptslen); 1944 case IPV6_RTHDR: 1945 if (!(ipp->ipp_fields & IPPF_RTHDR)) 1946 return (0); 1947 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 1948 return (ipp->ipp_rthdrlen); 1949 case IPV6_DSTOPTS: 1950 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 1951 return (0); 1952 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 1953 return (ipp->ipp_dstoptslen); 1954 case IPV6_PATHMTU: 1955 if (!(ipp->ipp_fields & IPPF_PATHMTU)) 1956 return (0); 1957 1958 return (ip_fill_mtuinfo(&icmp->icmp_v6dst, 0, 1959 (struct ip6_mtuinfo *)ptr, 1960 is->is_netstack)); 1961 case IPV6_TCLASS: 1962 if (ipp->ipp_fields & IPPF_TCLASS) 1963 *i1 = ipp->ipp_tclass; 1964 else 1965 *i1 = IPV6_FLOW_TCLASS( 1966 IPV6_DEFAULT_VERS_AND_FLOW); 1967 break; 1968 default: 1969 return (-1); 1970 } 1971 break; 1972 case IPPROTO_ICMPV6: 1973 /* 1974 * Only allow IPv6 option processing on native IPv6 sockets. 1975 */ 1976 if (icmp->icmp_family != AF_INET6) 1977 return (-1); 1978 1979 if (icmp->icmp_proto != IPPROTO_ICMPV6) 1980 return (-1); 1981 1982 switch (name) { 1983 case ICMP6_FILTER: 1984 if (icmp->icmp_filter == NULL) { 1985 /* Make it look like "pass all" */ 1986 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1987 } else { 1988 (void) bcopy(icmp->icmp_filter, ptr, 1989 sizeof (icmp6_filter_t)); 1990 } 1991 return (sizeof (icmp6_filter_t)); 1992 default: 1993 return (-1); 1994 } 1995 default: 1996 return (-1); 1997 } 1998 return (sizeof (int)); 1999 } 2000 2001 /* 2002 * This routine retrieves the current status of socket options. 2003 * It returns the size of the option retrieved. 2004 */ 2005 int 2006 icmp_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2007 { 2008 icmp_t *icmp = Q_TO_ICMP(q); 2009 int err; 2010 2011 rw_enter(&icmp->icmp_rwlock, RW_READER); 2012 err = icmp_opt_get_locked(q, level, name, ptr); 2013 rw_exit(&icmp->icmp_rwlock); 2014 return (err); 2015 } 2016 2017 2018 /* This routine sets socket options. */ 2019 /* ARGSUSED */ 2020 int 2021 icmp_opt_set_locked(queue_t *q, uint_t optset_context, int level, int name, 2022 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2023 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2024 { 2025 conn_t *connp = Q_TO_CONN(q); 2026 icmp_t *icmp = connp->conn_icmp; 2027 icmp_stack_t *is = icmp->icmp_is; 2028 int *i1 = (int *)invalp; 2029 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2030 boolean_t checkonly; 2031 int error; 2032 2033 switch (optset_context) { 2034 case SETFN_OPTCOM_CHECKONLY: 2035 checkonly = B_TRUE; 2036 /* 2037 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2038 * inlen != 0 implies value supplied and 2039 * we have to "pretend" to set it. 2040 * inlen == 0 implies that there is no 2041 * value part in T_CHECK request and just validation 2042 * done elsewhere should be enough, we just return here. 2043 */ 2044 if (inlen == 0) { 2045 *outlenp = 0; 2046 return (0); 2047 } 2048 break; 2049 case SETFN_OPTCOM_NEGOTIATE: 2050 checkonly = B_FALSE; 2051 break; 2052 case SETFN_UD_NEGOTIATE: 2053 case SETFN_CONN_NEGOTIATE: 2054 checkonly = B_FALSE; 2055 /* 2056 * Negotiating local and "association-related" options 2057 * through T_UNITDATA_REQ. 2058 * 2059 * Following routine can filter out ones we do not 2060 * want to be "set" this way. 2061 */ 2062 if (!icmp_opt_allow_udr_set(level, name)) { 2063 *outlenp = 0; 2064 return (EINVAL); 2065 } 2066 break; 2067 default: 2068 /* 2069 * We should never get here 2070 */ 2071 *outlenp = 0; 2072 return (EINVAL); 2073 } 2074 2075 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2076 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2077 2078 /* 2079 * For fixed length options, no sanity check 2080 * of passed in length is done. It is assumed *_optcom_req() 2081 * routines do the right thing. 2082 */ 2083 2084 switch (level) { 2085 case SOL_SOCKET: 2086 switch (name) { 2087 case SO_DEBUG: 2088 if (!checkonly) 2089 icmp->icmp_debug = onoff; 2090 break; 2091 case SO_PROTOTYPE: 2092 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2093 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2094 secpolicy_net_rawaccess(cr) != 0) { 2095 *outlenp = 0; 2096 return (EACCES); 2097 } 2098 /* Can't use IPPROTO_RAW with IPv6 */ 2099 if ((*i1 & 0xFF) == IPPROTO_RAW && 2100 icmp->icmp_family == AF_INET6) { 2101 *outlenp = 0; 2102 return (EPROTONOSUPPORT); 2103 } 2104 if (checkonly) { 2105 /* T_CHECK case */ 2106 *(int *)outvalp = (*i1 & 0xFF); 2107 break; 2108 } 2109 icmp->icmp_proto = *i1 & 0xFF; 2110 if ((icmp->icmp_proto == IPPROTO_RAW || 2111 icmp->icmp_proto == IPPROTO_IGMP) && 2112 icmp->icmp_family == AF_INET) 2113 icmp->icmp_hdrincl = 1; 2114 else 2115 icmp->icmp_hdrincl = 0; 2116 2117 if (icmp->icmp_family == AF_INET6 && 2118 icmp->icmp_proto == IPPROTO_ICMPV6) { 2119 /* Set offset for icmp6_cksum */ 2120 icmp->icmp_raw_checksum = 0; 2121 icmp->icmp_checksum_off = 2; 2122 } 2123 if (icmp->icmp_proto == IPPROTO_UDP || 2124 icmp->icmp_proto == IPPROTO_TCP || 2125 icmp->icmp_proto == IPPROTO_SCTP) { 2126 icmp->icmp_no_tp_cksum = 1; 2127 icmp->icmp_sticky_ipp.ipp_fields |= 2128 IPPF_NO_CKSUM; 2129 } else { 2130 icmp->icmp_no_tp_cksum = 0; 2131 icmp->icmp_sticky_ipp.ipp_fields &= 2132 ~IPPF_NO_CKSUM; 2133 } 2134 2135 if (icmp->icmp_filter != NULL && 2136 icmp->icmp_proto != IPPROTO_ICMPV6) { 2137 kmem_free(icmp->icmp_filter, 2138 sizeof (icmp6_filter_t)); 2139 icmp->icmp_filter = NULL; 2140 } 2141 2142 /* Rebuild the header template */ 2143 error = icmp_build_hdrs(icmp); 2144 if (error != 0) { 2145 *outlenp = 0; 2146 return (error); 2147 } 2148 2149 /* 2150 * For SCTP, we don't use icmp_bind_proto() for 2151 * raw socket binding. Note that we do not need 2152 * to set *outlenp. 2153 * FIXME: how does SCTP work? 2154 */ 2155 if (icmp->icmp_proto == IPPROTO_SCTP) 2156 return (0); 2157 2158 *outlenp = sizeof (int); 2159 *(int *)outvalp = *i1 & 0xFF; 2160 2161 /* Drop lock across the bind operation */ 2162 rw_exit(&icmp->icmp_rwlock); 2163 icmp_bind_proto(q); 2164 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2165 return (0); 2166 case SO_REUSEADDR: 2167 if (!checkonly) 2168 icmp->icmp_reuseaddr = onoff; 2169 break; 2170 2171 /* 2172 * The following three items are available here, 2173 * but are only meaningful to IP. 2174 */ 2175 case SO_DONTROUTE: 2176 if (!checkonly) 2177 icmp->icmp_dontroute = onoff; 2178 break; 2179 case SO_USELOOPBACK: 2180 if (!checkonly) 2181 icmp->icmp_useloopback = onoff; 2182 break; 2183 case SO_BROADCAST: 2184 if (!checkonly) 2185 icmp->icmp_broadcast = onoff; 2186 break; 2187 2188 case SO_SNDBUF: 2189 if (*i1 > is->is_max_buf) { 2190 *outlenp = 0; 2191 return (ENOBUFS); 2192 } 2193 if (!checkonly) { 2194 q->q_hiwat = *i1; 2195 } 2196 break; 2197 case SO_RCVBUF: 2198 if (*i1 > is->is_max_buf) { 2199 *outlenp = 0; 2200 return (ENOBUFS); 2201 } 2202 if (!checkonly) { 2203 RD(q)->q_hiwat = *i1; 2204 rw_exit(&icmp->icmp_rwlock); 2205 (void) mi_set_sth_hiwat(RD(q), *i1); 2206 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2207 } 2208 break; 2209 case SO_DGRAM_ERRIND: 2210 if (!checkonly) 2211 icmp->icmp_dgram_errind = onoff; 2212 break; 2213 case SO_ALLZONES: 2214 /* 2215 * "soft" error (negative) 2216 * option not handled at this level 2217 * Note: Do not modify *outlenp 2218 */ 2219 return (-EINVAL); 2220 case SO_TIMESTAMP: 2221 if (!checkonly) { 2222 icmp->icmp_timestamp = onoff; 2223 } 2224 break; 2225 case SO_MAC_EXEMPT: 2226 if (secpolicy_net_mac_aware(cr) != 0 || 2227 icmp->icmp_state != TS_UNBND) 2228 return (EACCES); 2229 if (!checkonly) 2230 icmp->icmp_mac_exempt = onoff; 2231 break; 2232 /* 2233 * Following three not meaningful for icmp 2234 * Action is same as "default" so we keep them 2235 * in comments. 2236 * case SO_LINGER: 2237 * case SO_KEEPALIVE: 2238 * case SO_OOBINLINE: 2239 */ 2240 default: 2241 *outlenp = 0; 2242 return (EINVAL); 2243 } 2244 break; 2245 case IPPROTO_IP: 2246 /* 2247 * Only allow IPv4 option processing on IPv4 sockets. 2248 */ 2249 if (icmp->icmp_family != AF_INET) { 2250 *outlenp = 0; 2251 return (ENOPROTOOPT); 2252 } 2253 switch (name) { 2254 case IP_OPTIONS: 2255 case T_IP_OPTIONS: 2256 /* Save options for use by IP. */ 2257 if ((inlen & 0x3) || 2258 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2259 *outlenp = 0; 2260 return (EINVAL); 2261 } 2262 if (checkonly) 2263 break; 2264 2265 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2266 &icmp->icmp_ip_snd_options_len, 2267 icmp->icmp_label_len, invalp, inlen)) { 2268 *outlenp = 0; 2269 return (ENOMEM); 2270 } 2271 2272 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2273 icmp->icmp_ip_snd_options_len; 2274 rw_exit(&icmp->icmp_rwlock); 2275 (void) mi_set_sth_wroff(RD(q), icmp->icmp_max_hdr_len + 2276 is->is_wroff_extra); 2277 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2278 break; 2279 case IP_HDRINCL: 2280 if (!checkonly) 2281 icmp->icmp_hdrincl = onoff; 2282 break; 2283 case IP_TOS: 2284 case T_IP_TOS: 2285 if (!checkonly) { 2286 icmp->icmp_type_of_service = (uint8_t)*i1; 2287 } 2288 break; 2289 case IP_TTL: 2290 if (!checkonly) { 2291 icmp->icmp_ttl = (uint8_t)*i1; 2292 } 2293 break; 2294 case IP_MULTICAST_IF: 2295 /* 2296 * TODO should check OPTMGMT reply and undo this if 2297 * there is an error. 2298 */ 2299 if (!checkonly) 2300 icmp->icmp_multicast_if_addr = *i1; 2301 break; 2302 case IP_MULTICAST_TTL: 2303 if (!checkonly) 2304 icmp->icmp_multicast_ttl = *invalp; 2305 break; 2306 case IP_MULTICAST_LOOP: 2307 if (!checkonly) { 2308 connp->conn_multicast_loop = 2309 (*invalp == 0) ? 0 : 1; 2310 } 2311 break; 2312 case IP_BOUND_IF: 2313 if (!checkonly) 2314 icmp->icmp_bound_if = *i1; 2315 break; 2316 case IP_UNSPEC_SRC: 2317 if (!checkonly) 2318 icmp->icmp_unspec_source = onoff; 2319 break; 2320 case IP_XMIT_IF: 2321 if (!checkonly) 2322 icmp->icmp_xmit_if = *i1; 2323 break; 2324 case IP_RECVIF: 2325 if (!checkonly) 2326 icmp->icmp_recvif = onoff; 2327 /* 2328 * pass to ip 2329 */ 2330 return (-EINVAL); 2331 case IP_PKTINFO: { 2332 /* 2333 * This also handles IP_RECVPKTINFO. 2334 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2335 * Differentiation is based on the size of the argument 2336 * passed in. 2337 */ 2338 struct in_pktinfo *pktinfop; 2339 ip4_pkt_t *attr_pktinfop; 2340 2341 if (checkonly) 2342 break; 2343 2344 if (inlen == sizeof (int)) { 2345 /* 2346 * This is IP_RECVPKTINFO option. 2347 * Keep a local copy of wether this option is 2348 * set or not and pass it down to IP for 2349 * processing. 2350 */ 2351 icmp->icmp_ip_recvpktinfo = onoff; 2352 return (-EINVAL); 2353 } 2354 2355 2356 if (inlen != sizeof (struct in_pktinfo)) 2357 return (EINVAL); 2358 2359 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2360 == NULL) { 2361 /* 2362 * sticky option is not supported 2363 */ 2364 return (EINVAL); 2365 } 2366 2367 pktinfop = (struct in_pktinfo *)invalp; 2368 2369 /* 2370 * Atleast one of the values should be specified 2371 */ 2372 if (pktinfop->ipi_ifindex == 0 && 2373 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2374 return (EINVAL); 2375 } 2376 2377 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2378 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2379 } 2380 break; 2381 case IP_ADD_MEMBERSHIP: 2382 case IP_DROP_MEMBERSHIP: 2383 case IP_BLOCK_SOURCE: 2384 case IP_UNBLOCK_SOURCE: 2385 case IP_ADD_SOURCE_MEMBERSHIP: 2386 case IP_DROP_SOURCE_MEMBERSHIP: 2387 case MCAST_JOIN_GROUP: 2388 case MCAST_LEAVE_GROUP: 2389 case MCAST_BLOCK_SOURCE: 2390 case MCAST_UNBLOCK_SOURCE: 2391 case MCAST_JOIN_SOURCE_GROUP: 2392 case MCAST_LEAVE_SOURCE_GROUP: 2393 case MRT_INIT: 2394 case MRT_DONE: 2395 case MRT_ADD_VIF: 2396 case MRT_DEL_VIF: 2397 case MRT_ADD_MFC: 2398 case MRT_DEL_MFC: 2399 case MRT_VERSION: 2400 case MRT_ASSERT: 2401 case IP_SEC_OPT: 2402 case IP_DONTFAILOVER_IF: 2403 case IP_NEXTHOP: 2404 /* 2405 * "soft" error (negative) 2406 * option not handled at this level 2407 * Note: Do not modify *outlenp 2408 */ 2409 return (-EINVAL); 2410 default: 2411 *outlenp = 0; 2412 return (EINVAL); 2413 } 2414 break; 2415 case IPPROTO_IPV6: { 2416 ip6_pkt_t *ipp; 2417 boolean_t sticky; 2418 2419 if (icmp->icmp_family != AF_INET6) { 2420 *outlenp = 0; 2421 return (ENOPROTOOPT); 2422 } 2423 /* 2424 * Deal with both sticky options and ancillary data 2425 */ 2426 if (thisdg_attrs == NULL) { 2427 /* sticky options, or none */ 2428 ipp = &icmp->icmp_sticky_ipp; 2429 sticky = B_TRUE; 2430 } else { 2431 /* ancillary data */ 2432 ipp = (ip6_pkt_t *)thisdg_attrs; 2433 sticky = B_FALSE; 2434 } 2435 2436 switch (name) { 2437 case IPV6_MULTICAST_IF: 2438 if (!checkonly) 2439 icmp->icmp_multicast_if_index = *i1; 2440 break; 2441 case IPV6_UNICAST_HOPS: 2442 /* -1 means use default */ 2443 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2444 *outlenp = 0; 2445 return (EINVAL); 2446 } 2447 if (!checkonly) { 2448 if (*i1 == -1) { 2449 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2450 is->is_ipv6_hoplimit; 2451 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2452 /* Pass modified value to IP. */ 2453 *i1 = ipp->ipp_hoplimit; 2454 } else { 2455 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2456 (uint8_t)*i1; 2457 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2458 } 2459 /* Rebuild the header template */ 2460 error = icmp_build_hdrs(icmp); 2461 if (error != 0) { 2462 *outlenp = 0; 2463 return (error); 2464 } 2465 } 2466 break; 2467 case IPV6_MULTICAST_HOPS: 2468 /* -1 means use default */ 2469 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2470 *outlenp = 0; 2471 return (EINVAL); 2472 } 2473 if (!checkonly) { 2474 if (*i1 == -1) { 2475 icmp->icmp_multicast_ttl = 2476 ipp->ipp_multicast_hops = 2477 IP_DEFAULT_MULTICAST_TTL; 2478 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2479 /* Pass modified value to IP. */ 2480 *i1 = icmp->icmp_multicast_ttl; 2481 } else { 2482 icmp->icmp_multicast_ttl = 2483 ipp->ipp_multicast_hops = 2484 (uint8_t)*i1; 2485 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2486 } 2487 } 2488 break; 2489 case IPV6_MULTICAST_LOOP: 2490 if (*i1 != 0 && *i1 != 1) { 2491 *outlenp = 0; 2492 return (EINVAL); 2493 } 2494 if (!checkonly) 2495 connp->conn_multicast_loop = *i1; 2496 break; 2497 case IPV6_CHECKSUM: 2498 /* 2499 * Integer offset into the user data of where the 2500 * checksum is located. 2501 * Offset of -1 disables option. 2502 * Does not apply to IPPROTO_ICMPV6. 2503 */ 2504 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2505 *outlenp = 0; 2506 return (EINVAL); 2507 } 2508 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2509 /* Negative or not 16 bit aligned offset */ 2510 *outlenp = 0; 2511 return (EINVAL); 2512 } 2513 if (checkonly) 2514 break; 2515 2516 if (*i1 == -1) { 2517 icmp->icmp_raw_checksum = 0; 2518 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2519 } else { 2520 icmp->icmp_raw_checksum = 1; 2521 icmp->icmp_checksum_off = *i1; 2522 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2523 } 2524 /* Rebuild the header template */ 2525 error = icmp_build_hdrs(icmp); 2526 if (error != 0) { 2527 *outlenp = 0; 2528 return (error); 2529 } 2530 break; 2531 case IPV6_JOIN_GROUP: 2532 case IPV6_LEAVE_GROUP: 2533 case MCAST_JOIN_GROUP: 2534 case MCAST_LEAVE_GROUP: 2535 case MCAST_BLOCK_SOURCE: 2536 case MCAST_UNBLOCK_SOURCE: 2537 case MCAST_JOIN_SOURCE_GROUP: 2538 case MCAST_LEAVE_SOURCE_GROUP: 2539 /* 2540 * "soft" error (negative) 2541 * option not handled at this level 2542 * Note: Do not modify *outlenp 2543 */ 2544 return (-EINVAL); 2545 case IPV6_BOUND_IF: 2546 if (!checkonly) 2547 icmp->icmp_bound_if = *i1; 2548 break; 2549 case IPV6_UNSPEC_SRC: 2550 if (!checkonly) 2551 icmp->icmp_unspec_source = onoff; 2552 break; 2553 case IPV6_RECVTCLASS: 2554 if (!checkonly) 2555 icmp->icmp_ipv6_recvtclass = onoff; 2556 break; 2557 /* 2558 * Set boolean switches for ancillary data delivery 2559 */ 2560 case IPV6_RECVPKTINFO: 2561 if (!checkonly) 2562 icmp->icmp_ip_recvpktinfo = onoff; 2563 break; 2564 case IPV6_RECVPATHMTU: 2565 if (!checkonly) 2566 icmp->icmp_ipv6_recvpathmtu = onoff; 2567 break; 2568 case IPV6_RECVHOPLIMIT: 2569 if (!checkonly) 2570 icmp->icmp_ipv6_recvhoplimit = onoff; 2571 break; 2572 case IPV6_RECVHOPOPTS: 2573 if (!checkonly) 2574 icmp->icmp_ipv6_recvhopopts = onoff; 2575 break; 2576 case IPV6_RECVDSTOPTS: 2577 if (!checkonly) 2578 icmp->icmp_ipv6_recvdstopts = onoff; 2579 break; 2580 case _OLD_IPV6_RECVDSTOPTS: 2581 if (!checkonly) 2582 icmp->icmp_old_ipv6_recvdstopts = onoff; 2583 break; 2584 case IPV6_RECVRTHDRDSTOPTS: 2585 if (!checkonly) 2586 icmp->icmp_ipv6_recvrtdstopts = onoff; 2587 break; 2588 case IPV6_RECVRTHDR: 2589 if (!checkonly) 2590 icmp->icmp_ipv6_recvrthdr = onoff; 2591 break; 2592 /* 2593 * Set sticky options or ancillary data. 2594 * If sticky options, (re)build any extension headers 2595 * that might be needed as a result. 2596 */ 2597 case IPV6_PKTINFO: 2598 /* 2599 * The source address and ifindex are verified 2600 * in ip_opt_set(). For ancillary data the 2601 * source address is checked in ip_wput_v6. 2602 */ 2603 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2604 return (EINVAL); 2605 if (checkonly) 2606 break; 2607 2608 if (inlen == 0) { 2609 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2610 ipp->ipp_sticky_ignored |= 2611 (IPPF_IFINDEX|IPPF_ADDR); 2612 } else { 2613 struct in6_pktinfo *pkti; 2614 2615 pkti = (struct in6_pktinfo *)invalp; 2616 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2617 ipp->ipp_addr = pkti->ipi6_addr; 2618 if (ipp->ipp_ifindex != 0) 2619 ipp->ipp_fields |= IPPF_IFINDEX; 2620 else 2621 ipp->ipp_fields &= ~IPPF_IFINDEX; 2622 if (!IN6_IS_ADDR_UNSPECIFIED( 2623 &ipp->ipp_addr)) 2624 ipp->ipp_fields |= IPPF_ADDR; 2625 else 2626 ipp->ipp_fields &= ~IPPF_ADDR; 2627 } 2628 if (sticky) { 2629 error = icmp_build_hdrs(icmp); 2630 if (error != 0) 2631 return (error); 2632 } 2633 break; 2634 case IPV6_HOPLIMIT: 2635 /* This option can only be used as ancillary data. */ 2636 if (sticky) 2637 return (EINVAL); 2638 if (inlen != 0 && inlen != sizeof (int)) 2639 return (EINVAL); 2640 if (checkonly) 2641 break; 2642 2643 if (inlen == 0) { 2644 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2645 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2646 } else { 2647 if (*i1 > 255 || *i1 < -1) 2648 return (EINVAL); 2649 if (*i1 == -1) 2650 ipp->ipp_hoplimit = 2651 is->is_ipv6_hoplimit; 2652 else 2653 ipp->ipp_hoplimit = *i1; 2654 ipp->ipp_fields |= IPPF_HOPLIMIT; 2655 } 2656 break; 2657 case IPV6_TCLASS: 2658 /* 2659 * IPV6_RECVTCLASS accepts -1 as use kernel default 2660 * and [0, 255] as the actualy traffic class. 2661 */ 2662 if (inlen != 0 && inlen != sizeof (int)) 2663 return (EINVAL); 2664 if (checkonly) 2665 break; 2666 2667 if (inlen == 0) { 2668 ipp->ipp_fields &= ~IPPF_TCLASS; 2669 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2670 } else { 2671 if (*i1 >= 256 || *i1 < -1) 2672 return (EINVAL); 2673 if (*i1 == -1) { 2674 ipp->ipp_tclass = 2675 IPV6_FLOW_TCLASS( 2676 IPV6_DEFAULT_VERS_AND_FLOW); 2677 } else { 2678 ipp->ipp_tclass = *i1; 2679 } 2680 ipp->ipp_fields |= IPPF_TCLASS; 2681 } 2682 if (sticky) { 2683 error = icmp_build_hdrs(icmp); 2684 if (error != 0) 2685 return (error); 2686 } 2687 break; 2688 case IPV6_NEXTHOP: 2689 /* 2690 * IP will verify that the nexthop is reachable 2691 * and fail for sticky options. 2692 */ 2693 if (inlen != 0 && inlen != sizeof (sin6_t)) 2694 return (EINVAL); 2695 if (checkonly) 2696 break; 2697 2698 if (inlen == 0) { 2699 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2700 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2701 } else { 2702 sin6_t *sin6 = (sin6_t *)invalp; 2703 2704 if (sin6->sin6_family != AF_INET6) 2705 return (EAFNOSUPPORT); 2706 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 2707 return (EADDRNOTAVAIL); 2708 ipp->ipp_nexthop = sin6->sin6_addr; 2709 if (!IN6_IS_ADDR_UNSPECIFIED( 2710 &ipp->ipp_nexthop)) 2711 ipp->ipp_fields |= IPPF_NEXTHOP; 2712 else 2713 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2714 } 2715 if (sticky) { 2716 error = icmp_build_hdrs(icmp); 2717 if (error != 0) 2718 return (error); 2719 } 2720 break; 2721 case IPV6_HOPOPTS: { 2722 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2723 /* 2724 * Sanity checks - minimum size, size a multiple of 2725 * eight bytes, and matching size passed in. 2726 */ 2727 if (inlen != 0 && 2728 inlen != (8 * (hopts->ip6h_len + 1))) 2729 return (EINVAL); 2730 2731 if (checkonly) 2732 break; 2733 error = optcom_pkt_set(invalp, inlen, sticky, 2734 (uchar_t **)&ipp->ipp_hopopts, 2735 &ipp->ipp_hopoptslen, 2736 sticky ? icmp->icmp_label_len_v6 : 0); 2737 if (error != 0) 2738 return (error); 2739 if (ipp->ipp_hopoptslen == 0) { 2740 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2741 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2742 } else { 2743 ipp->ipp_fields |= IPPF_HOPOPTS; 2744 } 2745 if (sticky) { 2746 error = icmp_build_hdrs(icmp); 2747 if (error != 0) 2748 return (error); 2749 } 2750 break; 2751 } 2752 case IPV6_RTHDRDSTOPTS: { 2753 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2754 2755 /* 2756 * Sanity checks - minimum size, size a multiple of 2757 * eight bytes, and matching size passed in. 2758 */ 2759 if (inlen != 0 && 2760 inlen != (8 * (dopts->ip6d_len + 1))) 2761 return (EINVAL); 2762 2763 if (checkonly) 2764 break; 2765 2766 if (inlen == 0) { 2767 if (sticky && 2768 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2769 kmem_free(ipp->ipp_rtdstopts, 2770 ipp->ipp_rtdstoptslen); 2771 ipp->ipp_rtdstopts = NULL; 2772 ipp->ipp_rtdstoptslen = 0; 2773 } 2774 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2775 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2776 } else { 2777 error = optcom_pkt_set(invalp, inlen, sticky, 2778 (uchar_t **)&ipp->ipp_rtdstopts, 2779 &ipp->ipp_rtdstoptslen, 0); 2780 if (error != 0) 2781 return (error); 2782 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2783 } 2784 if (sticky) { 2785 error = icmp_build_hdrs(icmp); 2786 if (error != 0) 2787 return (error); 2788 } 2789 break; 2790 } 2791 case IPV6_DSTOPTS: { 2792 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2793 2794 /* 2795 * Sanity checks - minimum size, size a multiple of 2796 * eight bytes, and matching size passed in. 2797 */ 2798 if (inlen != 0 && 2799 inlen != (8 * (dopts->ip6d_len + 1))) 2800 return (EINVAL); 2801 2802 if (checkonly) 2803 break; 2804 2805 if (inlen == 0) { 2806 if (sticky && 2807 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2808 kmem_free(ipp->ipp_dstopts, 2809 ipp->ipp_dstoptslen); 2810 ipp->ipp_dstopts = NULL; 2811 ipp->ipp_dstoptslen = 0; 2812 } 2813 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2814 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2815 } else { 2816 error = optcom_pkt_set(invalp, inlen, sticky, 2817 (uchar_t **)&ipp->ipp_dstopts, 2818 &ipp->ipp_dstoptslen, 0); 2819 if (error != 0) 2820 return (error); 2821 ipp->ipp_fields |= IPPF_DSTOPTS; 2822 } 2823 if (sticky) { 2824 error = icmp_build_hdrs(icmp); 2825 if (error != 0) 2826 return (error); 2827 } 2828 break; 2829 } 2830 case IPV6_RTHDR: { 2831 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2832 2833 /* 2834 * Sanity checks - minimum size, size a multiple of 2835 * eight bytes, and matching size passed in. 2836 */ 2837 if (inlen != 0 && 2838 inlen != (8 * (rt->ip6r_len + 1))) 2839 return (EINVAL); 2840 2841 if (checkonly) 2842 break; 2843 2844 if (inlen == 0) { 2845 if (sticky && 2846 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2847 kmem_free(ipp->ipp_rthdr, 2848 ipp->ipp_rthdrlen); 2849 ipp->ipp_rthdr = NULL; 2850 ipp->ipp_rthdrlen = 0; 2851 } 2852 ipp->ipp_fields &= ~IPPF_RTHDR; 2853 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2854 } else { 2855 error = optcom_pkt_set(invalp, inlen, sticky, 2856 (uchar_t **)&ipp->ipp_rthdr, 2857 &ipp->ipp_rthdrlen, 0); 2858 if (error != 0) 2859 return (error); 2860 ipp->ipp_fields |= IPPF_RTHDR; 2861 } 2862 if (sticky) { 2863 error = icmp_build_hdrs(icmp); 2864 if (error != 0) 2865 return (error); 2866 } 2867 break; 2868 } 2869 2870 case IPV6_DONTFRAG: 2871 if (checkonly) 2872 break; 2873 2874 if (onoff) { 2875 ipp->ipp_fields |= IPPF_DONTFRAG; 2876 } else { 2877 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2878 } 2879 break; 2880 2881 case IPV6_USE_MIN_MTU: 2882 if (inlen != sizeof (int)) 2883 return (EINVAL); 2884 2885 if (*i1 < -1 || *i1 > 1) 2886 return (EINVAL); 2887 2888 if (checkonly) 2889 break; 2890 2891 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2892 ipp->ipp_use_min_mtu = *i1; 2893 break; 2894 2895 /* 2896 * This option can't be set. Its only returned via 2897 * getsockopt() or ancillary data. 2898 */ 2899 case IPV6_PATHMTU: 2900 return (EINVAL); 2901 2902 case IPV6_BOUND_PIF: 2903 case IPV6_SEC_OPT: 2904 case IPV6_DONTFAILOVER_IF: 2905 case IPV6_SRC_PREFERENCES: 2906 case IPV6_V6ONLY: 2907 /* Handled at IP level */ 2908 return (-EINVAL); 2909 default: 2910 *outlenp = 0; 2911 return (EINVAL); 2912 } 2913 break; 2914 } /* end IPPROTO_IPV6 */ 2915 2916 case IPPROTO_ICMPV6: 2917 /* 2918 * Only allow IPv6 option processing on IPv6 sockets. 2919 */ 2920 if (icmp->icmp_family != AF_INET6) { 2921 *outlenp = 0; 2922 return (ENOPROTOOPT); 2923 } 2924 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2925 *outlenp = 0; 2926 return (ENOPROTOOPT); 2927 } 2928 switch (name) { 2929 case ICMP6_FILTER: 2930 if (!checkonly) { 2931 if ((inlen != 0) && 2932 (inlen != sizeof (icmp6_filter_t))) 2933 return (EINVAL); 2934 2935 if (inlen == 0) { 2936 if (icmp->icmp_filter != NULL) { 2937 kmem_free(icmp->icmp_filter, 2938 sizeof (icmp6_filter_t)); 2939 icmp->icmp_filter = NULL; 2940 } 2941 } else { 2942 if (icmp->icmp_filter == NULL) { 2943 icmp->icmp_filter = kmem_alloc( 2944 sizeof (icmp6_filter_t), 2945 KM_NOSLEEP); 2946 if (icmp->icmp_filter == NULL) { 2947 *outlenp = 0; 2948 return (ENOBUFS); 2949 } 2950 } 2951 (void) bcopy(invalp, icmp->icmp_filter, 2952 inlen); 2953 } 2954 } 2955 break; 2956 2957 default: 2958 *outlenp = 0; 2959 return (EINVAL); 2960 } 2961 break; 2962 default: 2963 *outlenp = 0; 2964 return (EINVAL); 2965 } 2966 /* 2967 * Common case of OK return with outval same as inval. 2968 */ 2969 if (invalp != outvalp) { 2970 /* don't trust bcopy for identical src/dst */ 2971 (void) bcopy(invalp, outvalp, inlen); 2972 } 2973 *outlenp = inlen; 2974 return (0); 2975 } 2976 /* This routine sets socket options. */ 2977 /* ARGSUSED */ 2978 int 2979 icmp_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2980 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2981 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2982 { 2983 icmp_t *icmp; 2984 int err; 2985 2986 icmp = Q_TO_ICMP(q); 2987 2988 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2989 err = icmp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 2990 outlenp, outvalp, thisdg_attrs, cr, mblk); 2991 rw_exit(&icmp->icmp_rwlock); 2992 return (err); 2993 } 2994 2995 /* 2996 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 2997 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 2998 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 2999 * headers. 3000 * Returns failure if can't allocate memory. 3001 */ 3002 static int 3003 icmp_build_hdrs(icmp_t *icmp) 3004 { 3005 icmp_stack_t *is = icmp->icmp_is; 3006 uchar_t *hdrs; 3007 uint_t hdrs_len; 3008 ip6_t *ip6h; 3009 ip6i_t *ip6i; 3010 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3011 3012 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3013 hdrs_len = ip_total_hdrs_len_v6(ipp); 3014 ASSERT(hdrs_len != 0); 3015 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3016 /* Need to reallocate */ 3017 if (hdrs_len != 0) { 3018 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3019 if (hdrs == NULL) 3020 return (ENOMEM); 3021 } else { 3022 hdrs = NULL; 3023 } 3024 if (icmp->icmp_sticky_hdrs_len != 0) { 3025 kmem_free(icmp->icmp_sticky_hdrs, 3026 icmp->icmp_sticky_hdrs_len); 3027 } 3028 icmp->icmp_sticky_hdrs = hdrs; 3029 icmp->icmp_sticky_hdrs_len = hdrs_len; 3030 } 3031 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3032 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3033 3034 /* Set header fields not in ipp */ 3035 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3036 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3037 ip6h = (ip6_t *)&ip6i[1]; 3038 3039 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3040 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3041 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3042 } 3043 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3044 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3045 } 3046 } else { 3047 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3048 } 3049 3050 if (!(ipp->ipp_fields & IPPF_ADDR)) 3051 ip6h->ip6_src = icmp->icmp_v6src; 3052 3053 /* Try to get everything in a single mblk */ 3054 if (hdrs_len > icmp->icmp_max_hdr_len) { 3055 icmp->icmp_max_hdr_len = hdrs_len; 3056 rw_exit(&icmp->icmp_rwlock); 3057 (void) mi_set_sth_wroff(icmp->icmp_connp->conn_rq, 3058 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3059 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3060 } 3061 return (0); 3062 } 3063 3064 /* 3065 * This routine retrieves the value of an ND variable in a icmpparam_t 3066 * structure. It is called through nd_getset when a user reads the 3067 * variable. 3068 */ 3069 /* ARGSUSED */ 3070 static int 3071 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3072 { 3073 icmpparam_t *icmppa = (icmpparam_t *)cp; 3074 3075 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3076 return (0); 3077 } 3078 3079 /* 3080 * Walk through the param array specified registering each element with the 3081 * named dispatch (ND) handler. 3082 */ 3083 static boolean_t 3084 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3085 { 3086 for (; cnt-- > 0; icmppa++) { 3087 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3088 if (!nd_load(ndp, icmppa->icmp_param_name, 3089 icmp_param_get, icmp_param_set, 3090 (caddr_t)icmppa)) { 3091 nd_free(ndp); 3092 return (B_FALSE); 3093 } 3094 } 3095 } 3096 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3097 NULL)) { 3098 nd_free(ndp); 3099 return (B_FALSE); 3100 } 3101 return (B_TRUE); 3102 } 3103 3104 /* This routine sets an ND variable in a icmpparam_t structure. */ 3105 /* ARGSUSED */ 3106 static int 3107 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3108 { 3109 long new_value; 3110 icmpparam_t *icmppa = (icmpparam_t *)cp; 3111 3112 /* 3113 * Fail the request if the new value does not lie within the 3114 * required bounds. 3115 */ 3116 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3117 new_value < icmppa->icmp_param_min || 3118 new_value > icmppa->icmp_param_max) { 3119 return (EINVAL); 3120 } 3121 /* Set the new value */ 3122 icmppa->icmp_param_value = new_value; 3123 return (0); 3124 } 3125 /*ARGSUSED2*/ 3126 static void 3127 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3128 { 3129 conn_t *connp = (conn_t *)arg1; 3130 struct T_unitdata_ind *tudi; 3131 uchar_t *rptr; 3132 icmp_t *icmp; 3133 icmp_stack_t *is; 3134 sin_t *sin; 3135 sin6_t *sin6; 3136 ip6_t *ip6h; 3137 ip6i_t *ip6i; 3138 mblk_t *mp1; 3139 int hdr_len; 3140 ipha_t *ipha; 3141 int udi_size; /* Size of T_unitdata_ind */ 3142 uint_t ipvers; 3143 ip6_pkt_t ipp; 3144 uint8_t nexthdr; 3145 ip_pktinfo_t *pinfo = NULL; 3146 mblk_t *options_mp = NULL; 3147 uint_t icmp_opt = 0; 3148 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3149 uint_t hopstrip; 3150 3151 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3152 3153 icmp = connp->conn_icmp; 3154 is = icmp->icmp_is; 3155 rptr = mp->b_rptr; 3156 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3157 ASSERT(OK_32PTR(rptr)); 3158 3159 /* 3160 * IP should have prepended the options data in an M_CTL 3161 * Check M_CTL "type" to make sure are not here bcos of 3162 * a valid ICMP message 3163 */ 3164 if (DB_TYPE(mp) == M_CTL) { 3165 /* 3166 * FIXME: does IP still do this? 3167 * IP sends up the IPSEC_IN message for handling IPSEC 3168 * policy at the TCP level. We don't need it here. 3169 */ 3170 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3171 mp1 = mp->b_cont; 3172 freeb(mp); 3173 mp = mp1; 3174 rptr = mp->b_rptr; 3175 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3176 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3177 IN_PKTINFO) { 3178 /* 3179 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3180 * has been prepended to the packet by IP. We need to 3181 * extract the mblk and adjust the rptr 3182 */ 3183 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3184 options_mp = mp; 3185 mp = mp->b_cont; 3186 rptr = mp->b_rptr; 3187 } else { 3188 /* 3189 * ICMP messages. 3190 */ 3191 icmp_icmp_error(connp->conn_rq, mp); 3192 return; 3193 } 3194 } 3195 3196 /* 3197 * Discard message if it is misaligned or smaller than the IP header. 3198 */ 3199 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3200 freemsg(mp); 3201 if (options_mp != NULL) 3202 freeb(options_mp); 3203 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3204 return; 3205 } 3206 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3207 3208 /* Handle M_DATA messages containing IP packets messages */ 3209 if (ipvers == IPV4_VERSION) { 3210 /* 3211 * Special case where IP attaches 3212 * the IRE needs to be handled so that we don't send up 3213 * IRE to the user land. 3214 */ 3215 ipha = (ipha_t *)rptr; 3216 hdr_len = IPH_HDR_LENGTH(ipha); 3217 3218 if (ipha->ipha_protocol == IPPROTO_TCP) { 3219 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3220 3221 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3222 TH_SYN) && mp->b_cont != NULL) { 3223 mp1 = mp->b_cont; 3224 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3225 freeb(mp1); 3226 mp->b_cont = NULL; 3227 } 3228 } 3229 } 3230 if (is->is_bsd_compat) { 3231 ushort_t len; 3232 len = ntohs(ipha->ipha_length); 3233 3234 if (mp->b_datap->db_ref > 1) { 3235 /* 3236 * Allocate a new IP header so that we can 3237 * modify ipha_length. 3238 */ 3239 mblk_t *mp1; 3240 3241 mp1 = allocb(hdr_len, BPRI_MED); 3242 if (!mp1) { 3243 freemsg(mp); 3244 if (options_mp != NULL) 3245 freeb(options_mp); 3246 BUMP_MIB(&is->is_rawip_mib, 3247 rawipInErrors); 3248 return; 3249 } 3250 bcopy(rptr, mp1->b_rptr, hdr_len); 3251 mp->b_rptr = rptr + hdr_len; 3252 rptr = mp1->b_rptr; 3253 ipha = (ipha_t *)rptr; 3254 mp1->b_cont = mp; 3255 mp1->b_wptr = rptr + hdr_len; 3256 mp = mp1; 3257 } 3258 len -= hdr_len; 3259 ipha->ipha_length = htons(len); 3260 } 3261 } 3262 3263 /* 3264 * This is the inbound data path. Packets are passed upstream as 3265 * T_UNITDATA_IND messages with full IP headers still attached. 3266 */ 3267 if (icmp->icmp_family == AF_INET) { 3268 ASSERT(ipvers == IPV4_VERSION); 3269 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3270 if (icmp->icmp_recvif && (pinfo != NULL) && 3271 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3272 udi_size += sizeof (struct T_opthdr) + 3273 sizeof (uint_t); 3274 } 3275 3276 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3277 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3278 udi_size += sizeof (struct T_opthdr) + 3279 sizeof (struct in_pktinfo); 3280 } 3281 3282 /* 3283 * If SO_TIMESTAMP is set allocate the appropriate sized 3284 * buffer. Since gethrestime() expects a pointer aligned 3285 * argument, we allocate space necessary for extra 3286 * alignment (even though it might not be used). 3287 */ 3288 if (icmp->icmp_timestamp) { 3289 udi_size += sizeof (struct T_opthdr) + 3290 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3291 } 3292 mp1 = allocb(udi_size, BPRI_MED); 3293 if (mp1 == NULL) { 3294 freemsg(mp); 3295 if (options_mp != NULL) 3296 freeb(options_mp); 3297 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3298 return; 3299 } 3300 mp1->b_cont = mp; 3301 mp = mp1; 3302 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3303 mp->b_datap->db_type = M_PROTO; 3304 mp->b_wptr = (uchar_t *)tudi + udi_size; 3305 tudi->PRIM_type = T_UNITDATA_IND; 3306 tudi->SRC_length = sizeof (sin_t); 3307 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3308 sin = (sin_t *)&tudi[1]; 3309 *sin = sin_null; 3310 sin->sin_family = AF_INET; 3311 sin->sin_addr.s_addr = ipha->ipha_src; 3312 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3313 sizeof (sin_t); 3314 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3315 tudi->OPT_length = udi_size; 3316 3317 /* 3318 * Add options if IP_RECVIF is set 3319 */ 3320 if (udi_size != 0) { 3321 char *dstopt; 3322 3323 dstopt = (char *)&sin[1]; 3324 if (icmp->icmp_recvif && (pinfo != NULL) && 3325 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3326 3327 struct T_opthdr *toh; 3328 uint_t *dstptr; 3329 3330 toh = (struct T_opthdr *)dstopt; 3331 toh->level = IPPROTO_IP; 3332 toh->name = IP_RECVIF; 3333 toh->len = sizeof (struct T_opthdr) + 3334 sizeof (uint_t); 3335 toh->status = 0; 3336 dstopt += sizeof (struct T_opthdr); 3337 dstptr = (uint_t *)dstopt; 3338 *dstptr = pinfo->ip_pkt_ifindex; 3339 dstopt += sizeof (uint_t); 3340 udi_size -= toh->len; 3341 } 3342 if (icmp->icmp_timestamp) { 3343 struct T_opthdr *toh; 3344 3345 toh = (struct T_opthdr *)dstopt; 3346 toh->level = SOL_SOCKET; 3347 toh->name = SCM_TIMESTAMP; 3348 toh->len = sizeof (struct T_opthdr) + 3349 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3350 toh->status = 0; 3351 dstopt += sizeof (struct T_opthdr); 3352 /* Align for gethrestime() */ 3353 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3354 sizeof (intptr_t)); 3355 gethrestime((timestruc_t *)dstopt); 3356 dstopt = (char *)toh + toh->len; 3357 udi_size -= toh->len; 3358 } 3359 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3360 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3361 struct T_opthdr *toh; 3362 struct in_pktinfo *pktinfop; 3363 3364 toh = (struct T_opthdr *)dstopt; 3365 toh->level = IPPROTO_IP; 3366 toh->name = IP_PKTINFO; 3367 toh->len = sizeof (struct T_opthdr) + 3368 sizeof (in_pktinfo_t); 3369 toh->status = 0; 3370 dstopt += sizeof (struct T_opthdr); 3371 pktinfop = (struct in_pktinfo *)dstopt; 3372 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3373 pktinfop->ipi_spec_dst = 3374 pinfo->ip_pkt_match_addr; 3375 3376 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3377 3378 dstopt += sizeof (struct in_pktinfo); 3379 udi_size -= toh->len; 3380 } 3381 3382 /* Consumed all of allocated space */ 3383 ASSERT(udi_size == 0); 3384 } 3385 3386 if (options_mp != NULL) 3387 freeb(options_mp); 3388 3389 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3390 putnext(connp->conn_rq, mp); 3391 return; 3392 } 3393 3394 /* 3395 * We don't need options_mp in the IPv6 path. 3396 */ 3397 if (options_mp != NULL) { 3398 freeb(options_mp); 3399 options_mp = NULL; 3400 } 3401 3402 /* 3403 * Discard message if it is smaller than the IPv6 header 3404 * or if the header is malformed. 3405 */ 3406 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3407 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3408 icmp->icmp_family != AF_INET6) { 3409 freemsg(mp); 3410 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3411 return; 3412 } 3413 3414 /* Initialize */ 3415 ipp.ipp_fields = 0; 3416 hopstrip = 0; 3417 3418 ip6h = (ip6_t *)rptr; 3419 /* 3420 * Call on ip_find_hdr_v6 which gets the total hdr len 3421 * as well as individual lenghts of ext hdrs (and ptrs to 3422 * them). 3423 */ 3424 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3425 /* Look for ifindex information */ 3426 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3427 ip6i = (ip6i_t *)ip6h; 3428 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3429 ASSERT(ip6i->ip6i_ifindex != 0); 3430 ipp.ipp_fields |= IPPF_IFINDEX; 3431 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3432 } 3433 rptr = (uchar_t *)&ip6i[1]; 3434 mp->b_rptr = rptr; 3435 if (rptr == mp->b_wptr) { 3436 mp1 = mp->b_cont; 3437 freeb(mp); 3438 mp = mp1; 3439 rptr = mp->b_rptr; 3440 } 3441 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3442 ip6h = (ip6_t *)rptr; 3443 } 3444 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3445 3446 /* 3447 * We need to lie a bit to the user because users inside 3448 * labeled compartments should not see their own labels. We 3449 * assume that in all other respects IP has checked the label, 3450 * and that the label is always first among the options. (If 3451 * it's not first, then this code won't see it, and the option 3452 * will be passed along to the user.) 3453 * 3454 * If we had multilevel ICMP sockets, then the following code 3455 * should be skipped for them to allow the user to see the 3456 * label. 3457 * 3458 * Alignment restrictions in the definition of IP options 3459 * (namely, the requirement that the 4-octet DOI goes on a 3460 * 4-octet boundary) mean that we know exactly where the option 3461 * should start, but we're lenient for other hosts. 3462 * 3463 * Note that there are no multilevel ICMP or raw IP sockets 3464 * yet, thus nobody ever sees the IP6OPT_LS option. 3465 */ 3466 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3467 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3468 const uchar_t *ucp = 3469 (const uchar_t *)ipp.ipp_hopopts + 2; 3470 int remlen = ipp.ipp_hopoptslen - 2; 3471 3472 while (remlen > 0) { 3473 if (*ucp == IP6OPT_PAD1) { 3474 remlen--; 3475 ucp++; 3476 } else if (*ucp == IP6OPT_PADN) { 3477 remlen -= ucp[1] + 2; 3478 ucp += ucp[1] + 2; 3479 } else if (*ucp == ip6opt_ls) { 3480 hopstrip = (ucp - 3481 (const uchar_t *)ipp.ipp_hopopts) + 3482 ucp[1] + 2; 3483 hopstrip = (hopstrip + 7) & ~7; 3484 break; 3485 } else { 3486 /* label option must be first */ 3487 break; 3488 } 3489 } 3490 } 3491 } else { 3492 hdr_len = IPV6_HDR_LEN; 3493 ip6i = NULL; 3494 nexthdr = ip6h->ip6_nxt; 3495 } 3496 /* 3497 * One special case where IP attaches the IRE needs to 3498 * be handled so that we don't send up IRE to the user land. 3499 */ 3500 if (nexthdr == IPPROTO_TCP) { 3501 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3502 3503 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3504 mp->b_cont != NULL) { 3505 mp1 = mp->b_cont; 3506 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3507 freeb(mp1); 3508 mp->b_cont = NULL; 3509 } 3510 } 3511 } 3512 /* 3513 * Check a filter for ICMPv6 types if needed. 3514 * Verify raw checksums if needed. 3515 */ 3516 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3517 if (icmp->icmp_filter != NULL) { 3518 int type; 3519 3520 /* Assumes that IP has done the pullupmsg */ 3521 type = mp->b_rptr[hdr_len]; 3522 3523 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3524 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3525 freemsg(mp); 3526 return; 3527 } 3528 } else { 3529 /* Checksum */ 3530 uint16_t *up; 3531 uint32_t sum; 3532 int remlen; 3533 3534 up = (uint16_t *)&ip6h->ip6_src; 3535 3536 remlen = msgdsize(mp) - hdr_len; 3537 sum = htons(icmp->icmp_proto + remlen) 3538 + up[0] + up[1] + up[2] + up[3] 3539 + up[4] + up[5] + up[6] + up[7] 3540 + up[8] + up[9] + up[10] + up[11] 3541 + up[12] + up[13] + up[14] + up[15]; 3542 sum = (sum & 0xffff) + (sum >> 16); 3543 sum = IP_CSUM(mp, hdr_len, sum); 3544 if (sum != 0) { 3545 /* IPv6 RAW checksum failed */ 3546 ip0dbg(("icmp_rput: RAW checksum " 3547 "failed %x\n", sum)); 3548 freemsg(mp); 3549 BUMP_MIB(&is->is_rawip_mib, 3550 rawipInCksumErrs); 3551 return; 3552 } 3553 } 3554 } 3555 /* Skip all the IPv6 headers per API */ 3556 mp->b_rptr += hdr_len; 3557 3558 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3559 3560 /* 3561 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3562 * maintain state information, instead of relying on icmp_t 3563 * structure, since there arent any locks protecting these members 3564 * and there is a window where there might be a race between a 3565 * thread setting options on the write side and a thread reading 3566 * these options on the read size. 3567 */ 3568 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3569 IPPF_RTHDR|IPPF_IFINDEX)) { 3570 if (icmp->icmp_ipv6_recvhopopts && 3571 (ipp.ipp_fields & IPPF_HOPOPTS) && 3572 ipp.ipp_hopoptslen > hopstrip) { 3573 udi_size += sizeof (struct T_opthdr) + 3574 ipp.ipp_hopoptslen - hopstrip; 3575 icmp_opt |= IPPF_HOPOPTS; 3576 } 3577 if ((icmp->icmp_ipv6_recvdstopts || 3578 icmp->icmp_old_ipv6_recvdstopts) && 3579 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3580 udi_size += sizeof (struct T_opthdr) + 3581 ipp.ipp_dstoptslen; 3582 icmp_opt |= IPPF_DSTOPTS; 3583 } 3584 if (((icmp->icmp_ipv6_recvdstopts && 3585 icmp->icmp_ipv6_recvrthdr && 3586 (ipp.ipp_fields & IPPF_RTHDR)) || 3587 icmp->icmp_ipv6_recvrtdstopts) && 3588 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3589 udi_size += sizeof (struct T_opthdr) + 3590 ipp.ipp_rtdstoptslen; 3591 icmp_opt |= IPPF_RTDSTOPTS; 3592 } 3593 if (icmp->icmp_ipv6_recvrthdr && 3594 (ipp.ipp_fields & IPPF_RTHDR)) { 3595 udi_size += sizeof (struct T_opthdr) + 3596 ipp.ipp_rthdrlen; 3597 icmp_opt |= IPPF_RTHDR; 3598 } 3599 if (icmp->icmp_ip_recvpktinfo && 3600 (ipp.ipp_fields & IPPF_IFINDEX)) { 3601 udi_size += sizeof (struct T_opthdr) + 3602 sizeof (struct in6_pktinfo); 3603 icmp_opt |= IPPF_IFINDEX; 3604 } 3605 } 3606 if (icmp->icmp_ipv6_recvhoplimit) { 3607 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3608 icmp_ipv6_recvhoplimit = B_TRUE; 3609 } 3610 3611 if (icmp->icmp_ipv6_recvtclass) 3612 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3613 3614 mp1 = allocb(udi_size, BPRI_MED); 3615 if (mp1 == NULL) { 3616 freemsg(mp); 3617 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3618 return; 3619 } 3620 mp1->b_cont = mp; 3621 mp = mp1; 3622 mp->b_datap->db_type = M_PROTO; 3623 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3624 mp->b_wptr = (uchar_t *)tudi + udi_size; 3625 tudi->PRIM_type = T_UNITDATA_IND; 3626 tudi->SRC_length = sizeof (sin6_t); 3627 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3628 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3629 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3630 tudi->OPT_length = udi_size; 3631 sin6 = (sin6_t *)&tudi[1]; 3632 sin6->sin6_port = 0; 3633 sin6->sin6_family = AF_INET6; 3634 3635 sin6->sin6_addr = ip6h->ip6_src; 3636 /* No sin6_flowinfo per API */ 3637 sin6->sin6_flowinfo = 0; 3638 /* For link-scope source pass up scope id */ 3639 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3640 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3641 sin6->sin6_scope_id = ipp.ipp_ifindex; 3642 else 3643 sin6->sin6_scope_id = 0; 3644 3645 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3646 icmp->icmp_zoneid, is->is_netstack); 3647 3648 if (udi_size != 0) { 3649 uchar_t *dstopt; 3650 3651 dstopt = (uchar_t *)&sin6[1]; 3652 if (icmp_opt & IPPF_IFINDEX) { 3653 struct T_opthdr *toh; 3654 struct in6_pktinfo *pkti; 3655 3656 toh = (struct T_opthdr *)dstopt; 3657 toh->level = IPPROTO_IPV6; 3658 toh->name = IPV6_PKTINFO; 3659 toh->len = sizeof (struct T_opthdr) + 3660 sizeof (*pkti); 3661 toh->status = 0; 3662 dstopt += sizeof (struct T_opthdr); 3663 pkti = (struct in6_pktinfo *)dstopt; 3664 pkti->ipi6_addr = ip6h->ip6_dst; 3665 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3666 dstopt += sizeof (*pkti); 3667 udi_size -= toh->len; 3668 } 3669 if (icmp_ipv6_recvhoplimit) { 3670 struct T_opthdr *toh; 3671 3672 toh = (struct T_opthdr *)dstopt; 3673 toh->level = IPPROTO_IPV6; 3674 toh->name = IPV6_HOPLIMIT; 3675 toh->len = sizeof (struct T_opthdr) + 3676 sizeof (uint_t); 3677 toh->status = 0; 3678 dstopt += sizeof (struct T_opthdr); 3679 *(uint_t *)dstopt = ip6h->ip6_hops; 3680 dstopt += sizeof (uint_t); 3681 udi_size -= toh->len; 3682 } 3683 if (icmp->icmp_ipv6_recvtclass) { 3684 struct T_opthdr *toh; 3685 3686 toh = (struct T_opthdr *)dstopt; 3687 toh->level = IPPROTO_IPV6; 3688 toh->name = IPV6_TCLASS; 3689 toh->len = sizeof (struct T_opthdr) + 3690 sizeof (uint_t); 3691 toh->status = 0; 3692 dstopt += sizeof (struct T_opthdr); 3693 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3694 dstopt += sizeof (uint_t); 3695 udi_size -= toh->len; 3696 } 3697 if (icmp_opt & IPPF_HOPOPTS) { 3698 struct T_opthdr *toh; 3699 3700 toh = (struct T_opthdr *)dstopt; 3701 toh->level = IPPROTO_IPV6; 3702 toh->name = IPV6_HOPOPTS; 3703 toh->len = sizeof (struct T_opthdr) + 3704 ipp.ipp_hopoptslen - hopstrip; 3705 toh->status = 0; 3706 dstopt += sizeof (struct T_opthdr); 3707 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3708 ipp.ipp_hopoptslen - hopstrip); 3709 if (hopstrip > 0) { 3710 /* copy next header value and fake length */ 3711 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3712 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3713 hopstrip / 8; 3714 } 3715 dstopt += ipp.ipp_hopoptslen - hopstrip; 3716 udi_size -= toh->len; 3717 } 3718 if (icmp_opt & IPPF_RTDSTOPTS) { 3719 struct T_opthdr *toh; 3720 3721 toh = (struct T_opthdr *)dstopt; 3722 toh->level = IPPROTO_IPV6; 3723 toh->name = IPV6_DSTOPTS; 3724 toh->len = sizeof (struct T_opthdr) + 3725 ipp.ipp_rtdstoptslen; 3726 toh->status = 0; 3727 dstopt += sizeof (struct T_opthdr); 3728 bcopy(ipp.ipp_rtdstopts, dstopt, 3729 ipp.ipp_rtdstoptslen); 3730 dstopt += ipp.ipp_rtdstoptslen; 3731 udi_size -= toh->len; 3732 } 3733 if (icmp_opt & IPPF_RTHDR) { 3734 struct T_opthdr *toh; 3735 3736 toh = (struct T_opthdr *)dstopt; 3737 toh->level = IPPROTO_IPV6; 3738 toh->name = IPV6_RTHDR; 3739 toh->len = sizeof (struct T_opthdr) + 3740 ipp.ipp_rthdrlen; 3741 toh->status = 0; 3742 dstopt += sizeof (struct T_opthdr); 3743 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3744 dstopt += ipp.ipp_rthdrlen; 3745 udi_size -= toh->len; 3746 } 3747 if (icmp_opt & IPPF_DSTOPTS) { 3748 struct T_opthdr *toh; 3749 3750 toh = (struct T_opthdr *)dstopt; 3751 toh->level = IPPROTO_IPV6; 3752 toh->name = IPV6_DSTOPTS; 3753 toh->len = sizeof (struct T_opthdr) + 3754 ipp.ipp_dstoptslen; 3755 toh->status = 0; 3756 dstopt += sizeof (struct T_opthdr); 3757 bcopy(ipp.ipp_dstopts, dstopt, 3758 ipp.ipp_dstoptslen); 3759 dstopt += ipp.ipp_dstoptslen; 3760 udi_size -= toh->len; 3761 } 3762 /* Consumed all of allocated space */ 3763 ASSERT(udi_size == 0); 3764 } 3765 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3766 putnext(connp->conn_rq, mp); 3767 } 3768 3769 /* 3770 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 3771 * immediately. 3772 */ 3773 static void 3774 icmp_bind_result(conn_t *connp, mblk_t *mp) 3775 { 3776 struct T_error_ack *tea; 3777 3778 switch (mp->b_datap->db_type) { 3779 case M_PROTO: 3780 case M_PCPROTO: 3781 /* M_PROTO messages contain some type of TPI message. */ 3782 if ((mp->b_wptr - mp->b_rptr) < sizeof (t_scalar_t)) { 3783 freemsg(mp); 3784 return; 3785 } 3786 tea = (struct T_error_ack *)mp->b_rptr; 3787 3788 switch (tea->PRIM_type) { 3789 case T_ERROR_ACK: 3790 switch (tea->ERROR_prim) { 3791 case O_T_BIND_REQ: 3792 case T_BIND_REQ: 3793 icmp_bind_error(connp, mp); 3794 return; 3795 default: 3796 break; 3797 } 3798 ASSERT(0); 3799 freemsg(mp); 3800 return; 3801 3802 case T_BIND_ACK: 3803 icmp_bind_ack(connp, mp); 3804 return; 3805 3806 default: 3807 break; 3808 } 3809 freemsg(mp); 3810 return; 3811 default: 3812 /* FIXME: other cases? */ 3813 ASSERT(0); 3814 freemsg(mp); 3815 return; 3816 } 3817 } 3818 3819 /* 3820 * Process a T_BIND_ACK 3821 */ 3822 static void 3823 icmp_bind_ack(conn_t *connp, mblk_t *mp) 3824 { 3825 icmp_t *icmp = connp->conn_icmp; 3826 mblk_t *mp1; 3827 ire_t *ire; 3828 struct T_bind_ack *tba; 3829 uchar_t *addrp; 3830 ipa_conn_t *ac; 3831 ipa6_conn_t *ac6; 3832 3833 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3834 /* 3835 * We know if headers are included or not so we can 3836 * safely do this. 3837 */ 3838 if (icmp->icmp_state == TS_UNBND) { 3839 /* 3840 * TPI has not yet bound - bind sent by 3841 * icmp_bind_proto. 3842 */ 3843 freemsg(mp); 3844 rw_exit(&icmp->icmp_rwlock); 3845 return; 3846 } 3847 ASSERT(icmp->icmp_pending_op != -1); 3848 3849 /* 3850 * If a broadcast/multicast address was bound set 3851 * the source address to 0. 3852 * This ensures no datagrams with broadcast address 3853 * as source address are emitted (which would violate 3854 * RFC1122 - Hosts requirements) 3855 * 3856 * Note that when connecting the returned IRE is 3857 * for the destination address and we only perform 3858 * the broadcast check for the source address (it 3859 * is OK to connect to a broadcast/multicast address.) 3860 */ 3861 mp1 = mp->b_cont; 3862 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 3863 ire = (ire_t *)mp1->b_rptr; 3864 3865 /* 3866 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 3867 * local address. 3868 */ 3869 if (ire->ire_type == IRE_BROADCAST && 3870 icmp->icmp_state != TS_DATA_XFER) { 3871 ASSERT(icmp->icmp_pending_op == T_BIND_REQ || 3872 icmp->icmp_pending_op == O_T_BIND_REQ); 3873 /* This was just a local bind to a MC/broadcast addr */ 3874 V6_SET_ZERO(icmp->icmp_v6src); 3875 if (icmp->icmp_family == AF_INET6) 3876 (void) icmp_build_hdrs(icmp); 3877 } else if (V6_OR_V4_INADDR_ANY(icmp->icmp_v6src)) { 3878 /* 3879 * Local address not yet set - pick it from the 3880 * T_bind_ack 3881 */ 3882 tba = (struct T_bind_ack *)mp->b_rptr; 3883 addrp = &mp->b_rptr[tba->ADDR_offset]; 3884 switch (icmp->icmp_family) { 3885 case AF_INET: 3886 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 3887 ac = (ipa_conn_t *)addrp; 3888 } else { 3889 ASSERT(tba->ADDR_length == 3890 sizeof (ipa_conn_x_t)); 3891 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 3892 } 3893 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 3894 &icmp->icmp_v6src); 3895 break; 3896 case AF_INET6: 3897 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 3898 ac6 = (ipa6_conn_t *)addrp; 3899 } else { 3900 ASSERT(tba->ADDR_length == 3901 sizeof (ipa6_conn_x_t)); 3902 ac6 = &((ipa6_conn_x_t *) 3903 addrp)->ac6x_conn; 3904 } 3905 icmp->icmp_v6src = ac6->ac6_laddr; 3906 (void) icmp_build_hdrs(icmp); 3907 } 3908 } 3909 mp1 = mp1->b_cont; 3910 } 3911 icmp->icmp_pending_op = -1; 3912 rw_exit(&icmp->icmp_rwlock); 3913 /* 3914 * Look for one or more appended ACK message added by 3915 * icmp_connect or icmp_disconnect. 3916 * If none found just send up the T_BIND_ACK. 3917 * icmp_connect has appended a T_OK_ACK and a 3918 * T_CONN_CON. 3919 * icmp_disconnect has appended a T_OK_ACK. 3920 */ 3921 if (mp1 != NULL) { 3922 if (mp->b_cont == mp1) 3923 mp->b_cont = NULL; 3924 else { 3925 ASSERT(mp->b_cont->b_cont == mp1); 3926 mp->b_cont->b_cont = NULL; 3927 } 3928 freemsg(mp); 3929 mp = mp1; 3930 while (mp != NULL) { 3931 mp1 = mp->b_cont; 3932 mp->b_cont = NULL; 3933 putnext(connp->conn_rq, mp); 3934 mp = mp1; 3935 } 3936 return; 3937 } 3938 freemsg(mp->b_cont); 3939 mp->b_cont = NULL; 3940 putnext(connp->conn_rq, mp); 3941 } 3942 3943 static void 3944 icmp_bind_error(conn_t *connp, mblk_t *mp) 3945 { 3946 icmp_t *icmp = connp->conn_icmp; 3947 struct T_error_ack *tea; 3948 3949 tea = (struct T_error_ack *)mp->b_rptr; 3950 /* 3951 * If our O_T_BIND_REQ/T_BIND_REQ fails, 3952 * clear out the source address before 3953 * passing the message upstream. 3954 * If this was caused by a T_CONN_REQ 3955 * revert back to bound state. 3956 */ 3957 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3958 if (icmp->icmp_state == TS_UNBND) { 3959 /* 3960 * TPI has not yet bound - bind sent by icmp_bind_proto. 3961 */ 3962 freemsg(mp); 3963 rw_exit(&icmp->icmp_rwlock); 3964 return; 3965 } 3966 ASSERT(icmp->icmp_pending_op != -1); 3967 tea->ERROR_prim = icmp->icmp_pending_op; 3968 icmp->icmp_pending_op = -1; 3969 3970 switch (tea->ERROR_prim) { 3971 case T_CONN_REQ: 3972 ASSERT(icmp->icmp_state == TS_DATA_XFER); 3973 /* Connect failed */ 3974 /* Revert back to the bound source */ 3975 icmp->icmp_v6src = icmp->icmp_bound_v6src; 3976 icmp->icmp_state = TS_IDLE; 3977 if (icmp->icmp_family == AF_INET6) 3978 (void) icmp_build_hdrs(icmp); 3979 break; 3980 3981 case T_DISCON_REQ: 3982 case T_BIND_REQ: 3983 case O_T_BIND_REQ: 3984 V6_SET_ZERO(icmp->icmp_v6src); 3985 V6_SET_ZERO(icmp->icmp_bound_v6src); 3986 icmp->icmp_state = TS_UNBND; 3987 if (icmp->icmp_family == AF_INET6) 3988 (void) icmp_build_hdrs(icmp); 3989 break; 3990 default: 3991 break; 3992 } 3993 rw_exit(&icmp->icmp_rwlock); 3994 putnext(connp->conn_rq, mp); 3995 } 3996 3997 /* 3998 * return SNMP stuff in buffer in mpdata 3999 */ 4000 mblk_t * 4001 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4002 { 4003 mblk_t *mpdata; 4004 struct opthdr *optp; 4005 conn_t *connp = Q_TO_CONN(q); 4006 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4007 mblk_t *mp2ctl; 4008 4009 /* 4010 * make a copy of the original message 4011 */ 4012 mp2ctl = copymsg(mpctl); 4013 4014 if (mpctl == NULL || 4015 (mpdata = mpctl->b_cont) == NULL) { 4016 freemsg(mpctl); 4017 freemsg(mp2ctl); 4018 return (0); 4019 } 4020 4021 /* fixed length structure for IPv4 and IPv6 counters */ 4022 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4023 optp->level = EXPER_RAWIP; 4024 optp->name = 0; 4025 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4026 sizeof (is->is_rawip_mib)); 4027 optp->len = msgdsize(mpdata); 4028 qreply(q, mpctl); 4029 4030 return (mp2ctl); 4031 } 4032 4033 /* 4034 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4035 * TODO: If this ever actually tries to set anything, it needs to be 4036 * to do the appropriate locking. 4037 */ 4038 /* ARGSUSED */ 4039 int 4040 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4041 uchar_t *ptr, int len) 4042 { 4043 switch (level) { 4044 case EXPER_RAWIP: 4045 return (0); 4046 default: 4047 return (1); 4048 } 4049 } 4050 4051 /* Report for ndd "icmp_status" */ 4052 /* ARGSUSED */ 4053 static int 4054 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4055 { 4056 conn_t *connp; 4057 ip_stack_t *ipst; 4058 char laddrbuf[INET6_ADDRSTRLEN]; 4059 char faddrbuf[INET6_ADDRSTRLEN]; 4060 int i; 4061 4062 (void) mi_mpprintf(mp, 4063 "RAWIP " MI_COL_HDRPAD_STR 4064 /* 01234567[89ABCDEF] */ 4065 " src addr dest addr state"); 4066 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4067 4068 connp = Q_TO_CONN(q); 4069 ipst = connp->conn_netstack->netstack_ip; 4070 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4071 connf_t *connfp; 4072 char *state; 4073 4074 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4075 connp = NULL; 4076 4077 while ((connp = ipcl_get_next_conn(connfp, connp, 4078 IPCL_RAWIPCONN)) != NULL) { 4079 icmp_t *icmp; 4080 4081 mutex_enter(&(connp)->conn_lock); 4082 icmp = connp->conn_icmp; 4083 4084 if (icmp->icmp_state == TS_UNBND) 4085 state = "UNBOUND"; 4086 else if (icmp->icmp_state == TS_IDLE) 4087 state = "IDLE"; 4088 else if (icmp->icmp_state == TS_DATA_XFER) 4089 state = "CONNECTED"; 4090 else 4091 state = "UnkState"; 4092 4093 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4094 (void *)icmp, 4095 inet_ntop(AF_INET6, &icmp->icmp_v6dst, faddrbuf, 4096 sizeof (faddrbuf)), 4097 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4098 sizeof (laddrbuf)), 4099 state); 4100 mutex_exit(&(connp)->conn_lock); 4101 } 4102 } 4103 return (0); 4104 } 4105 4106 /* 4107 * This routine creates a T_UDERROR_IND message and passes it upstream. 4108 * The address and options are copied from the T_UNITDATA_REQ message 4109 * passed in mp. This message is freed. 4110 */ 4111 static void 4112 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4113 { 4114 mblk_t *mp1; 4115 uchar_t *rptr = mp->b_rptr; 4116 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4117 4118 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4119 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4120 tudr->OPT_length, err); 4121 if (mp1) 4122 qreply(q, mp1); 4123 freemsg(mp); 4124 } 4125 4126 /* 4127 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4128 * After some error checking, the message is passed downstream to ip. 4129 */ 4130 static void 4131 icmp_unbind(queue_t *q, mblk_t *mp) 4132 { 4133 icmp_t *icmp = Q_TO_ICMP(q); 4134 4135 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4136 /* If a bind has not been done, we can't unbind. */ 4137 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4138 rw_exit(&icmp->icmp_rwlock); 4139 icmp_err_ack(q, mp, TOUTSTATE, 0); 4140 return; 4141 } 4142 icmp->icmp_pending_op = T_UNBIND_REQ; 4143 rw_exit(&icmp->icmp_rwlock); 4144 4145 /* 4146 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 4147 * and therefore ip_unbind must never return NULL. 4148 */ 4149 mp = ip_unbind(q, mp); 4150 ASSERT(mp != NULL); 4151 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4152 4153 /* 4154 * Once we're unbound from IP, the pending operation may be cleared 4155 * here. 4156 */ 4157 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4158 V6_SET_ZERO(icmp->icmp_v6src); 4159 V6_SET_ZERO(icmp->icmp_bound_v6src); 4160 icmp->icmp_pending_op = -1; 4161 icmp->icmp_state = TS_UNBND; 4162 if (icmp->icmp_family == AF_INET6) 4163 (void) icmp_build_hdrs(icmp); 4164 rw_exit(&icmp->icmp_rwlock); 4165 4166 qreply(q, mp); 4167 } 4168 4169 /* 4170 * Process IPv4 packets that already include an IP header. 4171 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4172 * IPPROTO_IGMP). 4173 */ 4174 static void 4175 icmp_wput_hdrincl(queue_t *q, mblk_t *mp, icmp_t *icmp, ip4_pkt_t *pktinfop) 4176 { 4177 icmp_stack_t *is = icmp->icmp_is; 4178 ipha_t *ipha; 4179 int ip_hdr_length; 4180 int tp_hdr_len; 4181 mblk_t *mp1; 4182 uint_t pkt_len; 4183 ip_opt_info_t optinfo; 4184 conn_t *connp = icmp->icmp_connp; 4185 4186 optinfo.ip_opt_flags = 0; 4187 optinfo.ip_opt_ill_index = 0; 4188 ipha = (ipha_t *)mp->b_rptr; 4189 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4190 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4191 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4192 ASSERT(icmp != NULL); 4193 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4194 freemsg(mp); 4195 return; 4196 } 4197 ipha = (ipha_t *)mp->b_rptr; 4198 } 4199 ipha->ipha_version_and_hdr_length = 4200 (IP_VERSION<<4) | (ip_hdr_length>>2); 4201 4202 /* 4203 * For the socket of SOCK_RAW type, the checksum is provided in the 4204 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4205 * tell IP that the application has sent a complete IP header and not 4206 * to compute the transport checksum nor change the DF flag. 4207 */ 4208 ipha->ipha_ident = IP_HDR_INCLUDED; 4209 ipha->ipha_hdr_checksum = 0; 4210 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4211 /* Insert options if any */ 4212 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4213 /* 4214 * Put the IP header plus any transport header that is 4215 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4216 * that at least the checksum field is in the first mblk.) 4217 */ 4218 switch (ipha->ipha_protocol) { 4219 case IPPROTO_UDP: 4220 tp_hdr_len = 8; 4221 break; 4222 case IPPROTO_TCP: 4223 tp_hdr_len = 20; 4224 break; 4225 default: 4226 tp_hdr_len = 0; 4227 break; 4228 } 4229 /* 4230 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4231 * tp_hdr_len bytes will be in a single mblk. 4232 */ 4233 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4234 tp_hdr_len)) { 4235 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4236 tp_hdr_len)) { 4237 BUMP_MIB(&is->is_rawip_mib, 4238 rawipOutErrors); 4239 freemsg(mp); 4240 return; 4241 } 4242 ipha = (ipha_t *)mp->b_rptr; 4243 } 4244 4245 /* 4246 * if the length is larger then the max allowed IP packet, 4247 * then send an error and abort the processing. 4248 */ 4249 pkt_len = ntohs(ipha->ipha_length) 4250 + icmp->icmp_ip_snd_options_len; 4251 if (pkt_len > IP_MAXPACKET) { 4252 icmp_ud_err(q, mp, EMSGSIZE); 4253 return; 4254 } 4255 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4256 tp_hdr_len, BPRI_LO))) { 4257 icmp_ud_err(q, mp, ENOMEM); 4258 return; 4259 } 4260 mp1->b_rptr += is->is_wroff_extra; 4261 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4262 4263 ipha->ipha_length = htons((uint16_t)pkt_len); 4264 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4265 4266 /* Copy transport header if any */ 4267 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4268 mp1->b_wptr += tp_hdr_len; 4269 4270 /* Add options */ 4271 ipha = (ipha_t *)mp1->b_rptr; 4272 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4273 icmp->icmp_ip_snd_options_len); 4274 4275 /* Drop IP header and transport header from original */ 4276 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4277 4278 mp1->b_cont = mp; 4279 mp = mp1; 4280 /* 4281 * Massage source route putting first source 4282 * route in ipha_dst. 4283 */ 4284 (void) ip_massage_options(ipha, is->is_netstack); 4285 } 4286 4287 if (pktinfop != NULL) { 4288 /* 4289 * Over write the source address provided in the header 4290 */ 4291 if (pktinfop->ip4_addr != INADDR_ANY) { 4292 ipha->ipha_src = pktinfop->ip4_addr; 4293 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4294 } 4295 4296 if (pktinfop->ip4_ill_index != 0) { 4297 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4298 } 4299 } 4300 4301 mblk_setcred(mp, connp->conn_cred); 4302 ip_output_options(connp, mp, q, IP_WPUT, 4303 &optinfo); 4304 } 4305 4306 static boolean_t 4307 icmp_update_label(queue_t *q, icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4308 { 4309 int err; 4310 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4311 icmp_stack_t *is = icmp->icmp_is; 4312 conn_t *connp = icmp->icmp_connp; 4313 4314 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4315 opt_storage, icmp->icmp_mac_exempt, 4316 is->is_netstack->netstack_ip); 4317 if (err == 0) { 4318 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4319 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4320 opt_storage); 4321 } 4322 if (err != 0) { 4323 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4324 DTRACE_PROBE4( 4325 tx__ip__log__drop__updatelabel__icmp, 4326 char *, "queue(1) failed to update options(2) on mp(3)", 4327 queue_t *, q, char *, opt_storage, mblk_t *, mp); 4328 icmp_ud_err(q, mp, err); 4329 return (B_FALSE); 4330 } 4331 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4332 return (B_TRUE); 4333 } 4334 4335 /* 4336 * This routine handles all messages passed downstream. It either 4337 * consumes the message or passes it downstream; it never queues a 4338 * a message. 4339 */ 4340 static void 4341 icmp_wput(queue_t *q, mblk_t *mp) 4342 { 4343 uchar_t *rptr = mp->b_rptr; 4344 ipha_t *ipha; 4345 mblk_t *mp1; 4346 int ip_hdr_length; 4347 #define tudr ((struct T_unitdata_req *)rptr) 4348 size_t ip_len; 4349 conn_t *connp = Q_TO_CONN(q); 4350 icmp_t *icmp = connp->conn_icmp; 4351 icmp_stack_t *is = icmp->icmp_is; 4352 sin6_t *sin6; 4353 sin_t *sin; 4354 ipaddr_t v4dst; 4355 ip4_pkt_t pktinfo; 4356 ip4_pkt_t *pktinfop = &pktinfo; 4357 ip_opt_info_t optinfo; 4358 4359 switch (mp->b_datap->db_type) { 4360 case M_DATA: 4361 if (icmp->icmp_hdrincl) { 4362 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4363 ipha = (ipha_t *)mp->b_rptr; 4364 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4365 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4366 BUMP_MIB(&is->is_rawip_mib, 4367 rawipOutErrors); 4368 freemsg(mp); 4369 return; 4370 } 4371 ipha = (ipha_t *)mp->b_rptr; 4372 } 4373 /* 4374 * If this connection was used for v6 (inconceivable!) 4375 * or if we have a new destination, then it's time to 4376 * figure a new label. 4377 */ 4378 if (is_system_labeled() && 4379 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4380 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4381 ipha->ipha_dst) && 4382 !icmp_update_label(q, icmp, mp, ipha->ipha_dst)) { 4383 return; 4384 } 4385 icmp_wput_hdrincl(q, mp, icmp, NULL); 4386 return; 4387 } 4388 freemsg(mp); 4389 return; 4390 case M_PROTO: 4391 case M_PCPROTO: 4392 ip_len = mp->b_wptr - rptr; 4393 if (ip_len >= sizeof (struct T_unitdata_req)) { 4394 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4395 if (((union T_primitives *)rptr)->type 4396 == T_UNITDATA_REQ) 4397 break; 4398 } 4399 /* FALLTHRU */ 4400 default: 4401 icmp_wput_other(q, mp); 4402 return; 4403 } 4404 4405 /* Handle T_UNITDATA_REQ messages here. */ 4406 4407 4408 4409 if (icmp->icmp_state == TS_UNBND) { 4410 /* If a port has not been bound to the stream, fail. */ 4411 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4412 icmp_ud_err(q, mp, EPROTO); 4413 return; 4414 } 4415 mp1 = mp->b_cont; 4416 if (mp1 == NULL) { 4417 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4418 icmp_ud_err(q, mp, EPROTO); 4419 return; 4420 } 4421 4422 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4423 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4424 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4425 return; 4426 } 4427 4428 switch (icmp->icmp_family) { 4429 case AF_INET6: 4430 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4431 if (!OK_32PTR((char *)sin6) || 4432 tudr->DEST_length != sizeof (sin6_t) || 4433 sin6->sin6_family != AF_INET6) { 4434 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4435 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4436 return; 4437 } 4438 4439 /* No support for mapped addresses on raw sockets */ 4440 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4441 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4442 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4443 return; 4444 } 4445 4446 /* 4447 * Destination is a native IPv6 address. 4448 * Send out an IPv6 format packet. 4449 */ 4450 icmp_wput_ipv6(q, mp, sin6, tudr->OPT_length); 4451 return; 4452 4453 case AF_INET: 4454 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4455 if (!OK_32PTR((char *)sin) || 4456 tudr->DEST_length != sizeof (sin_t) || 4457 sin->sin_family != AF_INET) { 4458 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4459 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4460 return; 4461 } 4462 /* Extract and ipaddr */ 4463 v4dst = sin->sin_addr.s_addr; 4464 break; 4465 4466 default: 4467 ASSERT(0); 4468 } 4469 4470 pktinfop->ip4_ill_index = 0; 4471 pktinfop->ip4_addr = INADDR_ANY; 4472 optinfo.ip_opt_flags = 0; 4473 optinfo.ip_opt_ill_index = 0; 4474 4475 4476 /* 4477 * If options passed in, feed it for verification and handling 4478 */ 4479 if (tudr->OPT_length != 0) { 4480 int error; 4481 4482 error = 0; 4483 if (icmp_unitdata_opt_process(q, mp, &error, 4484 (void *)pktinfop) < 0) { 4485 /* failure */ 4486 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4487 icmp_ud_err(q, mp, error); 4488 return; 4489 } 4490 ASSERT(error == 0); 4491 /* 4492 * Note: Success in processing options. 4493 * mp option buffer represented by 4494 * OPT_length/offset now potentially modified 4495 * and contain option setting results 4496 */ 4497 4498 } 4499 4500 if (v4dst == INADDR_ANY) 4501 v4dst = htonl(INADDR_LOOPBACK); 4502 4503 /* Check if our saved options are valid; update if not */ 4504 if (is_system_labeled() && 4505 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4506 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst) && 4507 !icmp_update_label(q, icmp, mp, v4dst)) { 4508 return; 4509 } 4510 4511 /* Protocol 255 contains full IP headers */ 4512 if (icmp->icmp_hdrincl) { 4513 freeb(mp); 4514 icmp_wput_hdrincl(q, mp1, icmp, pktinfop); 4515 return; 4516 } 4517 4518 4519 /* Add an IP header */ 4520 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4521 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4522 if ((uchar_t *)ipha < mp1->b_datap->db_base || 4523 mp1->b_datap->db_ref != 1 || 4524 !OK_32PTR(ipha)) { 4525 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4526 BPRI_LO))) { 4527 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4528 icmp_ud_err(q, mp, ENOMEM); 4529 return; 4530 } 4531 mp1->b_cont = mp->b_cont; 4532 ipha = (ipha_t *)mp1->b_datap->db_lim; 4533 mp1->b_wptr = (uchar_t *)ipha; 4534 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4535 } 4536 #ifdef _BIG_ENDIAN 4537 /* Set version, header length, and tos */ 4538 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4539 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4540 icmp->icmp_type_of_service); 4541 /* Set ttl and protocol */ 4542 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4543 #else 4544 /* Set version, header length, and tos */ 4545 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4546 ((icmp->icmp_type_of_service << 8) | 4547 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4548 /* Set ttl and protocol */ 4549 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4550 #endif 4551 if (pktinfop->ip4_addr != INADDR_ANY) { 4552 ipha->ipha_src = pktinfop->ip4_addr; 4553 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4554 } else { 4555 4556 /* 4557 * Copy our address into the packet. If this is zero, 4558 * ip will fill in the real source address. 4559 */ 4560 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4561 } 4562 4563 ipha->ipha_fragment_offset_and_flags = 0; 4564 4565 if (pktinfop->ip4_ill_index != 0) { 4566 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4567 } 4568 4569 4570 /* 4571 * For the socket of SOCK_RAW type, the checksum is provided in the 4572 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4573 * tell IP that the application has sent a complete IP header and not 4574 * to compute the transport checksum nor change the DF flag. 4575 */ 4576 ipha->ipha_ident = IP_HDR_INCLUDED; 4577 4578 /* Finish common formatting of the packet. */ 4579 mp1->b_rptr = (uchar_t *)ipha; 4580 4581 ip_len = mp1->b_wptr - (uchar_t *)ipha; 4582 if (mp1->b_cont != NULL) 4583 ip_len += msgdsize(mp1->b_cont); 4584 4585 /* 4586 * Set the length into the IP header. 4587 * If the length is greater than the maximum allowed by IP, 4588 * then free the message and return. Do not try and send it 4589 * as this can cause problems in layers below. 4590 */ 4591 if (ip_len > IP_MAXPACKET) { 4592 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4593 icmp_ud_err(q, mp, EMSGSIZE); 4594 return; 4595 } 4596 ipha->ipha_length = htons((uint16_t)ip_len); 4597 /* 4598 * Copy in the destination address from the T_UNITDATA 4599 * request 4600 */ 4601 ipha->ipha_dst = v4dst; 4602 4603 /* 4604 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4605 */ 4606 if (CLASSD(v4dst)) 4607 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4608 4609 /* Copy in options if any */ 4610 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4611 bcopy(icmp->icmp_ip_snd_options, 4612 &ipha[1], icmp->icmp_ip_snd_options_len); 4613 /* 4614 * Massage source route putting first source route in ipha_dst. 4615 * Ignore the destination in the T_unitdata_req. 4616 */ 4617 (void) ip_massage_options(ipha, is->is_netstack); 4618 } 4619 4620 freeb(mp); 4621 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4622 mblk_setcred(mp1, connp->conn_cred); 4623 ip_output_options(Q_TO_CONN(q), mp1, q, IP_WPUT, &optinfo); 4624 #undef ipha 4625 #undef tudr 4626 } 4627 4628 static boolean_t 4629 icmp_update_label_v6(queue_t *wq, icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4630 { 4631 int err; 4632 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4633 icmp_stack_t *is = icmp->icmp_is; 4634 conn_t *connp = icmp->icmp_connp; 4635 4636 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4637 opt_storage, icmp->icmp_mac_exempt, 4638 is->is_netstack->netstack_ip); 4639 if (err == 0) { 4640 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4641 &icmp->icmp_label_len_v6, opt_storage); 4642 } 4643 if (err != 0) { 4644 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4645 DTRACE_PROBE4( 4646 tx__ip__log__drop__updatelabel__icmp6, 4647 char *, "queue(1) failed to update options(2) on mp(3)", 4648 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4649 icmp_ud_err(wq, mp, err); 4650 return (B_FALSE); 4651 } 4652 4653 icmp->icmp_v6lastdst = *dst; 4654 return (B_TRUE); 4655 } 4656 4657 /* 4658 * icmp_wput_ipv6(): 4659 * Assumes that icmp_wput did some sanity checking on the destination 4660 * address, but that the label may not yet be correct. 4661 */ 4662 void 4663 icmp_wput_ipv6(queue_t *q, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen) 4664 { 4665 ip6_t *ip6h; 4666 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 4667 mblk_t *mp1; 4668 int ip_hdr_len = IPV6_HDR_LEN; 4669 size_t ip_len; 4670 icmp_t *icmp = Q_TO_ICMP(q); 4671 icmp_stack_t *is = icmp->icmp_is; 4672 ip6_pkt_t ipp_s; /* For ancillary data options */ 4673 ip6_pkt_t *ipp = &ipp_s; 4674 ip6_pkt_t *tipp; 4675 uint32_t csum = 0; 4676 uint_t ignore = 0; 4677 uint_t option_exists = 0, is_sticky = 0; 4678 uint8_t *cp; 4679 uint8_t *nxthdr_ptr; 4680 in6_addr_t ip6_dst; 4681 4682 /* 4683 * If the local address is a mapped address return 4684 * an error. 4685 * It would be possible to send an IPv6 packet but the 4686 * response would never make it back to the application 4687 * since it is bound to a mapped address. 4688 */ 4689 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4690 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4691 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4692 return; 4693 } 4694 4695 ipp->ipp_fields = 0; 4696 ipp->ipp_sticky_ignored = 0; 4697 4698 /* 4699 * If TPI options passed in, feed it for verification and handling 4700 */ 4701 if (tudr_optlen != 0) { 4702 int error; 4703 4704 if (icmp_unitdata_opt_process(q, mp, &error, 4705 (void *)ipp) < 0) { 4706 /* failure */ 4707 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4708 icmp_ud_err(q, mp, error); 4709 return; 4710 } 4711 ignore = ipp->ipp_sticky_ignored; 4712 ASSERT(error == 0); 4713 } 4714 4715 if (sin6->sin6_scope_id != 0 && 4716 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4717 /* 4718 * IPPF_SCOPE_ID is special. It's neither a sticky 4719 * option nor ancillary data. It needs to be 4720 * explicitly set in options_exists. 4721 */ 4722 option_exists |= IPPF_SCOPE_ID; 4723 } 4724 4725 /* 4726 * Compute the destination address 4727 */ 4728 ip6_dst = sin6->sin6_addr; 4729 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4730 ip6_dst = ipv6_loopback; 4731 4732 /* 4733 * If we're not going to the same destination as last time, then 4734 * recompute the label required. This is done in a separate routine to 4735 * avoid blowing up our stack here. 4736 */ 4737 if (is_system_labeled() && 4738 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) && 4739 !icmp_update_label_v6(q, icmp, mp, &ip6_dst)) { 4740 return; 4741 } 4742 4743 /* 4744 * If there's a security label here, then we ignore any options the 4745 * user may try to set. We keep the peer's label as a hidden sticky 4746 * option. 4747 */ 4748 if (icmp->icmp_label_len_v6 > 0) { 4749 ignore &= ~IPPF_HOPOPTS; 4750 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4751 } 4752 4753 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4754 (ipp->ipp_fields == 0)) { 4755 /* No sticky options nor ancillary data. */ 4756 goto no_options; 4757 } 4758 4759 /* 4760 * Go through the options figuring out where each is going to 4761 * come from and build two masks. The first mask indicates if 4762 * the option exists at all. The second mask indicates if the 4763 * option is sticky or ancillary. 4764 */ 4765 if (!(ignore & IPPF_HOPOPTS)) { 4766 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4767 option_exists |= IPPF_HOPOPTS; 4768 ip_hdr_len += ipp->ipp_hopoptslen; 4769 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4770 option_exists |= IPPF_HOPOPTS; 4771 is_sticky |= IPPF_HOPOPTS; 4772 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4773 } 4774 } 4775 4776 if (!(ignore & IPPF_RTHDR)) { 4777 if (ipp->ipp_fields & IPPF_RTHDR) { 4778 option_exists |= IPPF_RTHDR; 4779 ip_hdr_len += ipp->ipp_rthdrlen; 4780 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4781 option_exists |= IPPF_RTHDR; 4782 is_sticky |= IPPF_RTHDR; 4783 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4784 } 4785 } 4786 4787 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4788 /* 4789 * Need to have a router header to use these. 4790 */ 4791 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4792 option_exists |= IPPF_RTDSTOPTS; 4793 ip_hdr_len += ipp->ipp_rtdstoptslen; 4794 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4795 option_exists |= IPPF_RTDSTOPTS; 4796 is_sticky |= IPPF_RTDSTOPTS; 4797 ip_hdr_len += 4798 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4799 } 4800 } 4801 4802 if (!(ignore & IPPF_DSTOPTS)) { 4803 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4804 option_exists |= IPPF_DSTOPTS; 4805 ip_hdr_len += ipp->ipp_dstoptslen; 4806 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4807 option_exists |= IPPF_DSTOPTS; 4808 is_sticky |= IPPF_DSTOPTS; 4809 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4810 } 4811 } 4812 4813 if (!(ignore & IPPF_IFINDEX)) { 4814 if (ipp->ipp_fields & IPPF_IFINDEX) { 4815 option_exists |= IPPF_IFINDEX; 4816 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4817 option_exists |= IPPF_IFINDEX; 4818 is_sticky |= IPPF_IFINDEX; 4819 } 4820 } 4821 4822 if (!(ignore & IPPF_ADDR)) { 4823 if (ipp->ipp_fields & IPPF_ADDR) { 4824 option_exists |= IPPF_ADDR; 4825 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4826 option_exists |= IPPF_ADDR; 4827 is_sticky |= IPPF_ADDR; 4828 } 4829 } 4830 4831 if (!(ignore & IPPF_DONTFRAG)) { 4832 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4833 option_exists |= IPPF_DONTFRAG; 4834 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4835 option_exists |= IPPF_DONTFRAG; 4836 is_sticky |= IPPF_DONTFRAG; 4837 } 4838 } 4839 4840 if (!(ignore & IPPF_USE_MIN_MTU)) { 4841 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4842 option_exists |= IPPF_USE_MIN_MTU; 4843 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4844 IPPF_USE_MIN_MTU) { 4845 option_exists |= IPPF_USE_MIN_MTU; 4846 is_sticky |= IPPF_USE_MIN_MTU; 4847 } 4848 } 4849 4850 if (!(ignore & IPPF_NEXTHOP)) { 4851 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4852 option_exists |= IPPF_NEXTHOP; 4853 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4854 option_exists |= IPPF_NEXTHOP; 4855 is_sticky |= IPPF_NEXTHOP; 4856 } 4857 } 4858 4859 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4860 option_exists |= IPPF_HOPLIMIT; 4861 /* IPV6_HOPLIMIT can never be sticky */ 4862 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4863 4864 if (!(ignore & IPPF_UNICAST_HOPS) && 4865 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4866 option_exists |= IPPF_UNICAST_HOPS; 4867 is_sticky |= IPPF_UNICAST_HOPS; 4868 } 4869 4870 if (!(ignore & IPPF_MULTICAST_HOPS) && 4871 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4872 option_exists |= IPPF_MULTICAST_HOPS; 4873 is_sticky |= IPPF_MULTICAST_HOPS; 4874 } 4875 4876 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4877 /* This is a sticky socket option only */ 4878 option_exists |= IPPF_NO_CKSUM; 4879 is_sticky |= IPPF_NO_CKSUM; 4880 } 4881 4882 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4883 /* This is a sticky socket option only */ 4884 option_exists |= IPPF_RAW_CKSUM; 4885 is_sticky |= IPPF_RAW_CKSUM; 4886 } 4887 4888 if (!(ignore & IPPF_TCLASS)) { 4889 if (ipp->ipp_fields & IPPF_TCLASS) { 4890 option_exists |= IPPF_TCLASS; 4891 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4892 option_exists |= IPPF_TCLASS; 4893 is_sticky |= IPPF_TCLASS; 4894 } 4895 } 4896 4897 no_options: 4898 4899 /* 4900 * If any options carried in the ip6i_t were specified, we 4901 * need to account for the ip6i_t in the data we'll be sending 4902 * down. 4903 */ 4904 if (option_exists & IPPF_HAS_IP6I) 4905 ip_hdr_len += sizeof (ip6i_t); 4906 4907 /* check/fix buffer config, setup pointers into it */ 4908 mp1 = mp->b_cont; 4909 ip6h = (ip6_t *)&mp1->b_rptr[-ip_hdr_len]; 4910 if ((mp1->b_datap->db_ref != 1) || 4911 ((unsigned char *)ip6h < mp1->b_datap->db_base) || 4912 !OK_32PTR(ip6h)) { 4913 /* Try to get everything in a single mblk next time */ 4914 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 4915 icmp->icmp_max_hdr_len = ip_hdr_len; 4916 (void) mi_set_sth_wroff(RD(q), 4917 icmp->icmp_max_hdr_len + is->is_wroff_extra); 4918 } 4919 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 4920 if (!mp1) { 4921 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4922 icmp_ud_err(q, mp, ENOMEM); 4923 return; 4924 } 4925 mp1->b_cont = mp->b_cont; 4926 mp1->b_wptr = mp1->b_datap->db_lim; 4927 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 4928 } 4929 mp1->b_rptr = (unsigned char *)ip6h; 4930 ip6i = (ip6i_t *)ip6h; 4931 4932 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 4933 if (option_exists & IPPF_HAS_IP6I) { 4934 ip6h = (ip6_t *)&ip6i[1]; 4935 ip6i->ip6i_flags = 0; 4936 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4937 4938 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 4939 if (option_exists & IPPF_SCOPE_ID) { 4940 ip6i->ip6i_flags |= IP6I_IFINDEX; 4941 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 4942 } else if (option_exists & IPPF_IFINDEX) { 4943 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 4944 ASSERT(tipp->ipp_ifindex != 0); 4945 ip6i->ip6i_flags |= IP6I_IFINDEX; 4946 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 4947 } 4948 4949 if (option_exists & IPPF_RAW_CKSUM) { 4950 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 4951 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 4952 } 4953 4954 if (option_exists & IPPF_NO_CKSUM) { 4955 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 4956 } 4957 4958 if (option_exists & IPPF_ADDR) { 4959 /* 4960 * Enable per-packet source address verification if 4961 * IPV6_PKTINFO specified the source address. 4962 * ip6_src is set in the transport's _wput function. 4963 */ 4964 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 4965 } 4966 4967 if (option_exists & IPPF_DONTFRAG) { 4968 ip6i->ip6i_flags |= IP6I_DONTFRAG; 4969 } 4970 4971 if (option_exists & IPPF_USE_MIN_MTU) { 4972 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 4973 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 4974 } 4975 4976 if (option_exists & IPPF_NEXTHOP) { 4977 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 4978 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 4979 ip6i->ip6i_flags |= IP6I_NEXTHOP; 4980 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 4981 } 4982 4983 /* 4984 * tell IP this is an ip6i_t private header 4985 */ 4986 ip6i->ip6i_nxt = IPPROTO_RAW; 4987 } 4988 4989 /* Initialize IPv6 header */ 4990 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 4991 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 4992 4993 /* Set the hoplimit of the outgoing packet. */ 4994 if (option_exists & IPPF_HOPLIMIT) { 4995 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 4996 ip6h->ip6_hops = ipp->ipp_hoplimit; 4997 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 4998 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 4999 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5000 if (option_exists & IPPF_MULTICAST_HOPS) 5001 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5002 } else { 5003 ip6h->ip6_hops = icmp->icmp_ttl; 5004 if (option_exists & IPPF_UNICAST_HOPS) 5005 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5006 } 5007 5008 if (option_exists & IPPF_ADDR) { 5009 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5010 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5011 ip6h->ip6_src = tipp->ipp_addr; 5012 } else { 5013 /* 5014 * The source address was not set using IPV6_PKTINFO. 5015 * First look at the bound source. 5016 * If unspecified fallback to __sin6_src_id. 5017 */ 5018 ip6h->ip6_src = icmp->icmp_v6src; 5019 if (sin6->__sin6_src_id != 0 && 5020 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5021 ip_srcid_find_id(sin6->__sin6_src_id, 5022 &ip6h->ip6_src, icmp->icmp_zoneid, 5023 is->is_netstack); 5024 } 5025 } 5026 5027 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5028 cp = (uint8_t *)&ip6h[1]; 5029 5030 /* 5031 * Here's where we have to start stringing together 5032 * any extension headers in the right order: 5033 * Hop-by-hop, destination, routing, and final destination opts. 5034 */ 5035 if (option_exists & IPPF_HOPOPTS) { 5036 /* Hop-by-hop options */ 5037 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5038 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5039 5040 *nxthdr_ptr = IPPROTO_HOPOPTS; 5041 nxthdr_ptr = &hbh->ip6h_nxt; 5042 5043 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5044 cp += tipp->ipp_hopoptslen; 5045 } 5046 /* 5047 * En-route destination options 5048 * Only do them if there's a routing header as well 5049 */ 5050 if (option_exists & IPPF_RTDSTOPTS) { 5051 ip6_dest_t *dst = (ip6_dest_t *)cp; 5052 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5053 5054 *nxthdr_ptr = IPPROTO_DSTOPTS; 5055 nxthdr_ptr = &dst->ip6d_nxt; 5056 5057 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5058 cp += tipp->ipp_rtdstoptslen; 5059 } 5060 /* 5061 * Routing header next 5062 */ 5063 if (option_exists & IPPF_RTHDR) { 5064 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5065 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5066 5067 *nxthdr_ptr = IPPROTO_ROUTING; 5068 nxthdr_ptr = &rt->ip6r_nxt; 5069 5070 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5071 cp += tipp->ipp_rthdrlen; 5072 } 5073 /* 5074 * Do ultimate destination options 5075 */ 5076 if (option_exists & IPPF_DSTOPTS) { 5077 ip6_dest_t *dest = (ip6_dest_t *)cp; 5078 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5079 5080 *nxthdr_ptr = IPPROTO_DSTOPTS; 5081 nxthdr_ptr = &dest->ip6d_nxt; 5082 5083 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5084 cp += tipp->ipp_dstoptslen; 5085 } 5086 5087 /* 5088 * Now set the last header pointer to the proto passed in 5089 */ 5090 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5091 *nxthdr_ptr = icmp->icmp_proto; 5092 5093 /* 5094 * Copy in the destination address 5095 */ 5096 ip6h->ip6_dst = ip6_dst; 5097 5098 ip6h->ip6_vcf = 5099 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5100 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5101 5102 if (option_exists & IPPF_TCLASS) { 5103 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5104 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5105 tipp->ipp_tclass); 5106 } 5107 if (option_exists & IPPF_RTHDR) { 5108 ip6_rthdr_t *rth; 5109 5110 /* 5111 * Perform any processing needed for source routing. 5112 * We know that all extension headers will be in the same mblk 5113 * as the IPv6 header. 5114 */ 5115 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 5116 if (rth != NULL && rth->ip6r_segleft != 0) { 5117 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5118 /* 5119 * Drop packet - only support Type 0 routing. 5120 * Notify the application as well. 5121 */ 5122 icmp_ud_err(q, mp, EPROTO); 5123 BUMP_MIB(&is->is_rawip_mib, 5124 rawipOutErrors); 5125 return; 5126 } 5127 /* 5128 * rth->ip6r_len is twice the number of 5129 * addresses in the header 5130 */ 5131 if (rth->ip6r_len & 0x1) { 5132 icmp_ud_err(q, mp, EPROTO); 5133 BUMP_MIB(&is->is_rawip_mib, 5134 rawipOutErrors); 5135 return; 5136 } 5137 /* 5138 * Shuffle the routing header and ip6_dst 5139 * addresses, and get the checksum difference 5140 * between the first hop (in ip6_dst) and 5141 * the destination (in the last routing hdr entry). 5142 */ 5143 csum = ip_massage_options_v6(ip6h, rth, 5144 is->is_netstack); 5145 /* 5146 * Verify that the first hop isn't a mapped address. 5147 * Routers along the path need to do this verification 5148 * for subsequent hops. 5149 */ 5150 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5151 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5152 BUMP_MIB(&is->is_rawip_mib, 5153 rawipOutErrors); 5154 return; 5155 } 5156 } 5157 } 5158 5159 ip_len = mp1->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5160 if (mp1->b_cont != NULL) 5161 ip_len += msgdsize(mp1->b_cont); 5162 5163 /* 5164 * Set the length into the IP header. 5165 * If the length is greater than the maximum allowed by IP, 5166 * then free the message and return. Do not try and send it 5167 * as this can cause problems in layers below. 5168 */ 5169 if (ip_len > IP_MAXPACKET) { 5170 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5171 icmp_ud_err(q, mp, EMSGSIZE); 5172 return; 5173 } 5174 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5175 uint_t cksum_off; /* From ip6i == mp1->b_rptr */ 5176 uint16_t *cksum_ptr; 5177 uint_t ext_hdrs_len; 5178 5179 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5180 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5181 icmp->icmp_checksum_off == 2); 5182 5183 /* 5184 * We make it easy for IP to include our pseudo header 5185 * by putting our length in uh_checksum, modified (if 5186 * we have a routing header) by the checksum difference 5187 * between the ultimate destination and first hop addresses. 5188 * Note: ICMPv6 must always checksum the packet. 5189 */ 5190 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5191 if (cksum_off + sizeof (uint16_t) > mp1->b_wptr - mp1->b_rptr) { 5192 if (!pullupmsg(mp1, cksum_off + sizeof (uint16_t))) { 5193 BUMP_MIB(&is->is_rawip_mib, 5194 rawipOutErrors); 5195 freemsg(mp); 5196 return; 5197 } 5198 ip6i = (ip6i_t *)mp1->b_rptr; 5199 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5200 ip6h = (ip6_t *)&ip6i[1]; 5201 else 5202 ip6h = (ip6_t *)ip6i; 5203 } 5204 /* Add payload length to checksum */ 5205 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5206 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5207 csum += htons(ip_len - ext_hdrs_len); 5208 5209 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5210 csum = (csum & 0xFFFF) + (csum >> 16); 5211 *cksum_ptr = (uint16_t)csum; 5212 } 5213 5214 #ifdef _LITTLE_ENDIAN 5215 ip_len = htons(ip_len); 5216 #endif 5217 ip6h->ip6_plen = (uint16_t)ip_len; 5218 5219 freeb(mp); 5220 5221 /* We're done. Pass the packet to IP */ 5222 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5223 ip_output_v6(icmp->icmp_connp, mp1, q, IP_WPUT); 5224 } 5225 5226 static void 5227 icmp_wput_other(queue_t *q, mblk_t *mp) 5228 { 5229 uchar_t *rptr = mp->b_rptr; 5230 struct iocblk *iocp; 5231 #define tudr ((struct T_unitdata_req *)rptr) 5232 conn_t *connp = Q_TO_CONN(q); 5233 icmp_t *icmp = connp->conn_icmp; 5234 icmp_stack_t *is = icmp->icmp_is; 5235 cred_t *cr; 5236 5237 cr = DB_CREDDEF(mp, connp->conn_cred); 5238 5239 switch (mp->b_datap->db_type) { 5240 case M_PROTO: 5241 case M_PCPROTO: 5242 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5243 /* 5244 * If the message does not contain a PRIM_type, 5245 * throw it away. 5246 */ 5247 freemsg(mp); 5248 return; 5249 } 5250 switch (((union T_primitives *)rptr)->type) { 5251 case T_ADDR_REQ: 5252 icmp_addr_req(q, mp); 5253 return; 5254 case O_T_BIND_REQ: 5255 case T_BIND_REQ: 5256 icmp_bind(q, mp); 5257 return; 5258 case T_CONN_REQ: 5259 icmp_connect(q, mp); 5260 return; 5261 case T_CAPABILITY_REQ: 5262 icmp_capability_req(q, mp); 5263 return; 5264 case T_INFO_REQ: 5265 icmp_info_req(q, mp); 5266 return; 5267 case T_UNITDATA_REQ: 5268 /* 5269 * If a T_UNITDATA_REQ gets here, the address must 5270 * be bad. Valid T_UNITDATA_REQs are found above 5271 * and break to below this switch. 5272 */ 5273 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5274 return; 5275 case T_UNBIND_REQ: 5276 icmp_unbind(q, mp); 5277 return; 5278 5279 case T_SVR4_OPTMGMT_REQ: 5280 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5281 cr)) { 5282 /* Only IP can return anything meaningful */ 5283 (void) svr4_optcom_req(q, mp, cr, 5284 &icmp_opt_obj, B_TRUE); 5285 } 5286 return; 5287 5288 case T_OPTMGMT_REQ: 5289 /* Only IP can return anything meaningful */ 5290 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5291 return; 5292 5293 case T_DISCON_REQ: 5294 icmp_disconnect(q, mp); 5295 return; 5296 5297 /* The following TPI message is not supported by icmp. */ 5298 case O_T_CONN_RES: 5299 case T_CONN_RES: 5300 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5301 return; 5302 5303 /* The following 3 TPI requests are illegal for icmp. */ 5304 case T_DATA_REQ: 5305 case T_EXDATA_REQ: 5306 case T_ORDREL_REQ: 5307 freemsg(mp); 5308 (void) putctl1(RD(q), M_ERROR, EPROTO); 5309 return; 5310 default: 5311 break; 5312 } 5313 break; 5314 case M_IOCTL: 5315 iocp = (struct iocblk *)mp->b_rptr; 5316 switch (iocp->ioc_cmd) { 5317 case TI_GETPEERNAME: 5318 if (icmp->icmp_state != TS_DATA_XFER) { 5319 /* 5320 * If a default destination address has not 5321 * been associated with the stream, then we 5322 * don't know the peer's name. 5323 */ 5324 iocp->ioc_error = ENOTCONN; 5325 err_ret:; 5326 iocp->ioc_count = 0; 5327 mp->b_datap->db_type = M_IOCACK; 5328 qreply(q, mp); 5329 return; 5330 } 5331 /* FALLTHRU */ 5332 case TI_GETMYNAME: 5333 /* 5334 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5335 * need to copyin the user's strbuf structure. 5336 * Processing will continue in the M_IOCDATA case 5337 * below. 5338 */ 5339 mi_copyin(q, mp, NULL, 5340 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5341 return; 5342 case ND_SET: 5343 /* nd_getset performs the necessary error checking */ 5344 case ND_GET: 5345 if (nd_getset(q, is->is_nd, mp)) { 5346 qreply(q, mp); 5347 return; 5348 } 5349 break; 5350 default: 5351 break; 5352 } 5353 break; 5354 case M_IOCDATA: 5355 icmp_wput_iocdata(q, mp); 5356 return; 5357 default: 5358 break; 5359 } 5360 ip_wput(q, mp); 5361 } 5362 5363 /* 5364 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5365 * messages. 5366 */ 5367 static void 5368 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5369 { 5370 mblk_t *mp1; 5371 STRUCT_HANDLE(strbuf, sb); 5372 icmp_t *icmp; 5373 in6_addr_t v6addr; 5374 ipaddr_t v4addr; 5375 uint32_t flowinfo = 0; 5376 int addrlen; 5377 5378 /* Make sure it is one of ours. */ 5379 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5380 case TI_GETMYNAME: 5381 case TI_GETPEERNAME: 5382 break; 5383 default: 5384 icmp = Q_TO_ICMP(q); 5385 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5386 return; 5387 } 5388 switch (mi_copy_state(q, mp, &mp1)) { 5389 case -1: 5390 return; 5391 case MI_COPY_CASE(MI_COPY_IN, 1): 5392 break; 5393 case MI_COPY_CASE(MI_COPY_OUT, 1): 5394 /* 5395 * The address has been copied out, so now 5396 * copyout the strbuf. 5397 */ 5398 mi_copyout(q, mp); 5399 return; 5400 case MI_COPY_CASE(MI_COPY_OUT, 2): 5401 /* 5402 * The address and strbuf have been copied out. 5403 * We're done, so just acknowledge the original 5404 * M_IOCTL. 5405 */ 5406 mi_copy_done(q, mp, 0); 5407 return; 5408 default: 5409 /* 5410 * Something strange has happened, so acknowledge 5411 * the original M_IOCTL with an EPROTO error. 5412 */ 5413 mi_copy_done(q, mp, EPROTO); 5414 return; 5415 } 5416 /* 5417 * Now we have the strbuf structure for TI_GETMYNAME 5418 * and TI_GETPEERNAME. Next we copyout the requested 5419 * address and then we'll copyout the strbuf. 5420 */ 5421 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5422 (void *)mp1->b_rptr); 5423 icmp = Q_TO_ICMP(q); 5424 if (icmp->icmp_family == AF_INET) 5425 addrlen = sizeof (sin_t); 5426 else 5427 addrlen = sizeof (sin6_t); 5428 5429 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5430 mi_copy_done(q, mp, EINVAL); 5431 return; 5432 } 5433 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5434 case TI_GETMYNAME: 5435 if (icmp->icmp_family == AF_INET) { 5436 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5437 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 5438 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5439 v4addr = V4_PART_OF_V6(icmp->icmp_v6src); 5440 } else { 5441 /* 5442 * INADDR_ANY 5443 * icmp_v6src is not set, we might be bound to 5444 * broadcast/multicast. Use icmp_bound_v6src as 5445 * local address instead (that could 5446 * also still be INADDR_ANY) 5447 */ 5448 v4addr = V4_PART_OF_V6(icmp->icmp_bound_v6src); 5449 } 5450 } else { 5451 /* icmp->icmp_family == AF_INET6 */ 5452 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 5453 v6addr = icmp->icmp_v6src; 5454 } else { 5455 /* 5456 * UNSPECIFIED 5457 * icmp_v6src is not set, we might be bound to 5458 * broadcast/multicast. Use icmp_bound_v6src as 5459 * local address instead (that could 5460 * also still be UNSPECIFIED) 5461 */ 5462 v6addr = icmp->icmp_bound_v6src; 5463 } 5464 } 5465 break; 5466 case TI_GETPEERNAME: 5467 if (icmp->icmp_family == AF_INET) { 5468 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 5469 v4addr = V4_PART_OF_V6(icmp->icmp_v6dst); 5470 } else { 5471 /* icmp->icmp_family == AF_INET6) */ 5472 v6addr = icmp->icmp_v6dst; 5473 flowinfo = icmp->icmp_flowinfo; 5474 } 5475 break; 5476 default: 5477 mi_copy_done(q, mp, EPROTO); 5478 return; 5479 } 5480 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5481 if (!mp1) 5482 return; 5483 5484 if (icmp->icmp_family == AF_INET) { 5485 sin_t *sin; 5486 5487 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 5488 sin = (sin_t *)mp1->b_rptr; 5489 mp1->b_wptr = (uchar_t *)&sin[1]; 5490 *sin = sin_null; 5491 sin->sin_family = AF_INET; 5492 sin->sin_addr.s_addr = v4addr; 5493 } else { 5494 /* icmp->icmp_family == AF_INET6 */ 5495 sin6_t *sin6; 5496 5497 ASSERT(icmp->icmp_family == AF_INET6); 5498 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 5499 sin6 = (sin6_t *)mp1->b_rptr; 5500 mp1->b_wptr = (uchar_t *)&sin6[1]; 5501 *sin6 = sin6_null; 5502 sin6->sin6_family = AF_INET6; 5503 sin6->sin6_flowinfo = flowinfo; 5504 sin6->sin6_addr = v6addr; 5505 } 5506 /* Copy out the address */ 5507 mi_copyout(q, mp); 5508 } 5509 5510 static int 5511 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5512 void *thisdg_attrs) 5513 { 5514 conn_t *connp = Q_TO_CONN(q); 5515 struct T_unitdata_req *udreqp; 5516 int is_absreq_failure; 5517 cred_t *cr; 5518 5519 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5520 *errorp = 0; 5521 5522 cr = DB_CREDDEF(mp, connp->conn_cred); 5523 5524 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5525 udreqp->OPT_offset, cr, &icmp_opt_obj, 5526 thisdg_attrs, &is_absreq_failure); 5527 5528 if (*errorp != 0) { 5529 /* 5530 * Note: No special action needed in this 5531 * module for "is_absreq_failure" 5532 */ 5533 return (-1); /* failure */ 5534 } 5535 ASSERT(is_absreq_failure == 0); 5536 return (0); /* success */ 5537 } 5538 5539 void 5540 icmp_ddi_init(void) 5541 { 5542 icmp_max_optsize = 5543 optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5544 icmp_opt_obj.odb_opt_arr_cnt); 5545 5546 /* 5547 * We want to be informed each time a stack is created or 5548 * destroyed in the kernel, so we can maintain the 5549 * set of icmp_stack_t's. 5550 */ 5551 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5552 } 5553 5554 void 5555 icmp_ddi_destroy(void) 5556 { 5557 netstack_unregister(NS_ICMP); 5558 } 5559 5560 /* 5561 * Initialize the ICMP stack instance. 5562 */ 5563 static void * 5564 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5565 { 5566 icmp_stack_t *is; 5567 icmpparam_t *pa; 5568 5569 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5570 is->is_netstack = ns; 5571 5572 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5573 is->is_param_arr = pa; 5574 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5575 5576 (void) icmp_param_register(&is->is_nd, 5577 is->is_param_arr, A_CNT(icmp_param_arr)); 5578 is->is_ksp = rawip_kstat_init(stackid); 5579 return (is); 5580 } 5581 5582 /* 5583 * Free the ICMP stack instance. 5584 */ 5585 static void 5586 rawip_stack_fini(netstackid_t stackid, void *arg) 5587 { 5588 icmp_stack_t *is = (icmp_stack_t *)arg; 5589 5590 nd_free(&is->is_nd); 5591 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5592 is->is_param_arr = NULL; 5593 5594 rawip_kstat_fini(stackid, is->is_ksp); 5595 is->is_ksp = NULL; 5596 kmem_free(is, sizeof (*is)); 5597 } 5598 5599 static void * 5600 rawip_kstat_init(netstackid_t stackid) { 5601 kstat_t *ksp; 5602 5603 rawip_named_kstat_t template = { 5604 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5605 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5606 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5607 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5608 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5609 }; 5610 5611 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5612 KSTAT_TYPE_NAMED, 5613 NUM_OF_FIELDS(rawip_named_kstat_t), 5614 0, stackid); 5615 if (ksp == NULL || ksp->ks_data == NULL) 5616 return (NULL); 5617 5618 bcopy(&template, ksp->ks_data, sizeof (template)); 5619 ksp->ks_update = rawip_kstat_update; 5620 ksp->ks_private = (void *)(uintptr_t)stackid; 5621 5622 kstat_install(ksp); 5623 return (ksp); 5624 } 5625 5626 static void 5627 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5628 { 5629 if (ksp != NULL) { 5630 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5631 kstat_delete_netstack(ksp, stackid); 5632 } 5633 } 5634 5635 static int 5636 rawip_kstat_update(kstat_t *ksp, int rw) 5637 { 5638 rawip_named_kstat_t *rawipkp; 5639 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5640 netstack_t *ns; 5641 icmp_stack_t *is; 5642 5643 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5644 return (EIO); 5645 5646 if (rw == KSTAT_WRITE) 5647 return (EACCES); 5648 5649 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5650 5651 ns = netstack_find_by_stackid(stackid); 5652 if (ns == NULL) 5653 return (-1); 5654 is = ns->netstack_icmp; 5655 if (is == NULL) { 5656 netstack_rele(ns); 5657 return (-1); 5658 } 5659 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5660 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5661 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5662 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5663 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5664 netstack_rele(ns); 5665 return (0); 5666 } 5667