1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 143 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 144 static void icmp_wput(queue_t *q, mblk_t *mp); 145 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 146 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 147 sin6_t *sin6, ip6_pkt_t *ipp); 148 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 149 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 150 static void icmp_wput_other(queue_t *q, mblk_t *mp); 151 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 152 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 274 icmp = connp->conn_icmp; 275 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 276 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 277 "icmp_bind: bad req, len %u", 278 (uint_t)(mp->b_wptr - mp->b_rptr)); 279 icmp_err_ack(q, mp, TPROTO, 0); 280 return; 281 } 282 283 if (icmp->icmp_state != TS_UNBND) { 284 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 285 "icmp_bind: bad state, %d", icmp->icmp_state); 286 icmp_err_ack(q, mp, TOUTSTATE, 0); 287 return; 288 } 289 290 /* 291 * Reallocate the message to make sure we have enough room for an 292 * address and the protocol type. 293 */ 294 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 295 if (!mp1) { 296 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 297 return; 298 } 299 mp = mp1; 300 301 /* Reset the message type in preparation for shipping it back. */ 302 DB_TYPE(mp) = M_PCPROTO; 303 tbr = (struct T_bind_req *)mp->b_rptr; 304 len = tbr->ADDR_length; 305 switch (len) { 306 case 0: /* request for a generic port */ 307 tbr->ADDR_offset = sizeof (struct T_bind_req); 308 if (icmp->icmp_family == AF_INET) { 309 tbr->ADDR_length = sizeof (sin_t); 310 sin = (sin_t *)&tbr[1]; 311 *sin = sin_null; 312 sin->sin_family = AF_INET; 313 mp->b_wptr = (uchar_t *)&sin[1]; 314 sa = (struct sockaddr *)sin; 315 len = sizeof (sin_t); 316 } else { 317 ASSERT(icmp->icmp_family == AF_INET6); 318 tbr->ADDR_length = sizeof (sin6_t); 319 sin6 = (sin6_t *)&tbr[1]; 320 *sin6 = sin6_null; 321 sin6->sin6_family = AF_INET6; 322 mp->b_wptr = (uchar_t *)&sin6[1]; 323 sa = (struct sockaddr *)sin6; 324 len = sizeof (sin6_t); 325 } 326 break; 327 328 case sizeof (sin_t): /* Complete IPv4 address */ 329 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 330 sizeof (sin_t)); 331 break; 332 333 case sizeof (sin6_t): /* Complete IPv6 address */ 334 sa = (struct sockaddr *)mi_offset_param(mp, 335 tbr->ADDR_offset, sizeof (sin6_t)); 336 break; 337 338 default: 339 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 340 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 341 icmp_err_ack(q, mp, TBADADDR, 0); 342 return; 343 } 344 345 error = rawip_do_bind(connp, sa, len); 346 done: 347 ASSERT(mp->b_cont == NULL); 348 if (error != 0) { 349 if (error > 0) { 350 icmp_err_ack(q, mp, TSYSERR, error); 351 } else { 352 icmp_err_ack(q, mp, -error, 0); 353 } 354 } else { 355 tbr->PRIM_type = T_BIND_ACK; 356 qreply(q, mp); 357 } 358 } 359 360 static int 361 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 362 { 363 sin_t *sin; 364 sin6_t *sin6; 365 icmp_t *icmp; 366 int error = 0; 367 mblk_t *ire_mp; 368 369 370 icmp = connp->conn_icmp; 371 372 if (sa == NULL || !OK_32PTR((char *)sa)) { 373 return (EINVAL); 374 } 375 376 /* 377 * The state must be TS_UNBND. TPI mandates that users must send 378 * TPI primitives only 1 at a time and wait for the response before 379 * sending the next primitive. 380 */ 381 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 382 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 383 error = -TOUTSTATE; 384 goto done; 385 } 386 387 ASSERT(len != 0); 388 switch (len) { 389 case sizeof (sin_t): /* Complete IPv4 address */ 390 sin = (sin_t *)sa; 391 if (sin->sin_family != AF_INET || 392 icmp->icmp_family != AF_INET) { 393 /* TSYSERR, EAFNOSUPPORT */ 394 error = EAFNOSUPPORT; 395 goto done; 396 } 397 break; 398 case sizeof (sin6_t): /* Complete IPv6 address */ 399 sin6 = (sin6_t *)sa; 400 if (sin6->sin6_family != AF_INET6 || 401 icmp->icmp_family != AF_INET6) { 402 /* TSYSERR, EAFNOSUPPORT */ 403 error = EAFNOSUPPORT; 404 goto done; 405 } 406 /* No support for mapped addresses on raw sockets */ 407 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 408 /* TSYSERR, EADDRNOTAVAIL */ 409 error = EADDRNOTAVAIL; 410 goto done; 411 } 412 break; 413 414 default: 415 /* TBADADDR */ 416 error = EADDRNOTAVAIL; 417 goto done; 418 } 419 420 icmp->icmp_pending_op = T_BIND_REQ; 421 icmp->icmp_state = TS_IDLE; 422 423 /* 424 * Copy the source address into our icmp structure. This address 425 * may still be zero; if so, ip will fill in the correct address 426 * each time an outbound packet is passed to it. 427 * If we are binding to a broadcast or multicast address then 428 * rawip_post_ip_bind_connect will clear the source address. 429 */ 430 431 if (icmp->icmp_family == AF_INET) { 432 ASSERT(sin != NULL); 433 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 434 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 435 &icmp->icmp_v6src); 436 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 437 icmp->icmp_ip_snd_options_len; 438 icmp->icmp_bound_v6src = icmp->icmp_v6src; 439 } else { 440 int error; 441 442 ASSERT(sin6 != NULL); 443 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 444 icmp->icmp_v6src = sin6->sin6_addr; 445 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 446 icmp->icmp_bound_v6src = icmp->icmp_v6src; 447 448 /* Rebuild the header template */ 449 error = icmp_build_hdrs(icmp); 450 if (error != 0) { 451 icmp->icmp_pending_op = -1; 452 /* 453 * TSYSERR 454 */ 455 goto done; 456 } 457 } 458 459 ire_mp = NULL; 460 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 461 /* 462 * request an IRE if src not 0 (INADDR_ANY) 463 */ 464 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 465 if (ire_mp == NULL) { 466 icmp->icmp_pending_op = -1; 467 error = ENOMEM; 468 goto done; 469 } 470 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 471 } 472 done: 473 rw_exit(&icmp->icmp_rwlock); 474 if (error != 0) 475 return (error); 476 477 if (icmp->icmp_family == AF_INET6) { 478 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 479 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 480 } else { 481 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 482 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 483 } 484 rawip_post_ip_bind_connect(icmp, ire_mp, error); 485 return (error); 486 } 487 488 static void 489 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 490 { 491 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 492 if (icmp->icmp_state == TS_UNBND) { 493 /* 494 * not yet bound - bind sent by icmp_bind_proto. 495 */ 496 rw_exit(&icmp->icmp_rwlock); 497 return; 498 } 499 ASSERT(icmp->icmp_pending_op != -1); 500 icmp->icmp_pending_op = -1; 501 502 if (error != 0) { 503 if (icmp->icmp_state == TS_DATA_XFER) { 504 /* Connect failed */ 505 /* Revert back to the bound source */ 506 icmp->icmp_v6src = icmp->icmp_bound_v6src; 507 icmp->icmp_state = TS_IDLE; 508 if (icmp->icmp_family == AF_INET6) 509 (void) icmp_build_hdrs(icmp); 510 } else { 511 V6_SET_ZERO(icmp->icmp_v6src); 512 V6_SET_ZERO(icmp->icmp_bound_v6src); 513 icmp->icmp_state = TS_UNBND; 514 if (icmp->icmp_family == AF_INET6) 515 (void) icmp_build_hdrs(icmp); 516 } 517 } else { 518 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 519 ire_t *ire; 520 521 ire = (ire_t *)ire_mp->b_rptr; 522 /* 523 * If a broadcast/multicast address was bound set 524 * the source address to 0. 525 * This ensures no datagrams with broadcast address 526 * as source address are emitted (which would violate 527 * RFC1122 - Hosts requirements) 528 * Note: we get IRE_BROADCAST for IPv6 529 * to "mark" a multicast local address. 530 */ 531 532 533 if (ire->ire_type == IRE_BROADCAST && 534 icmp->icmp_state != TS_DATA_XFER) { 535 /* 536 * This was just a local bind to a 537 * MC/broadcast addr 538 */ 539 V6_SET_ZERO(icmp->icmp_v6src); 540 if (icmp->icmp_family == AF_INET6) 541 (void) icmp_build_hdrs(icmp); 542 } 543 } 544 545 } 546 rw_exit(&icmp->icmp_rwlock); 547 if (ire_mp != NULL) 548 freeb(ire_mp); 549 } 550 551 /* 552 * Send message to IP to just bind to the protocol. 553 */ 554 static int 555 icmp_bind_proto(conn_t *connp) 556 { 557 icmp_t *icmp; 558 int error; 559 560 icmp = connp->conn_icmp; 561 562 if (icmp->icmp_family == AF_INET6) 563 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 564 &sin6_null.sin6_addr, 0, B_TRUE); 565 else 566 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 567 sin_null.sin_addr.s_addr, 0, B_TRUE); 568 569 rawip_post_ip_bind_connect(icmp, NULL, error); 570 return (error); 571 } 572 573 static void 574 icmp_tpi_connect(queue_t *q, mblk_t *mp) 575 { 576 conn_t *connp = Q_TO_CONN(q); 577 struct T_conn_req *tcr; 578 icmp_t *icmp; 579 struct sockaddr *sa; 580 socklen_t len; 581 int error; 582 583 icmp = connp->conn_icmp; 584 tcr = (struct T_conn_req *)mp->b_rptr; 585 /* Sanity checks */ 586 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 587 icmp_err_ack(q, mp, TPROTO, 0); 588 return; 589 } 590 591 if (tcr->OPT_length != 0) { 592 icmp_err_ack(q, mp, TBADOPT, 0); 593 return; 594 } 595 596 len = tcr->DEST_length; 597 598 switch (len) { 599 default: 600 icmp_err_ack(q, mp, TBADADDR, 0); 601 return; 602 case sizeof (sin_t): 603 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 604 sizeof (sin_t)); 605 break; 606 case sizeof (sin6_t): 607 sa = (struct sockaddr *)mi_offset_param(mp, 608 tcr->DEST_offset, sizeof (sin6_t)); 609 break; 610 } 611 612 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 613 if (error != 0) { 614 icmp_err_ack(q, mp, TSYSERR, error); 615 return; 616 } 617 618 error = rawip_do_connect(connp, sa, len); 619 if (error != 0) { 620 if (error < 0) { 621 icmp_err_ack(q, mp, -error, 0); 622 } else { 623 icmp_err_ack(q, mp, 0, error); 624 } 625 } else { 626 mblk_t *mp1; 627 628 /* 629 * We have to send a connection confirmation to 630 * keep TLI happy. 631 */ 632 if (icmp->icmp_family == AF_INET) { 633 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 634 sizeof (sin_t), NULL, 0); 635 } else { 636 ASSERT(icmp->icmp_family == AF_INET6); 637 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 638 sizeof (sin6_t), NULL, 0); 639 } 640 if (mp1 == NULL) { 641 rw_exit(&icmp->icmp_rwlock); 642 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 643 return; 644 } 645 646 /* 647 * Send ok_ack for T_CONN_REQ 648 */ 649 mp = mi_tpi_ok_ack_alloc(mp); 650 if (mp == NULL) { 651 /* Unable to reuse the T_CONN_REQ for the ack. */ 652 freemsg(mp1); 653 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 654 return; 655 } 656 putnext(connp->conn_rq, mp); 657 putnext(connp->conn_rq, mp1); 658 } 659 } 660 661 static int 662 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 663 { 664 icmp_t *icmp; 665 sin_t *sin; 666 sin6_t *sin6; 667 mblk_t *ire_mp; 668 int error; 669 ipaddr_t v4dst; 670 in6_addr_t v6dst; 671 672 icmp = connp->conn_icmp; 673 674 if (sa == NULL || !OK_32PTR((char *)sa)) { 675 return (EINVAL); 676 } 677 678 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 679 if (ire_mp == NULL) 680 return (ENOMEM); 681 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 682 683 684 ASSERT(sa != NULL && len != 0); 685 686 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 687 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 688 rw_exit(&icmp->icmp_rwlock); 689 freeb(ire_mp); 690 return (-TOUTSTATE); 691 } 692 693 switch (len) { 694 case sizeof (sin_t): 695 sin = (sin_t *)sa; 696 697 ASSERT(icmp->icmp_family == AF_INET); 698 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 699 700 v4dst = sin->sin_addr.s_addr; 701 /* 702 * Interpret a zero destination to mean loopback. 703 * Update the T_CONN_REQ (sin/sin6) since it is used to 704 * generate the T_CONN_CON. 705 */ 706 if (v4dst == INADDR_ANY) { 707 v4dst = htonl(INADDR_LOOPBACK); 708 } 709 710 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 711 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 712 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 713 icmp->icmp_ip_snd_options_len; 714 icmp->icmp_v6dst.sin6_addr = v6dst; 715 icmp->icmp_v6dst.sin6_family = AF_INET6; 716 icmp->icmp_v6dst.sin6_flowinfo = 0; 717 icmp->icmp_v6dst.sin6_port = 0; 718 719 /* 720 * If the destination address is multicast and 721 * an outgoing multicast interface has been set, 722 * use the address of that interface as our 723 * source address if no source address has been set. 724 */ 725 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 726 CLASSD(v4dst) && 727 icmp->icmp_multicast_if_addr != INADDR_ANY) { 728 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 729 &icmp->icmp_v6src); 730 } 731 break; 732 case sizeof (sin6_t): 733 sin6 = (sin6_t *)sa; 734 735 /* No support for mapped addresses on raw sockets */ 736 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 737 rw_exit(&icmp->icmp_rwlock); 738 freeb(ire_mp); 739 return (EADDRNOTAVAIL); 740 } 741 742 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 743 ASSERT(icmp->icmp_family == AF_INET6); 744 745 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 746 747 icmp->icmp_v6dst = *sin6; 748 icmp->icmp_v6dst.sin6_port = 0; 749 750 /* 751 * Interpret a zero destination to mean loopback. 752 * Update the T_CONN_REQ (sin/sin6) since it is used to 753 * generate the T_CONN_CON. 754 */ 755 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 756 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 757 } 758 /* 759 * If the destination address is multicast and 760 * an outgoing multicast interface has been set, 761 * then the ip bind logic will pick the correct source 762 * address (i.e. matching the outgoing multicast interface). 763 */ 764 break; 765 } 766 767 icmp->icmp_pending_op = T_CONN_REQ; 768 769 if (icmp->icmp_state == TS_DATA_XFER) { 770 /* Already connected - clear out state */ 771 icmp->icmp_v6src = icmp->icmp_bound_v6src; 772 icmp->icmp_state = TS_IDLE; 773 } 774 775 icmp->icmp_state = TS_DATA_XFER; 776 rw_exit(&icmp->icmp_rwlock); 777 778 if (icmp->icmp_family == AF_INET6) { 779 error = ip_proto_bind_connected_v6(connp, &ire_mp, 780 icmp->icmp_proto, &icmp->icmp_v6src, 0, 781 &icmp->icmp_v6dst.sin6_addr, 782 NULL, sin6->sin6_port, B_TRUE, B_TRUE); 783 } else { 784 error = ip_proto_bind_connected_v4(connp, &ire_mp, 785 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 786 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 787 B_TRUE, B_TRUE); 788 } 789 rawip_post_ip_bind_connect(icmp, ire_mp, error); 790 return (error); 791 } 792 793 static void 794 icmp_close_free(conn_t *connp) 795 { 796 icmp_t *icmp = connp->conn_icmp; 797 798 /* If there are any options associated with the stream, free them. */ 799 if (icmp->icmp_ip_snd_options != NULL) { 800 mi_free((char *)icmp->icmp_ip_snd_options); 801 icmp->icmp_ip_snd_options = NULL; 802 icmp->icmp_ip_snd_options_len = 0; 803 } 804 805 if (icmp->icmp_filter != NULL) { 806 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 807 icmp->icmp_filter = NULL; 808 } 809 810 /* Free memory associated with sticky options */ 811 if (icmp->icmp_sticky_hdrs_len != 0) { 812 kmem_free(icmp->icmp_sticky_hdrs, 813 icmp->icmp_sticky_hdrs_len); 814 icmp->icmp_sticky_hdrs = NULL; 815 icmp->icmp_sticky_hdrs_len = 0; 816 } 817 ip6_pkt_free(&icmp->icmp_sticky_ipp); 818 819 /* 820 * Clear any fields which the kmem_cache constructor clears. 821 * Only icmp_connp needs to be preserved. 822 * TBD: We should make this more efficient to avoid clearing 823 * everything. 824 */ 825 ASSERT(icmp->icmp_connp == connp); 826 bzero(icmp, sizeof (icmp_t)); 827 icmp->icmp_connp = connp; 828 } 829 830 static int 831 rawip_do_close(conn_t *connp) 832 { 833 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 834 835 ip_quiesce_conn(connp); 836 837 if (!IPCL_IS_NONSTR(connp)) { 838 qprocsoff(connp->conn_rq); 839 } 840 841 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 842 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 843 icmp_close_free(connp); 844 845 /* 846 * Now we are truly single threaded on this stream, and can 847 * delete the things hanging off the connp, and finally the connp. 848 * We removed this connp from the fanout list, it cannot be 849 * accessed thru the fanouts, and we already waited for the 850 * conn_ref to drop to 0. We are already in close, so 851 * there cannot be any other thread from the top. qprocsoff 852 * has completed, and service has completed or won't run in 853 * future. 854 */ 855 ASSERT(connp->conn_ref == 1); 856 857 if (!IPCL_IS_NONSTR(connp)) { 858 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 859 } else { 860 ip_close_helper_stream(connp); 861 } 862 863 connp->conn_ref--; 864 ipcl_conn_destroy(connp); 865 866 return (0); 867 } 868 869 static int 870 icmp_close(queue_t *q, int flags) 871 { 872 conn_t *connp; 873 874 if (flags & SO_FALLBACK) { 875 /* 876 * stream is being closed while in fallback 877 * simply free the resources that were allocated 878 */ 879 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 880 qprocsoff(q); 881 goto done; 882 } 883 884 connp = Q_TO_CONN(q); 885 (void) rawip_do_close(connp); 886 done: 887 q->q_ptr = WR(q)->q_ptr = NULL; 888 return (0); 889 } 890 891 /* 892 * This routine handles each T_DISCON_REQ message passed to icmp 893 * as an indicating that ICMP is no longer connected. This results 894 * in sending a T_BIND_REQ to IP to restore the binding to just 895 * the local address. 896 * 897 * The disconnect completes in rawip_post_ip_bind_connect. 898 */ 899 static int 900 icmp_do_disconnect(conn_t *connp) 901 { 902 icmp_t *icmp; 903 mblk_t *ire_mp; 904 int error; 905 906 icmp = connp->conn_icmp; 907 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 908 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 909 rw_exit(&icmp->icmp_rwlock); 910 return (-TOUTSTATE); 911 } 912 icmp->icmp_pending_op = T_DISCON_REQ; 913 icmp->icmp_v6src = icmp->icmp_bound_v6src; 914 icmp->icmp_state = TS_IDLE; 915 916 917 if (icmp->icmp_family == AF_INET6) { 918 /* Rebuild the header template */ 919 error = icmp_build_hdrs(icmp); 920 if (error != 0) { 921 icmp->icmp_pending_op = -1; 922 rw_exit(&icmp->icmp_rwlock); 923 return (error); 924 } 925 } 926 927 rw_exit(&icmp->icmp_rwlock); 928 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 929 if (ire_mp == NULL) { 930 return (ENOMEM); 931 } 932 933 if (icmp->icmp_family == AF_INET6) { 934 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 935 &icmp->icmp_bound_v6src, 0, B_TRUE); 936 } else { 937 938 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 939 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 940 } 941 942 rawip_post_ip_bind_connect(icmp, ire_mp, error); 943 944 return (error); 945 } 946 947 static void 948 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 949 { 950 conn_t *connp = Q_TO_CONN(q); 951 int error; 952 953 /* 954 * Allocate the largest primitive we need to send back 955 * T_error_ack is > than T_ok_ack 956 */ 957 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 958 if (mp == NULL) { 959 /* Unable to reuse the T_DISCON_REQ for the ack. */ 960 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 961 return; 962 } 963 964 error = icmp_do_disconnect(connp); 965 966 if (error != 0) { 967 if (error > 0) { 968 icmp_err_ack(q, mp, 0, error); 969 } else { 970 icmp_err_ack(q, mp, -error, 0); 971 } 972 } else { 973 mp = mi_tpi_ok_ack_alloc(mp); 974 ASSERT(mp != NULL); 975 qreply(q, mp); 976 } 977 978 } 979 980 static int 981 icmp_disconnect(conn_t *connp) 982 { 983 int error; 984 icmp_t *icmp = connp->conn_icmp; 985 986 icmp->icmp_dgram_errind = B_FALSE; 987 988 error = icmp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 return (error); 993 } 994 995 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 996 static void 997 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 998 { 999 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1000 qreply(q, mp); 1001 } 1002 1003 /* Shorthand to generate and send TPI error acks to our client */ 1004 static void 1005 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1006 t_scalar_t t_error, int sys_error) 1007 { 1008 struct T_error_ack *teackp; 1009 1010 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1011 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1012 teackp = (struct T_error_ack *)mp->b_rptr; 1013 teackp->ERROR_prim = primitive; 1014 teackp->TLI_error = t_error; 1015 teackp->UNIX_error = sys_error; 1016 qreply(q, mp); 1017 } 1018 } 1019 1020 /* 1021 * icmp_icmp_error is called by icmp_input to process ICMP 1022 * messages passed up by IP. 1023 * Generates the appropriate permanent (non-transient) errors. 1024 * Assumes that IP has pulled up everything up to and including 1025 * the ICMP header. 1026 */ 1027 static void 1028 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1029 { 1030 icmph_t *icmph; 1031 ipha_t *ipha; 1032 int iph_hdr_length; 1033 sin_t sin; 1034 mblk_t *mp1; 1035 int error = 0; 1036 icmp_t *icmp = connp->conn_icmp; 1037 1038 ipha = (ipha_t *)mp->b_rptr; 1039 1040 ASSERT(OK_32PTR(mp->b_rptr)); 1041 1042 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1043 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1044 icmp_icmp_error_ipv6(connp, mp); 1045 return; 1046 } 1047 1048 /* 1049 * icmp does not support v4 mapped addresses 1050 * so we can never be here for a V6 socket 1051 * i.e. icmp_family == AF_INET6 1052 */ 1053 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1054 (icmp->icmp_family == AF_INET)); 1055 1056 ASSERT(icmp->icmp_family == AF_INET); 1057 1058 /* Skip past the outer IP and ICMP headers */ 1059 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1060 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1061 ipha = (ipha_t *)&icmph[1]; 1062 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1063 1064 switch (icmph->icmph_type) { 1065 case ICMP_DEST_UNREACHABLE: 1066 switch (icmph->icmph_code) { 1067 case ICMP_FRAGMENTATION_NEEDED: 1068 /* 1069 * IP has already adjusted the path MTU. 1070 */ 1071 break; 1072 case ICMP_PORT_UNREACHABLE: 1073 case ICMP_PROTOCOL_UNREACHABLE: 1074 error = ECONNREFUSED; 1075 break; 1076 default: 1077 /* Transient errors */ 1078 break; 1079 } 1080 break; 1081 default: 1082 /* Transient errors */ 1083 break; 1084 } 1085 if (error == 0) { 1086 freemsg(mp); 1087 return; 1088 } 1089 1090 /* 1091 * Deliver T_UDERROR_IND when the application has asked for it. 1092 * The socket layer enables this automatically when connected. 1093 */ 1094 if (!icmp->icmp_dgram_errind) { 1095 freemsg(mp); 1096 return; 1097 } 1098 1099 sin = sin_null; 1100 sin.sin_family = AF_INET; 1101 sin.sin_addr.s_addr = ipha->ipha_dst; 1102 if (IPCL_IS_NONSTR(connp)) { 1103 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1104 if (icmp->icmp_state == TS_DATA_XFER) { 1105 if (sin.sin_addr.s_addr == 1106 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1107 rw_exit(&icmp->icmp_rwlock); 1108 (*connp->conn_upcalls->su_set_error) 1109 (connp->conn_upper_handle, error); 1110 goto done; 1111 } 1112 } else { 1113 icmp->icmp_delayed_error = error; 1114 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1115 } 1116 rw_exit(&icmp->icmp_rwlock); 1117 } else { 1118 1119 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1120 0, error); 1121 if (mp1 != NULL) 1122 putnext(connp->conn_rq, mp1); 1123 } 1124 done: 1125 freemsg(mp); 1126 } 1127 1128 /* 1129 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1130 * for IPv6 packets. 1131 * Send permanent (non-transient) errors upstream. 1132 * Assumes that IP has pulled up all the extension headers as well 1133 * as the ICMPv6 header. 1134 */ 1135 static void 1136 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1137 { 1138 icmp6_t *icmp6; 1139 ip6_t *ip6h, *outer_ip6h; 1140 uint16_t iph_hdr_length; 1141 uint8_t *nexthdrp; 1142 sin6_t sin6; 1143 mblk_t *mp1; 1144 int error = 0; 1145 icmp_t *icmp = connp->conn_icmp; 1146 1147 outer_ip6h = (ip6_t *)mp->b_rptr; 1148 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1149 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1150 else 1151 iph_hdr_length = IPV6_HDR_LEN; 1152 1153 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1154 ip6h = (ip6_t *)&icmp6[1]; 1155 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1156 freemsg(mp); 1157 return; 1158 } 1159 1160 switch (icmp6->icmp6_type) { 1161 case ICMP6_DST_UNREACH: 1162 switch (icmp6->icmp6_code) { 1163 case ICMP6_DST_UNREACH_NOPORT: 1164 error = ECONNREFUSED; 1165 break; 1166 case ICMP6_DST_UNREACH_ADMIN: 1167 case ICMP6_DST_UNREACH_NOROUTE: 1168 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1169 case ICMP6_DST_UNREACH_ADDR: 1170 /* Transient errors */ 1171 break; 1172 default: 1173 break; 1174 } 1175 break; 1176 case ICMP6_PACKET_TOO_BIG: { 1177 struct T_unitdata_ind *tudi; 1178 struct T_opthdr *toh; 1179 size_t udi_size; 1180 mblk_t *newmp; 1181 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1182 sizeof (struct ip6_mtuinfo); 1183 sin6_t *sin6; 1184 struct ip6_mtuinfo *mtuinfo; 1185 1186 /* 1187 * If the application has requested to receive path mtu 1188 * information, send up an empty message containing an 1189 * IPV6_PATHMTU ancillary data item. 1190 */ 1191 if (!icmp->icmp_ipv6_recvpathmtu) 1192 break; 1193 1194 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1195 opt_length; 1196 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1197 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1198 break; 1199 } 1200 1201 /* 1202 * newmp->b_cont is left to NULL on purpose. This is an 1203 * empty message containing only ancillary data. 1204 */ 1205 newmp->b_datap->db_type = M_PROTO; 1206 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1207 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1208 tudi->PRIM_type = T_UNITDATA_IND; 1209 tudi->SRC_length = sizeof (sin6_t); 1210 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1211 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1212 tudi->OPT_length = opt_length; 1213 1214 sin6 = (sin6_t *)&tudi[1]; 1215 bzero(sin6, sizeof (sin6_t)); 1216 sin6->sin6_family = AF_INET6; 1217 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1218 1219 toh = (struct T_opthdr *)&sin6[1]; 1220 toh->level = IPPROTO_IPV6; 1221 toh->name = IPV6_PATHMTU; 1222 toh->len = opt_length; 1223 toh->status = 0; 1224 1225 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1226 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1227 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1228 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1229 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1230 /* 1231 * We've consumed everything we need from the original 1232 * message. Free it, then send our empty message. 1233 */ 1234 freemsg(mp); 1235 if (!IPCL_IS_NONSTR(connp)) { 1236 putnext(connp->conn_rq, newmp); 1237 } else { 1238 (*connp->conn_upcalls->su_recv) 1239 (connp->conn_upper_handle, newmp, 0, 0, &error, 1240 NULL); 1241 ASSERT(error == 0); 1242 } 1243 return; 1244 } 1245 case ICMP6_TIME_EXCEEDED: 1246 /* Transient errors */ 1247 break; 1248 case ICMP6_PARAM_PROB: 1249 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1250 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1251 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1252 (uchar_t *)nexthdrp) { 1253 error = ECONNREFUSED; 1254 break; 1255 } 1256 break; 1257 } 1258 if (error == 0) { 1259 freemsg(mp); 1260 return; 1261 } 1262 1263 /* 1264 * Deliver T_UDERROR_IND when the application has asked for it. 1265 * The socket layer enables this automatically when connected. 1266 */ 1267 if (!icmp->icmp_dgram_errind) { 1268 freemsg(mp); 1269 return; 1270 } 1271 1272 sin6 = sin6_null; 1273 sin6.sin6_family = AF_INET6; 1274 sin6.sin6_addr = ip6h->ip6_dst; 1275 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1276 1277 if (IPCL_IS_NONSTR(connp)) { 1278 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1279 if (icmp->icmp_state == TS_DATA_XFER) { 1280 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1281 &icmp->icmp_v6dst.sin6_addr)) { 1282 rw_exit(&icmp->icmp_rwlock); 1283 (*connp->conn_upcalls->su_set_error) 1284 (connp->conn_upper_handle, error); 1285 goto done; 1286 } 1287 } else { 1288 icmp->icmp_delayed_error = error; 1289 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1290 } 1291 rw_exit(&icmp->icmp_rwlock); 1292 } else { 1293 1294 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1295 NULL, 0, error); 1296 if (mp1 != NULL) 1297 putnext(connp->conn_rq, mp1); 1298 } 1299 done: 1300 freemsg(mp); 1301 } 1302 1303 /* 1304 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1305 * The local address is filled in if endpoint is bound. The remote address 1306 * is filled in if remote address has been precified ("connected endpoint") 1307 * (The concept of connected CLTS sockets is alien to published TPI 1308 * but we support it anyway). 1309 */ 1310 static void 1311 icmp_addr_req(queue_t *q, mblk_t *mp) 1312 { 1313 icmp_t *icmp = Q_TO_ICMP(q); 1314 mblk_t *ackmp; 1315 struct T_addr_ack *taa; 1316 1317 /* Make it large enough for worst case */ 1318 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1319 2 * sizeof (sin6_t), 1); 1320 if (ackmp == NULL) { 1321 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1322 return; 1323 } 1324 taa = (struct T_addr_ack *)ackmp->b_rptr; 1325 1326 bzero(taa, sizeof (struct T_addr_ack)); 1327 ackmp->b_wptr = (uchar_t *)&taa[1]; 1328 1329 taa->PRIM_type = T_ADDR_ACK; 1330 ackmp->b_datap->db_type = M_PCPROTO; 1331 rw_enter(&icmp->icmp_rwlock, RW_READER); 1332 /* 1333 * Note: Following code assumes 32 bit alignment of basic 1334 * data structures like sin_t and struct T_addr_ack. 1335 */ 1336 if (icmp->icmp_state != TS_UNBND) { 1337 /* 1338 * Fill in local address 1339 */ 1340 taa->LOCADDR_offset = sizeof (*taa); 1341 if (icmp->icmp_family == AF_INET) { 1342 sin_t *sin; 1343 1344 taa->LOCADDR_length = sizeof (sin_t); 1345 sin = (sin_t *)&taa[1]; 1346 /* Fill zeroes and then intialize non-zero fields */ 1347 *sin = sin_null; 1348 sin->sin_family = AF_INET; 1349 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1350 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1351 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1352 sin->sin_addr.s_addr); 1353 } else { 1354 /* 1355 * INADDR_ANY 1356 * icmp_v6src is not set, we might be bound to 1357 * broadcast/multicast. Use icmp_bound_v6src as 1358 * local address instead (that could 1359 * also still be INADDR_ANY) 1360 */ 1361 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1362 sin->sin_addr.s_addr); 1363 } 1364 ackmp->b_wptr = (uchar_t *)&sin[1]; 1365 } else { 1366 sin6_t *sin6; 1367 1368 ASSERT(icmp->icmp_family == AF_INET6); 1369 taa->LOCADDR_length = sizeof (sin6_t); 1370 sin6 = (sin6_t *)&taa[1]; 1371 /* Fill zeroes and then intialize non-zero fields */ 1372 *sin6 = sin6_null; 1373 sin6->sin6_family = AF_INET6; 1374 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1375 sin6->sin6_addr = icmp->icmp_v6src; 1376 } else { 1377 /* 1378 * UNSPECIFIED 1379 * icmp_v6src is not set, we might be bound to 1380 * broadcast/multicast. Use icmp_bound_v6src as 1381 * local address instead (that could 1382 * also still be UNSPECIFIED) 1383 */ 1384 sin6->sin6_addr = icmp->icmp_bound_v6src; 1385 } 1386 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1387 } 1388 } 1389 rw_exit(&icmp->icmp_rwlock); 1390 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1391 qreply(q, ackmp); 1392 } 1393 1394 static void 1395 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1396 { 1397 *tap = icmp_g_t_info_ack; 1398 1399 if (icmp->icmp_family == AF_INET6) 1400 tap->ADDR_size = sizeof (sin6_t); 1401 else 1402 tap->ADDR_size = sizeof (sin_t); 1403 tap->CURRENT_state = icmp->icmp_state; 1404 tap->OPT_size = icmp_max_optsize; 1405 } 1406 1407 static void 1408 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1409 t_uscalar_t cap_bits1) 1410 { 1411 tcap->CAP_bits1 = 0; 1412 1413 if (cap_bits1 & TC1_INFO) { 1414 icmp_copy_info(&tcap->INFO_ack, icmp); 1415 tcap->CAP_bits1 |= TC1_INFO; 1416 } 1417 } 1418 1419 /* 1420 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1421 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1422 * icmp_g_t_info_ack. The current state of the stream is copied from 1423 * icmp_state. 1424 */ 1425 static void 1426 icmp_capability_req(queue_t *q, mblk_t *mp) 1427 { 1428 icmp_t *icmp = Q_TO_ICMP(q); 1429 t_uscalar_t cap_bits1; 1430 struct T_capability_ack *tcap; 1431 1432 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1433 1434 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1435 mp->b_datap->db_type, T_CAPABILITY_ACK); 1436 if (!mp) 1437 return; 1438 1439 tcap = (struct T_capability_ack *)mp->b_rptr; 1440 1441 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1442 1443 qreply(q, mp); 1444 } 1445 1446 /* 1447 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1448 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1449 * The current state of the stream is copied from icmp_state. 1450 */ 1451 static void 1452 icmp_info_req(queue_t *q, mblk_t *mp) 1453 { 1454 icmp_t *icmp = Q_TO_ICMP(q); 1455 1456 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1457 T_INFO_ACK); 1458 if (!mp) 1459 return; 1460 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1461 qreply(q, mp); 1462 } 1463 1464 /* For /dev/icmp aka AF_INET open */ 1465 static int 1466 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1467 int family) 1468 { 1469 conn_t *connp; 1470 dev_t conn_dev; 1471 icmp_stack_t *is; 1472 int error; 1473 1474 conn_dev = NULL; 1475 1476 /* If the stream is already open, return immediately. */ 1477 if (q->q_ptr != NULL) 1478 return (0); 1479 1480 if (sflag == MODOPEN) 1481 return (EINVAL); 1482 1483 /* 1484 * Since ICMP is not used so heavily, allocating from the small 1485 * arena should be sufficient. 1486 */ 1487 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1488 return (EBUSY); 1489 } 1490 1491 if (flag & SO_FALLBACK) { 1492 /* 1493 * Non streams socket needs a stream to fallback to 1494 */ 1495 RD(q)->q_ptr = (void *)conn_dev; 1496 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1497 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1498 qprocson(q); 1499 return (0); 1500 } 1501 1502 connp = icmp_open(family, credp, &error, KM_SLEEP); 1503 if (connp == NULL) { 1504 ASSERT(error != NULL); 1505 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1506 return (error); 1507 } 1508 1509 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1510 connp->conn_dev = conn_dev; 1511 connp->conn_minor_arena = ip_minor_arena_sa; 1512 1513 is = connp->conn_icmp->icmp_is; 1514 1515 /* 1516 * Initialize the icmp_t structure for this stream. 1517 */ 1518 q->q_ptr = connp; 1519 WR(q)->q_ptr = connp; 1520 connp->conn_rq = q; 1521 connp->conn_wq = WR(q); 1522 1523 if (connp->conn_icmp->icmp_family == AF_INET6) { 1524 /* Build initial header template for transmit */ 1525 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1526 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1527 rw_exit(&connp->conn_icmp->icmp_rwlock); 1528 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1529 ipcl_conn_destroy(connp); 1530 return (error); 1531 } 1532 rw_exit(&connp->conn_icmp->icmp_rwlock); 1533 } 1534 1535 1536 q->q_hiwat = is->is_recv_hiwat; 1537 WR(q)->q_hiwat = is->is_xmit_hiwat; 1538 WR(q)->q_lowat = is->is_xmit_lowat; 1539 1540 qprocson(q); 1541 1542 /* Set the Stream head write offset. */ 1543 (void) proto_set_tx_wroff(q, connp, 1544 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1545 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1546 1547 mutex_enter(&connp->conn_lock); 1548 connp->conn_state_flags &= ~CONN_INCIPIENT; 1549 mutex_exit(&connp->conn_lock); 1550 1551 return (0); 1552 } 1553 1554 /* For /dev/icmp4 aka AF_INET open */ 1555 static int 1556 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1557 { 1558 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1559 } 1560 1561 /* For /dev/icmp6 aka AF_INET6 open */ 1562 static int 1563 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1564 { 1565 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1566 } 1567 1568 /* 1569 * This is the open routine for icmp. It allocates a icmp_t structure for 1570 * the stream and, on the first open of the module, creates an ND table. 1571 */ 1572 /* ARGSUSED */ 1573 static conn_t * 1574 icmp_open(int family, cred_t *credp, int *err, int flags) 1575 { 1576 icmp_t *icmp; 1577 conn_t *connp; 1578 zoneid_t zoneid; 1579 netstack_t *ns; 1580 icmp_stack_t *is; 1581 boolean_t isv6 = B_FALSE; 1582 1583 *err = secpolicy_net_icmpaccess(credp); 1584 if (*err != 0) 1585 return (NULL); 1586 1587 if (family == AF_INET6) 1588 isv6 = B_TRUE; 1589 ns = netstack_find_by_cred(credp); 1590 ASSERT(ns != NULL); 1591 is = ns->netstack_icmp; 1592 ASSERT(is != NULL); 1593 1594 /* 1595 * For exclusive stacks we set the zoneid to zero 1596 * to make ICMP operate as if in the global zone. 1597 */ 1598 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1599 zoneid = GLOBAL_ZONEID; 1600 else 1601 zoneid = crgetzoneid(credp); 1602 1603 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1604 1605 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1606 icmp = connp->conn_icmp; 1607 icmp->icmp_v6dst = sin6_null; 1608 1609 /* 1610 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1611 * done by netstack_find_by_cred() 1612 */ 1613 netstack_rele(ns); 1614 1615 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1616 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1617 ASSERT(connp->conn_icmp == icmp); 1618 ASSERT(icmp->icmp_connp == connp); 1619 1620 /* Set the initial state of the stream and the privilege status. */ 1621 icmp->icmp_state = TS_UNBND; 1622 if (isv6) { 1623 icmp->icmp_ipversion = IPV6_VERSION; 1624 icmp->icmp_family = AF_INET6; 1625 connp->conn_ulp = IPPROTO_ICMPV6; 1626 /* May be changed by a SO_PROTOTYPE socket option. */ 1627 icmp->icmp_proto = IPPROTO_ICMPV6; 1628 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1629 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1630 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1631 connp->conn_af_isv6 = B_TRUE; 1632 connp->conn_flags |= IPCL_ISV6; 1633 } else { 1634 icmp->icmp_ipversion = IPV4_VERSION; 1635 icmp->icmp_family = AF_INET; 1636 /* May be changed by a SO_PROTOTYPE socket option. */ 1637 icmp->icmp_proto = IPPROTO_ICMP; 1638 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1639 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1640 connp->conn_af_isv6 = B_FALSE; 1641 connp->conn_flags &= ~IPCL_ISV6; 1642 } 1643 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1644 icmp->icmp_pending_op = -1; 1645 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1646 connp->conn_zoneid = zoneid; 1647 1648 /* 1649 * If the caller has the process-wide flag set, then default to MAC 1650 * exempt mode. This allows read-down to unlabeled hosts. 1651 */ 1652 if (getpflags(NET_MAC_AWARE, credp) != 0) 1653 connp->conn_mac_exempt = B_TRUE; 1654 1655 connp->conn_ulp_labeled = is_system_labeled(); 1656 1657 icmp->icmp_is = is; 1658 1659 connp->conn_recv = icmp_input; 1660 crhold(credp); 1661 connp->conn_cred = credp; 1662 1663 rw_exit(&icmp->icmp_rwlock); 1664 1665 connp->conn_flow_cntrld = B_FALSE; 1666 return (connp); 1667 } 1668 1669 /* 1670 * Which ICMP options OK to set through T_UNITDATA_REQ... 1671 */ 1672 /* ARGSUSED */ 1673 static boolean_t 1674 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1675 { 1676 return (B_TRUE); 1677 } 1678 1679 /* 1680 * This routine gets default values of certain options whose default 1681 * values are maintained by protcol specific code 1682 */ 1683 /* ARGSUSED */ 1684 int 1685 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1686 { 1687 icmp_t *icmp = Q_TO_ICMP(q); 1688 icmp_stack_t *is = icmp->icmp_is; 1689 int *i1 = (int *)ptr; 1690 1691 switch (level) { 1692 case IPPROTO_IP: 1693 switch (name) { 1694 case IP_MULTICAST_TTL: 1695 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1696 return (sizeof (uchar_t)); 1697 case IP_MULTICAST_LOOP: 1698 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1699 return (sizeof (uchar_t)); 1700 } 1701 break; 1702 case IPPROTO_IPV6: 1703 switch (name) { 1704 case IPV6_MULTICAST_HOPS: 1705 *i1 = IP_DEFAULT_MULTICAST_TTL; 1706 return (sizeof (int)); 1707 case IPV6_MULTICAST_LOOP: 1708 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1709 return (sizeof (int)); 1710 case IPV6_UNICAST_HOPS: 1711 *i1 = is->is_ipv6_hoplimit; 1712 return (sizeof (int)); 1713 } 1714 break; 1715 case IPPROTO_ICMPV6: 1716 switch (name) { 1717 case ICMP6_FILTER: 1718 /* Make it look like "pass all" */ 1719 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1720 return (sizeof (icmp6_filter_t)); 1721 } 1722 break; 1723 } 1724 return (-1); 1725 } 1726 1727 /* 1728 * This routine retrieves the current status of socket options. 1729 * It returns the size of the option retrieved. 1730 */ 1731 int 1732 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1733 { 1734 icmp_t *icmp = connp->conn_icmp; 1735 icmp_stack_t *is = icmp->icmp_is; 1736 int *i1 = (int *)ptr; 1737 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1738 int ret = 0; 1739 1740 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1741 switch (level) { 1742 case SOL_SOCKET: 1743 switch (name) { 1744 case SO_DEBUG: 1745 *i1 = icmp->icmp_debug; 1746 break; 1747 case SO_TYPE: 1748 *i1 = SOCK_RAW; 1749 break; 1750 case SO_PROTOTYPE: 1751 *i1 = icmp->icmp_proto; 1752 break; 1753 case SO_REUSEADDR: 1754 *i1 = icmp->icmp_reuseaddr; 1755 break; 1756 1757 /* 1758 * The following three items are available here, 1759 * but are only meaningful to IP. 1760 */ 1761 case SO_DONTROUTE: 1762 *i1 = icmp->icmp_dontroute; 1763 break; 1764 case SO_USELOOPBACK: 1765 *i1 = icmp->icmp_useloopback; 1766 break; 1767 case SO_BROADCAST: 1768 *i1 = icmp->icmp_broadcast; 1769 break; 1770 1771 case SO_SNDBUF: 1772 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1773 *i1 = icmp->icmp_xmit_hiwat; 1774 break; 1775 case SO_RCVBUF: 1776 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1777 *i1 = icmp->icmp_recv_hiwat; 1778 break; 1779 case SO_DGRAM_ERRIND: 1780 *i1 = icmp->icmp_dgram_errind; 1781 break; 1782 case SO_TIMESTAMP: 1783 *i1 = icmp->icmp_timestamp; 1784 break; 1785 case SO_MAC_EXEMPT: 1786 *i1 = connp->conn_mac_exempt; 1787 break; 1788 case SO_DOMAIN: 1789 *i1 = icmp->icmp_family; 1790 break; 1791 1792 /* 1793 * Following four not meaningful for icmp 1794 * Action is same as "default" to which we fallthrough 1795 * so we keep them in comments. 1796 * case SO_LINGER: 1797 * case SO_KEEPALIVE: 1798 * case SO_OOBINLINE: 1799 * case SO_ALLZONES: 1800 */ 1801 default: 1802 ret = -1; 1803 goto done; 1804 } 1805 break; 1806 case IPPROTO_IP: 1807 /* 1808 * Only allow IPv4 option processing on IPv4 sockets. 1809 */ 1810 if (icmp->icmp_family != AF_INET) { 1811 ret = -1; 1812 goto done; 1813 } 1814 1815 switch (name) { 1816 case IP_OPTIONS: 1817 case T_IP_OPTIONS: 1818 /* Options are passed up with each packet */ 1819 ret = 0; 1820 goto done; 1821 case IP_HDRINCL: 1822 *i1 = (int)icmp->icmp_hdrincl; 1823 break; 1824 case IP_TOS: 1825 case T_IP_TOS: 1826 *i1 = (int)icmp->icmp_type_of_service; 1827 break; 1828 case IP_TTL: 1829 *i1 = (int)icmp->icmp_ttl; 1830 break; 1831 case IP_MULTICAST_IF: 1832 /* 0 address if not set */ 1833 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1834 ret = sizeof (ipaddr_t); 1835 goto done; 1836 case IP_MULTICAST_TTL: 1837 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1838 ret = sizeof (uchar_t); 1839 goto done; 1840 case IP_MULTICAST_LOOP: 1841 *ptr = connp->conn_multicast_loop; 1842 ret = sizeof (uint8_t); 1843 goto done; 1844 case IP_BOUND_IF: 1845 /* Zero if not set */ 1846 *i1 = icmp->icmp_bound_if; 1847 break; /* goto sizeof (int) option return */ 1848 case IP_UNSPEC_SRC: 1849 *ptr = icmp->icmp_unspec_source; 1850 break; /* goto sizeof (int) option return */ 1851 case IP_RECVIF: 1852 *ptr = icmp->icmp_recvif; 1853 break; /* goto sizeof (int) option return */ 1854 case IP_BROADCAST_TTL: 1855 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1856 return (sizeof (uchar_t)); 1857 case IP_RECVPKTINFO: 1858 /* 1859 * This also handles IP_PKTINFO. 1860 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1861 * Differentiation is based on the size of the argument 1862 * passed in. 1863 * This option is handled in IP which will return an 1864 * error for IP_PKTINFO as it's not supported as a 1865 * sticky option. 1866 */ 1867 ret = -EINVAL; 1868 goto done; 1869 /* 1870 * Cannot "get" the value of following options 1871 * at this level. Action is same as "default" to 1872 * which we fallthrough so we keep them in comments. 1873 * 1874 * case IP_ADD_MEMBERSHIP: 1875 * case IP_DROP_MEMBERSHIP: 1876 * case IP_BLOCK_SOURCE: 1877 * case IP_UNBLOCK_SOURCE: 1878 * case IP_ADD_SOURCE_MEMBERSHIP: 1879 * case IP_DROP_SOURCE_MEMBERSHIP: 1880 * case MCAST_JOIN_GROUP: 1881 * case MCAST_LEAVE_GROUP: 1882 * case MCAST_BLOCK_SOURCE: 1883 * case MCAST_UNBLOCK_SOURCE: 1884 * case MCAST_JOIN_SOURCE_GROUP: 1885 * case MCAST_LEAVE_SOURCE_GROUP: 1886 * case MRT_INIT: 1887 * case MRT_DONE: 1888 * case MRT_ADD_VIF: 1889 * case MRT_DEL_VIF: 1890 * case MRT_ADD_MFC: 1891 * case MRT_DEL_MFC: 1892 * case MRT_VERSION: 1893 * case MRT_ASSERT: 1894 * case IP_SEC_OPT: 1895 * case IP_DONTFAILOVER_IF: 1896 * case IP_NEXTHOP: 1897 */ 1898 default: 1899 ret = -1; 1900 goto done; 1901 } 1902 break; 1903 case IPPROTO_IPV6: 1904 /* 1905 * Only allow IPv6 option processing on native IPv6 sockets. 1906 */ 1907 if (icmp->icmp_family != AF_INET6) { 1908 ret = -1; 1909 goto done; 1910 } 1911 switch (name) { 1912 case IPV6_UNICAST_HOPS: 1913 *i1 = (unsigned int)icmp->icmp_ttl; 1914 break; 1915 case IPV6_MULTICAST_IF: 1916 /* 0 index if not set */ 1917 *i1 = icmp->icmp_multicast_if_index; 1918 break; 1919 case IPV6_MULTICAST_HOPS: 1920 *i1 = icmp->icmp_multicast_ttl; 1921 break; 1922 case IPV6_MULTICAST_LOOP: 1923 *i1 = connp->conn_multicast_loop; 1924 break; 1925 case IPV6_BOUND_IF: 1926 /* Zero if not set */ 1927 *i1 = icmp->icmp_bound_if; 1928 break; 1929 case IPV6_UNSPEC_SRC: 1930 *i1 = icmp->icmp_unspec_source; 1931 break; 1932 case IPV6_CHECKSUM: 1933 /* 1934 * Return offset or -1 if no checksum offset. 1935 * Does not apply to IPPROTO_ICMPV6 1936 */ 1937 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1938 ret = -1; 1939 goto done; 1940 } 1941 1942 if (icmp->icmp_raw_checksum) { 1943 *i1 = icmp->icmp_checksum_off; 1944 } else { 1945 *i1 = -1; 1946 } 1947 break; 1948 case IPV6_JOIN_GROUP: 1949 case IPV6_LEAVE_GROUP: 1950 case MCAST_JOIN_GROUP: 1951 case MCAST_LEAVE_GROUP: 1952 case MCAST_BLOCK_SOURCE: 1953 case MCAST_UNBLOCK_SOURCE: 1954 case MCAST_JOIN_SOURCE_GROUP: 1955 case MCAST_LEAVE_SOURCE_GROUP: 1956 /* cannot "get" the value for these */ 1957 ret = -1; 1958 goto done; 1959 case IPV6_RECVPKTINFO: 1960 *i1 = icmp->icmp_ip_recvpktinfo; 1961 break; 1962 case IPV6_RECVTCLASS: 1963 *i1 = icmp->icmp_ipv6_recvtclass; 1964 break; 1965 case IPV6_RECVPATHMTU: 1966 *i1 = icmp->icmp_ipv6_recvpathmtu; 1967 break; 1968 case IPV6_V6ONLY: 1969 *i1 = 1; 1970 break; 1971 case IPV6_RECVHOPLIMIT: 1972 *i1 = icmp->icmp_ipv6_recvhoplimit; 1973 break; 1974 case IPV6_RECVHOPOPTS: 1975 *i1 = icmp->icmp_ipv6_recvhopopts; 1976 break; 1977 case IPV6_RECVDSTOPTS: 1978 *i1 = icmp->icmp_ipv6_recvdstopts; 1979 break; 1980 case _OLD_IPV6_RECVDSTOPTS: 1981 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1982 break; 1983 case IPV6_RECVRTHDRDSTOPTS: 1984 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1985 break; 1986 case IPV6_RECVRTHDR: 1987 *i1 = icmp->icmp_ipv6_recvrthdr; 1988 break; 1989 case IPV6_PKTINFO: { 1990 /* XXX assumes that caller has room for max size! */ 1991 struct in6_pktinfo *pkti; 1992 1993 pkti = (struct in6_pktinfo *)ptr; 1994 if (ipp->ipp_fields & IPPF_IFINDEX) 1995 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1996 else 1997 pkti->ipi6_ifindex = 0; 1998 if (ipp->ipp_fields & IPPF_ADDR) 1999 pkti->ipi6_addr = ipp->ipp_addr; 2000 else 2001 pkti->ipi6_addr = ipv6_all_zeros; 2002 ret = sizeof (struct in6_pktinfo); 2003 goto done; 2004 } 2005 case IPV6_NEXTHOP: { 2006 sin6_t *sin6 = (sin6_t *)ptr; 2007 2008 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2009 return (0); 2010 *sin6 = sin6_null; 2011 sin6->sin6_family = AF_INET6; 2012 sin6->sin6_addr = ipp->ipp_nexthop; 2013 ret = (sizeof (sin6_t)); 2014 goto done; 2015 } 2016 case IPV6_HOPOPTS: 2017 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2018 return (0); 2019 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2020 return (0); 2021 bcopy((char *)ipp->ipp_hopopts + 2022 icmp->icmp_label_len_v6, ptr, 2023 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2024 if (icmp->icmp_label_len_v6 > 0) { 2025 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2026 ptr[1] = (ipp->ipp_hopoptslen - 2027 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2028 } 2029 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2030 goto done; 2031 case IPV6_RTHDRDSTOPTS: 2032 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2033 return (0); 2034 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2035 ret = ipp->ipp_rtdstoptslen; 2036 goto done; 2037 case IPV6_RTHDR: 2038 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2039 return (0); 2040 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2041 ret = ipp->ipp_rthdrlen; 2042 goto done; 2043 case IPV6_DSTOPTS: 2044 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2045 ret = 0; 2046 goto done; 2047 } 2048 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2049 ret = ipp->ipp_dstoptslen; 2050 goto done; 2051 case IPV6_PATHMTU: 2052 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2053 ret = 0; 2054 } else { 2055 ret = ip_fill_mtuinfo( 2056 &icmp->icmp_v6dst.sin6_addr, 0, 2057 (struct ip6_mtuinfo *)ptr, 2058 is->is_netstack); 2059 } 2060 goto done; 2061 case IPV6_TCLASS: 2062 if (ipp->ipp_fields & IPPF_TCLASS) 2063 *i1 = ipp->ipp_tclass; 2064 else 2065 *i1 = IPV6_FLOW_TCLASS( 2066 IPV6_DEFAULT_VERS_AND_FLOW); 2067 break; 2068 default: 2069 ret = -1; 2070 goto done; 2071 } 2072 break; 2073 case IPPROTO_ICMPV6: 2074 /* 2075 * Only allow IPv6 option processing on native IPv6 sockets. 2076 */ 2077 if (icmp->icmp_family != AF_INET6) { 2078 ret = -1; 2079 } 2080 2081 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2082 ret = -1; 2083 } 2084 2085 switch (name) { 2086 case ICMP6_FILTER: 2087 if (icmp->icmp_filter == NULL) { 2088 /* Make it look like "pass all" */ 2089 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2090 } else { 2091 (void) bcopy(icmp->icmp_filter, ptr, 2092 sizeof (icmp6_filter_t)); 2093 } 2094 ret = sizeof (icmp6_filter_t); 2095 goto done; 2096 default: 2097 ret = -1; 2098 goto done; 2099 } 2100 default: 2101 ret = -1; 2102 goto done; 2103 } 2104 ret = sizeof (int); 2105 done: 2106 return (ret); 2107 } 2108 2109 /* 2110 * This routine retrieves the current status of socket options. 2111 * It returns the size of the option retrieved. 2112 */ 2113 int 2114 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2115 { 2116 conn_t *connp = Q_TO_CONN(q); 2117 icmp_t *icmp = connp->conn_icmp; 2118 int err; 2119 2120 rw_enter(&icmp->icmp_rwlock, RW_READER); 2121 err = icmp_opt_get(connp, level, name, ptr); 2122 rw_exit(&icmp->icmp_rwlock); 2123 return (err); 2124 } 2125 2126 int 2127 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2128 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2129 void *thisdg_attrs, boolean_t checkonly) 2130 { 2131 2132 int *i1 = (int *)invalp; 2133 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2134 icmp_t *icmp = connp->conn_icmp; 2135 icmp_stack_t *is = icmp->icmp_is; 2136 int error; 2137 2138 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2139 /* 2140 * For fixed length options, no sanity check 2141 * of passed in length is done. It is assumed *_optcom_req() 2142 * routines do the right thing. 2143 */ 2144 switch (level) { 2145 case SOL_SOCKET: 2146 switch (name) { 2147 case SO_DEBUG: 2148 if (!checkonly) 2149 icmp->icmp_debug = onoff; 2150 break; 2151 case SO_PROTOTYPE: 2152 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2153 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2154 secpolicy_net_rawaccess(cr) != 0) { 2155 *outlenp = 0; 2156 return (EACCES); 2157 } 2158 /* Can't use IPPROTO_RAW with IPv6 */ 2159 if ((*i1 & 0xFF) == IPPROTO_RAW && 2160 icmp->icmp_family == AF_INET6) { 2161 *outlenp = 0; 2162 return (EPROTONOSUPPORT); 2163 } 2164 if (checkonly) { 2165 /* T_CHECK case */ 2166 *(int *)outvalp = (*i1 & 0xFF); 2167 break; 2168 } 2169 icmp->icmp_proto = *i1 & 0xFF; 2170 if ((icmp->icmp_proto == IPPROTO_RAW || 2171 icmp->icmp_proto == IPPROTO_IGMP) && 2172 icmp->icmp_family == AF_INET) 2173 icmp->icmp_hdrincl = 1; 2174 else 2175 icmp->icmp_hdrincl = 0; 2176 2177 if (icmp->icmp_family == AF_INET6 && 2178 icmp->icmp_proto == IPPROTO_ICMPV6) { 2179 /* Set offset for icmp6_cksum */ 2180 icmp->icmp_raw_checksum = 0; 2181 icmp->icmp_checksum_off = 2; 2182 } 2183 if (icmp->icmp_proto == IPPROTO_UDP || 2184 icmp->icmp_proto == IPPROTO_TCP || 2185 icmp->icmp_proto == IPPROTO_SCTP) { 2186 icmp->icmp_no_tp_cksum = 1; 2187 icmp->icmp_sticky_ipp.ipp_fields |= 2188 IPPF_NO_CKSUM; 2189 } else { 2190 icmp->icmp_no_tp_cksum = 0; 2191 icmp->icmp_sticky_ipp.ipp_fields &= 2192 ~IPPF_NO_CKSUM; 2193 } 2194 2195 if (icmp->icmp_filter != NULL && 2196 icmp->icmp_proto != IPPROTO_ICMPV6) { 2197 kmem_free(icmp->icmp_filter, 2198 sizeof (icmp6_filter_t)); 2199 icmp->icmp_filter = NULL; 2200 } 2201 2202 /* Rebuild the header template */ 2203 error = icmp_build_hdrs(icmp); 2204 if (error != 0) { 2205 *outlenp = 0; 2206 return (error); 2207 } 2208 2209 /* 2210 * For SCTP, we don't use icmp_bind_proto() for 2211 * raw socket binding. Note that we do not need 2212 * to set *outlenp. 2213 * FIXME: how does SCTP work? 2214 */ 2215 if (icmp->icmp_proto == IPPROTO_SCTP) 2216 return (0); 2217 2218 *outlenp = sizeof (int); 2219 *(int *)outvalp = *i1 & 0xFF; 2220 2221 /* Drop lock across the bind operation */ 2222 rw_exit(&icmp->icmp_rwlock); 2223 (void) icmp_bind_proto(connp); 2224 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2225 return (0); 2226 case SO_REUSEADDR: 2227 if (!checkonly) { 2228 icmp->icmp_reuseaddr = onoff; 2229 PASS_OPT_TO_IP(connp); 2230 } 2231 break; 2232 2233 /* 2234 * The following three items are available here, 2235 * but are only meaningful to IP. 2236 */ 2237 case SO_DONTROUTE: 2238 if (!checkonly) { 2239 icmp->icmp_dontroute = onoff; 2240 PASS_OPT_TO_IP(connp); 2241 } 2242 break; 2243 case SO_USELOOPBACK: 2244 if (!checkonly) { 2245 icmp->icmp_useloopback = onoff; 2246 PASS_OPT_TO_IP(connp); 2247 } 2248 break; 2249 case SO_BROADCAST: 2250 if (!checkonly) { 2251 icmp->icmp_broadcast = onoff; 2252 PASS_OPT_TO_IP(connp); 2253 } 2254 break; 2255 2256 case SO_SNDBUF: 2257 if (*i1 > is->is_max_buf) { 2258 *outlenp = 0; 2259 return (ENOBUFS); 2260 } 2261 if (!checkonly) { 2262 if (!IPCL_IS_NONSTR(connp)) { 2263 connp->conn_wq->q_hiwat = *i1; 2264 } 2265 icmp->icmp_xmit_hiwat = *i1; 2266 } 2267 break; 2268 case SO_RCVBUF: 2269 if (*i1 > is->is_max_buf) { 2270 *outlenp = 0; 2271 return (ENOBUFS); 2272 } 2273 if (!checkonly) { 2274 icmp->icmp_recv_hiwat = *i1; 2275 rw_exit(&icmp->icmp_rwlock); 2276 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2277 *i1); 2278 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2279 } 2280 break; 2281 case SO_DGRAM_ERRIND: 2282 if (!checkonly) 2283 icmp->icmp_dgram_errind = onoff; 2284 break; 2285 case SO_ALLZONES: 2286 /* 2287 * "soft" error (negative) 2288 * option not handled at this level 2289 * Note: Do not modify *outlenp 2290 */ 2291 return (-EINVAL); 2292 case SO_TIMESTAMP: 2293 if (!checkonly) { 2294 icmp->icmp_timestamp = onoff; 2295 } 2296 break; 2297 case SO_MAC_EXEMPT: 2298 /* 2299 * "soft" error (negative) 2300 * option not handled at this level 2301 * Note: Do not modify *outlenp 2302 */ 2303 return (-EINVAL); 2304 /* 2305 * Following three not meaningful for icmp 2306 * Action is same as "default" so we keep them 2307 * in comments. 2308 * case SO_LINGER: 2309 * case SO_KEEPALIVE: 2310 * case SO_OOBINLINE: 2311 */ 2312 default: 2313 *outlenp = 0; 2314 return (EINVAL); 2315 } 2316 break; 2317 case IPPROTO_IP: 2318 /* 2319 * Only allow IPv4 option processing on IPv4 sockets. 2320 */ 2321 if (icmp->icmp_family != AF_INET) { 2322 *outlenp = 0; 2323 return (ENOPROTOOPT); 2324 } 2325 switch (name) { 2326 case IP_OPTIONS: 2327 case T_IP_OPTIONS: 2328 /* Save options for use by IP. */ 2329 if ((inlen & 0x3) || 2330 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2331 *outlenp = 0; 2332 return (EINVAL); 2333 } 2334 if (checkonly) 2335 break; 2336 2337 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2338 &icmp->icmp_ip_snd_options_len, 2339 icmp->icmp_label_len, invalp, inlen)) { 2340 *outlenp = 0; 2341 return (ENOMEM); 2342 } 2343 2344 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2345 icmp->icmp_ip_snd_options_len; 2346 rw_exit(&icmp->icmp_rwlock); 2347 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2348 RD(connp->conn_rq), connp, 2349 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2350 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2351 break; 2352 case IP_HDRINCL: 2353 if (!checkonly) 2354 icmp->icmp_hdrincl = onoff; 2355 break; 2356 case IP_TOS: 2357 case T_IP_TOS: 2358 if (!checkonly) { 2359 icmp->icmp_type_of_service = (uint8_t)*i1; 2360 } 2361 break; 2362 case IP_TTL: 2363 if (!checkonly) { 2364 icmp->icmp_ttl = (uint8_t)*i1; 2365 } 2366 break; 2367 case IP_MULTICAST_IF: 2368 /* 2369 * TODO should check OPTMGMT reply and undo this if 2370 * there is an error. 2371 */ 2372 if (!checkonly) { 2373 icmp->icmp_multicast_if_addr = *i1; 2374 PASS_OPT_TO_IP(connp); 2375 } 2376 break; 2377 case IP_MULTICAST_TTL: 2378 if (!checkonly) 2379 icmp->icmp_multicast_ttl = *invalp; 2380 break; 2381 case IP_MULTICAST_LOOP: 2382 if (!checkonly) { 2383 connp->conn_multicast_loop = 2384 (*invalp == 0) ? 0 : 1; 2385 PASS_OPT_TO_IP(connp); 2386 } 2387 break; 2388 case IP_BOUND_IF: 2389 if (!checkonly) { 2390 icmp->icmp_bound_if = *i1; 2391 PASS_OPT_TO_IP(connp); 2392 } 2393 break; 2394 case IP_UNSPEC_SRC: 2395 if (!checkonly) { 2396 icmp->icmp_unspec_source = onoff; 2397 PASS_OPT_TO_IP(connp); 2398 } 2399 break; 2400 case IP_BROADCAST_TTL: 2401 if (!checkonly) 2402 connp->conn_broadcast_ttl = *invalp; 2403 break; 2404 case IP_RECVIF: 2405 if (!checkonly) { 2406 icmp->icmp_recvif = onoff; 2407 } 2408 /* 2409 * pass to ip 2410 */ 2411 return (-EINVAL); 2412 case IP_PKTINFO: { 2413 /* 2414 * This also handles IP_RECVPKTINFO. 2415 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2416 * Differentiation is based on the size of the argument 2417 * passed in. 2418 */ 2419 struct in_pktinfo *pktinfop; 2420 ip4_pkt_t *attr_pktinfop; 2421 2422 if (checkonly) 2423 break; 2424 2425 if (inlen == sizeof (int)) { 2426 /* 2427 * This is IP_RECVPKTINFO option. 2428 * Keep a local copy of wether this option is 2429 * set or not and pass it down to IP for 2430 * processing. 2431 */ 2432 icmp->icmp_ip_recvpktinfo = onoff; 2433 return (-EINVAL); 2434 } 2435 2436 2437 if (inlen != sizeof (struct in_pktinfo)) { 2438 return (EINVAL); 2439 } 2440 2441 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2442 == NULL) { 2443 /* 2444 * sticky option is not supported 2445 */ 2446 return (EINVAL); 2447 } 2448 2449 pktinfop = (struct in_pktinfo *)invalp; 2450 2451 /* 2452 * Atleast one of the values should be specified 2453 */ 2454 if (pktinfop->ipi_ifindex == 0 && 2455 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2456 return (EINVAL); 2457 } 2458 2459 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2460 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2461 } 2462 break; 2463 case IP_ADD_MEMBERSHIP: 2464 case IP_DROP_MEMBERSHIP: 2465 case IP_BLOCK_SOURCE: 2466 case IP_UNBLOCK_SOURCE: 2467 case IP_ADD_SOURCE_MEMBERSHIP: 2468 case IP_DROP_SOURCE_MEMBERSHIP: 2469 case MCAST_JOIN_GROUP: 2470 case MCAST_LEAVE_GROUP: 2471 case MCAST_BLOCK_SOURCE: 2472 case MCAST_UNBLOCK_SOURCE: 2473 case MCAST_JOIN_SOURCE_GROUP: 2474 case MCAST_LEAVE_SOURCE_GROUP: 2475 case MRT_INIT: 2476 case MRT_DONE: 2477 case MRT_ADD_VIF: 2478 case MRT_DEL_VIF: 2479 case MRT_ADD_MFC: 2480 case MRT_DEL_MFC: 2481 case MRT_VERSION: 2482 case MRT_ASSERT: 2483 case IP_SEC_OPT: 2484 case IP_DONTFAILOVER_IF: 2485 case IP_NEXTHOP: 2486 /* 2487 * "soft" error (negative) 2488 * option not handled at this level 2489 * Note: Do not modify *outlenp 2490 */ 2491 return (-EINVAL); 2492 default: 2493 *outlenp = 0; 2494 return (EINVAL); 2495 } 2496 break; 2497 case IPPROTO_IPV6: { 2498 ip6_pkt_t *ipp; 2499 boolean_t sticky; 2500 2501 if (icmp->icmp_family != AF_INET6) { 2502 *outlenp = 0; 2503 return (ENOPROTOOPT); 2504 } 2505 /* 2506 * Deal with both sticky options and ancillary data 2507 */ 2508 if (thisdg_attrs == NULL) { 2509 /* sticky options, or none */ 2510 ipp = &icmp->icmp_sticky_ipp; 2511 sticky = B_TRUE; 2512 } else { 2513 /* ancillary data */ 2514 ipp = (ip6_pkt_t *)thisdg_attrs; 2515 sticky = B_FALSE; 2516 } 2517 2518 switch (name) { 2519 case IPV6_MULTICAST_IF: 2520 if (!checkonly) { 2521 icmp->icmp_multicast_if_index = *i1; 2522 PASS_OPT_TO_IP(connp); 2523 } 2524 break; 2525 case IPV6_UNICAST_HOPS: 2526 /* -1 means use default */ 2527 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2528 *outlenp = 0; 2529 return (EINVAL); 2530 } 2531 if (!checkonly) { 2532 if (*i1 == -1) { 2533 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2534 is->is_ipv6_hoplimit; 2535 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2536 /* Pass modified value to IP. */ 2537 *i1 = ipp->ipp_hoplimit; 2538 } else { 2539 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2540 (uint8_t)*i1; 2541 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2542 } 2543 /* Rebuild the header template */ 2544 error = icmp_build_hdrs(icmp); 2545 if (error != 0) { 2546 *outlenp = 0; 2547 return (error); 2548 } 2549 } 2550 break; 2551 case IPV6_MULTICAST_HOPS: 2552 /* -1 means use default */ 2553 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2554 *outlenp = 0; 2555 return (EINVAL); 2556 } 2557 if (!checkonly) { 2558 if (*i1 == -1) { 2559 icmp->icmp_multicast_ttl = 2560 ipp->ipp_multicast_hops = 2561 IP_DEFAULT_MULTICAST_TTL; 2562 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2563 /* Pass modified value to IP. */ 2564 *i1 = icmp->icmp_multicast_ttl; 2565 } else { 2566 icmp->icmp_multicast_ttl = 2567 ipp->ipp_multicast_hops = 2568 (uint8_t)*i1; 2569 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2570 } 2571 } 2572 break; 2573 case IPV6_MULTICAST_LOOP: 2574 if (*i1 != 0 && *i1 != 1) { 2575 *outlenp = 0; 2576 return (EINVAL); 2577 } 2578 if (!checkonly) { 2579 connp->conn_multicast_loop = *i1; 2580 PASS_OPT_TO_IP(connp); 2581 } 2582 break; 2583 case IPV6_CHECKSUM: 2584 /* 2585 * Integer offset into the user data of where the 2586 * checksum is located. 2587 * Offset of -1 disables option. 2588 * Does not apply to IPPROTO_ICMPV6. 2589 */ 2590 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2591 *outlenp = 0; 2592 return (EINVAL); 2593 } 2594 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2595 /* Negative or not 16 bit aligned offset */ 2596 *outlenp = 0; 2597 return (EINVAL); 2598 } 2599 if (checkonly) 2600 break; 2601 2602 if (*i1 == -1) { 2603 icmp->icmp_raw_checksum = 0; 2604 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2605 } else { 2606 icmp->icmp_raw_checksum = 1; 2607 icmp->icmp_checksum_off = *i1; 2608 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2609 } 2610 /* Rebuild the header template */ 2611 error = icmp_build_hdrs(icmp); 2612 if (error != 0) { 2613 *outlenp = 0; 2614 return (error); 2615 } 2616 break; 2617 case IPV6_JOIN_GROUP: 2618 case IPV6_LEAVE_GROUP: 2619 case MCAST_JOIN_GROUP: 2620 case MCAST_LEAVE_GROUP: 2621 case MCAST_BLOCK_SOURCE: 2622 case MCAST_UNBLOCK_SOURCE: 2623 case MCAST_JOIN_SOURCE_GROUP: 2624 case MCAST_LEAVE_SOURCE_GROUP: 2625 /* 2626 * "soft" error (negative) 2627 * option not handled at this level 2628 * Note: Do not modify *outlenp 2629 */ 2630 return (-EINVAL); 2631 case IPV6_BOUND_IF: 2632 if (!checkonly) { 2633 icmp->icmp_bound_if = *i1; 2634 PASS_OPT_TO_IP(connp); 2635 } 2636 break; 2637 case IPV6_UNSPEC_SRC: 2638 if (!checkonly) { 2639 icmp->icmp_unspec_source = onoff; 2640 PASS_OPT_TO_IP(connp); 2641 } 2642 break; 2643 case IPV6_RECVTCLASS: 2644 if (!checkonly) { 2645 icmp->icmp_ipv6_recvtclass = onoff; 2646 PASS_OPT_TO_IP(connp); 2647 } 2648 break; 2649 /* 2650 * Set boolean switches for ancillary data delivery 2651 */ 2652 case IPV6_RECVPKTINFO: 2653 if (!checkonly) { 2654 icmp->icmp_ip_recvpktinfo = onoff; 2655 PASS_OPT_TO_IP(connp); 2656 } 2657 break; 2658 case IPV6_RECVPATHMTU: 2659 if (!checkonly) { 2660 icmp->icmp_ipv6_recvpathmtu = onoff; 2661 PASS_OPT_TO_IP(connp); 2662 } 2663 break; 2664 case IPV6_RECVHOPLIMIT: 2665 if (!checkonly) { 2666 icmp->icmp_ipv6_recvhoplimit = onoff; 2667 PASS_OPT_TO_IP(connp); 2668 } 2669 break; 2670 case IPV6_RECVHOPOPTS: 2671 if (!checkonly) { 2672 icmp->icmp_ipv6_recvhopopts = onoff; 2673 PASS_OPT_TO_IP(connp); 2674 } 2675 break; 2676 case IPV6_RECVDSTOPTS: 2677 if (!checkonly) { 2678 icmp->icmp_ipv6_recvdstopts = onoff; 2679 PASS_OPT_TO_IP(connp); 2680 } 2681 break; 2682 case _OLD_IPV6_RECVDSTOPTS: 2683 if (!checkonly) 2684 icmp->icmp_old_ipv6_recvdstopts = onoff; 2685 break; 2686 case IPV6_RECVRTHDRDSTOPTS: 2687 if (!checkonly) { 2688 icmp->icmp_ipv6_recvrtdstopts = onoff; 2689 PASS_OPT_TO_IP(connp); 2690 } 2691 break; 2692 case IPV6_RECVRTHDR: 2693 if (!checkonly) { 2694 icmp->icmp_ipv6_recvrthdr = onoff; 2695 PASS_OPT_TO_IP(connp); 2696 } 2697 break; 2698 /* 2699 * Set sticky options or ancillary data. 2700 * If sticky options, (re)build any extension headers 2701 * that might be needed as a result. 2702 */ 2703 case IPV6_PKTINFO: 2704 /* 2705 * The source address and ifindex are verified 2706 * in ip_opt_set(). For ancillary data the 2707 * source address is checked in ip_wput_v6. 2708 */ 2709 if (inlen != 0 && inlen != 2710 sizeof (struct in6_pktinfo)) { 2711 return (EINVAL); 2712 } 2713 if (checkonly) 2714 break; 2715 2716 if (inlen == 0) { 2717 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2718 ipp->ipp_sticky_ignored |= 2719 (IPPF_IFINDEX|IPPF_ADDR); 2720 } else { 2721 struct in6_pktinfo *pkti; 2722 2723 pkti = (struct in6_pktinfo *)invalp; 2724 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2725 ipp->ipp_addr = pkti->ipi6_addr; 2726 if (ipp->ipp_ifindex != 0) 2727 ipp->ipp_fields |= IPPF_IFINDEX; 2728 else 2729 ipp->ipp_fields &= ~IPPF_IFINDEX; 2730 if (!IN6_IS_ADDR_UNSPECIFIED( 2731 &ipp->ipp_addr)) 2732 ipp->ipp_fields |= IPPF_ADDR; 2733 else 2734 ipp->ipp_fields &= ~IPPF_ADDR; 2735 } 2736 if (sticky) { 2737 error = icmp_build_hdrs(icmp); 2738 if (error != 0) 2739 return (error); 2740 PASS_OPT_TO_IP(connp); 2741 } 2742 break; 2743 case IPV6_HOPLIMIT: 2744 /* This option can only be used as ancillary data. */ 2745 if (sticky) 2746 return (EINVAL); 2747 if (inlen != 0 && inlen != sizeof (int)) 2748 return (EINVAL); 2749 if (checkonly) 2750 break; 2751 2752 if (inlen == 0) { 2753 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2754 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2755 } else { 2756 if (*i1 > 255 || *i1 < -1) 2757 return (EINVAL); 2758 if (*i1 == -1) 2759 ipp->ipp_hoplimit = 2760 is->is_ipv6_hoplimit; 2761 else 2762 ipp->ipp_hoplimit = *i1; 2763 ipp->ipp_fields |= IPPF_HOPLIMIT; 2764 } 2765 break; 2766 case IPV6_TCLASS: 2767 /* 2768 * IPV6_RECVTCLASS accepts -1 as use kernel default 2769 * and [0, 255] as the actualy traffic class. 2770 */ 2771 if (inlen != 0 && inlen != sizeof (int)) { 2772 return (EINVAL); 2773 } 2774 if (checkonly) 2775 break; 2776 2777 if (inlen == 0) { 2778 ipp->ipp_fields &= ~IPPF_TCLASS; 2779 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2780 } else { 2781 if (*i1 >= 256 || *i1 < -1) 2782 return (EINVAL); 2783 if (*i1 == -1) { 2784 ipp->ipp_tclass = 2785 IPV6_FLOW_TCLASS( 2786 IPV6_DEFAULT_VERS_AND_FLOW); 2787 } else { 2788 ipp->ipp_tclass = *i1; 2789 } 2790 ipp->ipp_fields |= IPPF_TCLASS; 2791 } 2792 if (sticky) { 2793 error = icmp_build_hdrs(icmp); 2794 if (error != 0) 2795 return (error); 2796 } 2797 break; 2798 case IPV6_NEXTHOP: 2799 /* 2800 * IP will verify that the nexthop is reachable 2801 * and fail for sticky options. 2802 */ 2803 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2804 return (EINVAL); 2805 } 2806 if (checkonly) 2807 break; 2808 2809 if (inlen == 0) { 2810 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2811 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2812 } else { 2813 sin6_t *sin6 = (sin6_t *)invalp; 2814 2815 if (sin6->sin6_family != AF_INET6) { 2816 return (EAFNOSUPPORT); 2817 } 2818 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2819 return (EADDRNOTAVAIL); 2820 } 2821 ipp->ipp_nexthop = sin6->sin6_addr; 2822 if (!IN6_IS_ADDR_UNSPECIFIED( 2823 &ipp->ipp_nexthop)) 2824 ipp->ipp_fields |= IPPF_NEXTHOP; 2825 else 2826 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2827 } 2828 if (sticky) { 2829 error = icmp_build_hdrs(icmp); 2830 if (error != 0) 2831 return (error); 2832 PASS_OPT_TO_IP(connp); 2833 } 2834 break; 2835 case IPV6_HOPOPTS: { 2836 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2837 /* 2838 * Sanity checks - minimum size, size a multiple of 2839 * eight bytes, and matching size passed in. 2840 */ 2841 if (inlen != 0 && 2842 inlen != (8 * (hopts->ip6h_len + 1))) { 2843 return (EINVAL); 2844 } 2845 2846 if (checkonly) 2847 break; 2848 error = optcom_pkt_set(invalp, inlen, sticky, 2849 (uchar_t **)&ipp->ipp_hopopts, 2850 &ipp->ipp_hopoptslen, 2851 sticky ? icmp->icmp_label_len_v6 : 0); 2852 if (error != 0) 2853 return (error); 2854 if (ipp->ipp_hopoptslen == 0) { 2855 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2856 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2857 } else { 2858 ipp->ipp_fields |= IPPF_HOPOPTS; 2859 } 2860 if (sticky) { 2861 error = icmp_build_hdrs(icmp); 2862 if (error != 0) 2863 return (error); 2864 } 2865 break; 2866 } 2867 case IPV6_RTHDRDSTOPTS: { 2868 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2869 2870 /* 2871 * Sanity checks - minimum size, size a multiple of 2872 * eight bytes, and matching size passed in. 2873 */ 2874 if (inlen != 0 && 2875 inlen != (8 * (dopts->ip6d_len + 1))) 2876 return (EINVAL); 2877 2878 if (checkonly) 2879 break; 2880 2881 if (inlen == 0) { 2882 if (sticky && 2883 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2884 kmem_free(ipp->ipp_rtdstopts, 2885 ipp->ipp_rtdstoptslen); 2886 ipp->ipp_rtdstopts = NULL; 2887 ipp->ipp_rtdstoptslen = 0; 2888 } 2889 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2890 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2891 } else { 2892 error = optcom_pkt_set(invalp, inlen, sticky, 2893 (uchar_t **)&ipp->ipp_rtdstopts, 2894 &ipp->ipp_rtdstoptslen, 0); 2895 if (error != 0) 2896 return (error); 2897 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2898 } 2899 if (sticky) { 2900 error = icmp_build_hdrs(icmp); 2901 if (error != 0) 2902 return (error); 2903 } 2904 break; 2905 } 2906 case IPV6_DSTOPTS: { 2907 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2908 2909 /* 2910 * Sanity checks - minimum size, size a multiple of 2911 * eight bytes, and matching size passed in. 2912 */ 2913 if (inlen != 0 && 2914 inlen != (8 * (dopts->ip6d_len + 1))) 2915 return (EINVAL); 2916 2917 if (checkonly) 2918 break; 2919 2920 if (inlen == 0) { 2921 if (sticky && 2922 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2923 kmem_free(ipp->ipp_dstopts, 2924 ipp->ipp_dstoptslen); 2925 ipp->ipp_dstopts = NULL; 2926 ipp->ipp_dstoptslen = 0; 2927 } 2928 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2929 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2930 } else { 2931 error = optcom_pkt_set(invalp, inlen, sticky, 2932 (uchar_t **)&ipp->ipp_dstopts, 2933 &ipp->ipp_dstoptslen, 0); 2934 if (error != 0) 2935 return (error); 2936 ipp->ipp_fields |= IPPF_DSTOPTS; 2937 } 2938 if (sticky) { 2939 error = icmp_build_hdrs(icmp); 2940 if (error != 0) 2941 return (error); 2942 } 2943 break; 2944 } 2945 case IPV6_RTHDR: { 2946 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2947 2948 /* 2949 * Sanity checks - minimum size, size a multiple of 2950 * eight bytes, and matching size passed in. 2951 */ 2952 if (inlen != 0 && 2953 inlen != (8 * (rt->ip6r_len + 1))) 2954 return (EINVAL); 2955 2956 if (checkonly) 2957 break; 2958 2959 if (inlen == 0) { 2960 if (sticky && 2961 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2962 kmem_free(ipp->ipp_rthdr, 2963 ipp->ipp_rthdrlen); 2964 ipp->ipp_rthdr = NULL; 2965 ipp->ipp_rthdrlen = 0; 2966 } 2967 ipp->ipp_fields &= ~IPPF_RTHDR; 2968 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2969 } else { 2970 error = optcom_pkt_set(invalp, inlen, sticky, 2971 (uchar_t **)&ipp->ipp_rthdr, 2972 &ipp->ipp_rthdrlen, 0); 2973 if (error != 0) 2974 return (error); 2975 ipp->ipp_fields |= IPPF_RTHDR; 2976 } 2977 if (sticky) { 2978 error = icmp_build_hdrs(icmp); 2979 if (error != 0) 2980 return (error); 2981 } 2982 break; 2983 } 2984 2985 case IPV6_DONTFRAG: 2986 if (checkonly) 2987 break; 2988 2989 if (onoff) { 2990 ipp->ipp_fields |= IPPF_DONTFRAG; 2991 } else { 2992 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2993 } 2994 break; 2995 2996 case IPV6_USE_MIN_MTU: 2997 if (inlen != sizeof (int)) 2998 return (EINVAL); 2999 3000 if (*i1 < -1 || *i1 > 1) 3001 return (EINVAL); 3002 3003 if (checkonly) 3004 break; 3005 3006 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3007 ipp->ipp_use_min_mtu = *i1; 3008 break; 3009 3010 /* 3011 * This option can't be set. Its only returned via 3012 * getsockopt() or ancillary data. 3013 */ 3014 case IPV6_PATHMTU: 3015 return (EINVAL); 3016 3017 case IPV6_BOUND_PIF: 3018 case IPV6_SEC_OPT: 3019 case IPV6_DONTFAILOVER_IF: 3020 case IPV6_SRC_PREFERENCES: 3021 case IPV6_V6ONLY: 3022 /* Handled at IP level */ 3023 return (-EINVAL); 3024 default: 3025 *outlenp = 0; 3026 return (EINVAL); 3027 } 3028 break; 3029 } /* end IPPROTO_IPV6 */ 3030 3031 case IPPROTO_ICMPV6: 3032 /* 3033 * Only allow IPv6 option processing on IPv6 sockets. 3034 */ 3035 if (icmp->icmp_family != AF_INET6) { 3036 *outlenp = 0; 3037 return (ENOPROTOOPT); 3038 } 3039 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3040 *outlenp = 0; 3041 return (ENOPROTOOPT); 3042 } 3043 switch (name) { 3044 case ICMP6_FILTER: 3045 if (!checkonly) { 3046 if ((inlen != 0) && 3047 (inlen != sizeof (icmp6_filter_t))) 3048 return (EINVAL); 3049 3050 if (inlen == 0) { 3051 if (icmp->icmp_filter != NULL) { 3052 kmem_free(icmp->icmp_filter, 3053 sizeof (icmp6_filter_t)); 3054 icmp->icmp_filter = NULL; 3055 } 3056 } else { 3057 if (icmp->icmp_filter == NULL) { 3058 icmp->icmp_filter = kmem_alloc( 3059 sizeof (icmp6_filter_t), 3060 KM_NOSLEEP); 3061 if (icmp->icmp_filter == NULL) { 3062 *outlenp = 0; 3063 return (ENOBUFS); 3064 } 3065 } 3066 (void) bcopy(invalp, icmp->icmp_filter, 3067 inlen); 3068 } 3069 } 3070 break; 3071 3072 default: 3073 *outlenp = 0; 3074 return (EINVAL); 3075 } 3076 break; 3077 default: 3078 *outlenp = 0; 3079 return (EINVAL); 3080 } 3081 /* 3082 * Common case of OK return with outval same as inval. 3083 */ 3084 if (invalp != outvalp) { 3085 /* don't trust bcopy for identical src/dst */ 3086 (void) bcopy(invalp, outvalp, inlen); 3087 } 3088 *outlenp = inlen; 3089 return (0); 3090 } 3091 3092 /* This routine sets socket options. */ 3093 /* ARGSUSED */ 3094 int 3095 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3096 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3097 void *thisdg_attrs, cred_t *cr) 3098 { 3099 boolean_t checkonly; 3100 int error; 3101 3102 error = 0; 3103 switch (optset_context) { 3104 case SETFN_OPTCOM_CHECKONLY: 3105 checkonly = B_TRUE; 3106 /* 3107 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3108 * inlen != 0 implies value supplied and 3109 * we have to "pretend" to set it. 3110 * inlen == 0 implies that there is no 3111 * value part in T_CHECK request and just validation 3112 * done elsewhere should be enough, we just return here. 3113 */ 3114 if (inlen == 0) { 3115 *outlenp = 0; 3116 error = 0; 3117 goto done; 3118 } 3119 break; 3120 case SETFN_OPTCOM_NEGOTIATE: 3121 checkonly = B_FALSE; 3122 break; 3123 case SETFN_UD_NEGOTIATE: 3124 case SETFN_CONN_NEGOTIATE: 3125 checkonly = B_FALSE; 3126 /* 3127 * Negotiating local and "association-related" options 3128 * through T_UNITDATA_REQ. 3129 * 3130 * Following routine can filter out ones we do not 3131 * want to be "set" this way. 3132 */ 3133 if (!icmp_opt_allow_udr_set(level, name)) { 3134 *outlenp = 0; 3135 error = EINVAL; 3136 goto done; 3137 } 3138 break; 3139 default: 3140 /* 3141 * We should never get here 3142 */ 3143 *outlenp = 0; 3144 error = EINVAL; 3145 goto done; 3146 } 3147 3148 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3149 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3150 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3151 outvalp, cr, thisdg_attrs, checkonly); 3152 3153 done: 3154 return (error); 3155 } 3156 3157 /* This routine sets socket options. */ 3158 /* ARGSUSED */ 3159 int 3160 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3161 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3162 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3163 { 3164 conn_t *connp = Q_TO_CONN(q); 3165 icmp_t *icmp; 3166 int error; 3167 3168 icmp = connp->conn_icmp; 3169 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3170 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3171 outlenp, outvalp, thisdg_attrs, cr); 3172 rw_exit(&icmp->icmp_rwlock); 3173 return (error); 3174 } 3175 3176 /* 3177 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3178 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3179 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3180 * headers. 3181 * Returns failure if can't allocate memory. 3182 */ 3183 static int 3184 icmp_build_hdrs(icmp_t *icmp) 3185 { 3186 icmp_stack_t *is = icmp->icmp_is; 3187 uchar_t *hdrs; 3188 uint_t hdrs_len; 3189 ip6_t *ip6h; 3190 ip6i_t *ip6i; 3191 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3192 3193 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3194 hdrs_len = ip_total_hdrs_len_v6(ipp); 3195 ASSERT(hdrs_len != 0); 3196 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3197 /* Need to reallocate */ 3198 if (hdrs_len != 0) { 3199 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3200 if (hdrs == NULL) 3201 return (ENOMEM); 3202 } else { 3203 hdrs = NULL; 3204 } 3205 if (icmp->icmp_sticky_hdrs_len != 0) { 3206 kmem_free(icmp->icmp_sticky_hdrs, 3207 icmp->icmp_sticky_hdrs_len); 3208 } 3209 icmp->icmp_sticky_hdrs = hdrs; 3210 icmp->icmp_sticky_hdrs_len = hdrs_len; 3211 } 3212 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3213 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3214 3215 /* Set header fields not in ipp */ 3216 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3217 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3218 ip6h = (ip6_t *)&ip6i[1]; 3219 3220 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3221 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3222 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3223 } 3224 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3225 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3226 } 3227 } else { 3228 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3229 } 3230 3231 if (!(ipp->ipp_fields & IPPF_ADDR)) 3232 ip6h->ip6_src = icmp->icmp_v6src; 3233 3234 /* Try to get everything in a single mblk */ 3235 if (hdrs_len > icmp->icmp_max_hdr_len) { 3236 icmp->icmp_max_hdr_len = hdrs_len; 3237 rw_exit(&icmp->icmp_rwlock); 3238 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3239 icmp->icmp_connp, 3240 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3241 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3242 } 3243 return (0); 3244 } 3245 3246 /* 3247 * This routine retrieves the value of an ND variable in a icmpparam_t 3248 * structure. It is called through nd_getset when a user reads the 3249 * variable. 3250 */ 3251 /* ARGSUSED */ 3252 static int 3253 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3254 { 3255 icmpparam_t *icmppa = (icmpparam_t *)cp; 3256 3257 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3258 return (0); 3259 } 3260 3261 /* 3262 * Walk through the param array specified registering each element with the 3263 * named dispatch (ND) handler. 3264 */ 3265 static boolean_t 3266 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3267 { 3268 for (; cnt-- > 0; icmppa++) { 3269 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3270 if (!nd_load(ndp, icmppa->icmp_param_name, 3271 icmp_param_get, icmp_param_set, 3272 (caddr_t)icmppa)) { 3273 nd_free(ndp); 3274 return (B_FALSE); 3275 } 3276 } 3277 } 3278 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3279 NULL)) { 3280 nd_free(ndp); 3281 return (B_FALSE); 3282 } 3283 return (B_TRUE); 3284 } 3285 3286 /* This routine sets an ND variable in a icmpparam_t structure. */ 3287 /* ARGSUSED */ 3288 static int 3289 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3290 { 3291 long new_value; 3292 icmpparam_t *icmppa = (icmpparam_t *)cp; 3293 3294 /* 3295 * Fail the request if the new value does not lie within the 3296 * required bounds. 3297 */ 3298 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3299 new_value < icmppa->icmp_param_min || 3300 new_value > icmppa->icmp_param_max) { 3301 return (EINVAL); 3302 } 3303 /* Set the new value */ 3304 icmppa->icmp_param_value = new_value; 3305 return (0); 3306 } 3307 static void 3308 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3309 { 3310 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3311 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3312 /* 3313 * fallback has started but messages have not been moved yet 3314 */ 3315 if (icmp->icmp_fallback_queue_head == NULL) { 3316 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3317 icmp->icmp_fallback_queue_head = mp; 3318 icmp->icmp_fallback_queue_tail = mp; 3319 } else { 3320 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3321 icmp->icmp_fallback_queue_tail->b_next = mp; 3322 icmp->icmp_fallback_queue_tail = mp; 3323 } 3324 mutex_exit(&icmp->icmp_recv_lock); 3325 } else { 3326 /* 3327 * no more fallbacks possible, ok to drop lock. 3328 */ 3329 mutex_exit(&icmp->icmp_recv_lock); 3330 putnext(icmp->icmp_connp->conn_rq, mp); 3331 } 3332 } 3333 3334 /*ARGSUSED2*/ 3335 static void 3336 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3337 { 3338 conn_t *connp = (conn_t *)arg1; 3339 struct T_unitdata_ind *tudi; 3340 uchar_t *rptr; 3341 icmp_t *icmp; 3342 icmp_stack_t *is; 3343 sin_t *sin; 3344 sin6_t *sin6; 3345 ip6_t *ip6h; 3346 ip6i_t *ip6i; 3347 mblk_t *mp1; 3348 int hdr_len; 3349 ipha_t *ipha; 3350 int udi_size; /* Size of T_unitdata_ind */ 3351 uint_t ipvers; 3352 ip6_pkt_t ipp; 3353 uint8_t nexthdr; 3354 ip_pktinfo_t *pinfo = NULL; 3355 mblk_t *options_mp = NULL; 3356 uint_t icmp_opt = 0; 3357 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3358 uint_t hopstrip; 3359 int error; 3360 3361 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3362 3363 icmp = connp->conn_icmp; 3364 is = icmp->icmp_is; 3365 rptr = mp->b_rptr; 3366 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3367 ASSERT(OK_32PTR(rptr)); 3368 3369 /* 3370 * IP should have prepended the options data in an M_CTL 3371 * Check M_CTL "type" to make sure are not here bcos of 3372 * a valid ICMP message 3373 */ 3374 if (DB_TYPE(mp) == M_CTL) { 3375 /* 3376 * FIXME: does IP still do this? 3377 * IP sends up the IPSEC_IN message for handling IPSEC 3378 * policy at the TCP level. We don't need it here. 3379 */ 3380 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3381 mp1 = mp->b_cont; 3382 freeb(mp); 3383 mp = mp1; 3384 rptr = mp->b_rptr; 3385 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3386 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3387 IN_PKTINFO) { 3388 /* 3389 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3390 * has been prepended to the packet by IP. We need to 3391 * extract the mblk and adjust the rptr 3392 */ 3393 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3394 options_mp = mp; 3395 mp = mp->b_cont; 3396 rptr = mp->b_rptr; 3397 } else { 3398 /* 3399 * ICMP messages. 3400 */ 3401 icmp_icmp_error(connp, mp); 3402 return; 3403 } 3404 } 3405 3406 /* 3407 * Discard message if it is misaligned or smaller than the IP header. 3408 */ 3409 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3410 freemsg(mp); 3411 if (options_mp != NULL) 3412 freeb(options_mp); 3413 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3414 return; 3415 } 3416 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3417 3418 /* Handle M_DATA messages containing IP packets messages */ 3419 if (ipvers == IPV4_VERSION) { 3420 /* 3421 * Special case where IP attaches 3422 * the IRE needs to be handled so that we don't send up 3423 * IRE to the user land. 3424 */ 3425 ipha = (ipha_t *)rptr; 3426 hdr_len = IPH_HDR_LENGTH(ipha); 3427 3428 if (ipha->ipha_protocol == IPPROTO_TCP) { 3429 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3430 3431 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3432 TH_SYN) && mp->b_cont != NULL) { 3433 mp1 = mp->b_cont; 3434 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3435 freeb(mp1); 3436 mp->b_cont = NULL; 3437 } 3438 } 3439 } 3440 if (is->is_bsd_compat) { 3441 ushort_t len; 3442 len = ntohs(ipha->ipha_length); 3443 3444 if (mp->b_datap->db_ref > 1) { 3445 /* 3446 * Allocate a new IP header so that we can 3447 * modify ipha_length. 3448 */ 3449 mblk_t *mp1; 3450 3451 mp1 = allocb(hdr_len, BPRI_MED); 3452 if (!mp1) { 3453 freemsg(mp); 3454 if (options_mp != NULL) 3455 freeb(options_mp); 3456 BUMP_MIB(&is->is_rawip_mib, 3457 rawipInErrors); 3458 return; 3459 } 3460 bcopy(rptr, mp1->b_rptr, hdr_len); 3461 mp->b_rptr = rptr + hdr_len; 3462 rptr = mp1->b_rptr; 3463 ipha = (ipha_t *)rptr; 3464 mp1->b_cont = mp; 3465 mp1->b_wptr = rptr + hdr_len; 3466 mp = mp1; 3467 } 3468 len -= hdr_len; 3469 ipha->ipha_length = htons(len); 3470 } 3471 } 3472 3473 /* 3474 * This is the inbound data path. Packets are passed upstream as 3475 * T_UNITDATA_IND messages with full IP headers still attached. 3476 */ 3477 if (icmp->icmp_family == AF_INET) { 3478 ASSERT(ipvers == IPV4_VERSION); 3479 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3480 if (icmp->icmp_recvif && (pinfo != NULL) && 3481 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3482 udi_size += sizeof (struct T_opthdr) + 3483 sizeof (uint_t); 3484 } 3485 3486 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3487 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3488 udi_size += sizeof (struct T_opthdr) + 3489 sizeof (struct in_pktinfo); 3490 } 3491 3492 /* 3493 * If SO_TIMESTAMP is set allocate the appropriate sized 3494 * buffer. Since gethrestime() expects a pointer aligned 3495 * argument, we allocate space necessary for extra 3496 * alignment (even though it might not be used). 3497 */ 3498 if (icmp->icmp_timestamp) { 3499 udi_size += sizeof (struct T_opthdr) + 3500 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3501 } 3502 mp1 = allocb(udi_size, BPRI_MED); 3503 if (mp1 == NULL) { 3504 freemsg(mp); 3505 if (options_mp != NULL) 3506 freeb(options_mp); 3507 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3508 return; 3509 } 3510 mp1->b_cont = mp; 3511 mp = mp1; 3512 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3513 mp->b_datap->db_type = M_PROTO; 3514 mp->b_wptr = (uchar_t *)tudi + udi_size; 3515 tudi->PRIM_type = T_UNITDATA_IND; 3516 tudi->SRC_length = sizeof (sin_t); 3517 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3518 sin = (sin_t *)&tudi[1]; 3519 *sin = sin_null; 3520 sin->sin_family = AF_INET; 3521 sin->sin_addr.s_addr = ipha->ipha_src; 3522 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3523 sizeof (sin_t); 3524 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3525 tudi->OPT_length = udi_size; 3526 3527 /* 3528 * Add options if IP_RECVIF is set 3529 */ 3530 if (udi_size != 0) { 3531 char *dstopt; 3532 3533 dstopt = (char *)&sin[1]; 3534 if (icmp->icmp_recvif && (pinfo != NULL) && 3535 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3536 3537 struct T_opthdr *toh; 3538 uint_t *dstptr; 3539 3540 toh = (struct T_opthdr *)dstopt; 3541 toh->level = IPPROTO_IP; 3542 toh->name = IP_RECVIF; 3543 toh->len = sizeof (struct T_opthdr) + 3544 sizeof (uint_t); 3545 toh->status = 0; 3546 dstopt += sizeof (struct T_opthdr); 3547 dstptr = (uint_t *)dstopt; 3548 *dstptr = pinfo->ip_pkt_ifindex; 3549 dstopt += sizeof (uint_t); 3550 udi_size -= toh->len; 3551 } 3552 if (icmp->icmp_timestamp) { 3553 struct T_opthdr *toh; 3554 3555 toh = (struct T_opthdr *)dstopt; 3556 toh->level = SOL_SOCKET; 3557 toh->name = SCM_TIMESTAMP; 3558 toh->len = sizeof (struct T_opthdr) + 3559 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3560 toh->status = 0; 3561 dstopt += sizeof (struct T_opthdr); 3562 /* Align for gethrestime() */ 3563 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3564 sizeof (intptr_t)); 3565 gethrestime((timestruc_t *)dstopt); 3566 dstopt = (char *)toh + toh->len; 3567 udi_size -= toh->len; 3568 } 3569 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3570 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3571 struct T_opthdr *toh; 3572 struct in_pktinfo *pktinfop; 3573 3574 toh = (struct T_opthdr *)dstopt; 3575 toh->level = IPPROTO_IP; 3576 toh->name = IP_PKTINFO; 3577 toh->len = sizeof (struct T_opthdr) + 3578 sizeof (in_pktinfo_t); 3579 toh->status = 0; 3580 dstopt += sizeof (struct T_opthdr); 3581 pktinfop = (struct in_pktinfo *)dstopt; 3582 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3583 pktinfop->ipi_spec_dst = 3584 pinfo->ip_pkt_match_addr; 3585 3586 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3587 3588 dstopt += sizeof (struct in_pktinfo); 3589 udi_size -= toh->len; 3590 } 3591 3592 /* Consumed all of allocated space */ 3593 ASSERT(udi_size == 0); 3594 } 3595 3596 if (options_mp != NULL) 3597 freeb(options_mp); 3598 3599 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3600 goto deliver; 3601 } 3602 3603 /* 3604 * We don't need options_mp in the IPv6 path. 3605 */ 3606 if (options_mp != NULL) { 3607 freeb(options_mp); 3608 options_mp = NULL; 3609 } 3610 3611 /* 3612 * Discard message if it is smaller than the IPv6 header 3613 * or if the header is malformed. 3614 */ 3615 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3616 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3617 icmp->icmp_family != AF_INET6) { 3618 freemsg(mp); 3619 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3620 return; 3621 } 3622 3623 /* Initialize */ 3624 ipp.ipp_fields = 0; 3625 hopstrip = 0; 3626 3627 ip6h = (ip6_t *)rptr; 3628 /* 3629 * Call on ip_find_hdr_v6 which gets the total hdr len 3630 * as well as individual lenghts of ext hdrs (and ptrs to 3631 * them). 3632 */ 3633 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3634 /* Look for ifindex information */ 3635 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3636 ip6i = (ip6i_t *)ip6h; 3637 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3638 ASSERT(ip6i->ip6i_ifindex != 0); 3639 ipp.ipp_fields |= IPPF_IFINDEX; 3640 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3641 } 3642 rptr = (uchar_t *)&ip6i[1]; 3643 mp->b_rptr = rptr; 3644 if (rptr == mp->b_wptr) { 3645 mp1 = mp->b_cont; 3646 freeb(mp); 3647 mp = mp1; 3648 rptr = mp->b_rptr; 3649 } 3650 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3651 ip6h = (ip6_t *)rptr; 3652 } 3653 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3654 3655 /* 3656 * We need to lie a bit to the user because users inside 3657 * labeled compartments should not see their own labels. We 3658 * assume that in all other respects IP has checked the label, 3659 * and that the label is always first among the options. (If 3660 * it's not first, then this code won't see it, and the option 3661 * will be passed along to the user.) 3662 * 3663 * If we had multilevel ICMP sockets, then the following code 3664 * should be skipped for them to allow the user to see the 3665 * label. 3666 * 3667 * Alignment restrictions in the definition of IP options 3668 * (namely, the requirement that the 4-octet DOI goes on a 3669 * 4-octet boundary) mean that we know exactly where the option 3670 * should start, but we're lenient for other hosts. 3671 * 3672 * Note that there are no multilevel ICMP or raw IP sockets 3673 * yet, thus nobody ever sees the IP6OPT_LS option. 3674 */ 3675 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3676 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3677 const uchar_t *ucp = 3678 (const uchar_t *)ipp.ipp_hopopts + 2; 3679 int remlen = ipp.ipp_hopoptslen - 2; 3680 3681 while (remlen > 0) { 3682 if (*ucp == IP6OPT_PAD1) { 3683 remlen--; 3684 ucp++; 3685 } else if (*ucp == IP6OPT_PADN) { 3686 remlen -= ucp[1] + 2; 3687 ucp += ucp[1] + 2; 3688 } else if (*ucp == ip6opt_ls) { 3689 hopstrip = (ucp - 3690 (const uchar_t *)ipp.ipp_hopopts) + 3691 ucp[1] + 2; 3692 hopstrip = (hopstrip + 7) & ~7; 3693 break; 3694 } else { 3695 /* label option must be first */ 3696 break; 3697 } 3698 } 3699 } 3700 } else { 3701 hdr_len = IPV6_HDR_LEN; 3702 ip6i = NULL; 3703 nexthdr = ip6h->ip6_nxt; 3704 } 3705 /* 3706 * One special case where IP attaches the IRE needs to 3707 * be handled so that we don't send up IRE to the user land. 3708 */ 3709 if (nexthdr == IPPROTO_TCP) { 3710 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3711 3712 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3713 mp->b_cont != NULL) { 3714 mp1 = mp->b_cont; 3715 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3716 freeb(mp1); 3717 mp->b_cont = NULL; 3718 } 3719 } 3720 } 3721 /* 3722 * Check a filter for ICMPv6 types if needed. 3723 * Verify raw checksums if needed. 3724 */ 3725 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3726 if (icmp->icmp_filter != NULL) { 3727 int type; 3728 3729 /* Assumes that IP has done the pullupmsg */ 3730 type = mp->b_rptr[hdr_len]; 3731 3732 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3733 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3734 freemsg(mp); 3735 return; 3736 } 3737 } else { 3738 /* Checksum */ 3739 uint16_t *up; 3740 uint32_t sum; 3741 int remlen; 3742 3743 up = (uint16_t *)&ip6h->ip6_src; 3744 3745 remlen = msgdsize(mp) - hdr_len; 3746 sum = htons(icmp->icmp_proto + remlen) 3747 + up[0] + up[1] + up[2] + up[3] 3748 + up[4] + up[5] + up[6] + up[7] 3749 + up[8] + up[9] + up[10] + up[11] 3750 + up[12] + up[13] + up[14] + up[15]; 3751 sum = (sum & 0xffff) + (sum >> 16); 3752 sum = IP_CSUM(mp, hdr_len, sum); 3753 if (sum != 0) { 3754 /* IPv6 RAW checksum failed */ 3755 ip0dbg(("icmp_rput: RAW checksum " 3756 "failed %x\n", sum)); 3757 freemsg(mp); 3758 BUMP_MIB(&is->is_rawip_mib, 3759 rawipInCksumErrs); 3760 return; 3761 } 3762 } 3763 } 3764 /* Skip all the IPv6 headers per API */ 3765 mp->b_rptr += hdr_len; 3766 3767 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3768 3769 /* 3770 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3771 * maintain state information, instead of relying on icmp_t 3772 * structure, since there arent any locks protecting these members 3773 * and there is a window where there might be a race between a 3774 * thread setting options on the write side and a thread reading 3775 * these options on the read size. 3776 */ 3777 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3778 IPPF_RTHDR|IPPF_IFINDEX)) { 3779 if (icmp->icmp_ipv6_recvhopopts && 3780 (ipp.ipp_fields & IPPF_HOPOPTS) && 3781 ipp.ipp_hopoptslen > hopstrip) { 3782 udi_size += sizeof (struct T_opthdr) + 3783 ipp.ipp_hopoptslen - hopstrip; 3784 icmp_opt |= IPPF_HOPOPTS; 3785 } 3786 if ((icmp->icmp_ipv6_recvdstopts || 3787 icmp->icmp_old_ipv6_recvdstopts) && 3788 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3789 udi_size += sizeof (struct T_opthdr) + 3790 ipp.ipp_dstoptslen; 3791 icmp_opt |= IPPF_DSTOPTS; 3792 } 3793 if (((icmp->icmp_ipv6_recvdstopts && 3794 icmp->icmp_ipv6_recvrthdr && 3795 (ipp.ipp_fields & IPPF_RTHDR)) || 3796 icmp->icmp_ipv6_recvrtdstopts) && 3797 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3798 udi_size += sizeof (struct T_opthdr) + 3799 ipp.ipp_rtdstoptslen; 3800 icmp_opt |= IPPF_RTDSTOPTS; 3801 } 3802 if (icmp->icmp_ipv6_recvrthdr && 3803 (ipp.ipp_fields & IPPF_RTHDR)) { 3804 udi_size += sizeof (struct T_opthdr) + 3805 ipp.ipp_rthdrlen; 3806 icmp_opt |= IPPF_RTHDR; 3807 } 3808 if (icmp->icmp_ip_recvpktinfo && 3809 (ipp.ipp_fields & IPPF_IFINDEX)) { 3810 udi_size += sizeof (struct T_opthdr) + 3811 sizeof (struct in6_pktinfo); 3812 icmp_opt |= IPPF_IFINDEX; 3813 } 3814 } 3815 if (icmp->icmp_ipv6_recvhoplimit) { 3816 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3817 icmp_ipv6_recvhoplimit = B_TRUE; 3818 } 3819 3820 if (icmp->icmp_ipv6_recvtclass) 3821 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3822 3823 /* 3824 * If SO_TIMESTAMP is set allocate the appropriate sized 3825 * buffer. Since gethrestime() expects a pointer aligned 3826 * argument, we allocate space necessary for extra 3827 * alignment (even though it might not be used). 3828 */ 3829 if (icmp->icmp_timestamp) { 3830 udi_size += sizeof (struct T_opthdr) + 3831 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3832 } 3833 3834 mp1 = allocb(udi_size, BPRI_MED); 3835 if (mp1 == NULL) { 3836 freemsg(mp); 3837 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3838 return; 3839 } 3840 mp1->b_cont = mp; 3841 mp = mp1; 3842 mp->b_datap->db_type = M_PROTO; 3843 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3844 mp->b_wptr = (uchar_t *)tudi + udi_size; 3845 tudi->PRIM_type = T_UNITDATA_IND; 3846 tudi->SRC_length = sizeof (sin6_t); 3847 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3848 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3849 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3850 tudi->OPT_length = udi_size; 3851 sin6 = (sin6_t *)&tudi[1]; 3852 sin6->sin6_port = 0; 3853 sin6->sin6_family = AF_INET6; 3854 3855 sin6->sin6_addr = ip6h->ip6_src; 3856 /* No sin6_flowinfo per API */ 3857 sin6->sin6_flowinfo = 0; 3858 /* For link-scope source pass up scope id */ 3859 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3860 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3861 sin6->sin6_scope_id = ipp.ipp_ifindex; 3862 else 3863 sin6->sin6_scope_id = 0; 3864 3865 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3866 icmp->icmp_zoneid, is->is_netstack); 3867 3868 if (udi_size != 0) { 3869 uchar_t *dstopt; 3870 3871 dstopt = (uchar_t *)&sin6[1]; 3872 if (icmp_opt & IPPF_IFINDEX) { 3873 struct T_opthdr *toh; 3874 struct in6_pktinfo *pkti; 3875 3876 toh = (struct T_opthdr *)dstopt; 3877 toh->level = IPPROTO_IPV6; 3878 toh->name = IPV6_PKTINFO; 3879 toh->len = sizeof (struct T_opthdr) + 3880 sizeof (*pkti); 3881 toh->status = 0; 3882 dstopt += sizeof (struct T_opthdr); 3883 pkti = (struct in6_pktinfo *)dstopt; 3884 pkti->ipi6_addr = ip6h->ip6_dst; 3885 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3886 dstopt += sizeof (*pkti); 3887 udi_size -= toh->len; 3888 } 3889 if (icmp_ipv6_recvhoplimit) { 3890 struct T_opthdr *toh; 3891 3892 toh = (struct T_opthdr *)dstopt; 3893 toh->level = IPPROTO_IPV6; 3894 toh->name = IPV6_HOPLIMIT; 3895 toh->len = sizeof (struct T_opthdr) + 3896 sizeof (uint_t); 3897 toh->status = 0; 3898 dstopt += sizeof (struct T_opthdr); 3899 *(uint_t *)dstopt = ip6h->ip6_hops; 3900 dstopt += sizeof (uint_t); 3901 udi_size -= toh->len; 3902 } 3903 if (icmp->icmp_ipv6_recvtclass) { 3904 struct T_opthdr *toh; 3905 3906 toh = (struct T_opthdr *)dstopt; 3907 toh->level = IPPROTO_IPV6; 3908 toh->name = IPV6_TCLASS; 3909 toh->len = sizeof (struct T_opthdr) + 3910 sizeof (uint_t); 3911 toh->status = 0; 3912 dstopt += sizeof (struct T_opthdr); 3913 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3914 dstopt += sizeof (uint_t); 3915 udi_size -= toh->len; 3916 } 3917 if (icmp->icmp_timestamp) { 3918 struct T_opthdr *toh; 3919 3920 toh = (struct T_opthdr *)dstopt; 3921 toh->level = SOL_SOCKET; 3922 toh->name = SCM_TIMESTAMP; 3923 toh->len = sizeof (struct T_opthdr) + 3924 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3925 toh->status = 0; 3926 dstopt += sizeof (struct T_opthdr); 3927 /* Align for gethrestime() */ 3928 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3929 sizeof (intptr_t)); 3930 gethrestime((timestruc_t *)dstopt); 3931 dstopt = (uchar_t *)toh + toh->len; 3932 udi_size -= toh->len; 3933 } 3934 3935 if (icmp_opt & IPPF_HOPOPTS) { 3936 struct T_opthdr *toh; 3937 3938 toh = (struct T_opthdr *)dstopt; 3939 toh->level = IPPROTO_IPV6; 3940 toh->name = IPV6_HOPOPTS; 3941 toh->len = sizeof (struct T_opthdr) + 3942 ipp.ipp_hopoptslen - hopstrip; 3943 toh->status = 0; 3944 dstopt += sizeof (struct T_opthdr); 3945 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3946 ipp.ipp_hopoptslen - hopstrip); 3947 if (hopstrip > 0) { 3948 /* copy next header value and fake length */ 3949 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3950 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3951 hopstrip / 8; 3952 } 3953 dstopt += ipp.ipp_hopoptslen - hopstrip; 3954 udi_size -= toh->len; 3955 } 3956 if (icmp_opt & IPPF_RTDSTOPTS) { 3957 struct T_opthdr *toh; 3958 3959 toh = (struct T_opthdr *)dstopt; 3960 toh->level = IPPROTO_IPV6; 3961 toh->name = IPV6_DSTOPTS; 3962 toh->len = sizeof (struct T_opthdr) + 3963 ipp.ipp_rtdstoptslen; 3964 toh->status = 0; 3965 dstopt += sizeof (struct T_opthdr); 3966 bcopy(ipp.ipp_rtdstopts, dstopt, 3967 ipp.ipp_rtdstoptslen); 3968 dstopt += ipp.ipp_rtdstoptslen; 3969 udi_size -= toh->len; 3970 } 3971 if (icmp_opt & IPPF_RTHDR) { 3972 struct T_opthdr *toh; 3973 3974 toh = (struct T_opthdr *)dstopt; 3975 toh->level = IPPROTO_IPV6; 3976 toh->name = IPV6_RTHDR; 3977 toh->len = sizeof (struct T_opthdr) + 3978 ipp.ipp_rthdrlen; 3979 toh->status = 0; 3980 dstopt += sizeof (struct T_opthdr); 3981 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3982 dstopt += ipp.ipp_rthdrlen; 3983 udi_size -= toh->len; 3984 } 3985 if (icmp_opt & IPPF_DSTOPTS) { 3986 struct T_opthdr *toh; 3987 3988 toh = (struct T_opthdr *)dstopt; 3989 toh->level = IPPROTO_IPV6; 3990 toh->name = IPV6_DSTOPTS; 3991 toh->len = sizeof (struct T_opthdr) + 3992 ipp.ipp_dstoptslen; 3993 toh->status = 0; 3994 dstopt += sizeof (struct T_opthdr); 3995 bcopy(ipp.ipp_dstopts, dstopt, 3996 ipp.ipp_dstoptslen); 3997 dstopt += ipp.ipp_dstoptslen; 3998 udi_size -= toh->len; 3999 } 4000 /* Consumed all of allocated space */ 4001 ASSERT(udi_size == 0); 4002 } 4003 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4004 4005 deliver: 4006 if (IPCL_IS_NONSTR(connp)) { 4007 if ((*connp->conn_upcalls->su_recv) 4008 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4009 NULL) < 0) { 4010 mutex_enter(&icmp->icmp_recv_lock); 4011 if (error == ENOSPC) { 4012 /* 4013 * let's confirm while holding the lock 4014 */ 4015 if ((*connp->conn_upcalls->su_recv) 4016 (connp->conn_upper_handle, NULL, 0, 0, 4017 &error, NULL) < 0) { 4018 if (error == ENOSPC) { 4019 connp->conn_flow_cntrld = 4020 B_TRUE; 4021 } else { 4022 ASSERT(error == EOPNOTSUPP); 4023 } 4024 } 4025 mutex_exit(&icmp->icmp_recv_lock); 4026 } else { 4027 ASSERT(error == EOPNOTSUPP); 4028 icmp_queue_fallback(icmp, mp); 4029 } 4030 } 4031 } else { 4032 putnext(connp->conn_rq, mp); 4033 } 4034 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 4035 } 4036 4037 /* 4038 * return SNMP stuff in buffer in mpdata 4039 */ 4040 mblk_t * 4041 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4042 { 4043 mblk_t *mpdata; 4044 struct opthdr *optp; 4045 conn_t *connp = Q_TO_CONN(q); 4046 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4047 mblk_t *mp2ctl; 4048 4049 /* 4050 * make a copy of the original message 4051 */ 4052 mp2ctl = copymsg(mpctl); 4053 4054 if (mpctl == NULL || 4055 (mpdata = mpctl->b_cont) == NULL) { 4056 freemsg(mpctl); 4057 freemsg(mp2ctl); 4058 return (0); 4059 } 4060 4061 /* fixed length structure for IPv4 and IPv6 counters */ 4062 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4063 optp->level = EXPER_RAWIP; 4064 optp->name = 0; 4065 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4066 sizeof (is->is_rawip_mib)); 4067 optp->len = msgdsize(mpdata); 4068 qreply(q, mpctl); 4069 4070 return (mp2ctl); 4071 } 4072 4073 /* 4074 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4075 * TODO: If this ever actually tries to set anything, it needs to be 4076 * to do the appropriate locking. 4077 */ 4078 /* ARGSUSED */ 4079 int 4080 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4081 uchar_t *ptr, int len) 4082 { 4083 switch (level) { 4084 case EXPER_RAWIP: 4085 return (0); 4086 default: 4087 return (1); 4088 } 4089 } 4090 4091 /* Report for ndd "icmp_status" */ 4092 /* ARGSUSED */ 4093 static int 4094 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4095 { 4096 conn_t *connp; 4097 ip_stack_t *ipst; 4098 char laddrbuf[INET6_ADDRSTRLEN]; 4099 char faddrbuf[INET6_ADDRSTRLEN]; 4100 int i; 4101 4102 (void) mi_mpprintf(mp, 4103 "RAWIP " MI_COL_HDRPAD_STR 4104 /* 01234567[89ABCDEF] */ 4105 " src addr dest addr state"); 4106 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4107 4108 connp = Q_TO_CONN(q); 4109 ipst = connp->conn_netstack->netstack_ip; 4110 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4111 connf_t *connfp; 4112 char *state; 4113 4114 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4115 connp = NULL; 4116 4117 while ((connp = ipcl_get_next_conn(connfp, connp, 4118 IPCL_RAWIPCONN)) != NULL) { 4119 icmp_t *icmp; 4120 4121 mutex_enter(&(connp)->conn_lock); 4122 icmp = connp->conn_icmp; 4123 4124 if (icmp->icmp_state == TS_UNBND) 4125 state = "UNBOUND"; 4126 else if (icmp->icmp_state == TS_IDLE) 4127 state = "IDLE"; 4128 else if (icmp->icmp_state == TS_DATA_XFER) 4129 state = "CONNECTED"; 4130 else 4131 state = "UnkState"; 4132 4133 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4134 (void *)icmp, 4135 inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr, 4136 faddrbuf, 4137 sizeof (faddrbuf)), 4138 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4139 sizeof (laddrbuf)), 4140 state); 4141 mutex_exit(&(connp)->conn_lock); 4142 } 4143 } 4144 return (0); 4145 } 4146 4147 /* 4148 * This routine creates a T_UDERROR_IND message and passes it upstream. 4149 * The address and options are copied from the T_UNITDATA_REQ message 4150 * passed in mp. This message is freed. 4151 */ 4152 static void 4153 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4154 { 4155 mblk_t *mp1; 4156 uchar_t *rptr = mp->b_rptr; 4157 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4158 4159 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4160 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4161 tudr->OPT_length, err); 4162 if (mp1) 4163 qreply(q, mp1); 4164 freemsg(mp); 4165 } 4166 4167 4168 static int 4169 rawip_do_unbind(conn_t *connp) 4170 { 4171 icmp_t *icmp = connp->conn_icmp; 4172 4173 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4174 /* If a bind has not been done, we can't unbind. */ 4175 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4176 rw_exit(&icmp->icmp_rwlock); 4177 return (-TOUTSTATE); 4178 } 4179 icmp->icmp_pending_op = T_UNBIND_REQ; 4180 rw_exit(&icmp->icmp_rwlock); 4181 4182 /* 4183 * Call ip to unbind 4184 */ 4185 4186 ip_unbind(connp); 4187 4188 /* 4189 * Once we're unbound from IP, the pending operation may be cleared 4190 * here. 4191 */ 4192 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4193 V6_SET_ZERO(icmp->icmp_v6src); 4194 V6_SET_ZERO(icmp->icmp_bound_v6src); 4195 icmp->icmp_pending_op = -1; 4196 icmp->icmp_state = TS_UNBND; 4197 if (icmp->icmp_family == AF_INET6) 4198 (void) icmp_build_hdrs(icmp); 4199 rw_exit(&icmp->icmp_rwlock); 4200 return (0); 4201 } 4202 4203 /* 4204 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4205 * After some error checking, the message is passed downstream to ip. 4206 */ 4207 static void 4208 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4209 { 4210 conn_t *connp = Q_TO_CONN(q); 4211 int error; 4212 4213 ASSERT(mp->b_cont == NULL); 4214 error = rawip_do_unbind(connp); 4215 if (error) { 4216 if (error < 0) { 4217 icmp_err_ack(q, mp, -error, 0); 4218 } else { 4219 icmp_err_ack(q, mp, 0, error); 4220 } 4221 return; 4222 } 4223 4224 /* 4225 * Convert mp into a T_OK_ACK 4226 */ 4227 4228 mp = mi_tpi_ok_ack_alloc(mp); 4229 4230 /* 4231 * should not happen in practice... T_OK_ACK is smaller than the 4232 * original message. 4233 */ 4234 ASSERT(mp != NULL); 4235 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4236 qreply(q, mp); 4237 } 4238 4239 4240 /* 4241 * Process IPv4 packets that already include an IP header. 4242 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4243 * IPPROTO_IGMP). 4244 */ 4245 static int 4246 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4247 ip4_pkt_t *pktinfop) 4248 { 4249 icmp_stack_t *is = icmp->icmp_is; 4250 ipha_t *ipha; 4251 int ip_hdr_length; 4252 int tp_hdr_len; 4253 mblk_t *mp1; 4254 uint_t pkt_len; 4255 ip_opt_info_t optinfo; 4256 4257 optinfo.ip_opt_flags = 0; 4258 optinfo.ip_opt_ill_index = 0; 4259 ipha = (ipha_t *)mp->b_rptr; 4260 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4261 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4262 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4263 ASSERT(icmp != NULL); 4264 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4265 freemsg(mp); 4266 return (0); 4267 } 4268 ipha = (ipha_t *)mp->b_rptr; 4269 } 4270 ipha->ipha_version_and_hdr_length = 4271 (IP_VERSION<<4) | (ip_hdr_length>>2); 4272 4273 /* 4274 * For the socket of SOCK_RAW type, the checksum is provided in the 4275 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4276 * tell IP that the application has sent a complete IP header and not 4277 * to compute the transport checksum nor change the DF flag. 4278 */ 4279 ipha->ipha_ident = IP_HDR_INCLUDED; 4280 ipha->ipha_hdr_checksum = 0; 4281 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4282 /* Insert options if any */ 4283 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4284 /* 4285 * Put the IP header plus any transport header that is 4286 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4287 * that at least the checksum field is in the first mblk.) 4288 */ 4289 switch (ipha->ipha_protocol) { 4290 case IPPROTO_UDP: 4291 tp_hdr_len = 8; 4292 break; 4293 case IPPROTO_TCP: 4294 tp_hdr_len = 20; 4295 break; 4296 default: 4297 tp_hdr_len = 0; 4298 break; 4299 } 4300 /* 4301 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4302 * tp_hdr_len bytes will be in a single mblk. 4303 */ 4304 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4305 tp_hdr_len)) { 4306 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4307 tp_hdr_len)) { 4308 BUMP_MIB(&is->is_rawip_mib, 4309 rawipOutErrors); 4310 freemsg(mp); 4311 return (0); 4312 } 4313 ipha = (ipha_t *)mp->b_rptr; 4314 } 4315 4316 /* 4317 * if the length is larger then the max allowed IP packet, 4318 * then send an error and abort the processing. 4319 */ 4320 pkt_len = ntohs(ipha->ipha_length) 4321 + icmp->icmp_ip_snd_options_len; 4322 if (pkt_len > IP_MAXPACKET) { 4323 return (EMSGSIZE); 4324 } 4325 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4326 tp_hdr_len, BPRI_LO))) { 4327 return (ENOMEM); 4328 } 4329 mp1->b_rptr += is->is_wroff_extra; 4330 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4331 4332 ipha->ipha_length = htons((uint16_t)pkt_len); 4333 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4334 4335 /* Copy transport header if any */ 4336 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4337 mp1->b_wptr += tp_hdr_len; 4338 4339 /* Add options */ 4340 ipha = (ipha_t *)mp1->b_rptr; 4341 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4342 icmp->icmp_ip_snd_options_len); 4343 4344 /* Drop IP header and transport header from original */ 4345 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4346 4347 mp1->b_cont = mp; 4348 mp = mp1; 4349 /* 4350 * Massage source route putting first source 4351 * route in ipha_dst. 4352 */ 4353 (void) ip_massage_options(ipha, is->is_netstack); 4354 } 4355 4356 if (pktinfop != NULL) { 4357 /* 4358 * Over write the source address provided in the header 4359 */ 4360 if (pktinfop->ip4_addr != INADDR_ANY) { 4361 ipha->ipha_src = pktinfop->ip4_addr; 4362 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4363 } 4364 4365 if (pktinfop->ip4_ill_index != 0) { 4366 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4367 } 4368 } 4369 4370 mblk_setcred(mp, connp->conn_cred); 4371 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4372 return (0); 4373 } 4374 4375 static int 4376 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4377 { 4378 int err; 4379 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4380 icmp_stack_t *is = icmp->icmp_is; 4381 conn_t *connp = icmp->icmp_connp; 4382 4383 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4384 opt_storage, connp->conn_mac_exempt, 4385 is->is_netstack->netstack_ip); 4386 if (err == 0) { 4387 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4388 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4389 opt_storage); 4390 } 4391 if (err != 0) { 4392 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4393 DTRACE_PROBE4( 4394 tx__ip__log__drop__updatelabel__icmp, 4395 char *, "icmp(1) failed to update options(2) on mp(3)", 4396 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4397 return (err); 4398 } 4399 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4400 return (0); 4401 } 4402 4403 /* 4404 * This routine handles all messages passed downstream. It either 4405 * consumes the message or passes it downstream; it never queues a 4406 * a message. 4407 */ 4408 static void 4409 icmp_wput(queue_t *q, mblk_t *mp) 4410 { 4411 uchar_t *rptr = mp->b_rptr; 4412 ipha_t *ipha; 4413 mblk_t *mp1; 4414 #define tudr ((struct T_unitdata_req *)rptr) 4415 size_t ip_len; 4416 conn_t *connp = Q_TO_CONN(q); 4417 icmp_t *icmp = connp->conn_icmp; 4418 icmp_stack_t *is = icmp->icmp_is; 4419 sin6_t *sin6; 4420 sin_t *sin; 4421 ipaddr_t v4dst; 4422 ip4_pkt_t pktinfo; 4423 ip4_pkt_t *pktinfop = &pktinfo; 4424 ip6_pkt_t ipp_s; /* For ancillary data options */ 4425 ip6_pkt_t *ipp = &ipp_s; 4426 int error; 4427 4428 ipp->ipp_fields = 0; 4429 ipp->ipp_sticky_ignored = 0; 4430 4431 switch (mp->b_datap->db_type) { 4432 case M_DATA: 4433 if (icmp->icmp_hdrincl) { 4434 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4435 ipha = (ipha_t *)mp->b_rptr; 4436 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4437 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4438 BUMP_MIB(&is->is_rawip_mib, 4439 rawipOutErrors); 4440 freemsg(mp); 4441 return; 4442 } 4443 ipha = (ipha_t *)mp->b_rptr; 4444 } 4445 /* 4446 * If this connection was used for v6 (inconceivable!) 4447 * or if we have a new destination, then it's time to 4448 * figure a new label. 4449 */ 4450 if (is_system_labeled() && 4451 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4452 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4453 ipha->ipha_dst)) { 4454 error = icmp_update_label(icmp, mp, 4455 ipha->ipha_dst); 4456 if (error != 0) { 4457 icmp_ud_err(q, mp, error); 4458 return; 4459 } 4460 } 4461 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4462 if (error != 0) 4463 icmp_ud_err(q, mp, error); 4464 return; 4465 } 4466 freemsg(mp); 4467 return; 4468 case M_PROTO: 4469 case M_PCPROTO: 4470 ip_len = mp->b_wptr - rptr; 4471 if (ip_len >= sizeof (struct T_unitdata_req)) { 4472 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4473 if (((union T_primitives *)rptr)->type 4474 == T_UNITDATA_REQ) 4475 break; 4476 } 4477 /* FALLTHRU */ 4478 default: 4479 icmp_wput_other(q, mp); 4480 return; 4481 } 4482 4483 /* Handle T_UNITDATA_REQ messages here. */ 4484 4485 mp1 = mp->b_cont; 4486 if (mp1 == NULL) { 4487 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4488 icmp_ud_err(q, mp, EPROTO); 4489 return; 4490 } 4491 4492 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4493 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4494 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4495 return; 4496 } 4497 4498 switch (icmp->icmp_family) { 4499 case AF_INET6: 4500 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4501 if (!OK_32PTR((char *)sin6) || 4502 tudr->DEST_length != sizeof (sin6_t) || 4503 sin6->sin6_family != AF_INET6) { 4504 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4505 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4506 return; 4507 } 4508 4509 /* No support for mapped addresses on raw sockets */ 4510 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4511 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4512 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4513 return; 4514 } 4515 4516 /* 4517 * Destination is a native IPv6 address. 4518 * Send out an IPv6 format packet. 4519 */ 4520 if (tudr->OPT_length != 0) { 4521 int error; 4522 4523 error = 0; 4524 if (icmp_unitdata_opt_process(q, mp, &error, 4525 (void *)ipp) < 0) { 4526 /* failure */ 4527 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4528 icmp_ud_err(q, mp, error); 4529 return; 4530 } 4531 ASSERT(error == 0); 4532 } 4533 4534 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4535 goto done; 4536 4537 case AF_INET: 4538 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4539 if (!OK_32PTR((char *)sin) || 4540 tudr->DEST_length != sizeof (sin_t) || 4541 sin->sin_family != AF_INET) { 4542 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4543 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4544 return; 4545 } 4546 /* Extract and ipaddr */ 4547 v4dst = sin->sin_addr.s_addr; 4548 break; 4549 4550 default: 4551 ASSERT(0); 4552 } 4553 4554 pktinfop->ip4_ill_index = 0; 4555 pktinfop->ip4_addr = INADDR_ANY; 4556 4557 /* 4558 * If options passed in, feed it for verification and handling 4559 */ 4560 if (tudr->OPT_length != 0) { 4561 int error; 4562 4563 error = 0; 4564 if (icmp_unitdata_opt_process(q, mp, &error, 4565 (void *)pktinfop) < 0) { 4566 /* failure */ 4567 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4568 icmp_ud_err(q, mp, error); 4569 return; 4570 } 4571 ASSERT(error == 0); 4572 /* 4573 * Note: Success in processing options. 4574 * mp option buffer represented by 4575 * OPT_length/offset now potentially modified 4576 * and contain option setting results 4577 */ 4578 } 4579 4580 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4581 done: 4582 if (error != 0) { 4583 icmp_ud_err(q, mp, error); 4584 return; 4585 } else { 4586 mp->b_cont = NULL; 4587 freeb(mp); 4588 } 4589 } 4590 4591 4592 /* ARGSUSED */ 4593 static void 4594 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4595 { 4596 #ifdef DEBUG 4597 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4598 #endif 4599 freemsg(mp); 4600 } 4601 4602 static int 4603 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4604 ip4_pkt_t *pktinfop) 4605 { 4606 ipha_t *ipha; 4607 size_t ip_len; 4608 icmp_t *icmp = connp->conn_icmp; 4609 icmp_stack_t *is = icmp->icmp_is; 4610 int ip_hdr_length; 4611 ip_opt_info_t optinfo; 4612 4613 optinfo.ip_opt_flags = 0; 4614 optinfo.ip_opt_ill_index = 0; 4615 4616 if (icmp->icmp_state == TS_UNBND) { 4617 /* If a port has not been bound to the stream, fail. */ 4618 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4619 return (EPROTO); 4620 } 4621 4622 if (v4dst == INADDR_ANY) 4623 v4dst = htonl(INADDR_LOOPBACK); 4624 4625 /* Check if our saved options are valid; update if not */ 4626 if (is_system_labeled() && 4627 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4628 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4629 int error = icmp_update_label(icmp, mp, v4dst); 4630 4631 if (error != 0) 4632 return (error); 4633 } 4634 4635 /* Protocol 255 contains full IP headers */ 4636 if (icmp->icmp_hdrincl) 4637 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4638 4639 /* Add an IP header */ 4640 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4641 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4642 if ((uchar_t *)ipha < mp->b_datap->db_base || 4643 mp->b_datap->db_ref != 1 || 4644 !OK_32PTR(ipha)) { 4645 mblk_t *mp1; 4646 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4647 BPRI_LO))) { 4648 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4649 return (ENOMEM); 4650 } 4651 mp1->b_cont = mp; 4652 ipha = (ipha_t *)mp1->b_datap->db_lim; 4653 mp1->b_wptr = (uchar_t *)ipha; 4654 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4655 mp = mp1; 4656 } 4657 #ifdef _BIG_ENDIAN 4658 /* Set version, header length, and tos */ 4659 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4660 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4661 icmp->icmp_type_of_service); 4662 /* Set ttl and protocol */ 4663 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4664 #else 4665 /* Set version, header length, and tos */ 4666 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4667 ((icmp->icmp_type_of_service << 8) | 4668 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4669 /* Set ttl and protocol */ 4670 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4671 #endif 4672 if (pktinfop->ip4_addr != INADDR_ANY) { 4673 ipha->ipha_src = pktinfop->ip4_addr; 4674 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4675 } else { 4676 4677 /* 4678 * Copy our address into the packet. If this is zero, 4679 * ip will fill in the real source address. 4680 */ 4681 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4682 } 4683 4684 ipha->ipha_fragment_offset_and_flags = 0; 4685 4686 if (pktinfop->ip4_ill_index != 0) { 4687 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4688 } 4689 4690 4691 /* 4692 * For the socket of SOCK_RAW type, the checksum is provided in the 4693 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4694 * tell IP that the application has sent a complete IP header and not 4695 * to compute the transport checksum nor change the DF flag. 4696 */ 4697 ipha->ipha_ident = IP_HDR_INCLUDED; 4698 4699 /* Finish common formatting of the packet. */ 4700 mp->b_rptr = (uchar_t *)ipha; 4701 4702 ip_len = mp->b_wptr - (uchar_t *)ipha; 4703 if (mp->b_cont != NULL) 4704 ip_len += msgdsize(mp->b_cont); 4705 4706 /* 4707 * Set the length into the IP header. 4708 * If the length is greater than the maximum allowed by IP, 4709 * then free the message and return. Do not try and send it 4710 * as this can cause problems in layers below. 4711 */ 4712 if (ip_len > IP_MAXPACKET) { 4713 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4714 return (EMSGSIZE); 4715 } 4716 ipha->ipha_length = htons((uint16_t)ip_len); 4717 /* 4718 * Copy in the destination address request 4719 */ 4720 ipha->ipha_dst = v4dst; 4721 4722 /* 4723 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4724 */ 4725 if (CLASSD(v4dst)) 4726 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4727 4728 /* Copy in options if any */ 4729 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4730 bcopy(icmp->icmp_ip_snd_options, 4731 &ipha[1], icmp->icmp_ip_snd_options_len); 4732 /* 4733 * Massage source route putting first source route in ipha_dst. 4734 * Ignore the destination in the T_unitdata_req. 4735 */ 4736 (void) ip_massage_options(ipha, is->is_netstack); 4737 } 4738 4739 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4740 mblk_setcred(mp, connp->conn_cred); 4741 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4742 return (0); 4743 } 4744 4745 static int 4746 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4747 { 4748 int err; 4749 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4750 icmp_stack_t *is = icmp->icmp_is; 4751 conn_t *connp = icmp->icmp_connp; 4752 4753 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4754 opt_storage, connp->conn_mac_exempt, 4755 is->is_netstack->netstack_ip); 4756 if (err == 0) { 4757 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4758 &icmp->icmp_label_len_v6, opt_storage); 4759 } 4760 if (err != 0) { 4761 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4762 DTRACE_PROBE4( 4763 tx__ip__log__drop__updatelabel__icmp6, 4764 char *, "icmp(1) failed to update options(2) on mp(3)", 4765 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4766 return (err); 4767 } 4768 4769 icmp->icmp_v6lastdst = *dst; 4770 return (0); 4771 } 4772 4773 /* 4774 * raw_ip_send_data_v6(): 4775 * Assumes that icmp_wput did some sanity checking on the destination 4776 * address, but that the label may not yet be correct. 4777 */ 4778 static int 4779 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4780 ip6_pkt_t *ipp) 4781 { 4782 ip6_t *ip6h; 4783 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4784 int ip_hdr_len = IPV6_HDR_LEN; 4785 size_t ip_len; 4786 icmp_t *icmp = connp->conn_icmp; 4787 icmp_stack_t *is = icmp->icmp_is; 4788 ip6_pkt_t *tipp; 4789 uint32_t csum = 0; 4790 uint_t ignore = 0; 4791 uint_t option_exists = 0, is_sticky = 0; 4792 uint8_t *cp; 4793 uint8_t *nxthdr_ptr; 4794 in6_addr_t ip6_dst; 4795 4796 /* 4797 * If the local address is a mapped address return 4798 * an error. 4799 * It would be possible to send an IPv6 packet but the 4800 * response would never make it back to the application 4801 * since it is bound to a mapped address. 4802 */ 4803 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4804 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4805 return (EADDRNOTAVAIL); 4806 } 4807 4808 ignore = ipp->ipp_sticky_ignored; 4809 if (sin6->sin6_scope_id != 0 && 4810 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4811 /* 4812 * IPPF_SCOPE_ID is special. It's neither a sticky 4813 * option nor ancillary data. It needs to be 4814 * explicitly set in options_exists. 4815 */ 4816 option_exists |= IPPF_SCOPE_ID; 4817 } 4818 4819 /* 4820 * Compute the destination address 4821 */ 4822 ip6_dst = sin6->sin6_addr; 4823 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4824 ip6_dst = ipv6_loopback; 4825 4826 /* 4827 * If we're not going to the same destination as last time, then 4828 * recompute the label required. This is done in a separate routine to 4829 * avoid blowing up our stack here. 4830 */ 4831 if (is_system_labeled() && 4832 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4833 int error = 0; 4834 4835 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4836 if (error != 0) 4837 return (error); 4838 } 4839 4840 /* 4841 * If there's a security label here, then we ignore any options the 4842 * user may try to set. We keep the peer's label as a hidden sticky 4843 * option. 4844 */ 4845 if (icmp->icmp_label_len_v6 > 0) { 4846 ignore &= ~IPPF_HOPOPTS; 4847 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4848 } 4849 4850 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4851 (ipp->ipp_fields == 0)) { 4852 /* No sticky options nor ancillary data. */ 4853 goto no_options; 4854 } 4855 4856 /* 4857 * Go through the options figuring out where each is going to 4858 * come from and build two masks. The first mask indicates if 4859 * the option exists at all. The second mask indicates if the 4860 * option is sticky or ancillary. 4861 */ 4862 if (!(ignore & IPPF_HOPOPTS)) { 4863 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4864 option_exists |= IPPF_HOPOPTS; 4865 ip_hdr_len += ipp->ipp_hopoptslen; 4866 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4867 option_exists |= IPPF_HOPOPTS; 4868 is_sticky |= IPPF_HOPOPTS; 4869 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4870 } 4871 } 4872 4873 if (!(ignore & IPPF_RTHDR)) { 4874 if (ipp->ipp_fields & IPPF_RTHDR) { 4875 option_exists |= IPPF_RTHDR; 4876 ip_hdr_len += ipp->ipp_rthdrlen; 4877 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4878 option_exists |= IPPF_RTHDR; 4879 is_sticky |= IPPF_RTHDR; 4880 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4881 } 4882 } 4883 4884 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4885 /* 4886 * Need to have a router header to use these. 4887 */ 4888 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4889 option_exists |= IPPF_RTDSTOPTS; 4890 ip_hdr_len += ipp->ipp_rtdstoptslen; 4891 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4892 option_exists |= IPPF_RTDSTOPTS; 4893 is_sticky |= IPPF_RTDSTOPTS; 4894 ip_hdr_len += 4895 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4896 } 4897 } 4898 4899 if (!(ignore & IPPF_DSTOPTS)) { 4900 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4901 option_exists |= IPPF_DSTOPTS; 4902 ip_hdr_len += ipp->ipp_dstoptslen; 4903 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4904 option_exists |= IPPF_DSTOPTS; 4905 is_sticky |= IPPF_DSTOPTS; 4906 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4907 } 4908 } 4909 4910 if (!(ignore & IPPF_IFINDEX)) { 4911 if (ipp->ipp_fields & IPPF_IFINDEX) { 4912 option_exists |= IPPF_IFINDEX; 4913 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4914 option_exists |= IPPF_IFINDEX; 4915 is_sticky |= IPPF_IFINDEX; 4916 } 4917 } 4918 4919 if (!(ignore & IPPF_ADDR)) { 4920 if (ipp->ipp_fields & IPPF_ADDR) { 4921 option_exists |= IPPF_ADDR; 4922 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4923 option_exists |= IPPF_ADDR; 4924 is_sticky |= IPPF_ADDR; 4925 } 4926 } 4927 4928 if (!(ignore & IPPF_DONTFRAG)) { 4929 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4930 option_exists |= IPPF_DONTFRAG; 4931 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4932 option_exists |= IPPF_DONTFRAG; 4933 is_sticky |= IPPF_DONTFRAG; 4934 } 4935 } 4936 4937 if (!(ignore & IPPF_USE_MIN_MTU)) { 4938 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4939 option_exists |= IPPF_USE_MIN_MTU; 4940 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4941 IPPF_USE_MIN_MTU) { 4942 option_exists |= IPPF_USE_MIN_MTU; 4943 is_sticky |= IPPF_USE_MIN_MTU; 4944 } 4945 } 4946 4947 if (!(ignore & IPPF_NEXTHOP)) { 4948 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4949 option_exists |= IPPF_NEXTHOP; 4950 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4951 option_exists |= IPPF_NEXTHOP; 4952 is_sticky |= IPPF_NEXTHOP; 4953 } 4954 } 4955 4956 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4957 option_exists |= IPPF_HOPLIMIT; 4958 /* IPV6_HOPLIMIT can never be sticky */ 4959 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4960 4961 if (!(ignore & IPPF_UNICAST_HOPS) && 4962 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4963 option_exists |= IPPF_UNICAST_HOPS; 4964 is_sticky |= IPPF_UNICAST_HOPS; 4965 } 4966 4967 if (!(ignore & IPPF_MULTICAST_HOPS) && 4968 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4969 option_exists |= IPPF_MULTICAST_HOPS; 4970 is_sticky |= IPPF_MULTICAST_HOPS; 4971 } 4972 4973 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4974 /* This is a sticky socket option only */ 4975 option_exists |= IPPF_NO_CKSUM; 4976 is_sticky |= IPPF_NO_CKSUM; 4977 } 4978 4979 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4980 /* This is a sticky socket option only */ 4981 option_exists |= IPPF_RAW_CKSUM; 4982 is_sticky |= IPPF_RAW_CKSUM; 4983 } 4984 4985 if (!(ignore & IPPF_TCLASS)) { 4986 if (ipp->ipp_fields & IPPF_TCLASS) { 4987 option_exists |= IPPF_TCLASS; 4988 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4989 option_exists |= IPPF_TCLASS; 4990 is_sticky |= IPPF_TCLASS; 4991 } 4992 } 4993 4994 no_options: 4995 4996 /* 4997 * If any options carried in the ip6i_t were specified, we 4998 * need to account for the ip6i_t in the data we'll be sending 4999 * down. 5000 */ 5001 if (option_exists & IPPF_HAS_IP6I) 5002 ip_hdr_len += sizeof (ip6i_t); 5003 5004 /* check/fix buffer config, setup pointers into it */ 5005 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5006 if ((mp->b_datap->db_ref != 1) || 5007 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5008 !OK_32PTR(ip6h)) { 5009 mblk_t *mp1; 5010 5011 /* Try to get everything in a single mblk next time */ 5012 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5013 icmp->icmp_max_hdr_len = ip_hdr_len; 5014 5015 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5016 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5017 } 5018 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5019 if (!mp1) { 5020 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5021 return (ENOMEM); 5022 } 5023 mp1->b_cont = mp; 5024 mp1->b_wptr = mp1->b_datap->db_lim; 5025 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5026 mp = mp1; 5027 } 5028 mp->b_rptr = (unsigned char *)ip6h; 5029 ip6i = (ip6i_t *)ip6h; 5030 5031 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5032 if (option_exists & IPPF_HAS_IP6I) { 5033 ip6h = (ip6_t *)&ip6i[1]; 5034 ip6i->ip6i_flags = 0; 5035 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5036 5037 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5038 if (option_exists & IPPF_SCOPE_ID) { 5039 ip6i->ip6i_flags |= IP6I_IFINDEX; 5040 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5041 } else if (option_exists & IPPF_IFINDEX) { 5042 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5043 ASSERT(tipp->ipp_ifindex != 0); 5044 ip6i->ip6i_flags |= IP6I_IFINDEX; 5045 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5046 } 5047 5048 if (option_exists & IPPF_RAW_CKSUM) { 5049 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5050 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5051 } 5052 5053 if (option_exists & IPPF_NO_CKSUM) { 5054 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5055 } 5056 5057 if (option_exists & IPPF_ADDR) { 5058 /* 5059 * Enable per-packet source address verification if 5060 * IPV6_PKTINFO specified the source address. 5061 * ip6_src is set in the transport's _wput function. 5062 */ 5063 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5064 } 5065 5066 if (option_exists & IPPF_DONTFRAG) { 5067 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5068 } 5069 5070 if (option_exists & IPPF_USE_MIN_MTU) { 5071 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5072 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5073 } 5074 5075 if (option_exists & IPPF_NEXTHOP) { 5076 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5077 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5078 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5079 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5080 } 5081 5082 /* 5083 * tell IP this is an ip6i_t private header 5084 */ 5085 ip6i->ip6i_nxt = IPPROTO_RAW; 5086 } 5087 5088 /* Initialize IPv6 header */ 5089 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5090 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5091 5092 /* Set the hoplimit of the outgoing packet. */ 5093 if (option_exists & IPPF_HOPLIMIT) { 5094 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5095 ip6h->ip6_hops = ipp->ipp_hoplimit; 5096 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5097 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5098 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5099 if (option_exists & IPPF_MULTICAST_HOPS) 5100 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5101 } else { 5102 ip6h->ip6_hops = icmp->icmp_ttl; 5103 if (option_exists & IPPF_UNICAST_HOPS) 5104 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5105 } 5106 5107 if (option_exists & IPPF_ADDR) { 5108 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5109 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5110 ip6h->ip6_src = tipp->ipp_addr; 5111 } else { 5112 /* 5113 * The source address was not set using IPV6_PKTINFO. 5114 * First look at the bound source. 5115 * If unspecified fallback to __sin6_src_id. 5116 */ 5117 ip6h->ip6_src = icmp->icmp_v6src; 5118 if (sin6->__sin6_src_id != 0 && 5119 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5120 ip_srcid_find_id(sin6->__sin6_src_id, 5121 &ip6h->ip6_src, icmp->icmp_zoneid, 5122 is->is_netstack); 5123 } 5124 } 5125 5126 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5127 cp = (uint8_t *)&ip6h[1]; 5128 5129 /* 5130 * Here's where we have to start stringing together 5131 * any extension headers in the right order: 5132 * Hop-by-hop, destination, routing, and final destination opts. 5133 */ 5134 if (option_exists & IPPF_HOPOPTS) { 5135 /* Hop-by-hop options */ 5136 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5137 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5138 5139 *nxthdr_ptr = IPPROTO_HOPOPTS; 5140 nxthdr_ptr = &hbh->ip6h_nxt; 5141 5142 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5143 cp += tipp->ipp_hopoptslen; 5144 } 5145 /* 5146 * En-route destination options 5147 * Only do them if there's a routing header as well 5148 */ 5149 if (option_exists & IPPF_RTDSTOPTS) { 5150 ip6_dest_t *dst = (ip6_dest_t *)cp; 5151 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5152 5153 *nxthdr_ptr = IPPROTO_DSTOPTS; 5154 nxthdr_ptr = &dst->ip6d_nxt; 5155 5156 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5157 cp += tipp->ipp_rtdstoptslen; 5158 } 5159 /* 5160 * Routing header next 5161 */ 5162 if (option_exists & IPPF_RTHDR) { 5163 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5164 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5165 5166 *nxthdr_ptr = IPPROTO_ROUTING; 5167 nxthdr_ptr = &rt->ip6r_nxt; 5168 5169 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5170 cp += tipp->ipp_rthdrlen; 5171 } 5172 /* 5173 * Do ultimate destination options 5174 */ 5175 if (option_exists & IPPF_DSTOPTS) { 5176 ip6_dest_t *dest = (ip6_dest_t *)cp; 5177 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5178 5179 *nxthdr_ptr = IPPROTO_DSTOPTS; 5180 nxthdr_ptr = &dest->ip6d_nxt; 5181 5182 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5183 cp += tipp->ipp_dstoptslen; 5184 } 5185 5186 /* 5187 * Now set the last header pointer to the proto passed in 5188 */ 5189 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5190 *nxthdr_ptr = icmp->icmp_proto; 5191 5192 /* 5193 * Copy in the destination address 5194 */ 5195 ip6h->ip6_dst = ip6_dst; 5196 5197 ip6h->ip6_vcf = 5198 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5199 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5200 5201 if (option_exists & IPPF_TCLASS) { 5202 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5203 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5204 tipp->ipp_tclass); 5205 } 5206 if (option_exists & IPPF_RTHDR) { 5207 ip6_rthdr_t *rth; 5208 5209 /* 5210 * Perform any processing needed for source routing. 5211 * We know that all extension headers will be in the same mblk 5212 * as the IPv6 header. 5213 */ 5214 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5215 if (rth != NULL && rth->ip6r_segleft != 0) { 5216 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5217 /* 5218 * Drop packet - only support Type 0 routing. 5219 * Notify the application as well. 5220 */ 5221 BUMP_MIB(&is->is_rawip_mib, 5222 rawipOutErrors); 5223 return (EPROTO); 5224 } 5225 /* 5226 * rth->ip6r_len is twice the number of 5227 * addresses in the header 5228 */ 5229 if (rth->ip6r_len & 0x1) { 5230 BUMP_MIB(&is->is_rawip_mib, 5231 rawipOutErrors); 5232 return (EPROTO); 5233 } 5234 /* 5235 * Shuffle the routing header and ip6_dst 5236 * addresses, and get the checksum difference 5237 * between the first hop (in ip6_dst) and 5238 * the destination (in the last routing hdr entry). 5239 */ 5240 csum = ip_massage_options_v6(ip6h, rth, 5241 is->is_netstack); 5242 /* 5243 * Verify that the first hop isn't a mapped address. 5244 * Routers along the path need to do this verification 5245 * for subsequent hops. 5246 */ 5247 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5248 BUMP_MIB(&is->is_rawip_mib, 5249 rawipOutErrors); 5250 return (EADDRNOTAVAIL); 5251 } 5252 } 5253 } 5254 5255 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5256 if (mp->b_cont != NULL) 5257 ip_len += msgdsize(mp->b_cont); 5258 5259 /* 5260 * Set the length into the IP header. 5261 * If the length is greater than the maximum allowed by IP, 5262 * then free the message and return. Do not try and send it 5263 * as this can cause problems in layers below. 5264 */ 5265 if (ip_len > IP_MAXPACKET) { 5266 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5267 return (EMSGSIZE); 5268 } 5269 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5270 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5271 uint16_t *cksum_ptr; 5272 uint_t ext_hdrs_len; 5273 5274 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5275 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5276 icmp->icmp_checksum_off == 2); 5277 5278 /* 5279 * We make it easy for IP to include our pseudo header 5280 * by putting our length in uh_checksum, modified (if 5281 * we have a routing header) by the checksum difference 5282 * between the ultimate destination and first hop addresses. 5283 * Note: ICMPv6 must always checksum the packet. 5284 */ 5285 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5286 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5287 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5288 BUMP_MIB(&is->is_rawip_mib, 5289 rawipOutErrors); 5290 freemsg(mp); 5291 return (0); 5292 } 5293 ip6i = (ip6i_t *)mp->b_rptr; 5294 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5295 ip6h = (ip6_t *)&ip6i[1]; 5296 else 5297 ip6h = (ip6_t *)ip6i; 5298 } 5299 /* Add payload length to checksum */ 5300 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5301 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5302 csum += htons(ip_len - ext_hdrs_len); 5303 5304 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5305 csum = (csum & 0xFFFF) + (csum >> 16); 5306 *cksum_ptr = (uint16_t)csum; 5307 } 5308 5309 #ifdef _LITTLE_ENDIAN 5310 ip_len = htons(ip_len); 5311 #endif 5312 ip6h->ip6_plen = (uint16_t)ip_len; 5313 5314 /* We're done. Pass the packet to IP */ 5315 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5316 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5317 return (0); 5318 } 5319 5320 static void 5321 icmp_wput_other(queue_t *q, mblk_t *mp) 5322 { 5323 uchar_t *rptr = mp->b_rptr; 5324 struct iocblk *iocp; 5325 #define tudr ((struct T_unitdata_req *)rptr) 5326 conn_t *connp = Q_TO_CONN(q); 5327 icmp_t *icmp = connp->conn_icmp; 5328 icmp_stack_t *is = icmp->icmp_is; 5329 cred_t *cr; 5330 5331 cr = DB_CREDDEF(mp, connp->conn_cred); 5332 5333 switch (mp->b_datap->db_type) { 5334 case M_PROTO: 5335 case M_PCPROTO: 5336 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5337 /* 5338 * If the message does not contain a PRIM_type, 5339 * throw it away. 5340 */ 5341 freemsg(mp); 5342 return; 5343 } 5344 switch (((union T_primitives *)rptr)->type) { 5345 case T_ADDR_REQ: 5346 icmp_addr_req(q, mp); 5347 return; 5348 case O_T_BIND_REQ: 5349 case T_BIND_REQ: 5350 icmp_tpi_bind(q, mp); 5351 return; 5352 case T_CONN_REQ: 5353 icmp_tpi_connect(q, mp); 5354 return; 5355 case T_CAPABILITY_REQ: 5356 icmp_capability_req(q, mp); 5357 return; 5358 case T_INFO_REQ: 5359 icmp_info_req(q, mp); 5360 return; 5361 case T_UNITDATA_REQ: 5362 /* 5363 * If a T_UNITDATA_REQ gets here, the address must 5364 * be bad. Valid T_UNITDATA_REQs are found above 5365 * and break to below this switch. 5366 */ 5367 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5368 return; 5369 case T_UNBIND_REQ: 5370 icmp_tpi_unbind(q, mp); 5371 return; 5372 5373 case T_SVR4_OPTMGMT_REQ: 5374 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5375 cr)) { 5376 /* Only IP can return anything meaningful */ 5377 (void) svr4_optcom_req(q, mp, cr, 5378 &icmp_opt_obj, B_TRUE); 5379 } 5380 return; 5381 5382 case T_OPTMGMT_REQ: 5383 /* Only IP can return anything meaningful */ 5384 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5385 return; 5386 5387 case T_DISCON_REQ: 5388 icmp_tpi_disconnect(q, mp); 5389 return; 5390 5391 /* The following TPI message is not supported by icmp. */ 5392 case O_T_CONN_RES: 5393 case T_CONN_RES: 5394 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5395 return; 5396 5397 /* The following 3 TPI requests are illegal for icmp. */ 5398 case T_DATA_REQ: 5399 case T_EXDATA_REQ: 5400 case T_ORDREL_REQ: 5401 freemsg(mp); 5402 (void) putctl1(RD(q), M_ERROR, EPROTO); 5403 return; 5404 default: 5405 break; 5406 } 5407 break; 5408 case M_IOCTL: 5409 iocp = (struct iocblk *)mp->b_rptr; 5410 switch (iocp->ioc_cmd) { 5411 case TI_GETPEERNAME: 5412 if (icmp->icmp_state != TS_DATA_XFER) { 5413 /* 5414 * If a default destination address has not 5415 * been associated with the stream, then we 5416 * don't know the peer's name. 5417 */ 5418 iocp->ioc_error = ENOTCONN; 5419 err_ret:; 5420 iocp->ioc_count = 0; 5421 mp->b_datap->db_type = M_IOCACK; 5422 qreply(q, mp); 5423 return; 5424 } 5425 /* FALLTHRU */ 5426 case TI_GETMYNAME: 5427 /* 5428 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5429 * need to copyin the user's strbuf structure. 5430 * Processing will continue in the M_IOCDATA case 5431 * below. 5432 */ 5433 mi_copyin(q, mp, NULL, 5434 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5435 return; 5436 case ND_SET: 5437 /* nd_getset performs the necessary error checking */ 5438 case ND_GET: 5439 if (nd_getset(q, is->is_nd, mp)) { 5440 qreply(q, mp); 5441 return; 5442 } 5443 break; 5444 case _SIOCSOCKFALLBACK: 5445 /* 5446 * socket is falling back to be a 5447 * streams socket. Nothing to do 5448 */ 5449 iocp->ioc_count = 0; 5450 iocp->ioc_rval = 0; 5451 qreply(q, mp); 5452 return; 5453 default: 5454 break; 5455 } 5456 break; 5457 case M_IOCDATA: 5458 icmp_wput_iocdata(q, mp); 5459 return; 5460 default: 5461 break; 5462 } 5463 ip_wput(q, mp); 5464 } 5465 5466 /* 5467 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5468 * messages. 5469 */ 5470 static void 5471 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5472 { 5473 mblk_t *mp1; 5474 STRUCT_HANDLE(strbuf, sb); 5475 icmp_t *icmp; 5476 uint_t addrlen; 5477 uint_t error; 5478 5479 /* Make sure it is one of ours. */ 5480 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5481 case TI_GETMYNAME: 5482 case TI_GETPEERNAME: 5483 break; 5484 default: 5485 icmp = Q_TO_ICMP(q); 5486 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5487 return; 5488 } 5489 switch (mi_copy_state(q, mp, &mp1)) { 5490 case -1: 5491 return; 5492 case MI_COPY_CASE(MI_COPY_IN, 1): 5493 break; 5494 case MI_COPY_CASE(MI_COPY_OUT, 1): 5495 /* 5496 * The address has been copied out, so now 5497 * copyout the strbuf. 5498 */ 5499 mi_copyout(q, mp); 5500 return; 5501 case MI_COPY_CASE(MI_COPY_OUT, 2): 5502 /* 5503 * The address and strbuf have been copied out. 5504 * We're done, so just acknowledge the original 5505 * M_IOCTL. 5506 */ 5507 mi_copy_done(q, mp, 0); 5508 return; 5509 default: 5510 /* 5511 * Something strange has happened, so acknowledge 5512 * the original M_IOCTL with an EPROTO error. 5513 */ 5514 mi_copy_done(q, mp, EPROTO); 5515 return; 5516 } 5517 /* 5518 * Now we have the strbuf structure for TI_GETMYNAME 5519 * and TI_GETPEERNAME. Next we copyout the requested 5520 * address and then we'll copyout the strbuf. 5521 */ 5522 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5523 (void *)mp1->b_rptr); 5524 icmp = Q_TO_ICMP(q); 5525 if (icmp->icmp_family == AF_INET) 5526 addrlen = sizeof (sin_t); 5527 else 5528 addrlen = sizeof (sin6_t); 5529 5530 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5531 mi_copy_done(q, mp, EINVAL); 5532 return; 5533 } 5534 5535 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5536 5537 if (mp1 == NULL) 5538 return; 5539 5540 rw_enter(&icmp->icmp_rwlock, RW_READER); 5541 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5542 case TI_GETMYNAME: 5543 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5544 &addrlen); 5545 break; 5546 case TI_GETPEERNAME: 5547 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5548 &addrlen); 5549 break; 5550 } 5551 rw_exit(&icmp->icmp_rwlock); 5552 5553 if (error != 0) { 5554 mi_copy_done(q, mp, error); 5555 } else { 5556 mp1->b_wptr += addrlen; 5557 STRUCT_FSET(sb, len, addrlen); 5558 5559 /* Copy out the address */ 5560 mi_copyout(q, mp); 5561 } 5562 } 5563 5564 static int 5565 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5566 void *thisdg_attrs) 5567 { 5568 conn_t *connp = Q_TO_CONN(q); 5569 struct T_unitdata_req *udreqp; 5570 int is_absreq_failure; 5571 cred_t *cr; 5572 5573 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5574 *errorp = 0; 5575 5576 cr = DB_CREDDEF(mp, connp->conn_cred); 5577 5578 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5579 udreqp->OPT_offset, cr, &icmp_opt_obj, 5580 thisdg_attrs, &is_absreq_failure); 5581 5582 if (*errorp != 0) { 5583 /* 5584 * Note: No special action needed in this 5585 * module for "is_absreq_failure" 5586 */ 5587 return (-1); /* failure */ 5588 } 5589 ASSERT(is_absreq_failure == 0); 5590 return (0); /* success */ 5591 } 5592 5593 void 5594 icmp_ddi_g_init(void) 5595 { 5596 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5597 icmp_opt_obj.odb_opt_arr_cnt); 5598 5599 /* 5600 * We want to be informed each time a stack is created or 5601 * destroyed in the kernel, so we can maintain the 5602 * set of icmp_stack_t's. 5603 */ 5604 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5605 } 5606 5607 void 5608 icmp_ddi_g_destroy(void) 5609 { 5610 netstack_unregister(NS_ICMP); 5611 } 5612 5613 #define INET_NAME "ip" 5614 5615 /* 5616 * Initialize the ICMP stack instance. 5617 */ 5618 static void * 5619 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5620 { 5621 icmp_stack_t *is; 5622 icmpparam_t *pa; 5623 int error = 0; 5624 major_t major; 5625 5626 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5627 is->is_netstack = ns; 5628 5629 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5630 is->is_param_arr = pa; 5631 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5632 5633 (void) icmp_param_register(&is->is_nd, 5634 is->is_param_arr, A_CNT(icmp_param_arr)); 5635 is->is_ksp = rawip_kstat_init(stackid); 5636 5637 major = mod_name_to_major(INET_NAME); 5638 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5639 ASSERT(error == 0); 5640 return (is); 5641 } 5642 5643 /* 5644 * Free the ICMP stack instance. 5645 */ 5646 static void 5647 rawip_stack_fini(netstackid_t stackid, void *arg) 5648 { 5649 icmp_stack_t *is = (icmp_stack_t *)arg; 5650 5651 nd_free(&is->is_nd); 5652 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5653 is->is_param_arr = NULL; 5654 5655 rawip_kstat_fini(stackid, is->is_ksp); 5656 is->is_ksp = NULL; 5657 ldi_ident_release(is->is_ldi_ident); 5658 kmem_free(is, sizeof (*is)); 5659 } 5660 5661 static void * 5662 rawip_kstat_init(netstackid_t stackid) { 5663 kstat_t *ksp; 5664 5665 rawip_named_kstat_t template = { 5666 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5667 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5668 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5669 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5670 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5671 }; 5672 5673 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5674 KSTAT_TYPE_NAMED, 5675 NUM_OF_FIELDS(rawip_named_kstat_t), 5676 0, stackid); 5677 if (ksp == NULL || ksp->ks_data == NULL) 5678 return (NULL); 5679 5680 bcopy(&template, ksp->ks_data, sizeof (template)); 5681 ksp->ks_update = rawip_kstat_update; 5682 ksp->ks_private = (void *)(uintptr_t)stackid; 5683 5684 kstat_install(ksp); 5685 return (ksp); 5686 } 5687 5688 static void 5689 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5690 { 5691 if (ksp != NULL) { 5692 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5693 kstat_delete_netstack(ksp, stackid); 5694 } 5695 } 5696 5697 static int 5698 rawip_kstat_update(kstat_t *ksp, int rw) 5699 { 5700 rawip_named_kstat_t *rawipkp; 5701 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5702 netstack_t *ns; 5703 icmp_stack_t *is; 5704 5705 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5706 return (EIO); 5707 5708 if (rw == KSTAT_WRITE) 5709 return (EACCES); 5710 5711 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5712 5713 ns = netstack_find_by_stackid(stackid); 5714 if (ns == NULL) 5715 return (-1); 5716 is = ns->netstack_icmp; 5717 if (is == NULL) { 5718 netstack_rele(ns); 5719 return (-1); 5720 } 5721 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5722 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5723 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5724 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5725 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5726 netstack_rele(ns); 5727 return (0); 5728 } 5729 5730 /* ARGSUSED */ 5731 int 5732 rawip_accept(sock_lower_handle_t lproto_handle, 5733 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5734 cred_t *cr) 5735 { 5736 return (EOPNOTSUPP); 5737 } 5738 5739 /* ARGSUSED */ 5740 int 5741 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5742 socklen_t len, cred_t *cr) 5743 { 5744 conn_t *connp = (conn_t *)proto_handle; 5745 int error; 5746 5747 /* Binding to a NULL address really means unbind */ 5748 if (sa == NULL) 5749 error = rawip_do_unbind(connp); 5750 else 5751 error = rawip_do_bind(connp, sa, len); 5752 5753 if (error < 0) { 5754 if (error == -TOUTSTATE) 5755 error = EINVAL; 5756 else 5757 error = proto_tlitosyserr(-error); 5758 } 5759 return (error); 5760 } 5761 5762 static int 5763 rawip_implicit_bind(conn_t *connp) 5764 { 5765 sin6_t sin6addr; 5766 sin_t *sin; 5767 sin6_t *sin6; 5768 socklen_t len; 5769 int error; 5770 5771 if (connp->conn_icmp->icmp_family == AF_INET) { 5772 len = sizeof (struct sockaddr_in); 5773 sin = (sin_t *)&sin6addr; 5774 *sin = sin_null; 5775 sin->sin_family = AF_INET; 5776 sin->sin_addr.s_addr = INADDR_ANY; 5777 } else { 5778 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5779 len = sizeof (sin6_t); 5780 sin6 = (sin6_t *)&sin6addr; 5781 *sin6 = sin6_null; 5782 sin6->sin6_family = AF_INET6; 5783 V6_SET_ZERO(sin6->sin6_addr); 5784 } 5785 5786 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5787 5788 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5789 } 5790 5791 static int 5792 rawip_unbind(conn_t *connp) 5793 { 5794 int error; 5795 5796 error = rawip_do_unbind(connp); 5797 if (error < 0) { 5798 error = proto_tlitosyserr(-error); 5799 } 5800 return (error); 5801 } 5802 5803 /* ARGSUSED */ 5804 int 5805 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5806 { 5807 return (EOPNOTSUPP); 5808 } 5809 5810 /* ARGSUSED */ 5811 int 5812 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5813 socklen_t len, sock_connid_t *id, cred_t *cr) 5814 { 5815 conn_t *connp = (conn_t *)proto_handle; 5816 icmp_t *icmp = connp->conn_icmp; 5817 int error; 5818 boolean_t did_bind = B_FALSE; 5819 5820 if (sa == NULL) { 5821 /* 5822 * Disconnect 5823 * Make sure we are connected 5824 */ 5825 if (icmp->icmp_state != TS_DATA_XFER) 5826 return (EINVAL); 5827 5828 error = icmp_disconnect(connp); 5829 return (error); 5830 } 5831 5832 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5833 if (error != 0) 5834 return (error); 5835 5836 /* do an implicit bind if necessary */ 5837 if (icmp->icmp_state == TS_UNBND) { 5838 error = rawip_implicit_bind(connp); 5839 /* 5840 * We could be racing with an actual bind, in which case 5841 * we would see EPROTO. We cross our fingers and try 5842 * to connect. 5843 */ 5844 if (!(error == 0 || error == EPROTO)) 5845 return (error); 5846 did_bind = B_TRUE; 5847 } 5848 5849 /* 5850 * set SO_DGRAM_ERRIND 5851 */ 5852 icmp->icmp_dgram_errind = B_TRUE; 5853 5854 error = rawip_do_connect(connp, sa, len); 5855 5856 if (error != 0 && did_bind) { 5857 int unbind_err; 5858 5859 unbind_err = rawip_unbind(connp); 5860 ASSERT(unbind_err == 0); 5861 } 5862 5863 if (error == 0) { 5864 *id = 0; 5865 (*connp->conn_upcalls->su_connected) 5866 (connp->conn_upper_handle, 0, NULL, -1); 5867 } else if (error < 0) { 5868 error = proto_tlitosyserr(-error); 5869 } 5870 return (error); 5871 } 5872 5873 /* ARGSUSED */ 5874 void 5875 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5876 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5877 { 5878 conn_t *connp = (conn_t *)proto_handle; 5879 icmp_t *icmp; 5880 struct T_capability_ack tca; 5881 struct sockaddr_in6 laddr, faddr; 5882 socklen_t laddrlen, faddrlen; 5883 short opts; 5884 struct stroptions *stropt; 5885 mblk_t *stropt_mp; 5886 int error; 5887 5888 icmp = connp->conn_icmp; 5889 5890 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5891 5892 /* 5893 * setup the fallback stream that was allocated 5894 */ 5895 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5896 connp->conn_minor_arena = WR(q)->q_ptr; 5897 5898 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5899 5900 WR(q)->q_qinfo = &icmpwinit; 5901 5902 connp->conn_rq = RD(q); 5903 connp->conn_wq = WR(q); 5904 5905 /* Notify stream head about options before sending up data */ 5906 stropt_mp->b_datap->db_type = M_SETOPTS; 5907 stropt_mp->b_wptr += sizeof (*stropt); 5908 stropt = (struct stroptions *)stropt_mp->b_rptr; 5909 stropt->so_flags = SO_WROFF | SO_HIWAT; 5910 stropt->so_wroff = 5911 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 5912 stropt->so_hiwat = icmp->icmp_recv_hiwat; 5913 putnext(RD(q), stropt_mp); 5914 5915 /* 5916 * free helper stream 5917 */ 5918 ip_close_helper_stream(connp); 5919 5920 /* 5921 * Collect the information needed to sync with the sonode 5922 */ 5923 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 5924 5925 laddrlen = faddrlen = sizeof (sin6_t); 5926 (void) rawip_getsockname((sock_lower_handle_t)connp, 5927 (struct sockaddr *)&laddr, &laddrlen, NULL); 5928 error = rawip_getpeername((sock_lower_handle_t)connp, 5929 (struct sockaddr *)&faddr, &faddrlen, NULL); 5930 if (error != 0) 5931 faddrlen = 0; 5932 opts = 0; 5933 if (icmp->icmp_dgram_errind) 5934 opts |= SO_DGRAM_ERRIND; 5935 if (icmp->icmp_dontroute) 5936 opts |= SO_DONTROUTE; 5937 5938 /* 5939 * Once we grab the drain lock, no data will be send up 5940 * to the socket. So we notify the socket that the endpoint 5941 * is quiescent and it's therefore safe move data from 5942 * the socket to the stream head. 5943 */ 5944 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 5945 (struct sockaddr *)&laddr, laddrlen, 5946 (struct sockaddr *)&faddr, faddrlen, opts); 5947 5948 /* 5949 * push up any packets that were queued in icmp_t 5950 */ 5951 5952 mutex_enter(&icmp->icmp_recv_lock); 5953 while (icmp->icmp_fallback_queue_head != NULL) { 5954 mblk_t *mp; 5955 5956 mp = icmp->icmp_fallback_queue_head; 5957 icmp->icmp_fallback_queue_head = mp->b_next; 5958 mp->b_next = NULL; 5959 mutex_exit(&icmp->icmp_recv_lock); 5960 putnext(RD(q), mp); 5961 mutex_enter(&icmp->icmp_recv_lock); 5962 } 5963 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 5964 /* 5965 * No longer a streams less socket 5966 */ 5967 connp->conn_flags &= ~IPCL_NONSTR; 5968 mutex_exit(&icmp->icmp_recv_lock); 5969 ASSERT(icmp->icmp_fallback_queue_head == NULL && 5970 icmp->icmp_fallback_queue_tail == NULL); 5971 5972 ASSERT(connp->conn_ref >= 1); 5973 } 5974 5975 /* ARGSUSED */ 5976 sock_lower_handle_t 5977 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5978 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5979 { 5980 conn_t *connp; 5981 5982 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 5983 *errorp = EPROTONOSUPPORT; 5984 return (NULL); 5985 } 5986 5987 connp = icmp_open(family, credp, errorp, flags); 5988 if (connp != NULL) { 5989 icmp_stack_t *is; 5990 5991 is = connp->conn_icmp->icmp_is; 5992 connp->conn_flags |= IPCL_NONSTR; 5993 5994 if (connp->conn_icmp->icmp_family == AF_INET6) { 5995 /* Build initial header template for transmit */ 5996 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 5997 if ((*errorp = 5998 icmp_build_hdrs(connp->conn_icmp)) != 0) { 5999 rw_exit(&connp->conn_icmp->icmp_rwlock); 6000 ipcl_conn_destroy(connp); 6001 return (NULL); 6002 } 6003 rw_exit(&connp->conn_icmp->icmp_rwlock); 6004 } 6005 6006 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6007 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6008 6009 if ((*errorp = ip_create_helper_stream(connp, 6010 is->is_ldi_ident)) != 0) { 6011 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6012 (void) rawip_do_close(connp); 6013 return (NULL); 6014 } 6015 6016 mutex_enter(&connp->conn_lock); 6017 connp->conn_state_flags &= ~CONN_INCIPIENT; 6018 mutex_exit(&connp->conn_lock); 6019 *sock_downcalls = &sock_rawip_downcalls; 6020 *smodep = SM_ATOMIC; 6021 } else { 6022 ASSERT(*errorp != 0); 6023 } 6024 6025 return ((sock_lower_handle_t)connp); 6026 } 6027 6028 /* ARGSUSED */ 6029 void 6030 rawip_activate(sock_lower_handle_t proto_handle, 6031 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6032 cred_t *cr) 6033 { 6034 conn_t *connp = (conn_t *)proto_handle; 6035 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6036 struct sock_proto_props sopp; 6037 6038 connp->conn_upcalls = sock_upcalls; 6039 connp->conn_upper_handle = sock_handle; 6040 6041 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6042 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6043 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6044 is->is_wroff_extra; 6045 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6046 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6047 sopp.sopp_maxblk = INFPSZ; 6048 sopp.sopp_maxpsz = IP_MAXPACKET; 6049 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6050 icmp_mod_info.mi_minpsz; 6051 6052 (*connp->conn_upcalls->su_set_proto_props) 6053 (connp->conn_upper_handle, &sopp); 6054 } 6055 6056 static int 6057 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6058 { 6059 sin_t *sin = (sin_t *)sa; 6060 sin6_t *sin6 = (sin6_t *)sa; 6061 6062 ASSERT(icmp != NULL); 6063 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6064 6065 switch (icmp->icmp_family) { 6066 case AF_INET: 6067 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6068 if (*salenp < sizeof (sin_t)) 6069 return (EINVAL); 6070 6071 *salenp = sizeof (sin_t); 6072 *sin = sin_null; 6073 sin->sin_family = AF_INET; 6074 if (icmp->icmp_state == TS_UNBND) { 6075 break; 6076 } 6077 6078 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6079 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6080 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6081 } else { 6082 /* 6083 * INADDR_ANY 6084 * icmp_v6src is not set, we might be bound to 6085 * broadcast/multicast. Use icmp_bound_v6src as 6086 * local address instead (that could 6087 * also still be INADDR_ANY) 6088 */ 6089 sin->sin_addr.s_addr = 6090 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6091 } 6092 break; 6093 case AF_INET6: 6094 6095 if (*salenp < sizeof (sin6_t)) 6096 return (EINVAL); 6097 6098 *salenp = sizeof (sin6_t); 6099 *sin6 = sin6_null; 6100 sin6->sin6_family = AF_INET6; 6101 if (icmp->icmp_state == TS_UNBND) { 6102 break; 6103 } 6104 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6105 sin6->sin6_addr = icmp->icmp_v6src; 6106 } else { 6107 /* 6108 * UNSPECIFIED 6109 * icmp_v6src is not set, we might be bound to 6110 * broadcast/multicast. Use icmp_bound_v6src as 6111 * local address instead (that could 6112 * also still be UNSPECIFIED) 6113 */ 6114 6115 sin6->sin6_addr = icmp->icmp_bound_v6src; 6116 } 6117 break; 6118 } 6119 return (0); 6120 } 6121 6122 static int 6123 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6124 { 6125 sin_t *sin = (sin_t *)sa; 6126 sin6_t *sin6 = (sin6_t *)sa; 6127 6128 ASSERT(icmp != NULL); 6129 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6130 6131 if (icmp->icmp_state != TS_DATA_XFER) 6132 return (ENOTCONN); 6133 6134 sa->sa_family = icmp->icmp_family; 6135 switch (icmp->icmp_family) { 6136 case AF_INET: 6137 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6138 6139 if (*salenp < sizeof (sin_t)) 6140 return (EINVAL); 6141 6142 *salenp = sizeof (sin_t); 6143 *sin = sin_null; 6144 sin->sin_family = AF_INET; 6145 sin->sin_addr.s_addr = 6146 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6147 break; 6148 case AF_INET6: 6149 if (*salenp < sizeof (sin6_t)) 6150 return (EINVAL); 6151 6152 *salenp = sizeof (sin6_t); 6153 *sin6 = sin6_null; 6154 *sin6 = icmp->icmp_v6dst; 6155 break; 6156 } 6157 return (0); 6158 } 6159 6160 /* ARGSUSED */ 6161 int 6162 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6163 socklen_t *salenp, cred_t *cr) 6164 { 6165 conn_t *connp = (conn_t *)proto_handle; 6166 icmp_t *icmp = connp->conn_icmp; 6167 int error; 6168 6169 ASSERT(icmp != NULL); 6170 6171 rw_enter(&icmp->icmp_rwlock, RW_READER); 6172 6173 error = rawip_do_getpeername(icmp, sa, salenp); 6174 6175 rw_exit(&icmp->icmp_rwlock); 6176 6177 return (error); 6178 } 6179 6180 /* ARGSUSED */ 6181 int 6182 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6183 socklen_t *salenp, cred_t *cr) 6184 { 6185 conn_t *connp = (conn_t *)proto_handle; 6186 icmp_t *icmp = connp->conn_icmp; 6187 int error; 6188 6189 ASSERT(icmp != NULL); 6190 rw_enter(&icmp->icmp_rwlock, RW_READER); 6191 6192 error = rawip_do_getsockname(icmp, sa, salenp); 6193 6194 rw_exit(&icmp->icmp_rwlock); 6195 6196 return (error); 6197 } 6198 6199 int 6200 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6201 const void *optvalp, socklen_t optlen, cred_t *cr) 6202 { 6203 conn_t *connp = (conn_t *)proto_handle; 6204 icmp_t *icmp = connp->conn_icmp; 6205 int error; 6206 6207 error = proto_opt_check(level, option_name, optlen, NULL, 6208 icmp_opt_obj.odb_opt_des_arr, 6209 icmp_opt_obj.odb_opt_arr_cnt, 6210 icmp_opt_obj.odb_topmost_tpiprovider, 6211 B_TRUE, B_FALSE, cr); 6212 6213 if (error != 0) { 6214 /* 6215 * option not recognized 6216 */ 6217 if (error < 0) { 6218 error = proto_tlitosyserr(-error); 6219 } 6220 return (error); 6221 } 6222 6223 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6224 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6225 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6226 (uchar_t *)optvalp, NULL, cr); 6227 rw_exit(&icmp->icmp_rwlock); 6228 6229 if (error < 0) { 6230 /* 6231 * Pass on to ip 6232 */ 6233 error = ip_set_options(connp, level, option_name, optvalp, 6234 optlen, cr); 6235 } 6236 6237 ASSERT(error >= 0); 6238 6239 return (error); 6240 } 6241 6242 int 6243 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6244 void *optvalp, socklen_t *optlen, cred_t *cr) 6245 { 6246 int error; 6247 conn_t *connp = (conn_t *)proto_handle; 6248 icmp_t *icmp = connp->conn_icmp; 6249 t_uscalar_t max_optbuf_len; 6250 void *optvalp_buf; 6251 int len; 6252 6253 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6254 icmp_opt_obj.odb_opt_des_arr, 6255 icmp_opt_obj.odb_opt_arr_cnt, 6256 icmp_opt_obj.odb_topmost_tpiprovider, 6257 B_FALSE, B_TRUE, cr); 6258 6259 if (error != 0) { 6260 if (error < 0) { 6261 error = proto_tlitosyserr(-error); 6262 } 6263 return (error); 6264 } 6265 6266 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6267 rw_enter(&icmp->icmp_rwlock, RW_READER); 6268 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6269 rw_exit(&icmp->icmp_rwlock); 6270 6271 if (len < 0) { 6272 /* 6273 * Pass on to IP 6274 */ 6275 kmem_free(optvalp_buf, max_optbuf_len); 6276 return (ip_get_options(connp, level, option_name, optvalp, 6277 optlen, cr)); 6278 } else { 6279 /* 6280 * update optlen and copy option value 6281 */ 6282 t_uscalar_t size = MIN(len, *optlen); 6283 bcopy(optvalp_buf, optvalp, size); 6284 bcopy(&size, optlen, sizeof (size)); 6285 6286 kmem_free(optvalp_buf, max_optbuf_len); 6287 return (0); 6288 } 6289 } 6290 6291 /* ARGSUSED */ 6292 int 6293 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6294 { 6295 conn_t *connp = (conn_t *)proto_handle; 6296 (void) rawip_do_close(connp); 6297 return (0); 6298 } 6299 6300 /* ARGSUSED */ 6301 int 6302 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6303 { 6304 conn_t *connp = (conn_t *)proto_handle; 6305 6306 /* shut down the send side */ 6307 if (how != SHUT_RD) 6308 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6309 SOCK_OPCTL_SHUT_SEND, 0); 6310 /* shut down the recv side */ 6311 if (how != SHUT_WR) 6312 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6313 SOCK_OPCTL_SHUT_RECV, 0); 6314 return (0); 6315 } 6316 6317 void 6318 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6319 { 6320 conn_t *connp = (conn_t *)proto_handle; 6321 icmp_t *icmp = connp->conn_icmp; 6322 6323 mutex_enter(&icmp->icmp_recv_lock); 6324 connp->conn_flow_cntrld = B_FALSE; 6325 mutex_exit(&icmp->icmp_recv_lock); 6326 } 6327 6328 int 6329 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6330 int mode, int32_t *rvalp, cred_t *cr) 6331 { 6332 conn_t *connp = (conn_t *)proto_handle; 6333 int error; 6334 6335 switch (cmd) { 6336 case ND_SET: 6337 case ND_GET: 6338 case _SIOCSOCKFALLBACK: 6339 case TI_GETPEERNAME: 6340 case TI_GETMYNAME: 6341 #ifdef DEBUG 6342 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6343 " socket", cmd); 6344 #endif 6345 error = EINVAL; 6346 break; 6347 default: 6348 /* 6349 * Pass on to IP using helper stream 6350 */ 6351 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6352 cmd, arg, mode, cr, rvalp); 6353 break; 6354 } 6355 return (error); 6356 } 6357 6358 /* ARGSUSED */ 6359 int 6360 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6361 cred_t *cr) 6362 { 6363 conn_t *connp = (conn_t *)proto_handle; 6364 icmp_t *icmp = connp->conn_icmp; 6365 icmp_stack_t *is = icmp->icmp_is; 6366 int error = 0; 6367 boolean_t bypass_dgram_errind = B_FALSE; 6368 6369 ASSERT(DB_TYPE(mp) == M_DATA); 6370 6371 if (is_system_labeled()) 6372 msg_setcredpid(mp, cr, curproc->p_pid); 6373 6374 /* do an implicit bind if necessary */ 6375 if (icmp->icmp_state == TS_UNBND) { 6376 error = rawip_implicit_bind(connp); 6377 /* 6378 * We could be racing with an actual bind, in which case 6379 * we would see EPROTO. We cross our fingers and try 6380 * to connect. 6381 */ 6382 if (!(error == 0 || error == EPROTO)) { 6383 freemsg(mp); 6384 return (error); 6385 } 6386 } 6387 6388 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6389 6390 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6391 error = EISCONN; 6392 goto done_lock; 6393 } 6394 6395 switch (icmp->icmp_family) { 6396 case AF_INET6: { 6397 sin6_t *sin6; 6398 ip6_pkt_t ipp_s; /* For ancillary data options */ 6399 ip6_pkt_t *ipp = &ipp_s; 6400 6401 sin6 = (sin6_t *)msg->msg_name; 6402 if (sin6 != NULL) { 6403 error = proto_verify_ip_addr(icmp->icmp_family, 6404 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6405 if (error != 0) { 6406 bypass_dgram_errind = B_TRUE; 6407 goto done_lock; 6408 } 6409 if (icmp->icmp_delayed_error != 0) { 6410 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6411 sin6_t *sin2 = (sin6_t *) 6412 &icmp->icmp_delayed_addr; 6413 6414 error = icmp->icmp_delayed_error; 6415 icmp->icmp_delayed_error = 0; 6416 6417 /* Compare IP address and port */ 6418 6419 if (sin1->sin6_port == sin2->sin6_port && 6420 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6421 &sin2->sin6_addr)) { 6422 goto done_lock; 6423 } 6424 } 6425 } else { 6426 /* 6427 * Use connected address 6428 */ 6429 if (icmp->icmp_state != TS_DATA_XFER) { 6430 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6431 error = EDESTADDRREQ; 6432 bypass_dgram_errind = B_TRUE; 6433 goto done_lock; 6434 } 6435 sin6 = &icmp->icmp_v6dst; 6436 } 6437 6438 /* No support for mapped addresses on raw sockets */ 6439 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6440 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6441 error = EADDRNOTAVAIL; 6442 goto done_lock; 6443 } 6444 6445 ipp->ipp_fields = 0; 6446 ipp->ipp_sticky_ignored = 0; 6447 6448 /* 6449 * If options passed in, feed it for verification and handling 6450 */ 6451 if (msg->msg_controllen != 0) { 6452 error = process_auxiliary_options(connp, 6453 msg->msg_control, msg->msg_controllen, 6454 ipp, &icmp_opt_obj, icmp_opt_set); 6455 if (error != 0) { 6456 goto done_lock; 6457 } 6458 } 6459 6460 rw_exit(&icmp->icmp_rwlock); 6461 6462 /* 6463 * Destination is a native IPv6 address. 6464 * Send out an IPv6 format packet. 6465 */ 6466 6467 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6468 ipp); 6469 } 6470 break; 6471 case AF_INET: { 6472 sin_t *sin; 6473 ip4_pkt_t pktinfo; 6474 ip4_pkt_t *pktinfop = &pktinfo; 6475 ipaddr_t v4dst; 6476 6477 sin = (sin_t *)msg->msg_name; 6478 if (sin != NULL) { 6479 error = proto_verify_ip_addr(icmp->icmp_family, 6480 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6481 if (error != 0) { 6482 bypass_dgram_errind = B_TRUE; 6483 goto done_lock; 6484 } 6485 v4dst = sin->sin_addr.s_addr; 6486 if (icmp->icmp_delayed_error != 0) { 6487 sin_t *sin1 = (sin_t *)msg->msg_name; 6488 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6489 6490 error = icmp->icmp_delayed_error; 6491 icmp->icmp_delayed_error = 0; 6492 6493 /* Compare IP address and port */ 6494 if (sin1->sin_port == sin2->sin_port && 6495 sin1->sin_addr.s_addr == 6496 sin2->sin_addr.s_addr) { 6497 goto done_lock; 6498 } 6499 6500 } 6501 } else { 6502 /* 6503 * Use connected address 6504 */ 6505 if (icmp->icmp_state != TS_DATA_XFER) { 6506 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6507 error = EDESTADDRREQ; 6508 bypass_dgram_errind = B_TRUE; 6509 goto done_lock; 6510 } 6511 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6512 } 6513 6514 6515 pktinfop->ip4_ill_index = 0; 6516 pktinfop->ip4_addr = INADDR_ANY; 6517 6518 /* 6519 * If options passed in, feed it for verification and handling 6520 */ 6521 if (msg->msg_controllen != 0) { 6522 error = process_auxiliary_options(connp, 6523 msg->msg_control, msg->msg_controllen, 6524 pktinfop, &icmp_opt_obj, icmp_opt_set); 6525 if (error != 0) { 6526 goto done_lock; 6527 } 6528 } 6529 rw_exit(&icmp->icmp_rwlock); 6530 6531 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6532 v4dst, pktinfop); 6533 break; 6534 } 6535 6536 default: 6537 ASSERT(0); 6538 } 6539 6540 goto done; 6541 6542 done_lock: 6543 rw_exit(&icmp->icmp_rwlock); 6544 if (error != 0) { 6545 ASSERT(mp != NULL); 6546 freemsg(mp); 6547 } 6548 done: 6549 if (bypass_dgram_errind) 6550 return (error); 6551 return (icmp->icmp_dgram_errind ? error : 0); 6552 } 6553 6554 sock_downcalls_t sock_rawip_downcalls = { 6555 rawip_activate, 6556 rawip_accept, 6557 rawip_bind, 6558 rawip_listen, 6559 rawip_connect, 6560 rawip_getpeername, 6561 rawip_getsockname, 6562 rawip_getsockopt, 6563 rawip_setsockopt, 6564 rawip_send, 6565 NULL, 6566 NULL, 6567 NULL, 6568 rawip_shutdown, 6569 rawip_clr_flowctrl, 6570 rawip_ioctl, 6571 rawip_close 6572 }; 6573