1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 141 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 142 static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst); 143 static void icmp_wput(queue_t *q, mblk_t *mp); 144 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 145 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 146 sin6_t *sin6, ip6_pkt_t *ipp); 147 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 148 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 149 static void icmp_wput_other(queue_t *q, mblk_t *mp); 150 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 151 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 152 static void icmp_ulp_recv(conn_t *, mblk_t *); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len, cred_t *cr); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 cred_t *cr; 274 275 /* 276 * All Solaris components should pass a db_credp 277 * for this TPI message, hence we ASSERT. 278 * But in case there is some other M_PROTO that looks 279 * like a TPI message sent by some other kernel 280 * component, we check and return an error. 281 */ 282 cr = msg_getcred(mp, NULL); 283 ASSERT(cr != NULL); 284 if (cr == NULL) { 285 icmp_err_ack(q, mp, TSYSERR, EINVAL); 286 return; 287 } 288 289 icmp = connp->conn_icmp; 290 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 291 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 292 "icmp_bind: bad req, len %u", 293 (uint_t)(mp->b_wptr - mp->b_rptr)); 294 icmp_err_ack(q, mp, TPROTO, 0); 295 return; 296 } 297 298 if (icmp->icmp_state != TS_UNBND) { 299 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 300 "icmp_bind: bad state, %d", icmp->icmp_state); 301 icmp_err_ack(q, mp, TOUTSTATE, 0); 302 return; 303 } 304 305 /* 306 * Reallocate the message to make sure we have enough room for an 307 * address and the protocol type. 308 */ 309 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 310 if (!mp1) { 311 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 312 return; 313 } 314 mp = mp1; 315 316 /* Reset the message type in preparation for shipping it back. */ 317 DB_TYPE(mp) = M_PCPROTO; 318 tbr = (struct T_bind_req *)mp->b_rptr; 319 len = tbr->ADDR_length; 320 switch (len) { 321 case 0: /* request for a generic port */ 322 tbr->ADDR_offset = sizeof (struct T_bind_req); 323 if (icmp->icmp_family == AF_INET) { 324 tbr->ADDR_length = sizeof (sin_t); 325 sin = (sin_t *)&tbr[1]; 326 *sin = sin_null; 327 sin->sin_family = AF_INET; 328 mp->b_wptr = (uchar_t *)&sin[1]; 329 sa = (struct sockaddr *)sin; 330 len = sizeof (sin_t); 331 } else { 332 ASSERT(icmp->icmp_family == AF_INET6); 333 tbr->ADDR_length = sizeof (sin6_t); 334 sin6 = (sin6_t *)&tbr[1]; 335 *sin6 = sin6_null; 336 sin6->sin6_family = AF_INET6; 337 mp->b_wptr = (uchar_t *)&sin6[1]; 338 sa = (struct sockaddr *)sin6; 339 len = sizeof (sin6_t); 340 } 341 break; 342 343 case sizeof (sin_t): /* Complete IPv4 address */ 344 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 345 sizeof (sin_t)); 346 break; 347 348 case sizeof (sin6_t): /* Complete IPv6 address */ 349 sa = (struct sockaddr *)mi_offset_param(mp, 350 tbr->ADDR_offset, sizeof (sin6_t)); 351 break; 352 353 default: 354 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 355 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 356 icmp_err_ack(q, mp, TBADADDR, 0); 357 return; 358 } 359 360 error = rawip_do_bind(connp, sa, len); 361 done: 362 ASSERT(mp->b_cont == NULL); 363 if (error != 0) { 364 if (error > 0) { 365 icmp_err_ack(q, mp, TSYSERR, error); 366 } else { 367 icmp_err_ack(q, mp, -error, 0); 368 } 369 } else { 370 tbr->PRIM_type = T_BIND_ACK; 371 qreply(q, mp); 372 } 373 } 374 375 static int 376 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 377 { 378 sin_t *sin; 379 sin6_t *sin6; 380 icmp_t *icmp; 381 int error = 0; 382 mblk_t *ire_mp; 383 384 385 icmp = connp->conn_icmp; 386 387 if (sa == NULL || !OK_32PTR((char *)sa)) { 388 return (EINVAL); 389 } 390 391 /* 392 * The state must be TS_UNBND. TPI mandates that users must send 393 * TPI primitives only 1 at a time and wait for the response before 394 * sending the next primitive. 395 */ 396 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 397 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 398 error = -TOUTSTATE; 399 goto done; 400 } 401 402 ASSERT(len != 0); 403 switch (len) { 404 case sizeof (sin_t): /* Complete IPv4 address */ 405 sin = (sin_t *)sa; 406 if (sin->sin_family != AF_INET || 407 icmp->icmp_family != AF_INET) { 408 /* TSYSERR, EAFNOSUPPORT */ 409 error = EAFNOSUPPORT; 410 goto done; 411 } 412 break; 413 case sizeof (sin6_t): /* Complete IPv6 address */ 414 sin6 = (sin6_t *)sa; 415 if (sin6->sin6_family != AF_INET6 || 416 icmp->icmp_family != AF_INET6) { 417 /* TSYSERR, EAFNOSUPPORT */ 418 error = EAFNOSUPPORT; 419 goto done; 420 } 421 /* No support for mapped addresses on raw sockets */ 422 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 423 /* TSYSERR, EADDRNOTAVAIL */ 424 error = EADDRNOTAVAIL; 425 goto done; 426 } 427 break; 428 429 default: 430 /* TBADADDR */ 431 error = EADDRNOTAVAIL; 432 goto done; 433 } 434 435 icmp->icmp_pending_op = T_BIND_REQ; 436 icmp->icmp_state = TS_IDLE; 437 438 /* 439 * Copy the source address into our icmp structure. This address 440 * may still be zero; if so, ip will fill in the correct address 441 * each time an outbound packet is passed to it. 442 * If we are binding to a broadcast or multicast address then 443 * rawip_post_ip_bind_connect will clear the source address. 444 */ 445 446 if (icmp->icmp_family == AF_INET) { 447 ASSERT(sin != NULL); 448 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 449 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 450 &icmp->icmp_v6src); 451 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 452 icmp->icmp_ip_snd_options_len; 453 icmp->icmp_bound_v6src = icmp->icmp_v6src; 454 } else { 455 int error; 456 457 ASSERT(sin6 != NULL); 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 icmp->icmp_v6src = sin6->sin6_addr; 460 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 461 icmp->icmp_bound_v6src = icmp->icmp_v6src; 462 463 /* Rebuild the header template */ 464 error = icmp_build_hdrs(icmp); 465 if (error != 0) { 466 icmp->icmp_pending_op = -1; 467 /* 468 * TSYSERR 469 */ 470 goto done; 471 } 472 } 473 474 ire_mp = NULL; 475 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 476 /* 477 * request an IRE if src not 0 (INADDR_ANY) 478 */ 479 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 480 if (ire_mp == NULL) { 481 icmp->icmp_pending_op = -1; 482 error = ENOMEM; 483 goto done; 484 } 485 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 486 } 487 done: 488 rw_exit(&icmp->icmp_rwlock); 489 if (error != 0) 490 return (error); 491 492 if (icmp->icmp_family == AF_INET6) { 493 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 494 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 495 } else { 496 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 497 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 498 } 499 rawip_post_ip_bind_connect(icmp, ire_mp, error); 500 return (error); 501 } 502 503 static void 504 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 505 { 506 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 507 if (icmp->icmp_state == TS_UNBND) { 508 /* 509 * not yet bound - bind sent by icmp_bind_proto. 510 */ 511 rw_exit(&icmp->icmp_rwlock); 512 return; 513 } 514 ASSERT(icmp->icmp_pending_op != -1); 515 icmp->icmp_pending_op = -1; 516 517 if (error != 0) { 518 if (icmp->icmp_state == TS_DATA_XFER) { 519 /* Connect failed */ 520 /* Revert back to the bound source */ 521 icmp->icmp_v6src = icmp->icmp_bound_v6src; 522 icmp->icmp_state = TS_IDLE; 523 if (icmp->icmp_family == AF_INET6) 524 (void) icmp_build_hdrs(icmp); 525 } else { 526 V6_SET_ZERO(icmp->icmp_v6src); 527 V6_SET_ZERO(icmp->icmp_bound_v6src); 528 icmp->icmp_state = TS_UNBND; 529 if (icmp->icmp_family == AF_INET6) 530 (void) icmp_build_hdrs(icmp); 531 } 532 } else { 533 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 534 ire_t *ire; 535 536 ire = (ire_t *)ire_mp->b_rptr; 537 /* 538 * If a broadcast/multicast address was bound set 539 * the source address to 0. 540 * This ensures no datagrams with broadcast address 541 * as source address are emitted (which would violate 542 * RFC1122 - Hosts requirements) 543 * Note: we get IRE_BROADCAST for IPv6 544 * to "mark" a multicast local address. 545 */ 546 547 548 if (ire->ire_type == IRE_BROADCAST && 549 icmp->icmp_state != TS_DATA_XFER) { 550 /* 551 * This was just a local bind to a 552 * MC/broadcast addr 553 */ 554 V6_SET_ZERO(icmp->icmp_v6src); 555 if (icmp->icmp_family == AF_INET6) 556 (void) icmp_build_hdrs(icmp); 557 } 558 } 559 560 } 561 rw_exit(&icmp->icmp_rwlock); 562 if (ire_mp != NULL) 563 freeb(ire_mp); 564 } 565 566 /* 567 * Send message to IP to just bind to the protocol. 568 */ 569 static int 570 icmp_bind_proto(conn_t *connp) 571 { 572 icmp_t *icmp; 573 int error; 574 575 icmp = connp->conn_icmp; 576 577 if (icmp->icmp_family == AF_INET6) 578 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 579 &sin6_null.sin6_addr, 0, B_TRUE); 580 else 581 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 582 sin_null.sin_addr.s_addr, 0, B_TRUE); 583 584 rawip_post_ip_bind_connect(icmp, NULL, error); 585 return (error); 586 } 587 588 static void 589 icmp_tpi_connect(queue_t *q, mblk_t *mp) 590 { 591 conn_t *connp = Q_TO_CONN(q); 592 struct T_conn_req *tcr; 593 icmp_t *icmp; 594 struct sockaddr *sa; 595 socklen_t len; 596 int error; 597 cred_t *cr; 598 599 /* 600 * All Solaris components should pass a db_credp 601 * for this TPI message, hence we ASSERT. 602 * But in case there is some other M_PROTO that looks 603 * like a TPI message sent by some other kernel 604 * component, we check and return an error. 605 */ 606 cr = msg_getcred(mp, NULL); 607 ASSERT(cr != NULL); 608 if (cr == NULL) { 609 icmp_err_ack(q, mp, TSYSERR, EINVAL); 610 return; 611 } 612 613 icmp = connp->conn_icmp; 614 tcr = (struct T_conn_req *)mp->b_rptr; 615 /* Sanity checks */ 616 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 617 icmp_err_ack(q, mp, TPROTO, 0); 618 return; 619 } 620 621 if (tcr->OPT_length != 0) { 622 icmp_err_ack(q, mp, TBADOPT, 0); 623 return; 624 } 625 626 len = tcr->DEST_length; 627 628 switch (len) { 629 default: 630 icmp_err_ack(q, mp, TBADADDR, 0); 631 return; 632 case sizeof (sin_t): 633 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 634 sizeof (sin_t)); 635 break; 636 case sizeof (sin6_t): 637 sa = (struct sockaddr *)mi_offset_param(mp, 638 tcr->DEST_offset, sizeof (sin6_t)); 639 break; 640 } 641 642 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 643 if (error != 0) { 644 icmp_err_ack(q, mp, TSYSERR, error); 645 return; 646 } 647 648 error = rawip_do_connect(connp, sa, len, cr); 649 if (error != 0) { 650 if (error < 0) { 651 icmp_err_ack(q, mp, -error, 0); 652 } else { 653 icmp_err_ack(q, mp, 0, error); 654 } 655 } else { 656 mblk_t *mp1; 657 658 /* 659 * We have to send a connection confirmation to 660 * keep TLI happy. 661 */ 662 if (icmp->icmp_family == AF_INET) { 663 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 664 sizeof (sin_t), NULL, 0); 665 } else { 666 ASSERT(icmp->icmp_family == AF_INET6); 667 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 668 sizeof (sin6_t), NULL, 0); 669 } 670 if (mp1 == NULL) { 671 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 672 return; 673 } 674 675 /* 676 * Send ok_ack for T_CONN_REQ 677 */ 678 mp = mi_tpi_ok_ack_alloc(mp); 679 if (mp == NULL) { 680 /* Unable to reuse the T_CONN_REQ for the ack. */ 681 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 682 return; 683 } 684 putnext(connp->conn_rq, mp); 685 putnext(connp->conn_rq, mp1); 686 } 687 } 688 689 static int 690 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 691 cred_t *cr) 692 { 693 icmp_t *icmp; 694 sin_t *sin; 695 sin6_t *sin6; 696 mblk_t *ire_mp; 697 int error; 698 ipaddr_t v4dst; 699 in6_addr_t v6dst; 700 701 icmp = connp->conn_icmp; 702 703 if (sa == NULL || !OK_32PTR((char *)sa)) { 704 return (EINVAL); 705 } 706 707 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 708 if (ire_mp == NULL) 709 return (ENOMEM); 710 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 711 712 713 ASSERT(sa != NULL && len != 0); 714 715 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 716 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 717 rw_exit(&icmp->icmp_rwlock); 718 freeb(ire_mp); 719 return (-TOUTSTATE); 720 } 721 722 switch (len) { 723 case sizeof (sin_t): 724 sin = (sin_t *)sa; 725 726 ASSERT(icmp->icmp_family == AF_INET); 727 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 728 729 v4dst = sin->sin_addr.s_addr; 730 /* 731 * Interpret a zero destination to mean loopback. 732 * Update the T_CONN_REQ (sin/sin6) since it is used to 733 * generate the T_CONN_CON. 734 */ 735 if (v4dst == INADDR_ANY) { 736 v4dst = htonl(INADDR_LOOPBACK); 737 } 738 739 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 740 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 741 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 742 icmp->icmp_ip_snd_options_len; 743 icmp->icmp_v6dst.sin6_addr = v6dst; 744 icmp->icmp_v6dst.sin6_family = AF_INET6; 745 icmp->icmp_v6dst.sin6_flowinfo = 0; 746 icmp->icmp_v6dst.sin6_port = 0; 747 748 /* 749 * If the destination address is multicast and 750 * an outgoing multicast interface has been set, 751 * use the address of that interface as our 752 * source address if no source address has been set. 753 */ 754 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 755 CLASSD(v4dst) && 756 icmp->icmp_multicast_if_addr != INADDR_ANY) { 757 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 758 &icmp->icmp_v6src); 759 } 760 break; 761 case sizeof (sin6_t): 762 sin6 = (sin6_t *)sa; 763 764 /* No support for mapped addresses on raw sockets */ 765 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 766 rw_exit(&icmp->icmp_rwlock); 767 freeb(ire_mp); 768 return (EADDRNOTAVAIL); 769 } 770 771 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 772 ASSERT(icmp->icmp_family == AF_INET6); 773 774 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 775 776 icmp->icmp_v6dst = *sin6; 777 icmp->icmp_v6dst.sin6_port = 0; 778 779 /* 780 * Interpret a zero destination to mean loopback. 781 * Update the T_CONN_REQ (sin/sin6) since it is used to 782 * generate the T_CONN_CON. 783 */ 784 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 785 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 786 } 787 /* 788 * If the destination address is multicast and 789 * an outgoing multicast interface has been set, 790 * then the ip bind logic will pick the correct source 791 * address (i.e. matching the outgoing multicast interface). 792 */ 793 break; 794 } 795 796 icmp->icmp_pending_op = T_CONN_REQ; 797 798 if (icmp->icmp_state == TS_DATA_XFER) { 799 /* Already connected - clear out state */ 800 icmp->icmp_v6src = icmp->icmp_bound_v6src; 801 icmp->icmp_state = TS_IDLE; 802 } 803 804 icmp->icmp_state = TS_DATA_XFER; 805 rw_exit(&icmp->icmp_rwlock); 806 807 if (icmp->icmp_family == AF_INET6) { 808 error = ip_proto_bind_connected_v6(connp, &ire_mp, 809 icmp->icmp_proto, &icmp->icmp_v6src, 0, 810 &icmp->icmp_v6dst.sin6_addr, 811 NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr); 812 } else { 813 error = ip_proto_bind_connected_v4(connp, &ire_mp, 814 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 815 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 816 B_TRUE, B_TRUE, cr); 817 } 818 rawip_post_ip_bind_connect(icmp, ire_mp, error); 819 return (error); 820 } 821 822 static void 823 icmp_close_free(conn_t *connp) 824 { 825 icmp_t *icmp = connp->conn_icmp; 826 827 /* If there are any options associated with the stream, free them. */ 828 if (icmp->icmp_ip_snd_options != NULL) { 829 mi_free((char *)icmp->icmp_ip_snd_options); 830 icmp->icmp_ip_snd_options = NULL; 831 icmp->icmp_ip_snd_options_len = 0; 832 } 833 834 if (icmp->icmp_filter != NULL) { 835 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 836 icmp->icmp_filter = NULL; 837 } 838 839 /* Free memory associated with sticky options */ 840 if (icmp->icmp_sticky_hdrs_len != 0) { 841 kmem_free(icmp->icmp_sticky_hdrs, 842 icmp->icmp_sticky_hdrs_len); 843 icmp->icmp_sticky_hdrs = NULL; 844 icmp->icmp_sticky_hdrs_len = 0; 845 } 846 847 if (icmp->icmp_last_cred != NULL) { 848 crfree(icmp->icmp_last_cred); 849 icmp->icmp_last_cred = NULL; 850 } 851 852 if (icmp->icmp_effective_cred != NULL) { 853 crfree(icmp->icmp_effective_cred); 854 icmp->icmp_effective_cred = NULL; 855 } 856 857 ip6_pkt_free(&icmp->icmp_sticky_ipp); 858 859 /* 860 * Clear any fields which the kmem_cache constructor clears. 861 * Only icmp_connp needs to be preserved. 862 * TBD: We should make this more efficient to avoid clearing 863 * everything. 864 */ 865 ASSERT(icmp->icmp_connp == connp); 866 bzero(icmp, sizeof (icmp_t)); 867 icmp->icmp_connp = connp; 868 } 869 870 static int 871 rawip_do_close(conn_t *connp) 872 { 873 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 874 875 ip_quiesce_conn(connp); 876 877 if (!IPCL_IS_NONSTR(connp)) { 878 qprocsoff(connp->conn_rq); 879 } 880 881 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 882 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 883 icmp_close_free(connp); 884 885 /* 886 * Now we are truly single threaded on this stream, and can 887 * delete the things hanging off the connp, and finally the connp. 888 * We removed this connp from the fanout list, it cannot be 889 * accessed thru the fanouts, and we already waited for the 890 * conn_ref to drop to 0. We are already in close, so 891 * there cannot be any other thread from the top. qprocsoff 892 * has completed, and service has completed or won't run in 893 * future. 894 */ 895 ASSERT(connp->conn_ref == 1); 896 897 if (!IPCL_IS_NONSTR(connp)) { 898 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 899 } else { 900 ip_free_helper_stream(connp); 901 } 902 903 connp->conn_ref--; 904 ipcl_conn_destroy(connp); 905 906 return (0); 907 } 908 909 static int 910 icmp_close(queue_t *q, int flags) 911 { 912 conn_t *connp; 913 914 if (flags & SO_FALLBACK) { 915 /* 916 * stream is being closed while in fallback 917 * simply free the resources that were allocated 918 */ 919 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 920 qprocsoff(q); 921 goto done; 922 } 923 924 connp = Q_TO_CONN(q); 925 (void) rawip_do_close(connp); 926 done: 927 q->q_ptr = WR(q)->q_ptr = NULL; 928 return (0); 929 } 930 931 /* 932 * This routine handles each T_DISCON_REQ message passed to icmp 933 * as an indicating that ICMP is no longer connected. This results 934 * in sending a T_BIND_REQ to IP to restore the binding to just 935 * the local address. 936 * 937 * The disconnect completes in rawip_post_ip_bind_connect. 938 */ 939 static int 940 icmp_do_disconnect(conn_t *connp) 941 { 942 icmp_t *icmp; 943 mblk_t *ire_mp; 944 int error; 945 946 icmp = connp->conn_icmp; 947 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 948 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 949 rw_exit(&icmp->icmp_rwlock); 950 return (-TOUTSTATE); 951 } 952 icmp->icmp_pending_op = T_DISCON_REQ; 953 icmp->icmp_v6src = icmp->icmp_bound_v6src; 954 icmp->icmp_state = TS_IDLE; 955 956 957 if (icmp->icmp_family == AF_INET6) { 958 /* Rebuild the header template */ 959 error = icmp_build_hdrs(icmp); 960 if (error != 0) { 961 icmp->icmp_pending_op = -1; 962 rw_exit(&icmp->icmp_rwlock); 963 return (error); 964 } 965 } 966 967 rw_exit(&icmp->icmp_rwlock); 968 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 969 if (ire_mp == NULL) { 970 return (ENOMEM); 971 } 972 973 if (icmp->icmp_family == AF_INET6) { 974 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 975 &icmp->icmp_bound_v6src, 0, B_TRUE); 976 } else { 977 978 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 979 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 980 } 981 982 rawip_post_ip_bind_connect(icmp, ire_mp, error); 983 984 return (error); 985 } 986 987 static void 988 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 989 { 990 conn_t *connp = Q_TO_CONN(q); 991 int error; 992 993 /* 994 * Allocate the largest primitive we need to send back 995 * T_error_ack is > than T_ok_ack 996 */ 997 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 998 if (mp == NULL) { 999 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1000 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1001 return; 1002 } 1003 1004 error = icmp_do_disconnect(connp); 1005 1006 if (error != 0) { 1007 if (error > 0) { 1008 icmp_err_ack(q, mp, 0, error); 1009 } else { 1010 icmp_err_ack(q, mp, -error, 0); 1011 } 1012 } else { 1013 mp = mi_tpi_ok_ack_alloc(mp); 1014 ASSERT(mp != NULL); 1015 qreply(q, mp); 1016 } 1017 1018 } 1019 1020 static int 1021 icmp_disconnect(conn_t *connp) 1022 { 1023 int error; 1024 icmp_t *icmp = connp->conn_icmp; 1025 1026 icmp->icmp_dgram_errind = B_FALSE; 1027 1028 error = icmp_do_disconnect(connp); 1029 1030 if (error < 0) 1031 error = proto_tlitosyserr(-error); 1032 return (error); 1033 } 1034 1035 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1036 static void 1037 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1038 { 1039 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1040 qreply(q, mp); 1041 } 1042 1043 /* Shorthand to generate and send TPI error acks to our client */ 1044 static void 1045 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1046 t_scalar_t t_error, int sys_error) 1047 { 1048 struct T_error_ack *teackp; 1049 1050 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1051 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1052 teackp = (struct T_error_ack *)mp->b_rptr; 1053 teackp->ERROR_prim = primitive; 1054 teackp->TLI_error = t_error; 1055 teackp->UNIX_error = sys_error; 1056 qreply(q, mp); 1057 } 1058 } 1059 1060 /* 1061 * icmp_icmp_error is called by icmp_input to process ICMP 1062 * messages passed up by IP. 1063 * Generates the appropriate permanent (non-transient) errors. 1064 * Assumes that IP has pulled up everything up to and including 1065 * the ICMP header. 1066 */ 1067 static void 1068 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1069 { 1070 icmph_t *icmph; 1071 ipha_t *ipha; 1072 int iph_hdr_length; 1073 sin_t sin; 1074 mblk_t *mp1; 1075 int error = 0; 1076 icmp_t *icmp = connp->conn_icmp; 1077 1078 ipha = (ipha_t *)mp->b_rptr; 1079 1080 ASSERT(OK_32PTR(mp->b_rptr)); 1081 1082 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1083 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1084 icmp_icmp_error_ipv6(connp, mp); 1085 return; 1086 } 1087 1088 /* 1089 * icmp does not support v4 mapped addresses 1090 * so we can never be here for a V6 socket 1091 * i.e. icmp_family == AF_INET6 1092 */ 1093 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1094 (icmp->icmp_family == AF_INET)); 1095 1096 ASSERT(icmp->icmp_family == AF_INET); 1097 1098 /* Skip past the outer IP and ICMP headers */ 1099 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1100 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1101 ipha = (ipha_t *)&icmph[1]; 1102 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1103 1104 switch (icmph->icmph_type) { 1105 case ICMP_DEST_UNREACHABLE: 1106 switch (icmph->icmph_code) { 1107 case ICMP_FRAGMENTATION_NEEDED: 1108 /* 1109 * IP has already adjusted the path MTU. 1110 */ 1111 break; 1112 case ICMP_PORT_UNREACHABLE: 1113 case ICMP_PROTOCOL_UNREACHABLE: 1114 error = ECONNREFUSED; 1115 break; 1116 default: 1117 /* Transient errors */ 1118 break; 1119 } 1120 break; 1121 default: 1122 /* Transient errors */ 1123 break; 1124 } 1125 if (error == 0) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 /* 1131 * Deliver T_UDERROR_IND when the application has asked for it. 1132 * The socket layer enables this automatically when connected. 1133 */ 1134 if (!icmp->icmp_dgram_errind) { 1135 freemsg(mp); 1136 return; 1137 } 1138 1139 sin = sin_null; 1140 sin.sin_family = AF_INET; 1141 sin.sin_addr.s_addr = ipha->ipha_dst; 1142 1143 if (IPCL_IS_NONSTR(connp)) { 1144 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1145 if (icmp->icmp_state == TS_DATA_XFER) { 1146 if (sin.sin_addr.s_addr == 1147 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1148 rw_exit(&icmp->icmp_rwlock); 1149 (*connp->conn_upcalls->su_set_error) 1150 (connp->conn_upper_handle, error); 1151 goto done; 1152 } 1153 } else { 1154 icmp->icmp_delayed_error = error; 1155 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1156 } 1157 rw_exit(&icmp->icmp_rwlock); 1158 } else { 1159 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1160 0, error); 1161 if (mp1 != NULL) 1162 putnext(connp->conn_rq, mp1); 1163 } 1164 done: 1165 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1166 freemsg(mp); 1167 } 1168 1169 /* 1170 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1171 * for IPv6 packets. 1172 * Send permanent (non-transient) errors upstream. 1173 * Assumes that IP has pulled up all the extension headers as well 1174 * as the ICMPv6 header. 1175 */ 1176 static void 1177 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1178 { 1179 icmp6_t *icmp6; 1180 ip6_t *ip6h, *outer_ip6h; 1181 uint16_t iph_hdr_length; 1182 uint8_t *nexthdrp; 1183 sin6_t sin6; 1184 mblk_t *mp1; 1185 int error = 0; 1186 icmp_t *icmp = connp->conn_icmp; 1187 1188 outer_ip6h = (ip6_t *)mp->b_rptr; 1189 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1190 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1191 else 1192 iph_hdr_length = IPV6_HDR_LEN; 1193 1194 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1195 ip6h = (ip6_t *)&icmp6[1]; 1196 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1197 freemsg(mp); 1198 return; 1199 } 1200 1201 switch (icmp6->icmp6_type) { 1202 case ICMP6_DST_UNREACH: 1203 switch (icmp6->icmp6_code) { 1204 case ICMP6_DST_UNREACH_NOPORT: 1205 error = ECONNREFUSED; 1206 break; 1207 case ICMP6_DST_UNREACH_ADMIN: 1208 case ICMP6_DST_UNREACH_NOROUTE: 1209 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1210 case ICMP6_DST_UNREACH_ADDR: 1211 /* Transient errors */ 1212 break; 1213 default: 1214 break; 1215 } 1216 break; 1217 case ICMP6_PACKET_TOO_BIG: { 1218 struct T_unitdata_ind *tudi; 1219 struct T_opthdr *toh; 1220 size_t udi_size; 1221 mblk_t *newmp; 1222 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1223 sizeof (struct ip6_mtuinfo); 1224 sin6_t *sin6; 1225 struct ip6_mtuinfo *mtuinfo; 1226 1227 /* 1228 * If the application has requested to receive path mtu 1229 * information, send up an empty message containing an 1230 * IPV6_PATHMTU ancillary data item. 1231 */ 1232 if (!icmp->icmp_ipv6_recvpathmtu) 1233 break; 1234 1235 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1236 opt_length; 1237 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1238 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1239 break; 1240 } 1241 1242 /* 1243 * newmp->b_cont is left to NULL on purpose. This is an 1244 * empty message containing only ancillary data. 1245 */ 1246 newmp->b_datap->db_type = M_PROTO; 1247 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1248 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1249 tudi->PRIM_type = T_UNITDATA_IND; 1250 tudi->SRC_length = sizeof (sin6_t); 1251 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1252 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1253 tudi->OPT_length = opt_length; 1254 1255 sin6 = (sin6_t *)&tudi[1]; 1256 bzero(sin6, sizeof (sin6_t)); 1257 sin6->sin6_family = AF_INET6; 1258 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1259 1260 toh = (struct T_opthdr *)&sin6[1]; 1261 toh->level = IPPROTO_IPV6; 1262 toh->name = IPV6_PATHMTU; 1263 toh->len = opt_length; 1264 toh->status = 0; 1265 1266 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1267 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1268 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1269 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1270 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1271 /* 1272 * We've consumed everything we need from the original 1273 * message. Free it, then send our empty message. 1274 */ 1275 freemsg(mp); 1276 icmp_ulp_recv(connp, newmp); 1277 1278 return; 1279 } 1280 case ICMP6_TIME_EXCEEDED: 1281 /* Transient errors */ 1282 break; 1283 case ICMP6_PARAM_PROB: 1284 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1285 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1286 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1287 (uchar_t *)nexthdrp) { 1288 error = ECONNREFUSED; 1289 break; 1290 } 1291 break; 1292 } 1293 if (error == 0) { 1294 freemsg(mp); 1295 return; 1296 } 1297 1298 /* 1299 * Deliver T_UDERROR_IND when the application has asked for it. 1300 * The socket layer enables this automatically when connected. 1301 */ 1302 if (!icmp->icmp_dgram_errind) { 1303 freemsg(mp); 1304 return; 1305 } 1306 1307 sin6 = sin6_null; 1308 sin6.sin6_family = AF_INET6; 1309 sin6.sin6_addr = ip6h->ip6_dst; 1310 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1311 1312 if (IPCL_IS_NONSTR(connp)) { 1313 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1314 if (icmp->icmp_state == TS_DATA_XFER) { 1315 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1316 &icmp->icmp_v6dst.sin6_addr)) { 1317 rw_exit(&icmp->icmp_rwlock); 1318 (*connp->conn_upcalls->su_set_error) 1319 (connp->conn_upper_handle, error); 1320 goto done; 1321 } 1322 } else { 1323 icmp->icmp_delayed_error = error; 1324 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1325 } 1326 rw_exit(&icmp->icmp_rwlock); 1327 } else { 1328 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1329 NULL, 0, error); 1330 if (mp1 != NULL) 1331 putnext(connp->conn_rq, mp1); 1332 } 1333 done: 1334 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1335 freemsg(mp); 1336 } 1337 1338 /* 1339 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1340 * The local address is filled in if endpoint is bound. The remote address 1341 * is filled in if remote address has been precified ("connected endpoint") 1342 * (The concept of connected CLTS sockets is alien to published TPI 1343 * but we support it anyway). 1344 */ 1345 static void 1346 icmp_addr_req(queue_t *q, mblk_t *mp) 1347 { 1348 icmp_t *icmp = Q_TO_ICMP(q); 1349 mblk_t *ackmp; 1350 struct T_addr_ack *taa; 1351 1352 /* Make it large enough for worst case */ 1353 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1354 2 * sizeof (sin6_t), 1); 1355 if (ackmp == NULL) { 1356 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1357 return; 1358 } 1359 taa = (struct T_addr_ack *)ackmp->b_rptr; 1360 1361 bzero(taa, sizeof (struct T_addr_ack)); 1362 ackmp->b_wptr = (uchar_t *)&taa[1]; 1363 1364 taa->PRIM_type = T_ADDR_ACK; 1365 ackmp->b_datap->db_type = M_PCPROTO; 1366 rw_enter(&icmp->icmp_rwlock, RW_READER); 1367 /* 1368 * Note: Following code assumes 32 bit alignment of basic 1369 * data structures like sin_t and struct T_addr_ack. 1370 */ 1371 if (icmp->icmp_state != TS_UNBND) { 1372 /* 1373 * Fill in local address 1374 */ 1375 taa->LOCADDR_offset = sizeof (*taa); 1376 if (icmp->icmp_family == AF_INET) { 1377 sin_t *sin; 1378 1379 taa->LOCADDR_length = sizeof (sin_t); 1380 sin = (sin_t *)&taa[1]; 1381 /* Fill zeroes and then intialize non-zero fields */ 1382 *sin = sin_null; 1383 sin->sin_family = AF_INET; 1384 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1385 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1386 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1387 sin->sin_addr.s_addr); 1388 } else { 1389 /* 1390 * INADDR_ANY 1391 * icmp_v6src is not set, we might be bound to 1392 * broadcast/multicast. Use icmp_bound_v6src as 1393 * local address instead (that could 1394 * also still be INADDR_ANY) 1395 */ 1396 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1397 sin->sin_addr.s_addr); 1398 } 1399 ackmp->b_wptr = (uchar_t *)&sin[1]; 1400 } else { 1401 sin6_t *sin6; 1402 1403 ASSERT(icmp->icmp_family == AF_INET6); 1404 taa->LOCADDR_length = sizeof (sin6_t); 1405 sin6 = (sin6_t *)&taa[1]; 1406 /* Fill zeroes and then intialize non-zero fields */ 1407 *sin6 = sin6_null; 1408 sin6->sin6_family = AF_INET6; 1409 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1410 sin6->sin6_addr = icmp->icmp_v6src; 1411 } else { 1412 /* 1413 * UNSPECIFIED 1414 * icmp_v6src is not set, we might be bound to 1415 * broadcast/multicast. Use icmp_bound_v6src as 1416 * local address instead (that could 1417 * also still be UNSPECIFIED) 1418 */ 1419 sin6->sin6_addr = icmp->icmp_bound_v6src; 1420 } 1421 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1422 } 1423 } 1424 rw_exit(&icmp->icmp_rwlock); 1425 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1426 qreply(q, ackmp); 1427 } 1428 1429 static void 1430 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1431 { 1432 *tap = icmp_g_t_info_ack; 1433 1434 if (icmp->icmp_family == AF_INET6) 1435 tap->ADDR_size = sizeof (sin6_t); 1436 else 1437 tap->ADDR_size = sizeof (sin_t); 1438 tap->CURRENT_state = icmp->icmp_state; 1439 tap->OPT_size = icmp_max_optsize; 1440 } 1441 1442 static void 1443 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1444 t_uscalar_t cap_bits1) 1445 { 1446 tcap->CAP_bits1 = 0; 1447 1448 if (cap_bits1 & TC1_INFO) { 1449 icmp_copy_info(&tcap->INFO_ack, icmp); 1450 tcap->CAP_bits1 |= TC1_INFO; 1451 } 1452 } 1453 1454 /* 1455 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1456 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1457 * icmp_g_t_info_ack. The current state of the stream is copied from 1458 * icmp_state. 1459 */ 1460 static void 1461 icmp_capability_req(queue_t *q, mblk_t *mp) 1462 { 1463 icmp_t *icmp = Q_TO_ICMP(q); 1464 t_uscalar_t cap_bits1; 1465 struct T_capability_ack *tcap; 1466 1467 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1468 1469 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1470 mp->b_datap->db_type, T_CAPABILITY_ACK); 1471 if (!mp) 1472 return; 1473 1474 tcap = (struct T_capability_ack *)mp->b_rptr; 1475 1476 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1477 1478 qreply(q, mp); 1479 } 1480 1481 /* 1482 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1483 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1484 * The current state of the stream is copied from icmp_state. 1485 */ 1486 static void 1487 icmp_info_req(queue_t *q, mblk_t *mp) 1488 { 1489 icmp_t *icmp = Q_TO_ICMP(q); 1490 1491 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1492 T_INFO_ACK); 1493 if (!mp) 1494 return; 1495 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1496 qreply(q, mp); 1497 } 1498 1499 /* For /dev/icmp aka AF_INET open */ 1500 static int 1501 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1502 int family) 1503 { 1504 conn_t *connp; 1505 dev_t conn_dev; 1506 icmp_stack_t *is; 1507 int error; 1508 1509 conn_dev = NULL; 1510 1511 /* If the stream is already open, return immediately. */ 1512 if (q->q_ptr != NULL) 1513 return (0); 1514 1515 if (sflag == MODOPEN) 1516 return (EINVAL); 1517 1518 /* 1519 * Since ICMP is not used so heavily, allocating from the small 1520 * arena should be sufficient. 1521 */ 1522 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1523 return (EBUSY); 1524 } 1525 1526 if (flag & SO_FALLBACK) { 1527 /* 1528 * Non streams socket needs a stream to fallback to 1529 */ 1530 RD(q)->q_ptr = (void *)conn_dev; 1531 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1532 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1533 qprocson(q); 1534 return (0); 1535 } 1536 1537 connp = icmp_open(family, credp, &error, KM_SLEEP); 1538 if (connp == NULL) { 1539 ASSERT(error != NULL); 1540 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1541 return (error); 1542 } 1543 1544 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1545 connp->conn_dev = conn_dev; 1546 connp->conn_minor_arena = ip_minor_arena_sa; 1547 1548 is = connp->conn_icmp->icmp_is; 1549 1550 /* 1551 * Initialize the icmp_t structure for this stream. 1552 */ 1553 q->q_ptr = connp; 1554 WR(q)->q_ptr = connp; 1555 connp->conn_rq = q; 1556 connp->conn_wq = WR(q); 1557 1558 if (connp->conn_icmp->icmp_family == AF_INET6) { 1559 /* Build initial header template for transmit */ 1560 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1561 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1562 rw_exit(&connp->conn_icmp->icmp_rwlock); 1563 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1564 ipcl_conn_destroy(connp); 1565 return (error); 1566 } 1567 rw_exit(&connp->conn_icmp->icmp_rwlock); 1568 } 1569 1570 1571 q->q_hiwat = is->is_recv_hiwat; 1572 WR(q)->q_hiwat = is->is_xmit_hiwat; 1573 WR(q)->q_lowat = is->is_xmit_lowat; 1574 1575 qprocson(q); 1576 1577 /* Set the Stream head write offset. */ 1578 (void) proto_set_tx_wroff(q, connp, 1579 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1580 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1581 1582 mutex_enter(&connp->conn_lock); 1583 connp->conn_state_flags &= ~CONN_INCIPIENT; 1584 mutex_exit(&connp->conn_lock); 1585 1586 return (0); 1587 } 1588 1589 /* For /dev/icmp4 aka AF_INET open */ 1590 static int 1591 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1592 { 1593 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1594 } 1595 1596 /* For /dev/icmp6 aka AF_INET6 open */ 1597 static int 1598 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1599 { 1600 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1601 } 1602 1603 /* 1604 * This is the open routine for icmp. It allocates a icmp_t structure for 1605 * the stream and, on the first open of the module, creates an ND table. 1606 */ 1607 /* ARGSUSED */ 1608 static conn_t * 1609 icmp_open(int family, cred_t *credp, int *err, int flags) 1610 { 1611 icmp_t *icmp; 1612 conn_t *connp; 1613 zoneid_t zoneid; 1614 netstack_t *ns; 1615 icmp_stack_t *is; 1616 boolean_t isv6 = B_FALSE; 1617 1618 *err = secpolicy_net_icmpaccess(credp); 1619 if (*err != 0) 1620 return (NULL); 1621 1622 if (family == AF_INET6) 1623 isv6 = B_TRUE; 1624 ns = netstack_find_by_cred(credp); 1625 ASSERT(ns != NULL); 1626 is = ns->netstack_icmp; 1627 ASSERT(is != NULL); 1628 1629 /* 1630 * For exclusive stacks we set the zoneid to zero 1631 * to make ICMP operate as if in the global zone. 1632 */ 1633 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1634 zoneid = GLOBAL_ZONEID; 1635 else 1636 zoneid = crgetzoneid(credp); 1637 1638 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1639 1640 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1641 icmp = connp->conn_icmp; 1642 icmp->icmp_v6dst = sin6_null; 1643 1644 /* 1645 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1646 * done by netstack_find_by_cred() 1647 */ 1648 netstack_rele(ns); 1649 1650 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1651 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1652 ASSERT(connp->conn_icmp == icmp); 1653 ASSERT(icmp->icmp_connp == connp); 1654 1655 /* Set the initial state of the stream and the privilege status. */ 1656 icmp->icmp_state = TS_UNBND; 1657 if (isv6) { 1658 icmp->icmp_ipversion = IPV6_VERSION; 1659 icmp->icmp_family = AF_INET6; 1660 connp->conn_ulp = IPPROTO_ICMPV6; 1661 /* May be changed by a SO_PROTOTYPE socket option. */ 1662 icmp->icmp_proto = IPPROTO_ICMPV6; 1663 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1664 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1665 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1666 connp->conn_af_isv6 = B_TRUE; 1667 } else { 1668 icmp->icmp_ipversion = IPV4_VERSION; 1669 icmp->icmp_family = AF_INET; 1670 /* May be changed by a SO_PROTOTYPE socket option. */ 1671 icmp->icmp_proto = IPPROTO_ICMP; 1672 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1673 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1674 connp->conn_af_isv6 = B_FALSE; 1675 } 1676 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1677 icmp->icmp_pending_op = -1; 1678 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1679 connp->conn_zoneid = zoneid; 1680 1681 /* 1682 * If the caller has the process-wide flag set, then default to MAC 1683 * exempt mode. This allows read-down to unlabeled hosts. 1684 */ 1685 if (getpflags(NET_MAC_AWARE, credp) != 0) 1686 connp->conn_mac_exempt = B_TRUE; 1687 1688 connp->conn_ulp_labeled = is_system_labeled(); 1689 1690 icmp->icmp_is = is; 1691 1692 connp->conn_recv = icmp_input; 1693 crhold(credp); 1694 connp->conn_cred = credp; 1695 1696 rw_exit(&icmp->icmp_rwlock); 1697 1698 connp->conn_flow_cntrld = B_FALSE; 1699 return (connp); 1700 } 1701 1702 /* 1703 * Which ICMP options OK to set through T_UNITDATA_REQ... 1704 */ 1705 /* ARGSUSED */ 1706 static boolean_t 1707 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1708 { 1709 return (B_TRUE); 1710 } 1711 1712 /* 1713 * This routine gets default values of certain options whose default 1714 * values are maintained by protcol specific code 1715 */ 1716 /* ARGSUSED */ 1717 int 1718 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1719 { 1720 icmp_t *icmp = Q_TO_ICMP(q); 1721 icmp_stack_t *is = icmp->icmp_is; 1722 int *i1 = (int *)ptr; 1723 1724 switch (level) { 1725 case IPPROTO_IP: 1726 switch (name) { 1727 case IP_MULTICAST_TTL: 1728 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1729 return (sizeof (uchar_t)); 1730 case IP_MULTICAST_LOOP: 1731 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1732 return (sizeof (uchar_t)); 1733 } 1734 break; 1735 case IPPROTO_IPV6: 1736 switch (name) { 1737 case IPV6_MULTICAST_HOPS: 1738 *i1 = IP_DEFAULT_MULTICAST_TTL; 1739 return (sizeof (int)); 1740 case IPV6_MULTICAST_LOOP: 1741 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1742 return (sizeof (int)); 1743 case IPV6_UNICAST_HOPS: 1744 *i1 = is->is_ipv6_hoplimit; 1745 return (sizeof (int)); 1746 } 1747 break; 1748 case IPPROTO_ICMPV6: 1749 switch (name) { 1750 case ICMP6_FILTER: 1751 /* Make it look like "pass all" */ 1752 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1753 return (sizeof (icmp6_filter_t)); 1754 } 1755 break; 1756 } 1757 return (-1); 1758 } 1759 1760 /* 1761 * This routine retrieves the current status of socket options. 1762 * It returns the size of the option retrieved. 1763 */ 1764 int 1765 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1766 { 1767 icmp_t *icmp = connp->conn_icmp; 1768 icmp_stack_t *is = icmp->icmp_is; 1769 int *i1 = (int *)ptr; 1770 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1771 int ret = 0; 1772 1773 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1774 switch (level) { 1775 case SOL_SOCKET: 1776 switch (name) { 1777 case SO_DEBUG: 1778 *i1 = icmp->icmp_debug; 1779 break; 1780 case SO_TYPE: 1781 *i1 = SOCK_RAW; 1782 break; 1783 case SO_PROTOTYPE: 1784 *i1 = icmp->icmp_proto; 1785 break; 1786 case SO_REUSEADDR: 1787 *i1 = icmp->icmp_reuseaddr; 1788 break; 1789 1790 /* 1791 * The following three items are available here, 1792 * but are only meaningful to IP. 1793 */ 1794 case SO_DONTROUTE: 1795 *i1 = icmp->icmp_dontroute; 1796 break; 1797 case SO_USELOOPBACK: 1798 *i1 = icmp->icmp_useloopback; 1799 break; 1800 case SO_BROADCAST: 1801 *i1 = icmp->icmp_broadcast; 1802 break; 1803 1804 case SO_SNDBUF: 1805 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1806 *i1 = icmp->icmp_xmit_hiwat; 1807 break; 1808 case SO_RCVBUF: 1809 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1810 *i1 = icmp->icmp_recv_hiwat; 1811 break; 1812 case SO_DGRAM_ERRIND: 1813 *i1 = icmp->icmp_dgram_errind; 1814 break; 1815 case SO_TIMESTAMP: 1816 *i1 = icmp->icmp_timestamp; 1817 break; 1818 case SO_MAC_EXEMPT: 1819 *i1 = connp->conn_mac_exempt; 1820 break; 1821 case SO_DOMAIN: 1822 *i1 = icmp->icmp_family; 1823 break; 1824 1825 /* 1826 * Following four not meaningful for icmp 1827 * Action is same as "default" to which we fallthrough 1828 * so we keep them in comments. 1829 * case SO_LINGER: 1830 * case SO_KEEPALIVE: 1831 * case SO_OOBINLINE: 1832 * case SO_ALLZONES: 1833 */ 1834 default: 1835 ret = -1; 1836 goto done; 1837 } 1838 break; 1839 case IPPROTO_IP: 1840 /* 1841 * Only allow IPv4 option processing on IPv4 sockets. 1842 */ 1843 if (icmp->icmp_family != AF_INET) { 1844 ret = -1; 1845 goto done; 1846 } 1847 1848 switch (name) { 1849 case IP_OPTIONS: 1850 case T_IP_OPTIONS: 1851 /* Options are passed up with each packet */ 1852 ret = 0; 1853 goto done; 1854 case IP_HDRINCL: 1855 *i1 = (int)icmp->icmp_hdrincl; 1856 break; 1857 case IP_TOS: 1858 case T_IP_TOS: 1859 *i1 = (int)icmp->icmp_type_of_service; 1860 break; 1861 case IP_TTL: 1862 *i1 = (int)icmp->icmp_ttl; 1863 break; 1864 case IP_MULTICAST_IF: 1865 /* 0 address if not set */ 1866 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1867 ret = sizeof (ipaddr_t); 1868 goto done; 1869 case IP_MULTICAST_TTL: 1870 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1871 ret = sizeof (uchar_t); 1872 goto done; 1873 case IP_MULTICAST_LOOP: 1874 *ptr = connp->conn_multicast_loop; 1875 ret = sizeof (uint8_t); 1876 goto done; 1877 case IP_BOUND_IF: 1878 /* Zero if not set */ 1879 *i1 = icmp->icmp_bound_if; 1880 break; /* goto sizeof (int) option return */ 1881 case IP_UNSPEC_SRC: 1882 *ptr = icmp->icmp_unspec_source; 1883 break; /* goto sizeof (int) option return */ 1884 case IP_RECVIF: 1885 *ptr = icmp->icmp_recvif; 1886 break; /* goto sizeof (int) option return */ 1887 case IP_BROADCAST_TTL: 1888 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1889 return (sizeof (uchar_t)); 1890 case IP_RECVPKTINFO: 1891 /* 1892 * This also handles IP_PKTINFO. 1893 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1894 * Differentiation is based on the size of the argument 1895 * passed in. 1896 * This option is handled in IP which will return an 1897 * error for IP_PKTINFO as it's not supported as a 1898 * sticky option. 1899 */ 1900 ret = -EINVAL; 1901 goto done; 1902 /* 1903 * Cannot "get" the value of following options 1904 * at this level. Action is same as "default" to 1905 * which we fallthrough so we keep them in comments. 1906 * 1907 * case IP_ADD_MEMBERSHIP: 1908 * case IP_DROP_MEMBERSHIP: 1909 * case IP_BLOCK_SOURCE: 1910 * case IP_UNBLOCK_SOURCE: 1911 * case IP_ADD_SOURCE_MEMBERSHIP: 1912 * case IP_DROP_SOURCE_MEMBERSHIP: 1913 * case MCAST_JOIN_GROUP: 1914 * case MCAST_LEAVE_GROUP: 1915 * case MCAST_BLOCK_SOURCE: 1916 * case MCAST_UNBLOCK_SOURCE: 1917 * case MCAST_JOIN_SOURCE_GROUP: 1918 * case MCAST_LEAVE_SOURCE_GROUP: 1919 * case MRT_INIT: 1920 * case MRT_DONE: 1921 * case MRT_ADD_VIF: 1922 * case MRT_DEL_VIF: 1923 * case MRT_ADD_MFC: 1924 * case MRT_DEL_MFC: 1925 * case MRT_VERSION: 1926 * case MRT_ASSERT: 1927 * case IP_SEC_OPT: 1928 * case IP_NEXTHOP: 1929 */ 1930 default: 1931 ret = -1; 1932 goto done; 1933 } 1934 break; 1935 case IPPROTO_IPV6: 1936 /* 1937 * Only allow IPv6 option processing on native IPv6 sockets. 1938 */ 1939 if (icmp->icmp_family != AF_INET6) { 1940 ret = -1; 1941 goto done; 1942 } 1943 switch (name) { 1944 case IPV6_UNICAST_HOPS: 1945 *i1 = (unsigned int)icmp->icmp_ttl; 1946 break; 1947 case IPV6_MULTICAST_IF: 1948 /* 0 index if not set */ 1949 *i1 = icmp->icmp_multicast_if_index; 1950 break; 1951 case IPV6_MULTICAST_HOPS: 1952 *i1 = icmp->icmp_multicast_ttl; 1953 break; 1954 case IPV6_MULTICAST_LOOP: 1955 *i1 = connp->conn_multicast_loop; 1956 break; 1957 case IPV6_BOUND_IF: 1958 /* Zero if not set */ 1959 *i1 = icmp->icmp_bound_if; 1960 break; 1961 case IPV6_UNSPEC_SRC: 1962 *i1 = icmp->icmp_unspec_source; 1963 break; 1964 case IPV6_CHECKSUM: 1965 /* 1966 * Return offset or -1 if no checksum offset. 1967 * Does not apply to IPPROTO_ICMPV6 1968 */ 1969 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1970 ret = -1; 1971 goto done; 1972 } 1973 1974 if (icmp->icmp_raw_checksum) { 1975 *i1 = icmp->icmp_checksum_off; 1976 } else { 1977 *i1 = -1; 1978 } 1979 break; 1980 case IPV6_JOIN_GROUP: 1981 case IPV6_LEAVE_GROUP: 1982 case MCAST_JOIN_GROUP: 1983 case MCAST_LEAVE_GROUP: 1984 case MCAST_BLOCK_SOURCE: 1985 case MCAST_UNBLOCK_SOURCE: 1986 case MCAST_JOIN_SOURCE_GROUP: 1987 case MCAST_LEAVE_SOURCE_GROUP: 1988 /* cannot "get" the value for these */ 1989 ret = -1; 1990 goto done; 1991 case IPV6_RECVPKTINFO: 1992 *i1 = icmp->icmp_ip_recvpktinfo; 1993 break; 1994 case IPV6_RECVTCLASS: 1995 *i1 = icmp->icmp_ipv6_recvtclass; 1996 break; 1997 case IPV6_RECVPATHMTU: 1998 *i1 = icmp->icmp_ipv6_recvpathmtu; 1999 break; 2000 case IPV6_V6ONLY: 2001 *i1 = 1; 2002 break; 2003 case IPV6_RECVHOPLIMIT: 2004 *i1 = icmp->icmp_ipv6_recvhoplimit; 2005 break; 2006 case IPV6_RECVHOPOPTS: 2007 *i1 = icmp->icmp_ipv6_recvhopopts; 2008 break; 2009 case IPV6_RECVDSTOPTS: 2010 *i1 = icmp->icmp_ipv6_recvdstopts; 2011 break; 2012 case _OLD_IPV6_RECVDSTOPTS: 2013 *i1 = icmp->icmp_old_ipv6_recvdstopts; 2014 break; 2015 case IPV6_RECVRTHDRDSTOPTS: 2016 *i1 = icmp->icmp_ipv6_recvrtdstopts; 2017 break; 2018 case IPV6_RECVRTHDR: 2019 *i1 = icmp->icmp_ipv6_recvrthdr; 2020 break; 2021 case IPV6_PKTINFO: { 2022 /* XXX assumes that caller has room for max size! */ 2023 struct in6_pktinfo *pkti; 2024 2025 pkti = (struct in6_pktinfo *)ptr; 2026 if (ipp->ipp_fields & IPPF_IFINDEX) 2027 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2028 else 2029 pkti->ipi6_ifindex = 0; 2030 if (ipp->ipp_fields & IPPF_ADDR) 2031 pkti->ipi6_addr = ipp->ipp_addr; 2032 else 2033 pkti->ipi6_addr = ipv6_all_zeros; 2034 ret = sizeof (struct in6_pktinfo); 2035 goto done; 2036 } 2037 case IPV6_NEXTHOP: { 2038 sin6_t *sin6 = (sin6_t *)ptr; 2039 2040 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2041 return (0); 2042 *sin6 = sin6_null; 2043 sin6->sin6_family = AF_INET6; 2044 sin6->sin6_addr = ipp->ipp_nexthop; 2045 ret = (sizeof (sin6_t)); 2046 goto done; 2047 } 2048 case IPV6_HOPOPTS: 2049 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2050 return (0); 2051 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2052 return (0); 2053 bcopy((char *)ipp->ipp_hopopts + 2054 icmp->icmp_label_len_v6, ptr, 2055 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2056 if (icmp->icmp_label_len_v6 > 0) { 2057 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2058 ptr[1] = (ipp->ipp_hopoptslen - 2059 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2060 } 2061 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2062 goto done; 2063 case IPV6_RTHDRDSTOPTS: 2064 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2065 return (0); 2066 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2067 ret = ipp->ipp_rtdstoptslen; 2068 goto done; 2069 case IPV6_RTHDR: 2070 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2071 return (0); 2072 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2073 ret = ipp->ipp_rthdrlen; 2074 goto done; 2075 case IPV6_DSTOPTS: 2076 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2077 ret = 0; 2078 goto done; 2079 } 2080 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2081 ret = ipp->ipp_dstoptslen; 2082 goto done; 2083 case IPV6_PATHMTU: 2084 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2085 ret = 0; 2086 } else { 2087 ret = ip_fill_mtuinfo( 2088 &icmp->icmp_v6dst.sin6_addr, 0, 2089 (struct ip6_mtuinfo *)ptr, 2090 is->is_netstack); 2091 } 2092 goto done; 2093 case IPV6_TCLASS: 2094 if (ipp->ipp_fields & IPPF_TCLASS) 2095 *i1 = ipp->ipp_tclass; 2096 else 2097 *i1 = IPV6_FLOW_TCLASS( 2098 IPV6_DEFAULT_VERS_AND_FLOW); 2099 break; 2100 default: 2101 ret = -1; 2102 goto done; 2103 } 2104 break; 2105 case IPPROTO_ICMPV6: 2106 /* 2107 * Only allow IPv6 option processing on native IPv6 sockets. 2108 */ 2109 if (icmp->icmp_family != AF_INET6) { 2110 ret = -1; 2111 } 2112 2113 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2114 ret = -1; 2115 } 2116 2117 switch (name) { 2118 case ICMP6_FILTER: 2119 if (icmp->icmp_filter == NULL) { 2120 /* Make it look like "pass all" */ 2121 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2122 } else { 2123 (void) bcopy(icmp->icmp_filter, ptr, 2124 sizeof (icmp6_filter_t)); 2125 } 2126 ret = sizeof (icmp6_filter_t); 2127 goto done; 2128 default: 2129 ret = -1; 2130 goto done; 2131 } 2132 default: 2133 ret = -1; 2134 goto done; 2135 } 2136 ret = sizeof (int); 2137 done: 2138 return (ret); 2139 } 2140 2141 /* 2142 * This routine retrieves the current status of socket options. 2143 * It returns the size of the option retrieved. 2144 */ 2145 int 2146 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2147 { 2148 conn_t *connp = Q_TO_CONN(q); 2149 icmp_t *icmp = connp->conn_icmp; 2150 int err; 2151 2152 rw_enter(&icmp->icmp_rwlock, RW_READER); 2153 err = icmp_opt_get(connp, level, name, ptr); 2154 rw_exit(&icmp->icmp_rwlock); 2155 return (err); 2156 } 2157 2158 int 2159 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2160 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2161 void *thisdg_attrs, boolean_t checkonly) 2162 { 2163 2164 int *i1 = (int *)invalp; 2165 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2166 icmp_t *icmp = connp->conn_icmp; 2167 icmp_stack_t *is = icmp->icmp_is; 2168 int error; 2169 2170 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2171 /* 2172 * For fixed length options, no sanity check 2173 * of passed in length is done. It is assumed *_optcom_req() 2174 * routines do the right thing. 2175 */ 2176 switch (level) { 2177 case SOL_SOCKET: 2178 switch (name) { 2179 case SO_DEBUG: 2180 if (!checkonly) 2181 icmp->icmp_debug = onoff; 2182 break; 2183 case SO_PROTOTYPE: 2184 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2185 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2186 secpolicy_net_rawaccess(cr) != 0) { 2187 *outlenp = 0; 2188 return (EACCES); 2189 } 2190 /* Can't use IPPROTO_RAW with IPv6 */ 2191 if ((*i1 & 0xFF) == IPPROTO_RAW && 2192 icmp->icmp_family == AF_INET6) { 2193 *outlenp = 0; 2194 return (EPROTONOSUPPORT); 2195 } 2196 if (checkonly) { 2197 /* T_CHECK case */ 2198 *(int *)outvalp = (*i1 & 0xFF); 2199 break; 2200 } 2201 icmp->icmp_proto = *i1 & 0xFF; 2202 if ((icmp->icmp_proto == IPPROTO_RAW || 2203 icmp->icmp_proto == IPPROTO_IGMP) && 2204 icmp->icmp_family == AF_INET) 2205 icmp->icmp_hdrincl = 1; 2206 else 2207 icmp->icmp_hdrincl = 0; 2208 2209 if (icmp->icmp_family == AF_INET6 && 2210 icmp->icmp_proto == IPPROTO_ICMPV6) { 2211 /* Set offset for icmp6_cksum */ 2212 icmp->icmp_raw_checksum = 0; 2213 icmp->icmp_checksum_off = 2; 2214 } 2215 if (icmp->icmp_proto == IPPROTO_UDP || 2216 icmp->icmp_proto == IPPROTO_TCP || 2217 icmp->icmp_proto == IPPROTO_SCTP) { 2218 icmp->icmp_no_tp_cksum = 1; 2219 icmp->icmp_sticky_ipp.ipp_fields |= 2220 IPPF_NO_CKSUM; 2221 } else { 2222 icmp->icmp_no_tp_cksum = 0; 2223 icmp->icmp_sticky_ipp.ipp_fields &= 2224 ~IPPF_NO_CKSUM; 2225 } 2226 2227 if (icmp->icmp_filter != NULL && 2228 icmp->icmp_proto != IPPROTO_ICMPV6) { 2229 kmem_free(icmp->icmp_filter, 2230 sizeof (icmp6_filter_t)); 2231 icmp->icmp_filter = NULL; 2232 } 2233 2234 /* Rebuild the header template */ 2235 error = icmp_build_hdrs(icmp); 2236 if (error != 0) { 2237 *outlenp = 0; 2238 return (error); 2239 } 2240 2241 /* 2242 * For SCTP, we don't use icmp_bind_proto() for 2243 * raw socket binding. Note that we do not need 2244 * to set *outlenp. 2245 * FIXME: how does SCTP work? 2246 */ 2247 if (icmp->icmp_proto == IPPROTO_SCTP) 2248 return (0); 2249 2250 *outlenp = sizeof (int); 2251 *(int *)outvalp = *i1 & 0xFF; 2252 2253 /* Drop lock across the bind operation */ 2254 rw_exit(&icmp->icmp_rwlock); 2255 (void) icmp_bind_proto(connp); 2256 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2257 return (0); 2258 case SO_REUSEADDR: 2259 if (!checkonly) { 2260 icmp->icmp_reuseaddr = onoff; 2261 PASS_OPT_TO_IP(connp); 2262 } 2263 break; 2264 2265 /* 2266 * The following three items are available here, 2267 * but are only meaningful to IP. 2268 */ 2269 case SO_DONTROUTE: 2270 if (!checkonly) { 2271 icmp->icmp_dontroute = onoff; 2272 PASS_OPT_TO_IP(connp); 2273 } 2274 break; 2275 case SO_USELOOPBACK: 2276 if (!checkonly) { 2277 icmp->icmp_useloopback = onoff; 2278 PASS_OPT_TO_IP(connp); 2279 } 2280 break; 2281 case SO_BROADCAST: 2282 if (!checkonly) { 2283 icmp->icmp_broadcast = onoff; 2284 PASS_OPT_TO_IP(connp); 2285 } 2286 break; 2287 2288 case SO_SNDBUF: 2289 if (*i1 > is->is_max_buf) { 2290 *outlenp = 0; 2291 return (ENOBUFS); 2292 } 2293 if (!checkonly) { 2294 if (!IPCL_IS_NONSTR(connp)) { 2295 connp->conn_wq->q_hiwat = *i1; 2296 } 2297 icmp->icmp_xmit_hiwat = *i1; 2298 } 2299 break; 2300 case SO_RCVBUF: 2301 if (*i1 > is->is_max_buf) { 2302 *outlenp = 0; 2303 return (ENOBUFS); 2304 } 2305 if (!checkonly) { 2306 icmp->icmp_recv_hiwat = *i1; 2307 rw_exit(&icmp->icmp_rwlock); 2308 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2309 *i1); 2310 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2311 } 2312 break; 2313 case SO_DGRAM_ERRIND: 2314 if (!checkonly) 2315 icmp->icmp_dgram_errind = onoff; 2316 break; 2317 case SO_ALLZONES: 2318 /* 2319 * "soft" error (negative) 2320 * option not handled at this level 2321 * Note: Do not modify *outlenp 2322 */ 2323 return (-EINVAL); 2324 case SO_TIMESTAMP: 2325 if (!checkonly) { 2326 icmp->icmp_timestamp = onoff; 2327 } 2328 break; 2329 case SO_MAC_EXEMPT: 2330 /* 2331 * "soft" error (negative) 2332 * option not handled at this level 2333 * Note: Do not modify *outlenp 2334 */ 2335 return (-EINVAL); 2336 case SO_RCVTIMEO: 2337 case SO_SNDTIMEO: 2338 /* 2339 * Pass these two options in order for third part 2340 * protocol usage. Here just return directly. 2341 */ 2342 return (0); 2343 /* 2344 * Following three not meaningful for icmp 2345 * Action is same as "default" so we keep them 2346 * in comments. 2347 * case SO_LINGER: 2348 * case SO_KEEPALIVE: 2349 * case SO_OOBINLINE: 2350 */ 2351 default: 2352 *outlenp = 0; 2353 return (EINVAL); 2354 } 2355 break; 2356 case IPPROTO_IP: 2357 /* 2358 * Only allow IPv4 option processing on IPv4 sockets. 2359 */ 2360 if (icmp->icmp_family != AF_INET) { 2361 *outlenp = 0; 2362 return (ENOPROTOOPT); 2363 } 2364 switch (name) { 2365 case IP_OPTIONS: 2366 case T_IP_OPTIONS: 2367 /* Save options for use by IP. */ 2368 if ((inlen & 0x3) || 2369 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2370 *outlenp = 0; 2371 return (EINVAL); 2372 } 2373 if (checkonly) 2374 break; 2375 2376 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2377 &icmp->icmp_ip_snd_options_len, 2378 icmp->icmp_label_len, invalp, inlen)) { 2379 *outlenp = 0; 2380 return (ENOMEM); 2381 } 2382 2383 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2384 icmp->icmp_ip_snd_options_len; 2385 rw_exit(&icmp->icmp_rwlock); 2386 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2387 RD(connp->conn_rq), connp, 2388 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2389 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2390 break; 2391 case IP_HDRINCL: 2392 if (!checkonly) 2393 icmp->icmp_hdrincl = onoff; 2394 break; 2395 case IP_TOS: 2396 case T_IP_TOS: 2397 if (!checkonly) { 2398 icmp->icmp_type_of_service = (uint8_t)*i1; 2399 } 2400 break; 2401 case IP_TTL: 2402 if (!checkonly) { 2403 icmp->icmp_ttl = (uint8_t)*i1; 2404 } 2405 break; 2406 case IP_MULTICAST_IF: 2407 /* 2408 * TODO should check OPTMGMT reply and undo this if 2409 * there is an error. 2410 */ 2411 if (!checkonly) { 2412 icmp->icmp_multicast_if_addr = *i1; 2413 PASS_OPT_TO_IP(connp); 2414 } 2415 break; 2416 case IP_MULTICAST_TTL: 2417 if (!checkonly) 2418 icmp->icmp_multicast_ttl = *invalp; 2419 break; 2420 case IP_MULTICAST_LOOP: 2421 if (!checkonly) { 2422 connp->conn_multicast_loop = 2423 (*invalp == 0) ? 0 : 1; 2424 PASS_OPT_TO_IP(connp); 2425 } 2426 break; 2427 case IP_BOUND_IF: 2428 if (!checkonly) { 2429 icmp->icmp_bound_if = *i1; 2430 PASS_OPT_TO_IP(connp); 2431 } 2432 break; 2433 case IP_UNSPEC_SRC: 2434 if (!checkonly) { 2435 icmp->icmp_unspec_source = onoff; 2436 PASS_OPT_TO_IP(connp); 2437 } 2438 break; 2439 case IP_BROADCAST_TTL: 2440 if (!checkonly) 2441 connp->conn_broadcast_ttl = *invalp; 2442 break; 2443 case IP_RECVIF: 2444 if (!checkonly) { 2445 icmp->icmp_recvif = onoff; 2446 } 2447 /* 2448 * pass to ip 2449 */ 2450 return (-EINVAL); 2451 case IP_PKTINFO: { 2452 /* 2453 * This also handles IP_RECVPKTINFO. 2454 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2455 * Differentiation is based on the size of the argument 2456 * passed in. 2457 */ 2458 struct in_pktinfo *pktinfop; 2459 ip4_pkt_t *attr_pktinfop; 2460 2461 if (checkonly) 2462 break; 2463 2464 if (inlen == sizeof (int)) { 2465 /* 2466 * This is IP_RECVPKTINFO option. 2467 * Keep a local copy of wether this option is 2468 * set or not and pass it down to IP for 2469 * processing. 2470 */ 2471 icmp->icmp_ip_recvpktinfo = onoff; 2472 return (-EINVAL); 2473 } 2474 2475 2476 if (inlen != sizeof (struct in_pktinfo)) { 2477 return (EINVAL); 2478 } 2479 2480 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2481 == NULL) { 2482 /* 2483 * sticky option is not supported 2484 */ 2485 return (EINVAL); 2486 } 2487 2488 pktinfop = (struct in_pktinfo *)invalp; 2489 2490 /* 2491 * Atleast one of the values should be specified 2492 */ 2493 if (pktinfop->ipi_ifindex == 0 && 2494 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2495 return (EINVAL); 2496 } 2497 2498 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2499 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2500 } 2501 break; 2502 case IP_ADD_MEMBERSHIP: 2503 case IP_DROP_MEMBERSHIP: 2504 case IP_BLOCK_SOURCE: 2505 case IP_UNBLOCK_SOURCE: 2506 case IP_ADD_SOURCE_MEMBERSHIP: 2507 case IP_DROP_SOURCE_MEMBERSHIP: 2508 case MCAST_JOIN_GROUP: 2509 case MCAST_LEAVE_GROUP: 2510 case MCAST_BLOCK_SOURCE: 2511 case MCAST_UNBLOCK_SOURCE: 2512 case MCAST_JOIN_SOURCE_GROUP: 2513 case MCAST_LEAVE_SOURCE_GROUP: 2514 case MRT_INIT: 2515 case MRT_DONE: 2516 case MRT_ADD_VIF: 2517 case MRT_DEL_VIF: 2518 case MRT_ADD_MFC: 2519 case MRT_DEL_MFC: 2520 case MRT_VERSION: 2521 case MRT_ASSERT: 2522 case IP_SEC_OPT: 2523 case IP_NEXTHOP: 2524 /* 2525 * "soft" error (negative) 2526 * option not handled at this level 2527 * Note: Do not modify *outlenp 2528 */ 2529 return (-EINVAL); 2530 default: 2531 *outlenp = 0; 2532 return (EINVAL); 2533 } 2534 break; 2535 case IPPROTO_IPV6: { 2536 ip6_pkt_t *ipp; 2537 boolean_t sticky; 2538 2539 if (icmp->icmp_family != AF_INET6) { 2540 *outlenp = 0; 2541 return (ENOPROTOOPT); 2542 } 2543 /* 2544 * Deal with both sticky options and ancillary data 2545 */ 2546 if (thisdg_attrs == NULL) { 2547 /* sticky options, or none */ 2548 ipp = &icmp->icmp_sticky_ipp; 2549 sticky = B_TRUE; 2550 } else { 2551 /* ancillary data */ 2552 ipp = (ip6_pkt_t *)thisdg_attrs; 2553 sticky = B_FALSE; 2554 } 2555 2556 switch (name) { 2557 case IPV6_MULTICAST_IF: 2558 if (!checkonly) { 2559 icmp->icmp_multicast_if_index = *i1; 2560 PASS_OPT_TO_IP(connp); 2561 } 2562 break; 2563 case IPV6_UNICAST_HOPS: 2564 /* -1 means use default */ 2565 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2566 *outlenp = 0; 2567 return (EINVAL); 2568 } 2569 if (!checkonly) { 2570 if (*i1 == -1) { 2571 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2572 is->is_ipv6_hoplimit; 2573 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2574 /* Pass modified value to IP. */ 2575 *i1 = ipp->ipp_hoplimit; 2576 } else { 2577 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2578 (uint8_t)*i1; 2579 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2580 } 2581 /* Rebuild the header template */ 2582 error = icmp_build_hdrs(icmp); 2583 if (error != 0) { 2584 *outlenp = 0; 2585 return (error); 2586 } 2587 } 2588 break; 2589 case IPV6_MULTICAST_HOPS: 2590 /* -1 means use default */ 2591 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2592 *outlenp = 0; 2593 return (EINVAL); 2594 } 2595 if (!checkonly) { 2596 if (*i1 == -1) { 2597 icmp->icmp_multicast_ttl = 2598 ipp->ipp_multicast_hops = 2599 IP_DEFAULT_MULTICAST_TTL; 2600 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2601 /* Pass modified value to IP. */ 2602 *i1 = icmp->icmp_multicast_ttl; 2603 } else { 2604 icmp->icmp_multicast_ttl = 2605 ipp->ipp_multicast_hops = 2606 (uint8_t)*i1; 2607 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2608 } 2609 } 2610 break; 2611 case IPV6_MULTICAST_LOOP: 2612 if (*i1 != 0 && *i1 != 1) { 2613 *outlenp = 0; 2614 return (EINVAL); 2615 } 2616 if (!checkonly) { 2617 connp->conn_multicast_loop = *i1; 2618 PASS_OPT_TO_IP(connp); 2619 } 2620 break; 2621 case IPV6_CHECKSUM: 2622 /* 2623 * Integer offset into the user data of where the 2624 * checksum is located. 2625 * Offset of -1 disables option. 2626 * Does not apply to IPPROTO_ICMPV6. 2627 */ 2628 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2629 *outlenp = 0; 2630 return (EINVAL); 2631 } 2632 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2633 /* Negative or not 16 bit aligned offset */ 2634 *outlenp = 0; 2635 return (EINVAL); 2636 } 2637 if (checkonly) 2638 break; 2639 2640 if (*i1 == -1) { 2641 icmp->icmp_raw_checksum = 0; 2642 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2643 } else { 2644 icmp->icmp_raw_checksum = 1; 2645 icmp->icmp_checksum_off = *i1; 2646 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2647 } 2648 /* Rebuild the header template */ 2649 error = icmp_build_hdrs(icmp); 2650 if (error != 0) { 2651 *outlenp = 0; 2652 return (error); 2653 } 2654 break; 2655 case IPV6_JOIN_GROUP: 2656 case IPV6_LEAVE_GROUP: 2657 case MCAST_JOIN_GROUP: 2658 case MCAST_LEAVE_GROUP: 2659 case MCAST_BLOCK_SOURCE: 2660 case MCAST_UNBLOCK_SOURCE: 2661 case MCAST_JOIN_SOURCE_GROUP: 2662 case MCAST_LEAVE_SOURCE_GROUP: 2663 /* 2664 * "soft" error (negative) 2665 * option not handled at this level 2666 * Note: Do not modify *outlenp 2667 */ 2668 return (-EINVAL); 2669 case IPV6_BOUND_IF: 2670 if (!checkonly) { 2671 icmp->icmp_bound_if = *i1; 2672 PASS_OPT_TO_IP(connp); 2673 } 2674 break; 2675 case IPV6_UNSPEC_SRC: 2676 if (!checkonly) { 2677 icmp->icmp_unspec_source = onoff; 2678 PASS_OPT_TO_IP(connp); 2679 } 2680 break; 2681 case IPV6_RECVTCLASS: 2682 if (!checkonly) { 2683 icmp->icmp_ipv6_recvtclass = onoff; 2684 PASS_OPT_TO_IP(connp); 2685 } 2686 break; 2687 /* 2688 * Set boolean switches for ancillary data delivery 2689 */ 2690 case IPV6_RECVPKTINFO: 2691 if (!checkonly) { 2692 icmp->icmp_ip_recvpktinfo = onoff; 2693 PASS_OPT_TO_IP(connp); 2694 } 2695 break; 2696 case IPV6_RECVPATHMTU: 2697 if (!checkonly) { 2698 icmp->icmp_ipv6_recvpathmtu = onoff; 2699 PASS_OPT_TO_IP(connp); 2700 } 2701 break; 2702 case IPV6_RECVHOPLIMIT: 2703 if (!checkonly) { 2704 icmp->icmp_ipv6_recvhoplimit = onoff; 2705 PASS_OPT_TO_IP(connp); 2706 } 2707 break; 2708 case IPV6_RECVHOPOPTS: 2709 if (!checkonly) { 2710 icmp->icmp_ipv6_recvhopopts = onoff; 2711 PASS_OPT_TO_IP(connp); 2712 } 2713 break; 2714 case IPV6_RECVDSTOPTS: 2715 if (!checkonly) { 2716 icmp->icmp_ipv6_recvdstopts = onoff; 2717 PASS_OPT_TO_IP(connp); 2718 } 2719 break; 2720 case _OLD_IPV6_RECVDSTOPTS: 2721 if (!checkonly) 2722 icmp->icmp_old_ipv6_recvdstopts = onoff; 2723 break; 2724 case IPV6_RECVRTHDRDSTOPTS: 2725 if (!checkonly) { 2726 icmp->icmp_ipv6_recvrtdstopts = onoff; 2727 PASS_OPT_TO_IP(connp); 2728 } 2729 break; 2730 case IPV6_RECVRTHDR: 2731 if (!checkonly) { 2732 icmp->icmp_ipv6_recvrthdr = onoff; 2733 PASS_OPT_TO_IP(connp); 2734 } 2735 break; 2736 /* 2737 * Set sticky options or ancillary data. 2738 * If sticky options, (re)build any extension headers 2739 * that might be needed as a result. 2740 */ 2741 case IPV6_PKTINFO: 2742 /* 2743 * The source address and ifindex are verified 2744 * in ip_opt_set(). For ancillary data the 2745 * source address is checked in ip_wput_v6. 2746 */ 2747 if (inlen != 0 && inlen != 2748 sizeof (struct in6_pktinfo)) { 2749 return (EINVAL); 2750 } 2751 if (checkonly) 2752 break; 2753 2754 if (inlen == 0) { 2755 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2756 ipp->ipp_sticky_ignored |= 2757 (IPPF_IFINDEX|IPPF_ADDR); 2758 } else { 2759 struct in6_pktinfo *pkti; 2760 2761 pkti = (struct in6_pktinfo *)invalp; 2762 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2763 ipp->ipp_addr = pkti->ipi6_addr; 2764 if (ipp->ipp_ifindex != 0) 2765 ipp->ipp_fields |= IPPF_IFINDEX; 2766 else 2767 ipp->ipp_fields &= ~IPPF_IFINDEX; 2768 if (!IN6_IS_ADDR_UNSPECIFIED( 2769 &ipp->ipp_addr)) 2770 ipp->ipp_fields |= IPPF_ADDR; 2771 else 2772 ipp->ipp_fields &= ~IPPF_ADDR; 2773 } 2774 if (sticky) { 2775 error = icmp_build_hdrs(icmp); 2776 if (error != 0) 2777 return (error); 2778 PASS_OPT_TO_IP(connp); 2779 } 2780 break; 2781 case IPV6_HOPLIMIT: 2782 /* This option can only be used as ancillary data. */ 2783 if (sticky) 2784 return (EINVAL); 2785 if (inlen != 0 && inlen != sizeof (int)) 2786 return (EINVAL); 2787 if (checkonly) 2788 break; 2789 2790 if (inlen == 0) { 2791 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2792 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2793 } else { 2794 if (*i1 > 255 || *i1 < -1) 2795 return (EINVAL); 2796 if (*i1 == -1) 2797 ipp->ipp_hoplimit = 2798 is->is_ipv6_hoplimit; 2799 else 2800 ipp->ipp_hoplimit = *i1; 2801 ipp->ipp_fields |= IPPF_HOPLIMIT; 2802 } 2803 break; 2804 case IPV6_TCLASS: 2805 /* 2806 * IPV6_RECVTCLASS accepts -1 as use kernel default 2807 * and [0, 255] as the actualy traffic class. 2808 */ 2809 if (inlen != 0 && inlen != sizeof (int)) { 2810 return (EINVAL); 2811 } 2812 if (checkonly) 2813 break; 2814 2815 if (inlen == 0) { 2816 ipp->ipp_fields &= ~IPPF_TCLASS; 2817 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2818 } else { 2819 if (*i1 >= 256 || *i1 < -1) 2820 return (EINVAL); 2821 if (*i1 == -1) { 2822 ipp->ipp_tclass = 2823 IPV6_FLOW_TCLASS( 2824 IPV6_DEFAULT_VERS_AND_FLOW); 2825 } else { 2826 ipp->ipp_tclass = *i1; 2827 } 2828 ipp->ipp_fields |= IPPF_TCLASS; 2829 } 2830 if (sticky) { 2831 error = icmp_build_hdrs(icmp); 2832 if (error != 0) 2833 return (error); 2834 } 2835 break; 2836 case IPV6_NEXTHOP: 2837 /* 2838 * IP will verify that the nexthop is reachable 2839 * and fail for sticky options. 2840 */ 2841 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2842 return (EINVAL); 2843 } 2844 if (checkonly) 2845 break; 2846 2847 if (inlen == 0) { 2848 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2849 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2850 } else { 2851 sin6_t *sin6 = (sin6_t *)invalp; 2852 2853 if (sin6->sin6_family != AF_INET6) { 2854 return (EAFNOSUPPORT); 2855 } 2856 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2857 return (EADDRNOTAVAIL); 2858 } 2859 ipp->ipp_nexthop = sin6->sin6_addr; 2860 if (!IN6_IS_ADDR_UNSPECIFIED( 2861 &ipp->ipp_nexthop)) 2862 ipp->ipp_fields |= IPPF_NEXTHOP; 2863 else 2864 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2865 } 2866 if (sticky) { 2867 error = icmp_build_hdrs(icmp); 2868 if (error != 0) 2869 return (error); 2870 PASS_OPT_TO_IP(connp); 2871 } 2872 break; 2873 case IPV6_HOPOPTS: { 2874 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2875 /* 2876 * Sanity checks - minimum size, size a multiple of 2877 * eight bytes, and matching size passed in. 2878 */ 2879 if (inlen != 0 && 2880 inlen != (8 * (hopts->ip6h_len + 1))) { 2881 return (EINVAL); 2882 } 2883 2884 if (checkonly) 2885 break; 2886 error = optcom_pkt_set(invalp, inlen, sticky, 2887 (uchar_t **)&ipp->ipp_hopopts, 2888 &ipp->ipp_hopoptslen, 2889 sticky ? icmp->icmp_label_len_v6 : 0); 2890 if (error != 0) 2891 return (error); 2892 if (ipp->ipp_hopoptslen == 0) { 2893 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2894 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2895 } else { 2896 ipp->ipp_fields |= IPPF_HOPOPTS; 2897 } 2898 if (sticky) { 2899 error = icmp_build_hdrs(icmp); 2900 if (error != 0) 2901 return (error); 2902 } 2903 break; 2904 } 2905 case IPV6_RTHDRDSTOPTS: { 2906 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2907 2908 /* 2909 * Sanity checks - minimum size, size a multiple of 2910 * eight bytes, and matching size passed in. 2911 */ 2912 if (inlen != 0 && 2913 inlen != (8 * (dopts->ip6d_len + 1))) 2914 return (EINVAL); 2915 2916 if (checkonly) 2917 break; 2918 2919 if (inlen == 0) { 2920 if (sticky && 2921 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2922 kmem_free(ipp->ipp_rtdstopts, 2923 ipp->ipp_rtdstoptslen); 2924 ipp->ipp_rtdstopts = NULL; 2925 ipp->ipp_rtdstoptslen = 0; 2926 } 2927 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2928 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2929 } else { 2930 error = optcom_pkt_set(invalp, inlen, sticky, 2931 (uchar_t **)&ipp->ipp_rtdstopts, 2932 &ipp->ipp_rtdstoptslen, 0); 2933 if (error != 0) 2934 return (error); 2935 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2936 } 2937 if (sticky) { 2938 error = icmp_build_hdrs(icmp); 2939 if (error != 0) 2940 return (error); 2941 } 2942 break; 2943 } 2944 case IPV6_DSTOPTS: { 2945 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2946 2947 /* 2948 * Sanity checks - minimum size, size a multiple of 2949 * eight bytes, and matching size passed in. 2950 */ 2951 if (inlen != 0 && 2952 inlen != (8 * (dopts->ip6d_len + 1))) 2953 return (EINVAL); 2954 2955 if (checkonly) 2956 break; 2957 2958 if (inlen == 0) { 2959 if (sticky && 2960 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2961 kmem_free(ipp->ipp_dstopts, 2962 ipp->ipp_dstoptslen); 2963 ipp->ipp_dstopts = NULL; 2964 ipp->ipp_dstoptslen = 0; 2965 } 2966 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2967 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2968 } else { 2969 error = optcom_pkt_set(invalp, inlen, sticky, 2970 (uchar_t **)&ipp->ipp_dstopts, 2971 &ipp->ipp_dstoptslen, 0); 2972 if (error != 0) 2973 return (error); 2974 ipp->ipp_fields |= IPPF_DSTOPTS; 2975 } 2976 if (sticky) { 2977 error = icmp_build_hdrs(icmp); 2978 if (error != 0) 2979 return (error); 2980 } 2981 break; 2982 } 2983 case IPV6_RTHDR: { 2984 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2985 2986 /* 2987 * Sanity checks - minimum size, size a multiple of 2988 * eight bytes, and matching size passed in. 2989 */ 2990 if (inlen != 0 && 2991 inlen != (8 * (rt->ip6r_len + 1))) 2992 return (EINVAL); 2993 2994 if (checkonly) 2995 break; 2996 2997 if (inlen == 0) { 2998 if (sticky && 2999 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3000 kmem_free(ipp->ipp_rthdr, 3001 ipp->ipp_rthdrlen); 3002 ipp->ipp_rthdr = NULL; 3003 ipp->ipp_rthdrlen = 0; 3004 } 3005 ipp->ipp_fields &= ~IPPF_RTHDR; 3006 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3007 } else { 3008 error = optcom_pkt_set(invalp, inlen, sticky, 3009 (uchar_t **)&ipp->ipp_rthdr, 3010 &ipp->ipp_rthdrlen, 0); 3011 if (error != 0) 3012 return (error); 3013 ipp->ipp_fields |= IPPF_RTHDR; 3014 } 3015 if (sticky) { 3016 error = icmp_build_hdrs(icmp); 3017 if (error != 0) 3018 return (error); 3019 } 3020 break; 3021 } 3022 3023 case IPV6_DONTFRAG: 3024 if (checkonly) 3025 break; 3026 3027 if (onoff) { 3028 ipp->ipp_fields |= IPPF_DONTFRAG; 3029 } else { 3030 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3031 } 3032 break; 3033 3034 case IPV6_USE_MIN_MTU: 3035 if (inlen != sizeof (int)) 3036 return (EINVAL); 3037 3038 if (*i1 < -1 || *i1 > 1) 3039 return (EINVAL); 3040 3041 if (checkonly) 3042 break; 3043 3044 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3045 ipp->ipp_use_min_mtu = *i1; 3046 break; 3047 3048 /* 3049 * This option can't be set. Its only returned via 3050 * getsockopt() or ancillary data. 3051 */ 3052 case IPV6_PATHMTU: 3053 return (EINVAL); 3054 3055 case IPV6_SEC_OPT: 3056 case IPV6_SRC_PREFERENCES: 3057 case IPV6_V6ONLY: 3058 /* Handled at IP level */ 3059 return (-EINVAL); 3060 default: 3061 *outlenp = 0; 3062 return (EINVAL); 3063 } 3064 break; 3065 } /* end IPPROTO_IPV6 */ 3066 3067 case IPPROTO_ICMPV6: 3068 /* 3069 * Only allow IPv6 option processing on IPv6 sockets. 3070 */ 3071 if (icmp->icmp_family != AF_INET6) { 3072 *outlenp = 0; 3073 return (ENOPROTOOPT); 3074 } 3075 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3076 *outlenp = 0; 3077 return (ENOPROTOOPT); 3078 } 3079 switch (name) { 3080 case ICMP6_FILTER: 3081 if (!checkonly) { 3082 if ((inlen != 0) && 3083 (inlen != sizeof (icmp6_filter_t))) 3084 return (EINVAL); 3085 3086 if (inlen == 0) { 3087 if (icmp->icmp_filter != NULL) { 3088 kmem_free(icmp->icmp_filter, 3089 sizeof (icmp6_filter_t)); 3090 icmp->icmp_filter = NULL; 3091 } 3092 } else { 3093 if (icmp->icmp_filter == NULL) { 3094 icmp->icmp_filter = kmem_alloc( 3095 sizeof (icmp6_filter_t), 3096 KM_NOSLEEP); 3097 if (icmp->icmp_filter == NULL) { 3098 *outlenp = 0; 3099 return (ENOBUFS); 3100 } 3101 } 3102 (void) bcopy(invalp, icmp->icmp_filter, 3103 inlen); 3104 } 3105 } 3106 break; 3107 3108 default: 3109 *outlenp = 0; 3110 return (EINVAL); 3111 } 3112 break; 3113 default: 3114 *outlenp = 0; 3115 return (EINVAL); 3116 } 3117 /* 3118 * Common case of OK return with outval same as inval. 3119 */ 3120 if (invalp != outvalp) { 3121 /* don't trust bcopy for identical src/dst */ 3122 (void) bcopy(invalp, outvalp, inlen); 3123 } 3124 *outlenp = inlen; 3125 return (0); 3126 } 3127 3128 /* This routine sets socket options. */ 3129 /* ARGSUSED */ 3130 int 3131 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3132 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3133 void *thisdg_attrs, cred_t *cr) 3134 { 3135 boolean_t checkonly; 3136 int error; 3137 3138 error = 0; 3139 switch (optset_context) { 3140 case SETFN_OPTCOM_CHECKONLY: 3141 checkonly = B_TRUE; 3142 /* 3143 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3144 * inlen != 0 implies value supplied and 3145 * we have to "pretend" to set it. 3146 * inlen == 0 implies that there is no 3147 * value part in T_CHECK request and just validation 3148 * done elsewhere should be enough, we just return here. 3149 */ 3150 if (inlen == 0) { 3151 *outlenp = 0; 3152 error = 0; 3153 goto done; 3154 } 3155 break; 3156 case SETFN_OPTCOM_NEGOTIATE: 3157 checkonly = B_FALSE; 3158 break; 3159 case SETFN_UD_NEGOTIATE: 3160 case SETFN_CONN_NEGOTIATE: 3161 checkonly = B_FALSE; 3162 /* 3163 * Negotiating local and "association-related" options 3164 * through T_UNITDATA_REQ. 3165 * 3166 * Following routine can filter out ones we do not 3167 * want to be "set" this way. 3168 */ 3169 if (!icmp_opt_allow_udr_set(level, name)) { 3170 *outlenp = 0; 3171 error = EINVAL; 3172 goto done; 3173 } 3174 break; 3175 default: 3176 /* 3177 * We should never get here 3178 */ 3179 *outlenp = 0; 3180 error = EINVAL; 3181 goto done; 3182 } 3183 3184 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3185 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3186 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3187 outvalp, cr, thisdg_attrs, checkonly); 3188 3189 done: 3190 return (error); 3191 } 3192 3193 /* This routine sets socket options. */ 3194 /* ARGSUSED */ 3195 int 3196 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3197 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3198 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3199 { 3200 conn_t *connp = Q_TO_CONN(q); 3201 icmp_t *icmp; 3202 int error; 3203 3204 icmp = connp->conn_icmp; 3205 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3206 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3207 outlenp, outvalp, thisdg_attrs, cr); 3208 rw_exit(&icmp->icmp_rwlock); 3209 return (error); 3210 } 3211 3212 /* 3213 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3214 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3215 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3216 * headers. 3217 * Returns failure if can't allocate memory. 3218 */ 3219 static int 3220 icmp_build_hdrs(icmp_t *icmp) 3221 { 3222 icmp_stack_t *is = icmp->icmp_is; 3223 uchar_t *hdrs; 3224 uint_t hdrs_len; 3225 ip6_t *ip6h; 3226 ip6i_t *ip6i; 3227 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3228 3229 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3230 hdrs_len = ip_total_hdrs_len_v6(ipp); 3231 ASSERT(hdrs_len != 0); 3232 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3233 /* Need to reallocate */ 3234 if (hdrs_len != 0) { 3235 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3236 if (hdrs == NULL) 3237 return (ENOMEM); 3238 } else { 3239 hdrs = NULL; 3240 } 3241 if (icmp->icmp_sticky_hdrs_len != 0) { 3242 kmem_free(icmp->icmp_sticky_hdrs, 3243 icmp->icmp_sticky_hdrs_len); 3244 } 3245 icmp->icmp_sticky_hdrs = hdrs; 3246 icmp->icmp_sticky_hdrs_len = hdrs_len; 3247 } 3248 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3249 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3250 3251 /* Set header fields not in ipp */ 3252 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3253 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3254 ip6h = (ip6_t *)&ip6i[1]; 3255 3256 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3257 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3258 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3259 } 3260 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3261 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3262 } 3263 } else { 3264 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3265 } 3266 3267 if (!(ipp->ipp_fields & IPPF_ADDR)) 3268 ip6h->ip6_src = icmp->icmp_v6src; 3269 3270 /* Try to get everything in a single mblk */ 3271 if (hdrs_len > icmp->icmp_max_hdr_len) { 3272 icmp->icmp_max_hdr_len = hdrs_len; 3273 rw_exit(&icmp->icmp_rwlock); 3274 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3275 icmp->icmp_connp, 3276 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3277 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3278 } 3279 return (0); 3280 } 3281 3282 /* 3283 * This routine retrieves the value of an ND variable in a icmpparam_t 3284 * structure. It is called through nd_getset when a user reads the 3285 * variable. 3286 */ 3287 /* ARGSUSED */ 3288 static int 3289 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3290 { 3291 icmpparam_t *icmppa = (icmpparam_t *)cp; 3292 3293 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3294 return (0); 3295 } 3296 3297 /* 3298 * Walk through the param array specified registering each element with the 3299 * named dispatch (ND) handler. 3300 */ 3301 static boolean_t 3302 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3303 { 3304 for (; cnt-- > 0; icmppa++) { 3305 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3306 if (!nd_load(ndp, icmppa->icmp_param_name, 3307 icmp_param_get, icmp_param_set, 3308 (caddr_t)icmppa)) { 3309 nd_free(ndp); 3310 return (B_FALSE); 3311 } 3312 } 3313 } 3314 return (B_TRUE); 3315 } 3316 3317 /* This routine sets an ND variable in a icmpparam_t structure. */ 3318 /* ARGSUSED */ 3319 static int 3320 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3321 { 3322 long new_value; 3323 icmpparam_t *icmppa = (icmpparam_t *)cp; 3324 3325 /* 3326 * Fail the request if the new value does not lie within the 3327 * required bounds. 3328 */ 3329 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3330 new_value < icmppa->icmp_param_min || 3331 new_value > icmppa->icmp_param_max) { 3332 return (EINVAL); 3333 } 3334 /* Set the new value */ 3335 icmppa->icmp_param_value = new_value; 3336 return (0); 3337 } 3338 3339 static mblk_t * 3340 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3341 { 3342 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3343 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3344 /* 3345 * fallback has started but messages have not been moved yet 3346 */ 3347 if (icmp->icmp_fallback_queue_head == NULL) { 3348 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3349 icmp->icmp_fallback_queue_head = mp; 3350 icmp->icmp_fallback_queue_tail = mp; 3351 } else { 3352 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3353 icmp->icmp_fallback_queue_tail->b_next = mp; 3354 icmp->icmp_fallback_queue_tail = mp; 3355 } 3356 return (NULL); 3357 } else { 3358 /* 3359 * Fallback completed, let the caller putnext() the mblk. 3360 */ 3361 return (mp); 3362 } 3363 } 3364 3365 /* 3366 * Deliver data to ULP. In case we have a socket, and it's falling back to 3367 * TPI, then we'll queue the mp for later processing. 3368 */ 3369 static void 3370 icmp_ulp_recv(conn_t *connp, mblk_t *mp) 3371 { 3372 3373 if (IPCL_IS_NONSTR(connp)) { 3374 icmp_t *icmp = connp->conn_icmp; 3375 int error; 3376 3377 if ((*connp->conn_upcalls->su_recv) 3378 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3379 NULL) < 0) { 3380 mutex_enter(&icmp->icmp_recv_lock); 3381 if (error == ENOSPC) { 3382 /* 3383 * let's confirm while holding the lock 3384 */ 3385 if ((*connp->conn_upcalls->su_recv) 3386 (connp->conn_upper_handle, NULL, 0, 0, 3387 &error, NULL) < 0) { 3388 ASSERT(error == ENOSPC); 3389 if (error == ENOSPC) { 3390 connp->conn_flow_cntrld = 3391 B_TRUE; 3392 } 3393 } 3394 mutex_exit(&icmp->icmp_recv_lock); 3395 } else { 3396 ASSERT(error == EOPNOTSUPP); 3397 mp = icmp_queue_fallback(icmp, mp); 3398 mutex_exit(&icmp->icmp_recv_lock); 3399 if (mp != NULL) 3400 putnext(connp->conn_rq, mp); 3401 } 3402 } 3403 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 3404 } else { 3405 putnext(connp->conn_rq, mp); 3406 } 3407 } 3408 3409 /*ARGSUSED2*/ 3410 static void 3411 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3412 { 3413 conn_t *connp = (conn_t *)arg1; 3414 struct T_unitdata_ind *tudi; 3415 uchar_t *rptr; 3416 icmp_t *icmp; 3417 icmp_stack_t *is; 3418 sin_t *sin; 3419 sin6_t *sin6; 3420 ip6_t *ip6h; 3421 ip6i_t *ip6i; 3422 mblk_t *mp1; 3423 int hdr_len; 3424 ipha_t *ipha; 3425 int udi_size; /* Size of T_unitdata_ind */ 3426 uint_t ipvers; 3427 ip6_pkt_t ipp; 3428 uint8_t nexthdr; 3429 ip_pktinfo_t *pinfo = NULL; 3430 mblk_t *options_mp = NULL; 3431 uint_t icmp_opt = 0; 3432 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3433 uint_t hopstrip; 3434 3435 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3436 3437 icmp = connp->conn_icmp; 3438 is = icmp->icmp_is; 3439 rptr = mp->b_rptr; 3440 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3441 ASSERT(OK_32PTR(rptr)); 3442 3443 /* 3444 * IP should have prepended the options data in an M_CTL 3445 * Check M_CTL "type" to make sure are not here bcos of 3446 * a valid ICMP message 3447 */ 3448 if (DB_TYPE(mp) == M_CTL) { 3449 /* 3450 * FIXME: does IP still do this? 3451 * IP sends up the IPSEC_IN message for handling IPSEC 3452 * policy at the TCP level. We don't need it here. 3453 */ 3454 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3455 mp1 = mp->b_cont; 3456 freeb(mp); 3457 mp = mp1; 3458 rptr = mp->b_rptr; 3459 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3460 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3461 IN_PKTINFO) { 3462 /* 3463 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3464 * has been prepended to the packet by IP. We need to 3465 * extract the mblk and adjust the rptr 3466 */ 3467 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3468 options_mp = mp; 3469 mp = mp->b_cont; 3470 rptr = mp->b_rptr; 3471 } else { 3472 /* 3473 * ICMP messages. 3474 */ 3475 icmp_icmp_error(connp, mp); 3476 return; 3477 } 3478 } 3479 3480 /* 3481 * Discard message if it is misaligned or smaller than the IP header. 3482 */ 3483 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3484 freemsg(mp); 3485 if (options_mp != NULL) 3486 freeb(options_mp); 3487 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3488 return; 3489 } 3490 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3491 3492 /* Handle M_DATA messages containing IP packets messages */ 3493 if (ipvers == IPV4_VERSION) { 3494 /* 3495 * Special case where IP attaches 3496 * the IRE needs to be handled so that we don't send up 3497 * IRE to the user land. 3498 */ 3499 ipha = (ipha_t *)rptr; 3500 hdr_len = IPH_HDR_LENGTH(ipha); 3501 3502 if (ipha->ipha_protocol == IPPROTO_TCP) { 3503 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3504 3505 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3506 TH_SYN) && mp->b_cont != NULL) { 3507 mp1 = mp->b_cont; 3508 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3509 freeb(mp1); 3510 mp->b_cont = NULL; 3511 } 3512 } 3513 } 3514 if (is->is_bsd_compat) { 3515 ushort_t len; 3516 len = ntohs(ipha->ipha_length); 3517 3518 if (mp->b_datap->db_ref > 1) { 3519 /* 3520 * Allocate a new IP header so that we can 3521 * modify ipha_length. 3522 */ 3523 mblk_t *mp1; 3524 3525 mp1 = allocb(hdr_len, BPRI_MED); 3526 if (!mp1) { 3527 freemsg(mp); 3528 if (options_mp != NULL) 3529 freeb(options_mp); 3530 BUMP_MIB(&is->is_rawip_mib, 3531 rawipInErrors); 3532 return; 3533 } 3534 bcopy(rptr, mp1->b_rptr, hdr_len); 3535 mp->b_rptr = rptr + hdr_len; 3536 rptr = mp1->b_rptr; 3537 ipha = (ipha_t *)rptr; 3538 mp1->b_cont = mp; 3539 mp1->b_wptr = rptr + hdr_len; 3540 mp = mp1; 3541 } 3542 len -= hdr_len; 3543 ipha->ipha_length = htons(len); 3544 } 3545 } 3546 3547 /* 3548 * This is the inbound data path. Packets are passed upstream as 3549 * T_UNITDATA_IND messages with full IP headers still attached. 3550 */ 3551 if (icmp->icmp_family == AF_INET) { 3552 ASSERT(ipvers == IPV4_VERSION); 3553 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3554 if (icmp->icmp_recvif && (pinfo != NULL) && 3555 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3556 udi_size += sizeof (struct T_opthdr) + 3557 sizeof (uint_t); 3558 } 3559 3560 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3561 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3562 udi_size += sizeof (struct T_opthdr) + 3563 sizeof (struct in_pktinfo); 3564 } 3565 3566 /* 3567 * If SO_TIMESTAMP is set allocate the appropriate sized 3568 * buffer. Since gethrestime() expects a pointer aligned 3569 * argument, we allocate space necessary for extra 3570 * alignment (even though it might not be used). 3571 */ 3572 if (icmp->icmp_timestamp) { 3573 udi_size += sizeof (struct T_opthdr) + 3574 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3575 } 3576 mp1 = allocb(udi_size, BPRI_MED); 3577 if (mp1 == NULL) { 3578 freemsg(mp); 3579 if (options_mp != NULL) 3580 freeb(options_mp); 3581 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3582 return; 3583 } 3584 mp1->b_cont = mp; 3585 mp = mp1; 3586 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3587 mp->b_datap->db_type = M_PROTO; 3588 mp->b_wptr = (uchar_t *)tudi + udi_size; 3589 tudi->PRIM_type = T_UNITDATA_IND; 3590 tudi->SRC_length = sizeof (sin_t); 3591 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3592 sin = (sin_t *)&tudi[1]; 3593 *sin = sin_null; 3594 sin->sin_family = AF_INET; 3595 sin->sin_addr.s_addr = ipha->ipha_src; 3596 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3597 sizeof (sin_t); 3598 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3599 tudi->OPT_length = udi_size; 3600 3601 /* 3602 * Add options if IP_RECVIF is set 3603 */ 3604 if (udi_size != 0) { 3605 char *dstopt; 3606 3607 dstopt = (char *)&sin[1]; 3608 if (icmp->icmp_recvif && (pinfo != NULL) && 3609 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3610 3611 struct T_opthdr *toh; 3612 uint_t *dstptr; 3613 3614 toh = (struct T_opthdr *)dstopt; 3615 toh->level = IPPROTO_IP; 3616 toh->name = IP_RECVIF; 3617 toh->len = sizeof (struct T_opthdr) + 3618 sizeof (uint_t); 3619 toh->status = 0; 3620 dstopt += sizeof (struct T_opthdr); 3621 dstptr = (uint_t *)dstopt; 3622 *dstptr = pinfo->ip_pkt_ifindex; 3623 dstopt += sizeof (uint_t); 3624 udi_size -= toh->len; 3625 } 3626 if (icmp->icmp_timestamp) { 3627 struct T_opthdr *toh; 3628 3629 toh = (struct T_opthdr *)dstopt; 3630 toh->level = SOL_SOCKET; 3631 toh->name = SCM_TIMESTAMP; 3632 toh->len = sizeof (struct T_opthdr) + 3633 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3634 toh->status = 0; 3635 dstopt += sizeof (struct T_opthdr); 3636 /* Align for gethrestime() */ 3637 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3638 sizeof (intptr_t)); 3639 gethrestime((timestruc_t *)dstopt); 3640 dstopt = (char *)toh + toh->len; 3641 udi_size -= toh->len; 3642 } 3643 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3644 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3645 struct T_opthdr *toh; 3646 struct in_pktinfo *pktinfop; 3647 3648 toh = (struct T_opthdr *)dstopt; 3649 toh->level = IPPROTO_IP; 3650 toh->name = IP_PKTINFO; 3651 toh->len = sizeof (struct T_opthdr) + 3652 sizeof (in_pktinfo_t); 3653 toh->status = 0; 3654 dstopt += sizeof (struct T_opthdr); 3655 pktinfop = (struct in_pktinfo *)dstopt; 3656 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3657 pktinfop->ipi_spec_dst = 3658 pinfo->ip_pkt_match_addr; 3659 3660 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3661 3662 dstopt += sizeof (struct in_pktinfo); 3663 udi_size -= toh->len; 3664 } 3665 3666 /* Consumed all of allocated space */ 3667 ASSERT(udi_size == 0); 3668 } 3669 3670 if (options_mp != NULL) 3671 freeb(options_mp); 3672 3673 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3674 goto deliver; 3675 } 3676 3677 /* 3678 * We don't need options_mp in the IPv6 path. 3679 */ 3680 if (options_mp != NULL) { 3681 freeb(options_mp); 3682 options_mp = NULL; 3683 } 3684 3685 /* 3686 * Discard message if it is smaller than the IPv6 header 3687 * or if the header is malformed. 3688 */ 3689 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3690 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3691 icmp->icmp_family != AF_INET6) { 3692 freemsg(mp); 3693 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3694 return; 3695 } 3696 3697 /* Initialize */ 3698 ipp.ipp_fields = 0; 3699 hopstrip = 0; 3700 3701 ip6h = (ip6_t *)rptr; 3702 /* 3703 * Call on ip_find_hdr_v6 which gets the total hdr len 3704 * as well as individual lenghts of ext hdrs (and ptrs to 3705 * them). 3706 */ 3707 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3708 /* Look for ifindex information */ 3709 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3710 ip6i = (ip6i_t *)ip6h; 3711 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3712 ASSERT(ip6i->ip6i_ifindex != 0); 3713 ipp.ipp_fields |= IPPF_IFINDEX; 3714 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3715 } 3716 rptr = (uchar_t *)&ip6i[1]; 3717 mp->b_rptr = rptr; 3718 if (rptr == mp->b_wptr) { 3719 mp1 = mp->b_cont; 3720 freeb(mp); 3721 mp = mp1; 3722 rptr = mp->b_rptr; 3723 } 3724 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3725 ip6h = (ip6_t *)rptr; 3726 } 3727 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3728 3729 /* 3730 * We need to lie a bit to the user because users inside 3731 * labeled compartments should not see their own labels. We 3732 * assume that in all other respects IP has checked the label, 3733 * and that the label is always first among the options. (If 3734 * it's not first, then this code won't see it, and the option 3735 * will be passed along to the user.) 3736 * 3737 * If we had multilevel ICMP sockets, then the following code 3738 * should be skipped for them to allow the user to see the 3739 * label. 3740 * 3741 * Alignment restrictions in the definition of IP options 3742 * (namely, the requirement that the 4-octet DOI goes on a 3743 * 4-octet boundary) mean that we know exactly where the option 3744 * should start, but we're lenient for other hosts. 3745 * 3746 * Note that there are no multilevel ICMP or raw IP sockets 3747 * yet, thus nobody ever sees the IP6OPT_LS option. 3748 */ 3749 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3750 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3751 const uchar_t *ucp = 3752 (const uchar_t *)ipp.ipp_hopopts + 2; 3753 int remlen = ipp.ipp_hopoptslen - 2; 3754 3755 while (remlen > 0) { 3756 if (*ucp == IP6OPT_PAD1) { 3757 remlen--; 3758 ucp++; 3759 } else if (*ucp == IP6OPT_PADN) { 3760 remlen -= ucp[1] + 2; 3761 ucp += ucp[1] + 2; 3762 } else if (*ucp == ip6opt_ls) { 3763 hopstrip = (ucp - 3764 (const uchar_t *)ipp.ipp_hopopts) + 3765 ucp[1] + 2; 3766 hopstrip = (hopstrip + 7) & ~7; 3767 break; 3768 } else { 3769 /* label option must be first */ 3770 break; 3771 } 3772 } 3773 } 3774 } else { 3775 hdr_len = IPV6_HDR_LEN; 3776 ip6i = NULL; 3777 nexthdr = ip6h->ip6_nxt; 3778 } 3779 /* 3780 * One special case where IP attaches the IRE needs to 3781 * be handled so that we don't send up IRE to the user land. 3782 */ 3783 if (nexthdr == IPPROTO_TCP) { 3784 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3785 3786 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3787 mp->b_cont != NULL) { 3788 mp1 = mp->b_cont; 3789 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3790 freeb(mp1); 3791 mp->b_cont = NULL; 3792 } 3793 } 3794 } 3795 /* 3796 * Check a filter for ICMPv6 types if needed. 3797 * Verify raw checksums if needed. 3798 */ 3799 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3800 if (icmp->icmp_filter != NULL) { 3801 int type; 3802 3803 /* Assumes that IP has done the pullupmsg */ 3804 type = mp->b_rptr[hdr_len]; 3805 3806 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3807 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3808 freemsg(mp); 3809 return; 3810 } 3811 } else { 3812 /* Checksum */ 3813 uint16_t *up; 3814 uint32_t sum; 3815 int remlen; 3816 3817 up = (uint16_t *)&ip6h->ip6_src; 3818 3819 remlen = msgdsize(mp) - hdr_len; 3820 sum = htons(icmp->icmp_proto + remlen) 3821 + up[0] + up[1] + up[2] + up[3] 3822 + up[4] + up[5] + up[6] + up[7] 3823 + up[8] + up[9] + up[10] + up[11] 3824 + up[12] + up[13] + up[14] + up[15]; 3825 sum = (sum & 0xffff) + (sum >> 16); 3826 sum = IP_CSUM(mp, hdr_len, sum); 3827 if (sum != 0) { 3828 /* IPv6 RAW checksum failed */ 3829 ip0dbg(("icmp_rput: RAW checksum " 3830 "failed %x\n", sum)); 3831 freemsg(mp); 3832 BUMP_MIB(&is->is_rawip_mib, 3833 rawipInCksumErrs); 3834 return; 3835 } 3836 } 3837 } 3838 /* Skip all the IPv6 headers per API */ 3839 mp->b_rptr += hdr_len; 3840 3841 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3842 3843 /* 3844 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3845 * maintain state information, instead of relying on icmp_t 3846 * structure, since there arent any locks protecting these members 3847 * and there is a window where there might be a race between a 3848 * thread setting options on the write side and a thread reading 3849 * these options on the read size. 3850 */ 3851 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3852 IPPF_RTHDR|IPPF_IFINDEX)) { 3853 if (icmp->icmp_ipv6_recvhopopts && 3854 (ipp.ipp_fields & IPPF_HOPOPTS) && 3855 ipp.ipp_hopoptslen > hopstrip) { 3856 udi_size += sizeof (struct T_opthdr) + 3857 ipp.ipp_hopoptslen - hopstrip; 3858 icmp_opt |= IPPF_HOPOPTS; 3859 } 3860 if ((icmp->icmp_ipv6_recvdstopts || 3861 icmp->icmp_old_ipv6_recvdstopts) && 3862 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3863 udi_size += sizeof (struct T_opthdr) + 3864 ipp.ipp_dstoptslen; 3865 icmp_opt |= IPPF_DSTOPTS; 3866 } 3867 if (((icmp->icmp_ipv6_recvdstopts && 3868 icmp->icmp_ipv6_recvrthdr && 3869 (ipp.ipp_fields & IPPF_RTHDR)) || 3870 icmp->icmp_ipv6_recvrtdstopts) && 3871 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3872 udi_size += sizeof (struct T_opthdr) + 3873 ipp.ipp_rtdstoptslen; 3874 icmp_opt |= IPPF_RTDSTOPTS; 3875 } 3876 if (icmp->icmp_ipv6_recvrthdr && 3877 (ipp.ipp_fields & IPPF_RTHDR)) { 3878 udi_size += sizeof (struct T_opthdr) + 3879 ipp.ipp_rthdrlen; 3880 icmp_opt |= IPPF_RTHDR; 3881 } 3882 if (icmp->icmp_ip_recvpktinfo && 3883 (ipp.ipp_fields & IPPF_IFINDEX)) { 3884 udi_size += sizeof (struct T_opthdr) + 3885 sizeof (struct in6_pktinfo); 3886 icmp_opt |= IPPF_IFINDEX; 3887 } 3888 } 3889 if (icmp->icmp_ipv6_recvhoplimit) { 3890 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3891 icmp_ipv6_recvhoplimit = B_TRUE; 3892 } 3893 3894 if (icmp->icmp_ipv6_recvtclass) 3895 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3896 3897 /* 3898 * If SO_TIMESTAMP is set allocate the appropriate sized 3899 * buffer. Since gethrestime() expects a pointer aligned 3900 * argument, we allocate space necessary for extra 3901 * alignment (even though it might not be used). 3902 */ 3903 if (icmp->icmp_timestamp) { 3904 udi_size += sizeof (struct T_opthdr) + 3905 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3906 } 3907 3908 mp1 = allocb(udi_size, BPRI_MED); 3909 if (mp1 == NULL) { 3910 freemsg(mp); 3911 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3912 return; 3913 } 3914 mp1->b_cont = mp; 3915 mp = mp1; 3916 mp->b_datap->db_type = M_PROTO; 3917 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3918 mp->b_wptr = (uchar_t *)tudi + udi_size; 3919 tudi->PRIM_type = T_UNITDATA_IND; 3920 tudi->SRC_length = sizeof (sin6_t); 3921 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3922 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3923 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3924 tudi->OPT_length = udi_size; 3925 sin6 = (sin6_t *)&tudi[1]; 3926 sin6->sin6_port = 0; 3927 sin6->sin6_family = AF_INET6; 3928 3929 sin6->sin6_addr = ip6h->ip6_src; 3930 /* No sin6_flowinfo per API */ 3931 sin6->sin6_flowinfo = 0; 3932 /* For link-scope source pass up scope id */ 3933 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3934 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3935 sin6->sin6_scope_id = ipp.ipp_ifindex; 3936 else 3937 sin6->sin6_scope_id = 0; 3938 3939 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3940 icmp->icmp_zoneid, is->is_netstack); 3941 3942 if (udi_size != 0) { 3943 uchar_t *dstopt; 3944 3945 dstopt = (uchar_t *)&sin6[1]; 3946 if (icmp_opt & IPPF_IFINDEX) { 3947 struct T_opthdr *toh; 3948 struct in6_pktinfo *pkti; 3949 3950 toh = (struct T_opthdr *)dstopt; 3951 toh->level = IPPROTO_IPV6; 3952 toh->name = IPV6_PKTINFO; 3953 toh->len = sizeof (struct T_opthdr) + 3954 sizeof (*pkti); 3955 toh->status = 0; 3956 dstopt += sizeof (struct T_opthdr); 3957 pkti = (struct in6_pktinfo *)dstopt; 3958 pkti->ipi6_addr = ip6h->ip6_dst; 3959 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3960 dstopt += sizeof (*pkti); 3961 udi_size -= toh->len; 3962 } 3963 if (icmp_ipv6_recvhoplimit) { 3964 struct T_opthdr *toh; 3965 3966 toh = (struct T_opthdr *)dstopt; 3967 toh->level = IPPROTO_IPV6; 3968 toh->name = IPV6_HOPLIMIT; 3969 toh->len = sizeof (struct T_opthdr) + 3970 sizeof (uint_t); 3971 toh->status = 0; 3972 dstopt += sizeof (struct T_opthdr); 3973 *(uint_t *)dstopt = ip6h->ip6_hops; 3974 dstopt += sizeof (uint_t); 3975 udi_size -= toh->len; 3976 } 3977 if (icmp->icmp_ipv6_recvtclass) { 3978 struct T_opthdr *toh; 3979 3980 toh = (struct T_opthdr *)dstopt; 3981 toh->level = IPPROTO_IPV6; 3982 toh->name = IPV6_TCLASS; 3983 toh->len = sizeof (struct T_opthdr) + 3984 sizeof (uint_t); 3985 toh->status = 0; 3986 dstopt += sizeof (struct T_opthdr); 3987 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3988 dstopt += sizeof (uint_t); 3989 udi_size -= toh->len; 3990 } 3991 if (icmp->icmp_timestamp) { 3992 struct T_opthdr *toh; 3993 3994 toh = (struct T_opthdr *)dstopt; 3995 toh->level = SOL_SOCKET; 3996 toh->name = SCM_TIMESTAMP; 3997 toh->len = sizeof (struct T_opthdr) + 3998 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3999 toh->status = 0; 4000 dstopt += sizeof (struct T_opthdr); 4001 /* Align for gethrestime() */ 4002 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4003 sizeof (intptr_t)); 4004 gethrestime((timestruc_t *)dstopt); 4005 dstopt = (uchar_t *)toh + toh->len; 4006 udi_size -= toh->len; 4007 } 4008 4009 if (icmp_opt & IPPF_HOPOPTS) { 4010 struct T_opthdr *toh; 4011 4012 toh = (struct T_opthdr *)dstopt; 4013 toh->level = IPPROTO_IPV6; 4014 toh->name = IPV6_HOPOPTS; 4015 toh->len = sizeof (struct T_opthdr) + 4016 ipp.ipp_hopoptslen - hopstrip; 4017 toh->status = 0; 4018 dstopt += sizeof (struct T_opthdr); 4019 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 4020 ipp.ipp_hopoptslen - hopstrip); 4021 if (hopstrip > 0) { 4022 /* copy next header value and fake length */ 4023 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 4024 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 4025 hopstrip / 8; 4026 } 4027 dstopt += ipp.ipp_hopoptslen - hopstrip; 4028 udi_size -= toh->len; 4029 } 4030 if (icmp_opt & IPPF_RTDSTOPTS) { 4031 struct T_opthdr *toh; 4032 4033 toh = (struct T_opthdr *)dstopt; 4034 toh->level = IPPROTO_IPV6; 4035 toh->name = IPV6_DSTOPTS; 4036 toh->len = sizeof (struct T_opthdr) + 4037 ipp.ipp_rtdstoptslen; 4038 toh->status = 0; 4039 dstopt += sizeof (struct T_opthdr); 4040 bcopy(ipp.ipp_rtdstopts, dstopt, 4041 ipp.ipp_rtdstoptslen); 4042 dstopt += ipp.ipp_rtdstoptslen; 4043 udi_size -= toh->len; 4044 } 4045 if (icmp_opt & IPPF_RTHDR) { 4046 struct T_opthdr *toh; 4047 4048 toh = (struct T_opthdr *)dstopt; 4049 toh->level = IPPROTO_IPV6; 4050 toh->name = IPV6_RTHDR; 4051 toh->len = sizeof (struct T_opthdr) + 4052 ipp.ipp_rthdrlen; 4053 toh->status = 0; 4054 dstopt += sizeof (struct T_opthdr); 4055 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4056 dstopt += ipp.ipp_rthdrlen; 4057 udi_size -= toh->len; 4058 } 4059 if (icmp_opt & IPPF_DSTOPTS) { 4060 struct T_opthdr *toh; 4061 4062 toh = (struct T_opthdr *)dstopt; 4063 toh->level = IPPROTO_IPV6; 4064 toh->name = IPV6_DSTOPTS; 4065 toh->len = sizeof (struct T_opthdr) + 4066 ipp.ipp_dstoptslen; 4067 toh->status = 0; 4068 dstopt += sizeof (struct T_opthdr); 4069 bcopy(ipp.ipp_dstopts, dstopt, 4070 ipp.ipp_dstoptslen); 4071 dstopt += ipp.ipp_dstoptslen; 4072 udi_size -= toh->len; 4073 } 4074 /* Consumed all of allocated space */ 4075 ASSERT(udi_size == 0); 4076 } 4077 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4078 4079 deliver: 4080 icmp_ulp_recv(connp, mp); 4081 4082 } 4083 4084 /* 4085 * return SNMP stuff in buffer in mpdata 4086 */ 4087 mblk_t * 4088 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4089 { 4090 mblk_t *mpdata; 4091 struct opthdr *optp; 4092 conn_t *connp = Q_TO_CONN(q); 4093 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4094 mblk_t *mp2ctl; 4095 4096 /* 4097 * make a copy of the original message 4098 */ 4099 mp2ctl = copymsg(mpctl); 4100 4101 if (mpctl == NULL || 4102 (mpdata = mpctl->b_cont) == NULL) { 4103 freemsg(mpctl); 4104 freemsg(mp2ctl); 4105 return (0); 4106 } 4107 4108 /* fixed length structure for IPv4 and IPv6 counters */ 4109 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4110 optp->level = EXPER_RAWIP; 4111 optp->name = 0; 4112 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4113 sizeof (is->is_rawip_mib)); 4114 optp->len = msgdsize(mpdata); 4115 qreply(q, mpctl); 4116 4117 return (mp2ctl); 4118 } 4119 4120 /* 4121 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4122 * TODO: If this ever actually tries to set anything, it needs to be 4123 * to do the appropriate locking. 4124 */ 4125 /* ARGSUSED */ 4126 int 4127 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4128 uchar_t *ptr, int len) 4129 { 4130 switch (level) { 4131 case EXPER_RAWIP: 4132 return (0); 4133 default: 4134 return (1); 4135 } 4136 } 4137 4138 /* 4139 * This routine creates a T_UDERROR_IND message and passes it upstream. 4140 * The address and options are copied from the T_UNITDATA_REQ message 4141 * passed in mp. This message is freed. 4142 */ 4143 static void 4144 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4145 { 4146 mblk_t *mp1; 4147 uchar_t *rptr = mp->b_rptr; 4148 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4149 4150 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4151 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4152 tudr->OPT_length, err); 4153 if (mp1) 4154 qreply(q, mp1); 4155 freemsg(mp); 4156 } 4157 4158 4159 static int 4160 rawip_do_unbind(conn_t *connp) 4161 { 4162 icmp_t *icmp = connp->conn_icmp; 4163 4164 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4165 /* If a bind has not been done, we can't unbind. */ 4166 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4167 rw_exit(&icmp->icmp_rwlock); 4168 return (-TOUTSTATE); 4169 } 4170 icmp->icmp_pending_op = T_UNBIND_REQ; 4171 rw_exit(&icmp->icmp_rwlock); 4172 4173 /* 4174 * Call ip to unbind 4175 */ 4176 4177 ip_unbind(connp); 4178 4179 /* 4180 * Once we're unbound from IP, the pending operation may be cleared 4181 * here. 4182 */ 4183 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4184 V6_SET_ZERO(icmp->icmp_v6src); 4185 V6_SET_ZERO(icmp->icmp_bound_v6src); 4186 icmp->icmp_pending_op = -1; 4187 icmp->icmp_state = TS_UNBND; 4188 if (icmp->icmp_family == AF_INET6) 4189 (void) icmp_build_hdrs(icmp); 4190 rw_exit(&icmp->icmp_rwlock); 4191 return (0); 4192 } 4193 4194 /* 4195 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4196 * After some error checking, the message is passed downstream to ip. 4197 */ 4198 static void 4199 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4200 { 4201 conn_t *connp = Q_TO_CONN(q); 4202 int error; 4203 4204 ASSERT(mp->b_cont == NULL); 4205 error = rawip_do_unbind(connp); 4206 if (error) { 4207 if (error < 0) { 4208 icmp_err_ack(q, mp, -error, 0); 4209 } else { 4210 icmp_err_ack(q, mp, 0, error); 4211 } 4212 return; 4213 } 4214 4215 /* 4216 * Convert mp into a T_OK_ACK 4217 */ 4218 4219 mp = mi_tpi_ok_ack_alloc(mp); 4220 4221 /* 4222 * should not happen in practice... T_OK_ACK is smaller than the 4223 * original message. 4224 */ 4225 ASSERT(mp != NULL); 4226 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4227 qreply(q, mp); 4228 } 4229 4230 4231 /* 4232 * Process IPv4 packets that already include an IP header. 4233 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4234 * IPPROTO_IGMP). 4235 */ 4236 static int 4237 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4238 ip4_pkt_t *pktinfop) 4239 { 4240 icmp_stack_t *is = icmp->icmp_is; 4241 ipha_t *ipha; 4242 int ip_hdr_length; 4243 int tp_hdr_len; 4244 int error; 4245 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4246 uint32_t ip_snd_opt_len = 0; 4247 mblk_t *mp1; 4248 uint_t pkt_len; 4249 ip_opt_info_t optinfo; 4250 pid_t cpid; 4251 cred_t *cr; 4252 4253 rw_enter(&icmp->icmp_rwlock, RW_READER); 4254 4255 optinfo.ip_opt_flags = 0; 4256 optinfo.ip_opt_ill_index = 0; 4257 ipha = (ipha_t *)mp->b_rptr; 4258 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4259 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4260 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4261 ASSERT(icmp != NULL); 4262 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4263 freemsg(mp); 4264 rw_exit(&icmp->icmp_rwlock); 4265 return (0); 4266 } 4267 ipha = (ipha_t *)mp->b_rptr; 4268 } 4269 ipha->ipha_version_and_hdr_length = 4270 (IP_VERSION<<4) | (ip_hdr_length>>2); 4271 4272 /* 4273 * Check if our saved options are valid; update if not. 4274 * TSOL Note: Since we are not in WRITER mode, ICMP packets 4275 * to different destination may require different labels, 4276 * or worse, ICMP packets to same IP address may require 4277 * different labels due to use of shared all-zones address. 4278 * We use conn_lock to ensure that lastdst, ip_snd_options, 4279 * and ip_snd_options_len are consistent for the current 4280 * destination and are updated atomically. 4281 */ 4282 mutex_enter(&connp->conn_lock); 4283 if (is_system_labeled()) { 4284 /* 4285 * Recompute the Trusted Extensions security label if 4286 * we're not going to the same destination as last 4287 * time or the cred attached to the received mblk 4288 * changed. 4289 */ 4290 cr = msg_getcred(mp, &cpid); 4291 if (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4292 V4_PART_OF_V6(icmp->icmp_v6lastdst) != ipha->ipha_dst || 4293 cr != icmp->icmp_last_cred) { 4294 error = icmp_update_label(icmp, mp, ipha->ipha_dst); 4295 if (error != 0) { 4296 mutex_exit(&connp->conn_lock); 4297 rw_exit(&icmp->icmp_rwlock); 4298 return (error); 4299 } 4300 } 4301 /* 4302 * Apply credentials with modified security label if they 4303 * exist. icmp_update_label() may have generated these 4304 * credentials for packets to unlabeled remote nodes. 4305 */ 4306 if (icmp->icmp_effective_cred != NULL) 4307 mblk_setcred(mp, icmp->icmp_effective_cred, cpid); 4308 } 4309 4310 if (icmp->icmp_ip_snd_options_len > 0) { 4311 ip_snd_opt_len = icmp->icmp_ip_snd_options_len; 4312 bcopy(icmp->icmp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 4313 } 4314 mutex_exit(&connp->conn_lock); 4315 4316 /* 4317 * For the socket of SOCK_RAW type, the checksum is provided in the 4318 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4319 * tell IP that the application has sent a complete IP header and not 4320 * to compute the transport checksum nor change the DF flag. 4321 */ 4322 ipha->ipha_ident = IP_HDR_INCLUDED; 4323 ipha->ipha_hdr_checksum = 0; 4324 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4325 /* Insert options if any */ 4326 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4327 /* 4328 * Put the IP header plus any transport header that is 4329 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4330 * that at least the checksum field is in the first mblk.) 4331 */ 4332 switch (ipha->ipha_protocol) { 4333 case IPPROTO_UDP: 4334 tp_hdr_len = 8; 4335 break; 4336 case IPPROTO_TCP: 4337 tp_hdr_len = 20; 4338 break; 4339 default: 4340 tp_hdr_len = 0; 4341 break; 4342 } 4343 /* 4344 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4345 * tp_hdr_len bytes will be in a single mblk. 4346 */ 4347 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4348 tp_hdr_len)) { 4349 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4350 tp_hdr_len)) { 4351 BUMP_MIB(&is->is_rawip_mib, 4352 rawipOutErrors); 4353 freemsg(mp); 4354 rw_exit(&icmp->icmp_rwlock); 4355 return (0); 4356 } 4357 ipha = (ipha_t *)mp->b_rptr; 4358 } 4359 4360 /* 4361 * if the length is larger then the max allowed IP packet, 4362 * then send an error and abort the processing. 4363 */ 4364 pkt_len = ntohs(ipha->ipha_length) 4365 + ip_snd_opt_len; 4366 if (pkt_len > IP_MAXPACKET) { 4367 rw_exit(&icmp->icmp_rwlock); 4368 return (EMSGSIZE); 4369 } 4370 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4371 tp_hdr_len, BPRI_LO))) { 4372 rw_exit(&icmp->icmp_rwlock); 4373 return (ENOMEM); 4374 } 4375 mp1->b_rptr += is->is_wroff_extra; 4376 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4377 4378 ipha->ipha_length = htons((uint16_t)pkt_len); 4379 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4380 4381 /* Copy transport header if any */ 4382 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4383 mp1->b_wptr += tp_hdr_len; 4384 4385 /* Add options */ 4386 ipha = (ipha_t *)mp1->b_rptr; 4387 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 4388 4389 /* Drop IP header and transport header from original */ 4390 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4391 4392 mp1->b_cont = mp; 4393 mp = mp1; 4394 /* 4395 * Massage source route putting first source 4396 * route in ipha_dst. 4397 */ 4398 (void) ip_massage_options(ipha, is->is_netstack); 4399 } 4400 4401 if (pktinfop != NULL) { 4402 /* 4403 * Over write the source address provided in the header 4404 */ 4405 if (pktinfop->ip4_addr != INADDR_ANY) { 4406 ipha->ipha_src = pktinfop->ip4_addr; 4407 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4408 } 4409 4410 if (pktinfop->ip4_ill_index != 0) { 4411 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4412 } 4413 } 4414 4415 rw_exit(&icmp->icmp_rwlock); 4416 4417 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4418 return (0); 4419 } 4420 4421 static int 4422 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4423 { 4424 int err; 4425 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4426 icmp_stack_t *is = icmp->icmp_is; 4427 conn_t *connp = icmp->icmp_connp; 4428 cred_t *cred; 4429 cred_t *msg_cred; 4430 cred_t *effective_cred; 4431 4432 /* 4433 * All Solaris components should pass a db_credp 4434 * for this message, hence we ASSERT. 4435 * On production kernels we return an error to be robust against 4436 * random streams modules sitting on top of us. 4437 */ 4438 cred = msg_cred = msg_getcred(mp, NULL); 4439 ASSERT(cred != NULL); 4440 if (cred == NULL) 4441 return (EINVAL); 4442 4443 /* 4444 * Verify the destination is allowed to receive packets at 4445 * the security label of the message data. check_dest() 4446 * may create a new effective cred for this message 4447 * with a modified label or label flags. 4448 */ 4449 if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, 4450 connp->conn_mac_exempt, &effective_cred)) != 0) 4451 goto done; 4452 if (effective_cred != NULL) 4453 cred = effective_cred; 4454 4455 /* 4456 * Calculate the security label to be placed in the text 4457 * of the message (if any). 4458 */ 4459 if ((err = tsol_compute_label(cred, dst, opt_storage, 4460 is->is_netstack->netstack_ip)) != 0) 4461 goto done; 4462 4463 /* 4464 * Insert the security label in the cached ip options, 4465 * removing any old label that may exist. 4466 */ 4467 if ((err = tsol_update_options(&icmp->icmp_ip_snd_options, 4468 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4469 opt_storage)) != 0) 4470 goto done; 4471 4472 /* 4473 * Save the destination address and cred we used to generate 4474 * the security label text. 4475 */ 4476 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4477 if (cred != icmp->icmp_effective_cred) { 4478 if (icmp->icmp_effective_cred != NULL) 4479 crfree(icmp->icmp_effective_cred); 4480 crhold(cred); 4481 icmp->icmp_effective_cred = cred; 4482 } 4483 4484 if (msg_cred != icmp->icmp_last_cred) { 4485 if (icmp->icmp_last_cred != NULL) 4486 crfree(icmp->icmp_last_cred); 4487 crhold(msg_cred); 4488 icmp->icmp_last_cred = msg_cred; 4489 } 4490 4491 done: 4492 if (effective_cred != NULL) 4493 crfree(effective_cred); 4494 4495 if (err != 0) { 4496 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4497 DTRACE_PROBE4( 4498 tx__ip__log__drop__updatelabel__icmp, 4499 char *, "icmp(1) failed to update options(2) on mp(3)", 4500 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4501 return (err); 4502 } 4503 return (0); 4504 } 4505 4506 /* 4507 * This routine handles all messages passed downstream. It either 4508 * consumes the message or passes it downstream; it never queues a 4509 * a message. 4510 */ 4511 static void 4512 icmp_wput(queue_t *q, mblk_t *mp) 4513 { 4514 uchar_t *rptr = mp->b_rptr; 4515 mblk_t *mp1; 4516 #define tudr ((struct T_unitdata_req *)rptr) 4517 size_t ip_len; 4518 conn_t *connp = Q_TO_CONN(q); 4519 icmp_t *icmp = connp->conn_icmp; 4520 icmp_stack_t *is = icmp->icmp_is; 4521 sin6_t *sin6; 4522 sin_t *sin; 4523 ipaddr_t v4dst; 4524 ip4_pkt_t pktinfo; 4525 ip4_pkt_t *pktinfop = &pktinfo; 4526 ip6_pkt_t ipp_s; /* For ancillary data options */ 4527 ip6_pkt_t *ipp = &ipp_s; 4528 int error; 4529 4530 ipp->ipp_fields = 0; 4531 ipp->ipp_sticky_ignored = 0; 4532 4533 switch (mp->b_datap->db_type) { 4534 case M_DATA: 4535 if (icmp->icmp_hdrincl) { 4536 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4537 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4538 if (error != 0) 4539 icmp_ud_err(q, mp, error); 4540 return; 4541 } 4542 freemsg(mp); 4543 return; 4544 case M_PROTO: 4545 case M_PCPROTO: 4546 ip_len = mp->b_wptr - rptr; 4547 if (ip_len >= sizeof (struct T_unitdata_req)) { 4548 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4549 if (((union T_primitives *)rptr)->type 4550 == T_UNITDATA_REQ) 4551 break; 4552 } 4553 /* FALLTHRU */ 4554 default: 4555 icmp_wput_other(q, mp); 4556 return; 4557 } 4558 4559 /* Handle T_UNITDATA_REQ messages here. */ 4560 4561 mp1 = mp->b_cont; 4562 if (mp1 == NULL) { 4563 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4564 icmp_ud_err(q, mp, EPROTO); 4565 return; 4566 } 4567 4568 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4569 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4570 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4571 return; 4572 } 4573 4574 switch (icmp->icmp_family) { 4575 case AF_INET6: 4576 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4577 if (!OK_32PTR((char *)sin6) || 4578 tudr->DEST_length != sizeof (sin6_t) || 4579 sin6->sin6_family != AF_INET6) { 4580 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4581 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4582 return; 4583 } 4584 4585 /* No support for mapped addresses on raw sockets */ 4586 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4587 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4588 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4589 return; 4590 } 4591 4592 /* 4593 * Destination is a native IPv6 address. 4594 * Send out an IPv6 format packet. 4595 */ 4596 if (tudr->OPT_length != 0) { 4597 int error; 4598 4599 error = 0; 4600 if (icmp_unitdata_opt_process(q, mp, &error, 4601 (void *)ipp) < 0) { 4602 /* failure */ 4603 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4604 icmp_ud_err(q, mp, error); 4605 return; 4606 } 4607 ASSERT(error == 0); 4608 } 4609 4610 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4611 goto done; 4612 4613 case AF_INET: 4614 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4615 if (!OK_32PTR((char *)sin) || 4616 tudr->DEST_length != sizeof (sin_t) || 4617 sin->sin_family != AF_INET) { 4618 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4619 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4620 return; 4621 } 4622 /* Extract and ipaddr */ 4623 v4dst = sin->sin_addr.s_addr; 4624 break; 4625 4626 default: 4627 ASSERT(0); 4628 } 4629 4630 pktinfop->ip4_ill_index = 0; 4631 pktinfop->ip4_addr = INADDR_ANY; 4632 4633 /* 4634 * If options passed in, feed it for verification and handling 4635 */ 4636 if (tudr->OPT_length != 0) { 4637 int error; 4638 4639 error = 0; 4640 if (icmp_unitdata_opt_process(q, mp, &error, 4641 (void *)pktinfop) < 0) { 4642 /* failure */ 4643 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4644 icmp_ud_err(q, mp, error); 4645 return; 4646 } 4647 ASSERT(error == 0); 4648 /* 4649 * Note: Success in processing options. 4650 * mp option buffer represented by 4651 * OPT_length/offset now potentially modified 4652 * and contain option setting results 4653 */ 4654 } 4655 4656 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4657 done: 4658 if (error != 0) { 4659 icmp_ud_err(q, mp, error); 4660 return; 4661 } else { 4662 mp->b_cont = NULL; 4663 freeb(mp); 4664 } 4665 } 4666 4667 4668 /* ARGSUSED */ 4669 static void 4670 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4671 { 4672 #ifdef DEBUG 4673 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4674 #endif 4675 freemsg(mp); 4676 } 4677 4678 static int 4679 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4680 ip4_pkt_t *pktinfop) 4681 { 4682 ipha_t *ipha; 4683 size_t ip_len; 4684 icmp_t *icmp = connp->conn_icmp; 4685 icmp_stack_t *is = icmp->icmp_is; 4686 int ip_hdr_length; 4687 ip_opt_info_t optinfo; 4688 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4689 uint32_t ip_snd_opt_len = 0; 4690 pid_t cpid; 4691 cred_t *cr; 4692 4693 optinfo.ip_opt_flags = 0; 4694 optinfo.ip_opt_ill_index = 0; 4695 4696 if (icmp->icmp_state == TS_UNBND) { 4697 /* If a port has not been bound to the stream, fail. */ 4698 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4699 return (EPROTO); 4700 } 4701 4702 if (v4dst == INADDR_ANY) 4703 v4dst = htonl(INADDR_LOOPBACK); 4704 4705 /* Protocol 255 contains full IP headers */ 4706 if (icmp->icmp_hdrincl) 4707 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4708 4709 rw_enter(&icmp->icmp_rwlock, RW_READER); 4710 4711 /* 4712 * Check if our saved options are valid; update if not. 4713 * TSOL Note: Since we are not in WRITER mode, ICMP packets 4714 * to different destination may require different labels, 4715 * or worse, ICMP packets to same IP address may require 4716 * different labels due to use of shared all-zones address. 4717 * We use conn_lock to ensure that lastdst, ip_snd_options, 4718 * and ip_snd_options_len are consistent for the current 4719 * destination and are updated atomically. 4720 */ 4721 mutex_enter(&connp->conn_lock); 4722 if (is_system_labeled()) { 4723 4724 /* 4725 * Recompute the Trusted Extensions security label if we're not 4726 * going to the same destination as last time or the cred 4727 * attached to the received mblk changed. 4728 */ 4729 cr = msg_getcred(mp, &cpid); 4730 if (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4731 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst || 4732 cr != icmp->icmp_last_cred) { 4733 int error = icmp_update_label(icmp, mp, v4dst); 4734 if (error != 0) { 4735 mutex_exit(&connp->conn_lock); 4736 rw_exit(&icmp->icmp_rwlock); 4737 return (error); 4738 } 4739 } 4740 /* 4741 * Apply credentials with modified security label if they 4742 * exist. icmp_update_label() may have generated these 4743 * credentials for packets to unlabeled remote nodes. 4744 */ 4745 if (icmp->icmp_effective_cred != NULL) 4746 mblk_setcred(mp, icmp->icmp_effective_cred, cpid); 4747 } 4748 4749 if (icmp->icmp_ip_snd_options_len > 0) { 4750 ip_snd_opt_len = icmp->icmp_ip_snd_options_len; 4751 bcopy(icmp->icmp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 4752 } 4753 mutex_exit(&connp->conn_lock); 4754 4755 /* Add an IP header */ 4756 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + ip_snd_opt_len; 4757 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4758 if ((uchar_t *)ipha < mp->b_datap->db_base || 4759 mp->b_datap->db_ref != 1 || 4760 !OK_32PTR(ipha)) { 4761 mblk_t *mp1; 4762 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4763 BPRI_LO))) { 4764 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4765 rw_exit(&icmp->icmp_rwlock); 4766 return (ENOMEM); 4767 } 4768 mp1->b_cont = mp; 4769 ipha = (ipha_t *)mp1->b_datap->db_lim; 4770 mp1->b_wptr = (uchar_t *)ipha; 4771 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4772 mp = mp1; 4773 } 4774 #ifdef _BIG_ENDIAN 4775 /* Set version, header length, and tos */ 4776 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4777 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4778 icmp->icmp_type_of_service); 4779 /* Set ttl and protocol */ 4780 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4781 #else 4782 /* Set version, header length, and tos */ 4783 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4784 ((icmp->icmp_type_of_service << 8) | 4785 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4786 /* Set ttl and protocol */ 4787 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4788 #endif 4789 if (pktinfop->ip4_addr != INADDR_ANY) { 4790 ipha->ipha_src = pktinfop->ip4_addr; 4791 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4792 } else { 4793 4794 /* 4795 * Copy our address into the packet. If this is zero, 4796 * ip will fill in the real source address. 4797 */ 4798 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4799 } 4800 4801 ipha->ipha_fragment_offset_and_flags = 0; 4802 4803 if (pktinfop->ip4_ill_index != 0) { 4804 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4805 } 4806 4807 4808 /* 4809 * For the socket of SOCK_RAW type, the checksum is provided in the 4810 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4811 * tell IP that the application has sent a complete IP header and not 4812 * to compute the transport checksum nor change the DF flag. 4813 */ 4814 ipha->ipha_ident = IP_HDR_INCLUDED; 4815 4816 /* Finish common formatting of the packet. */ 4817 mp->b_rptr = (uchar_t *)ipha; 4818 4819 ip_len = mp->b_wptr - (uchar_t *)ipha; 4820 if (mp->b_cont != NULL) 4821 ip_len += msgdsize(mp->b_cont); 4822 4823 /* 4824 * Set the length into the IP header. 4825 * If the length is greater than the maximum allowed by IP, 4826 * then free the message and return. Do not try and send it 4827 * as this can cause problems in layers below. 4828 */ 4829 if (ip_len > IP_MAXPACKET) { 4830 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4831 rw_exit(&icmp->icmp_rwlock); 4832 return (EMSGSIZE); 4833 } 4834 ipha->ipha_length = htons((uint16_t)ip_len); 4835 /* 4836 * Copy in the destination address request 4837 */ 4838 ipha->ipha_dst = v4dst; 4839 4840 /* 4841 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4842 */ 4843 if (CLASSD(v4dst)) 4844 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4845 4846 /* Copy in options if any */ 4847 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4848 bcopy(ip_snd_opt, 4849 &ipha[1], ip_snd_opt_len); 4850 /* 4851 * Massage source route putting first source route in ipha_dst. 4852 * Ignore the destination in the T_unitdata_req. 4853 */ 4854 (void) ip_massage_options(ipha, is->is_netstack); 4855 } 4856 4857 rw_exit(&icmp->icmp_rwlock); 4858 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4859 4860 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4861 return (0); 4862 } 4863 4864 static int 4865 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4866 { 4867 int err; 4868 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4869 icmp_stack_t *is = icmp->icmp_is; 4870 conn_t *connp = icmp->icmp_connp; 4871 cred_t *cred; 4872 cred_t *msg_cred; 4873 cred_t *effective_cred; 4874 4875 /* 4876 * All Solaris components should pass a db_credp 4877 * for this message, hence we ASSERT. 4878 * On production kernels we return an error to be robust against 4879 * random streams modules sitting on top of us. 4880 */ 4881 cred = msg_cred = msg_getcred(mp, NULL); 4882 ASSERT(cred != NULL); 4883 if (cred == NULL) 4884 return (EINVAL); 4885 4886 /* 4887 * Verify the destination is allowed to receive packets at 4888 * the security label of the message data. check_dest() 4889 * may create a new effective cred for this message 4890 * with a modified label or label flags. 4891 */ 4892 if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, 4893 connp->conn_mac_exempt, &effective_cred)) != 0) 4894 goto done; 4895 if (effective_cred != NULL) 4896 cred = effective_cred; 4897 4898 /* 4899 * Calculate the security label to be placed in the text 4900 * of the message (if any). 4901 */ 4902 if ((err = tsol_compute_label_v6(cred, dst, opt_storage, 4903 is->is_netstack->netstack_ip)) != 0) 4904 goto done; 4905 4906 /* 4907 * Insert the security label in the cached ip options, 4908 * removing any old label that may exist. 4909 */ 4910 if ((err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4911 &icmp->icmp_label_len_v6, opt_storage)) != 0) 4912 goto done; 4913 4914 /* 4915 * Save the destination address and cred we used to generate 4916 * the security label text. 4917 */ 4918 icmp->icmp_v6lastdst = *dst; 4919 if (cred != icmp->icmp_effective_cred) { 4920 if (icmp->icmp_effective_cred != NULL) 4921 crfree(icmp->icmp_effective_cred); 4922 crhold(cred); 4923 icmp->icmp_effective_cred = cred; 4924 } 4925 4926 if (msg_cred != icmp->icmp_last_cred) { 4927 if (icmp->icmp_last_cred != NULL) 4928 crfree(icmp->icmp_last_cred); 4929 crhold(msg_cred); 4930 icmp->icmp_last_cred = msg_cred; 4931 } 4932 4933 done: 4934 if (effective_cred != NULL) 4935 crfree(effective_cred); 4936 4937 if (err != 0) { 4938 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4939 DTRACE_PROBE4( 4940 tx__ip__log__drop__updatelabel__icmp6, 4941 char *, "icmp(1) failed to update options(2) on mp(3)", 4942 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4943 return (err); 4944 } 4945 return (0); 4946 } 4947 4948 /* 4949 * raw_ip_send_data_v6(): 4950 * Assumes that icmp_wput did some sanity checking on the destination 4951 * address, but that the label may not yet be correct. 4952 */ 4953 static int 4954 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4955 ip6_pkt_t *ipp) 4956 { 4957 ip6_t *ip6h; 4958 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4959 int ip_hdr_len = IPV6_HDR_LEN; 4960 size_t ip_len; 4961 icmp_t *icmp = connp->conn_icmp; 4962 icmp_stack_t *is = icmp->icmp_is; 4963 ip6_pkt_t *tipp; 4964 ip6_hbh_t *hopoptsptr = NULL; 4965 uint_t hopoptslen = 0; 4966 uint32_t csum = 0; 4967 uint_t ignore = 0; 4968 uint_t option_exists = 0, is_sticky = 0; 4969 uint8_t *cp; 4970 uint8_t *nxthdr_ptr; 4971 in6_addr_t ip6_dst; 4972 pid_t cpid; 4973 cred_t *cr; 4974 4975 rw_enter(&icmp->icmp_rwlock, RW_READER); 4976 4977 /* 4978 * If the local address is a mapped address return 4979 * an error. 4980 * It would be possible to send an IPv6 packet but the 4981 * response would never make it back to the application 4982 * since it is bound to a mapped address. 4983 */ 4984 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4985 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4986 rw_exit(&icmp->icmp_rwlock); 4987 return (EADDRNOTAVAIL); 4988 } 4989 4990 ignore = ipp->ipp_sticky_ignored; 4991 if (sin6->sin6_scope_id != 0 && 4992 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4993 /* 4994 * IPPF_SCOPE_ID is special. It's neither a sticky 4995 * option nor ancillary data. It needs to be 4996 * explicitly set in options_exists. 4997 */ 4998 option_exists |= IPPF_SCOPE_ID; 4999 } 5000 5001 /* 5002 * Compute the destination address 5003 */ 5004 ip6_dst = sin6->sin6_addr; 5005 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 5006 ip6_dst = ipv6_loopback; 5007 5008 /* 5009 * Check if our saved options are valid; update if not. 5010 * TSOL Note: Since we are not in WRITER mode, ICMP packets 5011 * to different destination may require different labels, 5012 * or worse, ICMP packets to same IP address may require 5013 * different labels due to use of shared all-zones address. 5014 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 5015 * and sticky ipp_hopoptslen are consistent for the current 5016 * destination and are updated atomically. 5017 */ 5018 mutex_enter(&connp->conn_lock); 5019 if (is_system_labeled()) { 5020 /* 5021 * Recompute the Trusted Extensions security label if we're 5022 * not going to the same destination as last time or the cred 5023 * attached to the received mblk changed. This is done in a 5024 * separate routine to avoid blowing up our stack here. 5025 */ 5026 cr = msg_getcred(mp, &cpid); 5027 if (!IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst) || 5028 cr != icmp->icmp_last_cred) { 5029 int error = 0; 5030 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 5031 if (error != 0) { 5032 mutex_exit(&connp->conn_lock); 5033 rw_exit(&icmp->icmp_rwlock); 5034 return (error); 5035 } 5036 } 5037 5038 /* 5039 * Apply credentials with modified security label if they exist. 5040 * icmp_update_label_v6() may have generated these credentials 5041 * for MAC-Exempt connections. 5042 */ 5043 if (icmp->icmp_effective_cred != NULL) 5044 mblk_setcred(mp, icmp->icmp_effective_cred, cpid); 5045 } 5046 5047 /* 5048 * If there's a security label here, then we ignore any options the 5049 * user may try to set. We keep the peer's label as a hidden sticky 5050 * option. 5051 */ 5052 if (icmp->icmp_label_len_v6 > 0) { 5053 ignore &= ~IPPF_HOPOPTS; 5054 ipp->ipp_fields &= ~IPPF_HOPOPTS; 5055 } 5056 5057 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 5058 (ipp->ipp_fields == 0)) { 5059 /* No sticky options nor ancillary data. */ 5060 mutex_exit(&connp->conn_lock); 5061 goto no_options; 5062 } 5063 5064 /* 5065 * Go through the options figuring out where each is going to 5066 * come from and build two masks. The first mask indicates if 5067 * the option exists at all. The second mask indicates if the 5068 * option is sticky or ancillary. 5069 */ 5070 if (!(ignore & IPPF_HOPOPTS)) { 5071 if (ipp->ipp_fields & IPPF_HOPOPTS) { 5072 option_exists |= IPPF_HOPOPTS; 5073 ip_hdr_len += ipp->ipp_hopoptslen; 5074 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 5075 option_exists |= IPPF_HOPOPTS; 5076 is_sticky |= IPPF_HOPOPTS; 5077 ASSERT(icmp->icmp_sticky_ipp.ipp_hopoptslen != 0); 5078 hopoptsptr = kmem_alloc( 5079 icmp->icmp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 5080 if (hopoptsptr == NULL) { 5081 mutex_exit(&connp->conn_lock); 5082 rw_exit(&icmp->icmp_rwlock); 5083 return (ENOMEM); 5084 } 5085 hopoptslen = icmp->icmp_sticky_ipp.ipp_hopoptslen; 5086 bcopy(icmp->icmp_sticky_ipp.ipp_hopopts, hopoptsptr, 5087 hopoptslen); 5088 ip_hdr_len += hopoptslen; 5089 } 5090 } 5091 mutex_exit(&connp->conn_lock); 5092 5093 if (!(ignore & IPPF_RTHDR)) { 5094 if (ipp->ipp_fields & IPPF_RTHDR) { 5095 option_exists |= IPPF_RTHDR; 5096 ip_hdr_len += ipp->ipp_rthdrlen; 5097 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 5098 option_exists |= IPPF_RTHDR; 5099 is_sticky |= IPPF_RTHDR; 5100 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 5101 } 5102 } 5103 5104 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 5105 /* 5106 * Need to have a router header to use these. 5107 */ 5108 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 5109 option_exists |= IPPF_RTDSTOPTS; 5110 ip_hdr_len += ipp->ipp_rtdstoptslen; 5111 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 5112 option_exists |= IPPF_RTDSTOPTS; 5113 is_sticky |= IPPF_RTDSTOPTS; 5114 ip_hdr_len += 5115 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 5116 } 5117 } 5118 5119 if (!(ignore & IPPF_DSTOPTS)) { 5120 if (ipp->ipp_fields & IPPF_DSTOPTS) { 5121 option_exists |= IPPF_DSTOPTS; 5122 ip_hdr_len += ipp->ipp_dstoptslen; 5123 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 5124 option_exists |= IPPF_DSTOPTS; 5125 is_sticky |= IPPF_DSTOPTS; 5126 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 5127 } 5128 } 5129 5130 if (!(ignore & IPPF_IFINDEX)) { 5131 if (ipp->ipp_fields & IPPF_IFINDEX) { 5132 option_exists |= IPPF_IFINDEX; 5133 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 5134 option_exists |= IPPF_IFINDEX; 5135 is_sticky |= IPPF_IFINDEX; 5136 } 5137 } 5138 5139 if (!(ignore & IPPF_ADDR)) { 5140 if (ipp->ipp_fields & IPPF_ADDR) { 5141 option_exists |= IPPF_ADDR; 5142 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 5143 option_exists |= IPPF_ADDR; 5144 is_sticky |= IPPF_ADDR; 5145 } 5146 } 5147 5148 if (!(ignore & IPPF_DONTFRAG)) { 5149 if (ipp->ipp_fields & IPPF_DONTFRAG) { 5150 option_exists |= IPPF_DONTFRAG; 5151 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 5152 option_exists |= IPPF_DONTFRAG; 5153 is_sticky |= IPPF_DONTFRAG; 5154 } 5155 } 5156 5157 if (!(ignore & IPPF_USE_MIN_MTU)) { 5158 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 5159 option_exists |= IPPF_USE_MIN_MTU; 5160 } else if (icmp->icmp_sticky_ipp.ipp_fields & 5161 IPPF_USE_MIN_MTU) { 5162 option_exists |= IPPF_USE_MIN_MTU; 5163 is_sticky |= IPPF_USE_MIN_MTU; 5164 } 5165 } 5166 5167 if (!(ignore & IPPF_NEXTHOP)) { 5168 if (ipp->ipp_fields & IPPF_NEXTHOP) { 5169 option_exists |= IPPF_NEXTHOP; 5170 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 5171 option_exists |= IPPF_NEXTHOP; 5172 is_sticky |= IPPF_NEXTHOP; 5173 } 5174 } 5175 5176 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 5177 option_exists |= IPPF_HOPLIMIT; 5178 /* IPV6_HOPLIMIT can never be sticky */ 5179 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 5180 5181 if (!(ignore & IPPF_UNICAST_HOPS) && 5182 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 5183 option_exists |= IPPF_UNICAST_HOPS; 5184 is_sticky |= IPPF_UNICAST_HOPS; 5185 } 5186 5187 if (!(ignore & IPPF_MULTICAST_HOPS) && 5188 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 5189 option_exists |= IPPF_MULTICAST_HOPS; 5190 is_sticky |= IPPF_MULTICAST_HOPS; 5191 } 5192 5193 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 5194 /* This is a sticky socket option only */ 5195 option_exists |= IPPF_NO_CKSUM; 5196 is_sticky |= IPPF_NO_CKSUM; 5197 } 5198 5199 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 5200 /* This is a sticky socket option only */ 5201 option_exists |= IPPF_RAW_CKSUM; 5202 is_sticky |= IPPF_RAW_CKSUM; 5203 } 5204 5205 if (!(ignore & IPPF_TCLASS)) { 5206 if (ipp->ipp_fields & IPPF_TCLASS) { 5207 option_exists |= IPPF_TCLASS; 5208 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 5209 option_exists |= IPPF_TCLASS; 5210 is_sticky |= IPPF_TCLASS; 5211 } 5212 } 5213 5214 no_options: 5215 5216 /* 5217 * If any options carried in the ip6i_t were specified, we 5218 * need to account for the ip6i_t in the data we'll be sending 5219 * down. 5220 */ 5221 if (option_exists & IPPF_HAS_IP6I) 5222 ip_hdr_len += sizeof (ip6i_t); 5223 5224 /* check/fix buffer config, setup pointers into it */ 5225 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5226 if ((mp->b_datap->db_ref != 1) || 5227 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5228 !OK_32PTR(ip6h)) { 5229 mblk_t *mp1; 5230 5231 /* Try to get everything in a single mblk next time */ 5232 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5233 icmp->icmp_max_hdr_len = ip_hdr_len; 5234 5235 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5236 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5237 } 5238 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5239 if (!mp1) { 5240 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5241 kmem_free(hopoptsptr, hopoptslen); 5242 rw_exit(&icmp->icmp_rwlock); 5243 return (ENOMEM); 5244 } 5245 mp1->b_cont = mp; 5246 mp1->b_wptr = mp1->b_datap->db_lim; 5247 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5248 mp = mp1; 5249 } 5250 mp->b_rptr = (unsigned char *)ip6h; 5251 ip6i = (ip6i_t *)ip6h; 5252 5253 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5254 if (option_exists & IPPF_HAS_IP6I) { 5255 ip6h = (ip6_t *)&ip6i[1]; 5256 ip6i->ip6i_flags = 0; 5257 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5258 5259 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5260 if (option_exists & IPPF_SCOPE_ID) { 5261 ip6i->ip6i_flags |= IP6I_IFINDEX; 5262 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5263 } else if (option_exists & IPPF_IFINDEX) { 5264 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5265 ASSERT(tipp->ipp_ifindex != 0); 5266 ip6i->ip6i_flags |= IP6I_IFINDEX; 5267 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5268 } 5269 5270 if (option_exists & IPPF_RAW_CKSUM) { 5271 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5272 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5273 } 5274 5275 if (option_exists & IPPF_NO_CKSUM) { 5276 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5277 } 5278 5279 if (option_exists & IPPF_ADDR) { 5280 /* 5281 * Enable per-packet source address verification if 5282 * IPV6_PKTINFO specified the source address. 5283 * ip6_src is set in the transport's _wput function. 5284 */ 5285 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5286 } 5287 5288 if (option_exists & IPPF_DONTFRAG) { 5289 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5290 } 5291 5292 if (option_exists & IPPF_USE_MIN_MTU) { 5293 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5294 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5295 } 5296 5297 if (option_exists & IPPF_NEXTHOP) { 5298 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5299 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5300 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5301 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5302 } 5303 5304 /* 5305 * tell IP this is an ip6i_t private header 5306 */ 5307 ip6i->ip6i_nxt = IPPROTO_RAW; 5308 } 5309 5310 /* Initialize IPv6 header */ 5311 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5312 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5313 5314 /* Set the hoplimit of the outgoing packet. */ 5315 if (option_exists & IPPF_HOPLIMIT) { 5316 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5317 ip6h->ip6_hops = ipp->ipp_hoplimit; 5318 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5319 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5320 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5321 if (option_exists & IPPF_MULTICAST_HOPS) 5322 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5323 } else { 5324 ip6h->ip6_hops = icmp->icmp_ttl; 5325 if (option_exists & IPPF_UNICAST_HOPS) 5326 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5327 } 5328 5329 if (option_exists & IPPF_ADDR) { 5330 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5331 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5332 ip6h->ip6_src = tipp->ipp_addr; 5333 } else { 5334 /* 5335 * The source address was not set using IPV6_PKTINFO. 5336 * First look at the bound source. 5337 * If unspecified fallback to __sin6_src_id. 5338 */ 5339 ip6h->ip6_src = icmp->icmp_v6src; 5340 if (sin6->__sin6_src_id != 0 && 5341 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5342 ip_srcid_find_id(sin6->__sin6_src_id, 5343 &ip6h->ip6_src, icmp->icmp_zoneid, 5344 is->is_netstack); 5345 } 5346 } 5347 5348 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5349 cp = (uint8_t *)&ip6h[1]; 5350 5351 /* 5352 * Here's where we have to start stringing together 5353 * any extension headers in the right order: 5354 * Hop-by-hop, destination, routing, and final destination opts. 5355 */ 5356 if (option_exists & IPPF_HOPOPTS) { 5357 /* Hop-by-hop options */ 5358 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5359 5360 *nxthdr_ptr = IPPROTO_HOPOPTS; 5361 nxthdr_ptr = &hbh->ip6h_nxt; 5362 5363 if (hopoptslen == 0) { 5364 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5365 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5366 cp += tipp->ipp_hopoptslen; 5367 } else { 5368 bcopy(hopoptsptr, cp, hopoptslen); 5369 cp += hopoptslen; 5370 kmem_free(hopoptsptr, hopoptslen); 5371 } 5372 } 5373 /* 5374 * En-route destination options 5375 * Only do them if there's a routing header as well 5376 */ 5377 if (option_exists & IPPF_RTDSTOPTS) { 5378 ip6_dest_t *dst = (ip6_dest_t *)cp; 5379 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5380 5381 *nxthdr_ptr = IPPROTO_DSTOPTS; 5382 nxthdr_ptr = &dst->ip6d_nxt; 5383 5384 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5385 cp += tipp->ipp_rtdstoptslen; 5386 } 5387 /* 5388 * Routing header next 5389 */ 5390 if (option_exists & IPPF_RTHDR) { 5391 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5392 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5393 5394 *nxthdr_ptr = IPPROTO_ROUTING; 5395 nxthdr_ptr = &rt->ip6r_nxt; 5396 5397 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5398 cp += tipp->ipp_rthdrlen; 5399 } 5400 /* 5401 * Do ultimate destination options 5402 */ 5403 if (option_exists & IPPF_DSTOPTS) { 5404 ip6_dest_t *dest = (ip6_dest_t *)cp; 5405 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5406 5407 *nxthdr_ptr = IPPROTO_DSTOPTS; 5408 nxthdr_ptr = &dest->ip6d_nxt; 5409 5410 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5411 cp += tipp->ipp_dstoptslen; 5412 } 5413 5414 /* 5415 * Now set the last header pointer to the proto passed in 5416 */ 5417 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5418 *nxthdr_ptr = icmp->icmp_proto; 5419 5420 /* 5421 * Copy in the destination address 5422 */ 5423 ip6h->ip6_dst = ip6_dst; 5424 5425 ip6h->ip6_vcf = 5426 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5427 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5428 5429 if (option_exists & IPPF_TCLASS) { 5430 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5431 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5432 tipp->ipp_tclass); 5433 } 5434 if (option_exists & IPPF_RTHDR) { 5435 ip6_rthdr_t *rth; 5436 5437 /* 5438 * Perform any processing needed for source routing. 5439 * We know that all extension headers will be in the same mblk 5440 * as the IPv6 header. 5441 */ 5442 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5443 if (rth != NULL && rth->ip6r_segleft != 0) { 5444 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5445 /* 5446 * Drop packet - only support Type 0 routing. 5447 * Notify the application as well. 5448 */ 5449 BUMP_MIB(&is->is_rawip_mib, 5450 rawipOutErrors); 5451 rw_exit(&icmp->icmp_rwlock); 5452 return (EPROTO); 5453 } 5454 /* 5455 * rth->ip6r_len is twice the number of 5456 * addresses in the header 5457 */ 5458 if (rth->ip6r_len & 0x1) { 5459 BUMP_MIB(&is->is_rawip_mib, 5460 rawipOutErrors); 5461 rw_exit(&icmp->icmp_rwlock); 5462 return (EPROTO); 5463 } 5464 /* 5465 * Shuffle the routing header and ip6_dst 5466 * addresses, and get the checksum difference 5467 * between the first hop (in ip6_dst) and 5468 * the destination (in the last routing hdr entry). 5469 */ 5470 csum = ip_massage_options_v6(ip6h, rth, 5471 is->is_netstack); 5472 /* 5473 * Verify that the first hop isn't a mapped address. 5474 * Routers along the path need to do this verification 5475 * for subsequent hops. 5476 */ 5477 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5478 BUMP_MIB(&is->is_rawip_mib, 5479 rawipOutErrors); 5480 rw_exit(&icmp->icmp_rwlock); 5481 return (EADDRNOTAVAIL); 5482 } 5483 } 5484 } 5485 5486 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5487 if (mp->b_cont != NULL) 5488 ip_len += msgdsize(mp->b_cont); 5489 5490 /* 5491 * Set the length into the IP header. 5492 * If the length is greater than the maximum allowed by IP, 5493 * then free the message and return. Do not try and send it 5494 * as this can cause problems in layers below. 5495 */ 5496 if (ip_len > IP_MAXPACKET) { 5497 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5498 rw_exit(&icmp->icmp_rwlock); 5499 return (EMSGSIZE); 5500 } 5501 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5502 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5503 uint16_t *cksum_ptr; 5504 uint_t ext_hdrs_len; 5505 5506 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5507 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5508 icmp->icmp_checksum_off == 2); 5509 5510 /* 5511 * We make it easy for IP to include our pseudo header 5512 * by putting our length in uh_checksum, modified (if 5513 * we have a routing header) by the checksum difference 5514 * between the ultimate destination and first hop addresses. 5515 * Note: ICMPv6 must always checksum the packet. 5516 */ 5517 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5518 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5519 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5520 BUMP_MIB(&is->is_rawip_mib, 5521 rawipOutErrors); 5522 freemsg(mp); 5523 rw_exit(&icmp->icmp_rwlock); 5524 return (0); 5525 } 5526 ip6i = (ip6i_t *)mp->b_rptr; 5527 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5528 ip6h = (ip6_t *)&ip6i[1]; 5529 else 5530 ip6h = (ip6_t *)ip6i; 5531 } 5532 /* Add payload length to checksum */ 5533 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5534 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5535 csum += htons(ip_len - ext_hdrs_len); 5536 5537 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5538 csum = (csum & 0xFFFF) + (csum >> 16); 5539 *cksum_ptr = (uint16_t)csum; 5540 } 5541 5542 #ifdef _LITTLE_ENDIAN 5543 ip_len = htons(ip_len); 5544 #endif 5545 ip6h->ip6_plen = (uint16_t)ip_len; 5546 5547 /* We're done. Pass the packet to IP */ 5548 rw_exit(&icmp->icmp_rwlock); 5549 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5550 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5551 return (0); 5552 } 5553 5554 static void 5555 icmp_wput_other(queue_t *q, mblk_t *mp) 5556 { 5557 uchar_t *rptr = mp->b_rptr; 5558 struct iocblk *iocp; 5559 #define tudr ((struct T_unitdata_req *)rptr) 5560 conn_t *connp = Q_TO_CONN(q); 5561 icmp_t *icmp = connp->conn_icmp; 5562 icmp_stack_t *is = icmp->icmp_is; 5563 cred_t *cr; 5564 5565 switch (mp->b_datap->db_type) { 5566 case M_PROTO: 5567 case M_PCPROTO: 5568 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5569 /* 5570 * If the message does not contain a PRIM_type, 5571 * throw it away. 5572 */ 5573 freemsg(mp); 5574 return; 5575 } 5576 switch (((union T_primitives *)rptr)->type) { 5577 case T_ADDR_REQ: 5578 icmp_addr_req(q, mp); 5579 return; 5580 case O_T_BIND_REQ: 5581 case T_BIND_REQ: 5582 icmp_tpi_bind(q, mp); 5583 return; 5584 case T_CONN_REQ: 5585 icmp_tpi_connect(q, mp); 5586 return; 5587 case T_CAPABILITY_REQ: 5588 icmp_capability_req(q, mp); 5589 return; 5590 case T_INFO_REQ: 5591 icmp_info_req(q, mp); 5592 return; 5593 case T_UNITDATA_REQ: 5594 /* 5595 * If a T_UNITDATA_REQ gets here, the address must 5596 * be bad. Valid T_UNITDATA_REQs are found above 5597 * and break to below this switch. 5598 */ 5599 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5600 return; 5601 case T_UNBIND_REQ: 5602 icmp_tpi_unbind(q, mp); 5603 return; 5604 5605 case T_SVR4_OPTMGMT_REQ: 5606 /* 5607 * All Solaris components should pass a db_credp 5608 * for this TPI message, hence we ASSERT. 5609 * But in case there is some other M_PROTO that looks 5610 * like a TPI message sent by some other kernel 5611 * component, we check and return an error. 5612 */ 5613 cr = msg_getcred(mp, NULL); 5614 ASSERT(cr != NULL); 5615 if (cr == NULL) { 5616 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5617 return; 5618 } 5619 5620 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5621 cr)) { 5622 /* Only IP can return anything meaningful */ 5623 (void) svr4_optcom_req(q, mp, cr, 5624 &icmp_opt_obj, B_TRUE); 5625 } 5626 return; 5627 5628 case T_OPTMGMT_REQ: 5629 /* 5630 * All Solaris components should pass a db_credp 5631 * for this TPI message, hence we ASSERT. 5632 * But in case there is some other M_PROTO that looks 5633 * like a TPI message sent by some other kernel 5634 * component, we check and return an error. 5635 */ 5636 cr = msg_getcred(mp, NULL); 5637 ASSERT(cr != NULL); 5638 if (cr == NULL) { 5639 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5640 return; 5641 } 5642 /* Only IP can return anything meaningful */ 5643 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5644 return; 5645 5646 case T_DISCON_REQ: 5647 icmp_tpi_disconnect(q, mp); 5648 return; 5649 5650 /* The following TPI message is not supported by icmp. */ 5651 case O_T_CONN_RES: 5652 case T_CONN_RES: 5653 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5654 return; 5655 5656 /* The following 3 TPI requests are illegal for icmp. */ 5657 case T_DATA_REQ: 5658 case T_EXDATA_REQ: 5659 case T_ORDREL_REQ: 5660 freemsg(mp); 5661 (void) putctl1(RD(q), M_ERROR, EPROTO); 5662 return; 5663 default: 5664 break; 5665 } 5666 break; 5667 case M_IOCTL: 5668 iocp = (struct iocblk *)mp->b_rptr; 5669 switch (iocp->ioc_cmd) { 5670 case TI_GETPEERNAME: 5671 if (icmp->icmp_state != TS_DATA_XFER) { 5672 /* 5673 * If a default destination address has not 5674 * been associated with the stream, then we 5675 * don't know the peer's name. 5676 */ 5677 iocp->ioc_error = ENOTCONN; 5678 err_ret:; 5679 iocp->ioc_count = 0; 5680 mp->b_datap->db_type = M_IOCACK; 5681 qreply(q, mp); 5682 return; 5683 } 5684 /* FALLTHRU */ 5685 case TI_GETMYNAME: 5686 /* 5687 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5688 * need to copyin the user's strbuf structure. 5689 * Processing will continue in the M_IOCDATA case 5690 * below. 5691 */ 5692 mi_copyin(q, mp, NULL, 5693 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5694 return; 5695 case ND_SET: 5696 /* nd_getset performs the necessary error checking */ 5697 case ND_GET: 5698 if (nd_getset(q, is->is_nd, mp)) { 5699 qreply(q, mp); 5700 return; 5701 } 5702 break; 5703 case _SIOCSOCKFALLBACK: 5704 /* 5705 * socket is falling back to be a 5706 * streams socket. Nothing to do 5707 */ 5708 iocp->ioc_count = 0; 5709 iocp->ioc_rval = 0; 5710 qreply(q, mp); 5711 return; 5712 default: 5713 break; 5714 } 5715 break; 5716 case M_IOCDATA: 5717 icmp_wput_iocdata(q, mp); 5718 return; 5719 default: 5720 break; 5721 } 5722 ip_wput(q, mp); 5723 } 5724 5725 /* 5726 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5727 * messages. 5728 */ 5729 static void 5730 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5731 { 5732 mblk_t *mp1; 5733 STRUCT_HANDLE(strbuf, sb); 5734 icmp_t *icmp; 5735 uint_t addrlen; 5736 uint_t error; 5737 5738 /* Make sure it is one of ours. */ 5739 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5740 case TI_GETMYNAME: 5741 case TI_GETPEERNAME: 5742 break; 5743 default: 5744 icmp = Q_TO_ICMP(q); 5745 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5746 return; 5747 } 5748 switch (mi_copy_state(q, mp, &mp1)) { 5749 case -1: 5750 return; 5751 case MI_COPY_CASE(MI_COPY_IN, 1): 5752 break; 5753 case MI_COPY_CASE(MI_COPY_OUT, 1): 5754 /* 5755 * The address has been copied out, so now 5756 * copyout the strbuf. 5757 */ 5758 mi_copyout(q, mp); 5759 return; 5760 case MI_COPY_CASE(MI_COPY_OUT, 2): 5761 /* 5762 * The address and strbuf have been copied out. 5763 * We're done, so just acknowledge the original 5764 * M_IOCTL. 5765 */ 5766 mi_copy_done(q, mp, 0); 5767 return; 5768 default: 5769 /* 5770 * Something strange has happened, so acknowledge 5771 * the original M_IOCTL with an EPROTO error. 5772 */ 5773 mi_copy_done(q, mp, EPROTO); 5774 return; 5775 } 5776 /* 5777 * Now we have the strbuf structure for TI_GETMYNAME 5778 * and TI_GETPEERNAME. Next we copyout the requested 5779 * address and then we'll copyout the strbuf. 5780 */ 5781 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5782 (void *)mp1->b_rptr); 5783 icmp = Q_TO_ICMP(q); 5784 if (icmp->icmp_family == AF_INET) 5785 addrlen = sizeof (sin_t); 5786 else 5787 addrlen = sizeof (sin6_t); 5788 5789 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5790 mi_copy_done(q, mp, EINVAL); 5791 return; 5792 } 5793 5794 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5795 5796 if (mp1 == NULL) 5797 return; 5798 5799 rw_enter(&icmp->icmp_rwlock, RW_READER); 5800 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5801 case TI_GETMYNAME: 5802 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5803 &addrlen); 5804 break; 5805 case TI_GETPEERNAME: 5806 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5807 &addrlen); 5808 break; 5809 } 5810 rw_exit(&icmp->icmp_rwlock); 5811 5812 if (error != 0) { 5813 mi_copy_done(q, mp, error); 5814 } else { 5815 mp1->b_wptr += addrlen; 5816 STRUCT_FSET(sb, len, addrlen); 5817 5818 /* Copy out the address */ 5819 mi_copyout(q, mp); 5820 } 5821 } 5822 5823 static int 5824 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5825 void *thisdg_attrs) 5826 { 5827 struct T_unitdata_req *udreqp; 5828 int is_absreq_failure; 5829 cred_t *cr; 5830 5831 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5832 *errorp = 0; 5833 5834 /* 5835 * All Solaris components should pass a db_credp 5836 * for this TPI message, hence we ASSERT. 5837 * But in case there is some other M_PROTO that looks 5838 * like a TPI message sent by some other kernel 5839 * component, we check and return an error. 5840 */ 5841 cr = msg_getcred(mp, NULL); 5842 ASSERT(cr != NULL); 5843 if (cr == NULL) 5844 return (-1); 5845 5846 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5847 udreqp->OPT_offset, cr, &icmp_opt_obj, 5848 thisdg_attrs, &is_absreq_failure); 5849 5850 if (*errorp != 0) { 5851 /* 5852 * Note: No special action needed in this 5853 * module for "is_absreq_failure" 5854 */ 5855 return (-1); /* failure */ 5856 } 5857 ASSERT(is_absreq_failure == 0); 5858 return (0); /* success */ 5859 } 5860 5861 void 5862 icmp_ddi_g_init(void) 5863 { 5864 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5865 icmp_opt_obj.odb_opt_arr_cnt); 5866 5867 /* 5868 * We want to be informed each time a stack is created or 5869 * destroyed in the kernel, so we can maintain the 5870 * set of icmp_stack_t's. 5871 */ 5872 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5873 } 5874 5875 void 5876 icmp_ddi_g_destroy(void) 5877 { 5878 netstack_unregister(NS_ICMP); 5879 } 5880 5881 #define INET_NAME "ip" 5882 5883 /* 5884 * Initialize the ICMP stack instance. 5885 */ 5886 static void * 5887 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5888 { 5889 icmp_stack_t *is; 5890 icmpparam_t *pa; 5891 int error = 0; 5892 major_t major; 5893 5894 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5895 is->is_netstack = ns; 5896 5897 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5898 is->is_param_arr = pa; 5899 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5900 5901 (void) icmp_param_register(&is->is_nd, 5902 is->is_param_arr, A_CNT(icmp_param_arr)); 5903 is->is_ksp = rawip_kstat_init(stackid); 5904 5905 major = mod_name_to_major(INET_NAME); 5906 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5907 ASSERT(error == 0); 5908 return (is); 5909 } 5910 5911 /* 5912 * Free the ICMP stack instance. 5913 */ 5914 static void 5915 rawip_stack_fini(netstackid_t stackid, void *arg) 5916 { 5917 icmp_stack_t *is = (icmp_stack_t *)arg; 5918 5919 nd_free(&is->is_nd); 5920 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5921 is->is_param_arr = NULL; 5922 5923 rawip_kstat_fini(stackid, is->is_ksp); 5924 is->is_ksp = NULL; 5925 ldi_ident_release(is->is_ldi_ident); 5926 kmem_free(is, sizeof (*is)); 5927 } 5928 5929 static void * 5930 rawip_kstat_init(netstackid_t stackid) { 5931 kstat_t *ksp; 5932 5933 rawip_named_kstat_t template = { 5934 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5935 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5936 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5937 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5938 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5939 }; 5940 5941 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5942 KSTAT_TYPE_NAMED, 5943 NUM_OF_FIELDS(rawip_named_kstat_t), 5944 0, stackid); 5945 if (ksp == NULL || ksp->ks_data == NULL) 5946 return (NULL); 5947 5948 bcopy(&template, ksp->ks_data, sizeof (template)); 5949 ksp->ks_update = rawip_kstat_update; 5950 ksp->ks_private = (void *)(uintptr_t)stackid; 5951 5952 kstat_install(ksp); 5953 return (ksp); 5954 } 5955 5956 static void 5957 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5958 { 5959 if (ksp != NULL) { 5960 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5961 kstat_delete_netstack(ksp, stackid); 5962 } 5963 } 5964 5965 static int 5966 rawip_kstat_update(kstat_t *ksp, int rw) 5967 { 5968 rawip_named_kstat_t *rawipkp; 5969 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5970 netstack_t *ns; 5971 icmp_stack_t *is; 5972 5973 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5974 return (EIO); 5975 5976 if (rw == KSTAT_WRITE) 5977 return (EACCES); 5978 5979 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5980 5981 ns = netstack_find_by_stackid(stackid); 5982 if (ns == NULL) 5983 return (-1); 5984 is = ns->netstack_icmp; 5985 if (is == NULL) { 5986 netstack_rele(ns); 5987 return (-1); 5988 } 5989 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5990 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5991 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5992 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5993 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5994 netstack_rele(ns); 5995 return (0); 5996 } 5997 5998 /* ARGSUSED */ 5999 int 6000 rawip_accept(sock_lower_handle_t lproto_handle, 6001 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6002 cred_t *cr) 6003 { 6004 return (EOPNOTSUPP); 6005 } 6006 6007 /* ARGSUSED */ 6008 int 6009 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6010 socklen_t len, cred_t *cr) 6011 { 6012 conn_t *connp = (conn_t *)proto_handle; 6013 int error; 6014 6015 /* All Solaris components should pass a cred for this operation. */ 6016 ASSERT(cr != NULL); 6017 6018 /* Binding to a NULL address really means unbind */ 6019 if (sa == NULL) 6020 error = rawip_do_unbind(connp); 6021 else 6022 error = rawip_do_bind(connp, sa, len); 6023 6024 if (error < 0) { 6025 if (error == -TOUTSTATE) 6026 error = EINVAL; 6027 else 6028 error = proto_tlitosyserr(-error); 6029 } 6030 return (error); 6031 } 6032 6033 static int 6034 rawip_implicit_bind(conn_t *connp) 6035 { 6036 sin6_t sin6addr; 6037 sin_t *sin; 6038 sin6_t *sin6; 6039 socklen_t len; 6040 int error; 6041 6042 if (connp->conn_icmp->icmp_family == AF_INET) { 6043 len = sizeof (struct sockaddr_in); 6044 sin = (sin_t *)&sin6addr; 6045 *sin = sin_null; 6046 sin->sin_family = AF_INET; 6047 sin->sin_addr.s_addr = INADDR_ANY; 6048 } else { 6049 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 6050 len = sizeof (sin6_t); 6051 sin6 = (sin6_t *)&sin6addr; 6052 *sin6 = sin6_null; 6053 sin6->sin6_family = AF_INET6; 6054 V6_SET_ZERO(sin6->sin6_addr); 6055 } 6056 6057 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 6058 6059 return ((error < 0) ? proto_tlitosyserr(-error) : error); 6060 } 6061 6062 static int 6063 rawip_unbind(conn_t *connp) 6064 { 6065 int error; 6066 6067 error = rawip_do_unbind(connp); 6068 if (error < 0) { 6069 error = proto_tlitosyserr(-error); 6070 } 6071 return (error); 6072 } 6073 6074 /* ARGSUSED */ 6075 int 6076 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6077 { 6078 return (EOPNOTSUPP); 6079 } 6080 6081 /* ARGSUSED */ 6082 int 6083 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6084 socklen_t len, sock_connid_t *id, cred_t *cr) 6085 { 6086 conn_t *connp = (conn_t *)proto_handle; 6087 icmp_t *icmp = connp->conn_icmp; 6088 int error; 6089 boolean_t did_bind = B_FALSE; 6090 6091 /* All Solaris components should pass a cred for this operation. */ 6092 ASSERT(cr != NULL); 6093 6094 if (sa == NULL) { 6095 /* 6096 * Disconnect 6097 * Make sure we are connected 6098 */ 6099 if (icmp->icmp_state != TS_DATA_XFER) 6100 return (EINVAL); 6101 6102 error = icmp_disconnect(connp); 6103 return (error); 6104 } 6105 6106 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 6107 if (error != 0) 6108 return (error); 6109 6110 /* do an implicit bind if necessary */ 6111 if (icmp->icmp_state == TS_UNBND) { 6112 error = rawip_implicit_bind(connp); 6113 /* 6114 * We could be racing with an actual bind, in which case 6115 * we would see EPROTO. We cross our fingers and try 6116 * to connect. 6117 */ 6118 if (!(error == 0 || error == EPROTO)) 6119 return (error); 6120 did_bind = B_TRUE; 6121 } 6122 6123 /* 6124 * set SO_DGRAM_ERRIND 6125 */ 6126 icmp->icmp_dgram_errind = B_TRUE; 6127 6128 error = rawip_do_connect(connp, sa, len, cr); 6129 6130 if (error != 0 && did_bind) { 6131 int unbind_err; 6132 6133 unbind_err = rawip_unbind(connp); 6134 ASSERT(unbind_err == 0); 6135 } 6136 6137 if (error == 0) { 6138 *id = 0; 6139 (*connp->conn_upcalls->su_connected) 6140 (connp->conn_upper_handle, 0, NULL, -1); 6141 } else if (error < 0) { 6142 error = proto_tlitosyserr(-error); 6143 } 6144 return (error); 6145 } 6146 6147 /* ARGSUSED */ 6148 int 6149 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6150 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 6151 { 6152 conn_t *connp = (conn_t *)proto_handle; 6153 icmp_t *icmp; 6154 struct T_capability_ack tca; 6155 struct sockaddr_in6 laddr, faddr; 6156 socklen_t laddrlen, faddrlen; 6157 short opts; 6158 struct stroptions *stropt; 6159 mblk_t *stropt_mp; 6160 int error; 6161 6162 icmp = connp->conn_icmp; 6163 6164 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6165 6166 /* 6167 * setup the fallback stream that was allocated 6168 */ 6169 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6170 connp->conn_minor_arena = WR(q)->q_ptr; 6171 6172 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6173 6174 WR(q)->q_qinfo = &icmpwinit; 6175 6176 connp->conn_rq = RD(q); 6177 connp->conn_wq = WR(q); 6178 6179 /* Notify stream head about options before sending up data */ 6180 stropt_mp->b_datap->db_type = M_SETOPTS; 6181 stropt_mp->b_wptr += sizeof (*stropt); 6182 stropt = (struct stroptions *)stropt_mp->b_rptr; 6183 stropt->so_flags = SO_WROFF | SO_HIWAT; 6184 stropt->so_wroff = 6185 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 6186 stropt->so_hiwat = icmp->icmp_recv_hiwat; 6187 putnext(RD(q), stropt_mp); 6188 6189 /* 6190 * free helper stream 6191 */ 6192 ip_free_helper_stream(connp); 6193 6194 /* 6195 * Collect the information needed to sync with the sonode 6196 */ 6197 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 6198 6199 laddrlen = faddrlen = sizeof (sin6_t); 6200 (void) rawip_getsockname((sock_lower_handle_t)connp, 6201 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6202 error = rawip_getpeername((sock_lower_handle_t)connp, 6203 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6204 if (error != 0) 6205 faddrlen = 0; 6206 opts = 0; 6207 if (icmp->icmp_dgram_errind) 6208 opts |= SO_DGRAM_ERRIND; 6209 if (icmp->icmp_dontroute) 6210 opts |= SO_DONTROUTE; 6211 6212 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6213 (struct sockaddr *)&laddr, laddrlen, 6214 (struct sockaddr *)&faddr, faddrlen, opts); 6215 6216 /* 6217 * Attempts to send data up during fallback will result in it being 6218 * queued in udp_t. Now we push up any queued packets. 6219 */ 6220 mutex_enter(&icmp->icmp_recv_lock); 6221 while (icmp->icmp_fallback_queue_head != NULL) { 6222 mblk_t *mp; 6223 6224 mp = icmp->icmp_fallback_queue_head; 6225 icmp->icmp_fallback_queue_head = mp->b_next; 6226 mp->b_next = NULL; 6227 mutex_exit(&icmp->icmp_recv_lock); 6228 putnext(RD(q), mp); 6229 mutex_enter(&icmp->icmp_recv_lock); 6230 } 6231 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 6232 6233 /* 6234 * No longer a streams less socket 6235 */ 6236 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6237 connp->conn_flags &= ~IPCL_NONSTR; 6238 rw_exit(&icmp->icmp_rwlock); 6239 6240 mutex_exit(&icmp->icmp_recv_lock); 6241 6242 ASSERT(icmp->icmp_fallback_queue_head == NULL && 6243 icmp->icmp_fallback_queue_tail == NULL); 6244 6245 ASSERT(connp->conn_ref >= 1); 6246 6247 return (0); 6248 } 6249 6250 /* ARGSUSED */ 6251 sock_lower_handle_t 6252 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 6253 uint_t *smodep, int *errorp, int flags, cred_t *credp) 6254 { 6255 conn_t *connp; 6256 6257 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 6258 *errorp = EPROTONOSUPPORT; 6259 return (NULL); 6260 } 6261 6262 connp = icmp_open(family, credp, errorp, flags); 6263 if (connp != NULL) { 6264 icmp_stack_t *is; 6265 6266 is = connp->conn_icmp->icmp_is; 6267 connp->conn_flags |= IPCL_NONSTR; 6268 6269 if (connp->conn_icmp->icmp_family == AF_INET6) { 6270 /* Build initial header template for transmit */ 6271 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6272 if ((*errorp = 6273 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6274 rw_exit(&connp->conn_icmp->icmp_rwlock); 6275 ipcl_conn_destroy(connp); 6276 return (NULL); 6277 } 6278 rw_exit(&connp->conn_icmp->icmp_rwlock); 6279 } 6280 6281 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6282 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6283 6284 if ((*errorp = ip_create_helper_stream(connp, 6285 is->is_ldi_ident)) != 0) { 6286 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6287 (void) rawip_do_close(connp); 6288 return (NULL); 6289 } 6290 6291 mutex_enter(&connp->conn_lock); 6292 connp->conn_state_flags &= ~CONN_INCIPIENT; 6293 mutex_exit(&connp->conn_lock); 6294 *sock_downcalls = &sock_rawip_downcalls; 6295 *smodep = SM_ATOMIC; 6296 } else { 6297 ASSERT(*errorp != 0); 6298 } 6299 6300 return ((sock_lower_handle_t)connp); 6301 } 6302 6303 /* ARGSUSED */ 6304 void 6305 rawip_activate(sock_lower_handle_t proto_handle, 6306 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6307 cred_t *cr) 6308 { 6309 conn_t *connp = (conn_t *)proto_handle; 6310 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6311 struct sock_proto_props sopp; 6312 6313 /* All Solaris components should pass a cred for this operation. */ 6314 ASSERT(cr != NULL); 6315 6316 connp->conn_upcalls = sock_upcalls; 6317 connp->conn_upper_handle = sock_handle; 6318 6319 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6320 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6321 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6322 is->is_wroff_extra; 6323 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6324 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6325 sopp.sopp_maxblk = INFPSZ; 6326 sopp.sopp_maxpsz = IP_MAXPACKET; 6327 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6328 icmp_mod_info.mi_minpsz; 6329 6330 (*connp->conn_upcalls->su_set_proto_props) 6331 (connp->conn_upper_handle, &sopp); 6332 } 6333 6334 static int 6335 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6336 { 6337 sin_t *sin = (sin_t *)sa; 6338 sin6_t *sin6 = (sin6_t *)sa; 6339 6340 ASSERT(icmp != NULL); 6341 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6342 6343 switch (icmp->icmp_family) { 6344 case AF_INET: 6345 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6346 if (*salenp < sizeof (sin_t)) 6347 return (EINVAL); 6348 6349 *salenp = sizeof (sin_t); 6350 *sin = sin_null; 6351 sin->sin_family = AF_INET; 6352 if (icmp->icmp_state == TS_UNBND) { 6353 break; 6354 } 6355 6356 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6357 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6358 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6359 } else { 6360 /* 6361 * INADDR_ANY 6362 * icmp_v6src is not set, we might be bound to 6363 * broadcast/multicast. Use icmp_bound_v6src as 6364 * local address instead (that could 6365 * also still be INADDR_ANY) 6366 */ 6367 sin->sin_addr.s_addr = 6368 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6369 } 6370 break; 6371 case AF_INET6: 6372 6373 if (*salenp < sizeof (sin6_t)) 6374 return (EINVAL); 6375 6376 *salenp = sizeof (sin6_t); 6377 *sin6 = sin6_null; 6378 sin6->sin6_family = AF_INET6; 6379 if (icmp->icmp_state == TS_UNBND) { 6380 break; 6381 } 6382 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6383 sin6->sin6_addr = icmp->icmp_v6src; 6384 } else { 6385 /* 6386 * UNSPECIFIED 6387 * icmp_v6src is not set, we might be bound to 6388 * broadcast/multicast. Use icmp_bound_v6src as 6389 * local address instead (that could 6390 * also still be UNSPECIFIED) 6391 */ 6392 6393 sin6->sin6_addr = icmp->icmp_bound_v6src; 6394 } 6395 break; 6396 } 6397 return (0); 6398 } 6399 6400 static int 6401 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6402 { 6403 sin_t *sin = (sin_t *)sa; 6404 sin6_t *sin6 = (sin6_t *)sa; 6405 6406 ASSERT(icmp != NULL); 6407 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6408 6409 if (icmp->icmp_state != TS_DATA_XFER) 6410 return (ENOTCONN); 6411 6412 sa->sa_family = icmp->icmp_family; 6413 switch (icmp->icmp_family) { 6414 case AF_INET: 6415 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6416 6417 if (*salenp < sizeof (sin_t)) 6418 return (EINVAL); 6419 6420 *salenp = sizeof (sin_t); 6421 *sin = sin_null; 6422 sin->sin_family = AF_INET; 6423 sin->sin_addr.s_addr = 6424 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6425 break; 6426 case AF_INET6: 6427 if (*salenp < sizeof (sin6_t)) 6428 return (EINVAL); 6429 6430 *salenp = sizeof (sin6_t); 6431 *sin6 = sin6_null; 6432 *sin6 = icmp->icmp_v6dst; 6433 break; 6434 } 6435 return (0); 6436 } 6437 6438 /* ARGSUSED */ 6439 int 6440 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6441 socklen_t *salenp, cred_t *cr) 6442 { 6443 conn_t *connp = (conn_t *)proto_handle; 6444 icmp_t *icmp = connp->conn_icmp; 6445 int error; 6446 6447 /* All Solaris components should pass a cred for this operation. */ 6448 ASSERT(cr != NULL); 6449 6450 ASSERT(icmp != NULL); 6451 6452 rw_enter(&icmp->icmp_rwlock, RW_READER); 6453 6454 error = rawip_do_getpeername(icmp, sa, salenp); 6455 6456 rw_exit(&icmp->icmp_rwlock); 6457 6458 return (error); 6459 } 6460 6461 /* ARGSUSED */ 6462 int 6463 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6464 socklen_t *salenp, cred_t *cr) 6465 { 6466 conn_t *connp = (conn_t *)proto_handle; 6467 icmp_t *icmp = connp->conn_icmp; 6468 int error; 6469 6470 /* All Solaris components should pass a cred for this operation. */ 6471 ASSERT(cr != NULL); 6472 6473 ASSERT(icmp != NULL); 6474 rw_enter(&icmp->icmp_rwlock, RW_READER); 6475 6476 error = rawip_do_getsockname(icmp, sa, salenp); 6477 6478 rw_exit(&icmp->icmp_rwlock); 6479 6480 return (error); 6481 } 6482 6483 int 6484 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6485 const void *optvalp, socklen_t optlen, cred_t *cr) 6486 { 6487 conn_t *connp = (conn_t *)proto_handle; 6488 icmp_t *icmp = connp->conn_icmp; 6489 int error; 6490 6491 /* All Solaris components should pass a cred for this operation. */ 6492 ASSERT(cr != NULL); 6493 6494 error = proto_opt_check(level, option_name, optlen, NULL, 6495 icmp_opt_obj.odb_opt_des_arr, 6496 icmp_opt_obj.odb_opt_arr_cnt, 6497 icmp_opt_obj.odb_topmost_tpiprovider, 6498 B_TRUE, B_FALSE, cr); 6499 6500 if (error != 0) { 6501 /* 6502 * option not recognized 6503 */ 6504 if (error < 0) { 6505 error = proto_tlitosyserr(-error); 6506 } 6507 return (error); 6508 } 6509 6510 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6511 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6512 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6513 (uchar_t *)optvalp, NULL, cr); 6514 rw_exit(&icmp->icmp_rwlock); 6515 6516 if (error < 0) { 6517 /* 6518 * Pass on to ip 6519 */ 6520 error = ip_set_options(connp, level, option_name, optvalp, 6521 optlen, cr); 6522 } 6523 6524 ASSERT(error >= 0); 6525 6526 return (error); 6527 } 6528 6529 int 6530 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6531 void *optvalp, socklen_t *optlen, cred_t *cr) 6532 { 6533 int error; 6534 conn_t *connp = (conn_t *)proto_handle; 6535 icmp_t *icmp = connp->conn_icmp; 6536 t_uscalar_t max_optbuf_len; 6537 void *optvalp_buf; 6538 int len; 6539 6540 /* All Solaris components should pass a cred for this operation. */ 6541 ASSERT(cr != NULL); 6542 6543 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6544 icmp_opt_obj.odb_opt_des_arr, 6545 icmp_opt_obj.odb_opt_arr_cnt, 6546 icmp_opt_obj.odb_topmost_tpiprovider, 6547 B_FALSE, B_TRUE, cr); 6548 6549 if (error != 0) { 6550 if (error < 0) { 6551 error = proto_tlitosyserr(-error); 6552 } 6553 return (error); 6554 } 6555 6556 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6557 rw_enter(&icmp->icmp_rwlock, RW_READER); 6558 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6559 rw_exit(&icmp->icmp_rwlock); 6560 6561 if (len < 0) { 6562 /* 6563 * Pass on to IP 6564 */ 6565 kmem_free(optvalp_buf, max_optbuf_len); 6566 return (ip_get_options(connp, level, option_name, optvalp, 6567 optlen, cr)); 6568 } else { 6569 /* 6570 * update optlen and copy option value 6571 */ 6572 t_uscalar_t size = MIN(len, *optlen); 6573 bcopy(optvalp_buf, optvalp, size); 6574 bcopy(&size, optlen, sizeof (size)); 6575 6576 kmem_free(optvalp_buf, max_optbuf_len); 6577 return (0); 6578 } 6579 } 6580 6581 /* ARGSUSED */ 6582 int 6583 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6584 { 6585 conn_t *connp = (conn_t *)proto_handle; 6586 6587 /* All Solaris components should pass a cred for this operation. */ 6588 ASSERT(cr != NULL); 6589 6590 (void) rawip_do_close(connp); 6591 return (0); 6592 } 6593 6594 /* ARGSUSED */ 6595 int 6596 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6597 { 6598 conn_t *connp = (conn_t *)proto_handle; 6599 6600 /* All Solaris components should pass a cred for this operation. */ 6601 ASSERT(cr != NULL); 6602 6603 /* shut down the send side */ 6604 if (how != SHUT_RD) 6605 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6606 SOCK_OPCTL_SHUT_SEND, 0); 6607 /* shut down the recv side */ 6608 if (how != SHUT_WR) 6609 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6610 SOCK_OPCTL_SHUT_RECV, 0); 6611 return (0); 6612 } 6613 6614 void 6615 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6616 { 6617 conn_t *connp = (conn_t *)proto_handle; 6618 icmp_t *icmp = connp->conn_icmp; 6619 6620 mutex_enter(&icmp->icmp_recv_lock); 6621 connp->conn_flow_cntrld = B_FALSE; 6622 mutex_exit(&icmp->icmp_recv_lock); 6623 } 6624 6625 int 6626 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6627 int mode, int32_t *rvalp, cred_t *cr) 6628 { 6629 conn_t *connp = (conn_t *)proto_handle; 6630 int error; 6631 6632 /* All Solaris components should pass a cred for this operation. */ 6633 ASSERT(cr != NULL); 6634 6635 switch (cmd) { 6636 case ND_SET: 6637 case ND_GET: 6638 case _SIOCSOCKFALLBACK: 6639 case TI_GETPEERNAME: 6640 case TI_GETMYNAME: 6641 #ifdef DEBUG 6642 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6643 " socket", cmd); 6644 #endif 6645 error = EINVAL; 6646 break; 6647 default: 6648 /* 6649 * Pass on to IP using helper stream 6650 */ 6651 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6652 cmd, arg, mode, cr, rvalp); 6653 break; 6654 } 6655 return (error); 6656 } 6657 6658 /* ARGSUSED */ 6659 int 6660 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6661 cred_t *cr) 6662 { 6663 conn_t *connp = (conn_t *)proto_handle; 6664 icmp_t *icmp = connp->conn_icmp; 6665 icmp_stack_t *is = icmp->icmp_is; 6666 int error = 0; 6667 boolean_t bypass_dgram_errind = B_FALSE; 6668 6669 ASSERT(DB_TYPE(mp) == M_DATA); 6670 6671 /* All Solaris components should pass a cred for this operation. */ 6672 ASSERT(cr != NULL); 6673 6674 /* If labeled then sockfs should have already set db_credp */ 6675 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6676 6677 /* do an implicit bind if necessary */ 6678 if (icmp->icmp_state == TS_UNBND) { 6679 error = rawip_implicit_bind(connp); 6680 /* 6681 * We could be racing with an actual bind, in which case 6682 * we would see EPROTO. We cross our fingers and try 6683 * to connect. 6684 */ 6685 if (!(error == 0 || error == EPROTO)) { 6686 freemsg(mp); 6687 return (error); 6688 } 6689 } 6690 6691 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6692 6693 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6694 error = EISCONN; 6695 goto done_lock; 6696 } 6697 6698 switch (icmp->icmp_family) { 6699 case AF_INET6: { 6700 sin6_t *sin6; 6701 ip6_pkt_t ipp_s; /* For ancillary data options */ 6702 ip6_pkt_t *ipp = &ipp_s; 6703 6704 sin6 = (sin6_t *)msg->msg_name; 6705 if (sin6 != NULL) { 6706 error = proto_verify_ip_addr(icmp->icmp_family, 6707 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6708 if (error != 0) { 6709 bypass_dgram_errind = B_TRUE; 6710 goto done_lock; 6711 } 6712 if (icmp->icmp_delayed_error != 0) { 6713 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6714 sin6_t *sin2 = (sin6_t *) 6715 &icmp->icmp_delayed_addr; 6716 6717 error = icmp->icmp_delayed_error; 6718 icmp->icmp_delayed_error = 0; 6719 6720 /* Compare IP address and port */ 6721 6722 if (sin1->sin6_port == sin2->sin6_port && 6723 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6724 &sin2->sin6_addr)) { 6725 goto done_lock; 6726 } 6727 } 6728 } else { 6729 /* 6730 * Use connected address 6731 */ 6732 if (icmp->icmp_state != TS_DATA_XFER) { 6733 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6734 error = EDESTADDRREQ; 6735 bypass_dgram_errind = B_TRUE; 6736 goto done_lock; 6737 } 6738 sin6 = &icmp->icmp_v6dst; 6739 } 6740 6741 /* No support for mapped addresses on raw sockets */ 6742 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6743 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6744 error = EADDRNOTAVAIL; 6745 goto done_lock; 6746 } 6747 6748 ipp->ipp_fields = 0; 6749 ipp->ipp_sticky_ignored = 0; 6750 6751 /* 6752 * If options passed in, feed it for verification and handling 6753 */ 6754 if (msg->msg_controllen != 0) { 6755 error = process_auxiliary_options(connp, 6756 msg->msg_control, msg->msg_controllen, 6757 ipp, &icmp_opt_obj, icmp_opt_set, cr); 6758 if (error != 0) { 6759 goto done_lock; 6760 } 6761 } 6762 6763 rw_exit(&icmp->icmp_rwlock); 6764 6765 /* 6766 * Destination is a native IPv6 address. 6767 * Send out an IPv6 format packet. 6768 */ 6769 6770 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6771 ipp); 6772 } 6773 break; 6774 case AF_INET: { 6775 sin_t *sin; 6776 ip4_pkt_t pktinfo; 6777 ip4_pkt_t *pktinfop = &pktinfo; 6778 ipaddr_t v4dst; 6779 6780 sin = (sin_t *)msg->msg_name; 6781 if (sin != NULL) { 6782 error = proto_verify_ip_addr(icmp->icmp_family, 6783 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6784 if (error != 0) { 6785 bypass_dgram_errind = B_TRUE; 6786 goto done_lock; 6787 } 6788 v4dst = sin->sin_addr.s_addr; 6789 if (icmp->icmp_delayed_error != 0) { 6790 sin_t *sin1 = (sin_t *)msg->msg_name; 6791 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6792 6793 error = icmp->icmp_delayed_error; 6794 icmp->icmp_delayed_error = 0; 6795 6796 /* Compare IP address and port */ 6797 if (sin1->sin_port == sin2->sin_port && 6798 sin1->sin_addr.s_addr == 6799 sin2->sin_addr.s_addr) { 6800 goto done_lock; 6801 } 6802 6803 } 6804 } else { 6805 /* 6806 * Use connected address 6807 */ 6808 if (icmp->icmp_state != TS_DATA_XFER) { 6809 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6810 error = EDESTADDRREQ; 6811 bypass_dgram_errind = B_TRUE; 6812 goto done_lock; 6813 } 6814 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6815 } 6816 6817 6818 pktinfop->ip4_ill_index = 0; 6819 pktinfop->ip4_addr = INADDR_ANY; 6820 6821 /* 6822 * If options passed in, feed it for verification and handling 6823 */ 6824 if (msg->msg_controllen != 0) { 6825 error = process_auxiliary_options(connp, 6826 msg->msg_control, msg->msg_controllen, 6827 pktinfop, &icmp_opt_obj, icmp_opt_set, cr); 6828 if (error != 0) { 6829 goto done_lock; 6830 } 6831 } 6832 rw_exit(&icmp->icmp_rwlock); 6833 6834 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6835 v4dst, pktinfop); 6836 break; 6837 } 6838 6839 default: 6840 ASSERT(0); 6841 } 6842 6843 goto done; 6844 6845 done_lock: 6846 rw_exit(&icmp->icmp_rwlock); 6847 if (error != 0) { 6848 ASSERT(mp != NULL); 6849 freemsg(mp); 6850 } 6851 done: 6852 if (bypass_dgram_errind) 6853 return (error); 6854 return (icmp->icmp_dgram_errind ? error : 0); 6855 } 6856 6857 sock_downcalls_t sock_rawip_downcalls = { 6858 rawip_activate, 6859 rawip_accept, 6860 rawip_bind, 6861 rawip_listen, 6862 rawip_connect, 6863 rawip_getpeername, 6864 rawip_getsockname, 6865 rawip_getsockopt, 6866 rawip_setsockopt, 6867 rawip_send, 6868 NULL, 6869 NULL, 6870 NULL, 6871 rawip_shutdown, 6872 rawip_clr_flowctrl, 6873 rawip_ioctl, 6874 rawip_close 6875 }; 6876