1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 143 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 144 static void icmp_wput(queue_t *q, mblk_t *mp); 145 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 146 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 147 sin6_t *sin6, ip6_pkt_t *ipp); 148 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 149 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 150 static void icmp_wput_other(queue_t *q, mblk_t *mp); 151 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 152 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 274 icmp = connp->conn_icmp; 275 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 276 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 277 "icmp_bind: bad req, len %u", 278 (uint_t)(mp->b_wptr - mp->b_rptr)); 279 icmp_err_ack(q, mp, TPROTO, 0); 280 return; 281 } 282 283 if (icmp->icmp_state != TS_UNBND) { 284 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 285 "icmp_bind: bad state, %d", icmp->icmp_state); 286 icmp_err_ack(q, mp, TOUTSTATE, 0); 287 return; 288 } 289 290 /* 291 * Reallocate the message to make sure we have enough room for an 292 * address and the protocol type. 293 */ 294 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 295 if (!mp1) { 296 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 297 return; 298 } 299 mp = mp1; 300 301 /* Reset the message type in preparation for shipping it back. */ 302 DB_TYPE(mp) = M_PCPROTO; 303 tbr = (struct T_bind_req *)mp->b_rptr; 304 len = tbr->ADDR_length; 305 switch (len) { 306 case 0: /* request for a generic port */ 307 tbr->ADDR_offset = sizeof (struct T_bind_req); 308 if (icmp->icmp_family == AF_INET) { 309 tbr->ADDR_length = sizeof (sin_t); 310 sin = (sin_t *)&tbr[1]; 311 *sin = sin_null; 312 sin->sin_family = AF_INET; 313 mp->b_wptr = (uchar_t *)&sin[1]; 314 sa = (struct sockaddr *)sin; 315 len = sizeof (sin_t); 316 } else { 317 ASSERT(icmp->icmp_family == AF_INET6); 318 tbr->ADDR_length = sizeof (sin6_t); 319 sin6 = (sin6_t *)&tbr[1]; 320 *sin6 = sin6_null; 321 sin6->sin6_family = AF_INET6; 322 mp->b_wptr = (uchar_t *)&sin6[1]; 323 sa = (struct sockaddr *)sin6; 324 len = sizeof (sin6_t); 325 } 326 break; 327 328 case sizeof (sin_t): /* Complete IPv4 address */ 329 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 330 sizeof (sin_t)); 331 break; 332 333 case sizeof (sin6_t): /* Complete IPv6 address */ 334 sa = (struct sockaddr *)mi_offset_param(mp, 335 tbr->ADDR_offset, sizeof (sin6_t)); 336 break; 337 338 default: 339 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 340 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 341 icmp_err_ack(q, mp, TBADADDR, 0); 342 return; 343 } 344 345 error = rawip_do_bind(connp, sa, len); 346 done: 347 ASSERT(mp->b_cont == NULL); 348 if (error != 0) { 349 if (error > 0) { 350 icmp_err_ack(q, mp, TSYSERR, error); 351 } else { 352 icmp_err_ack(q, mp, -error, 0); 353 } 354 } else { 355 tbr->PRIM_type = T_BIND_ACK; 356 qreply(q, mp); 357 } 358 } 359 360 static int 361 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 362 { 363 sin_t *sin; 364 sin6_t *sin6; 365 icmp_t *icmp; 366 int error = 0; 367 mblk_t *ire_mp; 368 369 370 icmp = connp->conn_icmp; 371 372 if (sa == NULL || !OK_32PTR((char *)sa)) { 373 return (EINVAL); 374 } 375 376 /* 377 * The state must be TS_UNBND. TPI mandates that users must send 378 * TPI primitives only 1 at a time and wait for the response before 379 * sending the next primitive. 380 */ 381 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 382 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 383 error = -TOUTSTATE; 384 goto done; 385 } 386 387 ASSERT(len != 0); 388 switch (len) { 389 case sizeof (sin_t): /* Complete IPv4 address */ 390 sin = (sin_t *)sa; 391 if (sin->sin_family != AF_INET || 392 icmp->icmp_family != AF_INET) { 393 /* TSYSERR, EAFNOSUPPORT */ 394 error = EAFNOSUPPORT; 395 goto done; 396 } 397 break; 398 case sizeof (sin6_t): /* Complete IPv6 address */ 399 sin6 = (sin6_t *)sa; 400 if (sin6->sin6_family != AF_INET6 || 401 icmp->icmp_family != AF_INET6) { 402 /* TSYSERR, EAFNOSUPPORT */ 403 error = EAFNOSUPPORT; 404 goto done; 405 } 406 /* No support for mapped addresses on raw sockets */ 407 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 408 /* TSYSERR, EADDRNOTAVAIL */ 409 error = EADDRNOTAVAIL; 410 goto done; 411 } 412 break; 413 414 default: 415 /* TBADADDR */ 416 error = EADDRNOTAVAIL; 417 goto done; 418 } 419 420 icmp->icmp_pending_op = T_BIND_REQ; 421 icmp->icmp_state = TS_IDLE; 422 423 /* 424 * Copy the source address into our icmp structure. This address 425 * may still be zero; if so, ip will fill in the correct address 426 * each time an outbound packet is passed to it. 427 * If we are binding to a broadcast or multicast address then 428 * rawip_post_ip_bind_connect will clear the source address. 429 */ 430 431 if (icmp->icmp_family == AF_INET) { 432 ASSERT(sin != NULL); 433 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 434 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 435 &icmp->icmp_v6src); 436 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 437 icmp->icmp_ip_snd_options_len; 438 icmp->icmp_bound_v6src = icmp->icmp_v6src; 439 } else { 440 int error; 441 442 ASSERT(sin6 != NULL); 443 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 444 icmp->icmp_v6src = sin6->sin6_addr; 445 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 446 icmp->icmp_bound_v6src = icmp->icmp_v6src; 447 448 /* Rebuild the header template */ 449 error = icmp_build_hdrs(icmp); 450 if (error != 0) { 451 icmp->icmp_pending_op = -1; 452 /* 453 * TSYSERR 454 */ 455 goto done; 456 } 457 } 458 459 ire_mp = NULL; 460 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 461 /* 462 * request an IRE if src not 0 (INADDR_ANY) 463 */ 464 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 465 if (ire_mp == NULL) { 466 icmp->icmp_pending_op = -1; 467 error = ENOMEM; 468 goto done; 469 } 470 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 471 } 472 done: 473 rw_exit(&icmp->icmp_rwlock); 474 if (error != 0) 475 return (error); 476 477 if (icmp->icmp_family == AF_INET6) { 478 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 479 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 480 } else { 481 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 482 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 483 } 484 rawip_post_ip_bind_connect(icmp, ire_mp, error); 485 return (error); 486 } 487 488 static void 489 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 490 { 491 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 492 if (icmp->icmp_state == TS_UNBND) { 493 /* 494 * not yet bound - bind sent by icmp_bind_proto. 495 */ 496 rw_exit(&icmp->icmp_rwlock); 497 return; 498 } 499 ASSERT(icmp->icmp_pending_op != -1); 500 icmp->icmp_pending_op = -1; 501 502 if (error != 0) { 503 if (icmp->icmp_state == TS_DATA_XFER) { 504 /* Connect failed */ 505 /* Revert back to the bound source */ 506 icmp->icmp_v6src = icmp->icmp_bound_v6src; 507 icmp->icmp_state = TS_IDLE; 508 if (icmp->icmp_family == AF_INET6) 509 (void) icmp_build_hdrs(icmp); 510 } else { 511 V6_SET_ZERO(icmp->icmp_v6src); 512 V6_SET_ZERO(icmp->icmp_bound_v6src); 513 icmp->icmp_state = TS_UNBND; 514 if (icmp->icmp_family == AF_INET6) 515 (void) icmp_build_hdrs(icmp); 516 } 517 } else { 518 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 519 ire_t *ire; 520 521 ire = (ire_t *)ire_mp->b_rptr; 522 /* 523 * If a broadcast/multicast address was bound set 524 * the source address to 0. 525 * This ensures no datagrams with broadcast address 526 * as source address are emitted (which would violate 527 * RFC1122 - Hosts requirements) 528 * Note: we get IRE_BROADCAST for IPv6 529 * to "mark" a multicast local address. 530 */ 531 532 533 if (ire->ire_type == IRE_BROADCAST && 534 icmp->icmp_state != TS_DATA_XFER) { 535 /* 536 * This was just a local bind to a 537 * MC/broadcast addr 538 */ 539 V6_SET_ZERO(icmp->icmp_v6src); 540 if (icmp->icmp_family == AF_INET6) 541 (void) icmp_build_hdrs(icmp); 542 } 543 } 544 545 } 546 rw_exit(&icmp->icmp_rwlock); 547 if (ire_mp != NULL) 548 freeb(ire_mp); 549 } 550 551 /* 552 * Send message to IP to just bind to the protocol. 553 */ 554 static int 555 icmp_bind_proto(conn_t *connp) 556 { 557 icmp_t *icmp; 558 int error; 559 560 icmp = connp->conn_icmp; 561 562 if (icmp->icmp_family == AF_INET6) 563 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 564 &sin6_null.sin6_addr, 0, B_TRUE); 565 else 566 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 567 sin_null.sin_addr.s_addr, 0, B_TRUE); 568 569 rawip_post_ip_bind_connect(icmp, NULL, error); 570 return (error); 571 } 572 573 static void 574 icmp_tpi_connect(queue_t *q, mblk_t *mp) 575 { 576 conn_t *connp = Q_TO_CONN(q); 577 struct T_conn_req *tcr; 578 icmp_t *icmp; 579 struct sockaddr *sa; 580 socklen_t len; 581 int error; 582 583 icmp = connp->conn_icmp; 584 tcr = (struct T_conn_req *)mp->b_rptr; 585 /* Sanity checks */ 586 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 587 icmp_err_ack(q, mp, TPROTO, 0); 588 return; 589 } 590 591 if (tcr->OPT_length != 0) { 592 icmp_err_ack(q, mp, TBADOPT, 0); 593 return; 594 } 595 596 len = tcr->DEST_length; 597 598 switch (len) { 599 default: 600 icmp_err_ack(q, mp, TBADADDR, 0); 601 return; 602 case sizeof (sin_t): 603 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 604 sizeof (sin_t)); 605 break; 606 case sizeof (sin6_t): 607 sa = (struct sockaddr *)mi_offset_param(mp, 608 tcr->DEST_offset, sizeof (sin6_t)); 609 break; 610 } 611 612 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 613 if (error != 0) { 614 icmp_err_ack(q, mp, TSYSERR, error); 615 return; 616 } 617 618 error = rawip_do_connect(connp, sa, len); 619 if (error != 0) { 620 if (error < 0) { 621 icmp_err_ack(q, mp, -error, 0); 622 } else { 623 icmp_err_ack(q, mp, 0, error); 624 } 625 } else { 626 mblk_t *mp1; 627 628 /* 629 * We have to send a connection confirmation to 630 * keep TLI happy. 631 */ 632 if (icmp->icmp_family == AF_INET) { 633 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 634 sizeof (sin_t), NULL, 0); 635 } else { 636 ASSERT(icmp->icmp_family == AF_INET6); 637 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 638 sizeof (sin6_t), NULL, 0); 639 } 640 if (mp1 == NULL) { 641 rw_exit(&icmp->icmp_rwlock); 642 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 643 return; 644 } 645 646 /* 647 * Send ok_ack for T_CONN_REQ 648 */ 649 mp = mi_tpi_ok_ack_alloc(mp); 650 if (mp == NULL) { 651 /* Unable to reuse the T_CONN_REQ for the ack. */ 652 freemsg(mp1); 653 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 654 return; 655 } 656 putnext(connp->conn_rq, mp); 657 putnext(connp->conn_rq, mp1); 658 } 659 } 660 661 static int 662 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 663 { 664 icmp_t *icmp; 665 sin_t *sin; 666 sin6_t *sin6; 667 mblk_t *ire_mp; 668 int error; 669 ipaddr_t v4dst; 670 in6_addr_t v6dst; 671 672 icmp = connp->conn_icmp; 673 674 if (sa == NULL || !OK_32PTR((char *)sa)) { 675 return (EINVAL); 676 } 677 678 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 679 if (ire_mp == NULL) 680 return (ENOMEM); 681 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 682 683 684 ASSERT(sa != NULL && len != 0); 685 686 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 687 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 688 rw_exit(&icmp->icmp_rwlock); 689 freeb(ire_mp); 690 return (-TOUTSTATE); 691 } 692 693 switch (len) { 694 case sizeof (sin_t): 695 sin = (sin_t *)sa; 696 697 ASSERT(icmp->icmp_family == AF_INET); 698 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 699 700 v4dst = sin->sin_addr.s_addr; 701 /* 702 * Interpret a zero destination to mean loopback. 703 * Update the T_CONN_REQ (sin/sin6) since it is used to 704 * generate the T_CONN_CON. 705 */ 706 if (v4dst == INADDR_ANY) { 707 v4dst = htonl(INADDR_LOOPBACK); 708 } 709 710 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 711 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 712 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 713 icmp->icmp_ip_snd_options_len; 714 icmp->icmp_v6dst.sin6_addr = v6dst; 715 icmp->icmp_v6dst.sin6_family = AF_INET6; 716 icmp->icmp_v6dst.sin6_flowinfo = 0; 717 icmp->icmp_v6dst.sin6_port = 0; 718 719 /* 720 * If the destination address is multicast and 721 * an outgoing multicast interface has been set, 722 * use the address of that interface as our 723 * source address if no source address has been set. 724 */ 725 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 726 CLASSD(v4dst) && 727 icmp->icmp_multicast_if_addr != INADDR_ANY) { 728 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 729 &icmp->icmp_v6src); 730 } 731 break; 732 case sizeof (sin6_t): 733 sin6 = (sin6_t *)sa; 734 735 /* No support for mapped addresses on raw sockets */ 736 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 737 rw_exit(&icmp->icmp_rwlock); 738 freeb(ire_mp); 739 return (EADDRNOTAVAIL); 740 } 741 742 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 743 ASSERT(icmp->icmp_family == AF_INET6); 744 745 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 746 747 icmp->icmp_v6dst = *sin6; 748 icmp->icmp_v6dst.sin6_port = 0; 749 750 /* 751 * Interpret a zero destination to mean loopback. 752 * Update the T_CONN_REQ (sin/sin6) since it is used to 753 * generate the T_CONN_CON. 754 */ 755 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 756 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 757 } 758 /* 759 * If the destination address is multicast and 760 * an outgoing multicast interface has been set, 761 * then the ip bind logic will pick the correct source 762 * address (i.e. matching the outgoing multicast interface). 763 */ 764 break; 765 } 766 767 icmp->icmp_pending_op = T_CONN_REQ; 768 769 if (icmp->icmp_state == TS_DATA_XFER) { 770 /* Already connected - clear out state */ 771 icmp->icmp_v6src = icmp->icmp_bound_v6src; 772 icmp->icmp_state = TS_IDLE; 773 } 774 775 icmp->icmp_state = TS_DATA_XFER; 776 rw_exit(&icmp->icmp_rwlock); 777 778 if (icmp->icmp_family == AF_INET6) { 779 error = ip_proto_bind_connected_v6(connp, &ire_mp, 780 icmp->icmp_proto, &icmp->icmp_v6src, 0, 781 &icmp->icmp_v6dst.sin6_addr, 782 NULL, sin6->sin6_port, B_TRUE, B_TRUE); 783 } else { 784 error = ip_proto_bind_connected_v4(connp, &ire_mp, 785 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 786 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 787 B_TRUE, B_TRUE); 788 } 789 rawip_post_ip_bind_connect(icmp, ire_mp, error); 790 return (error); 791 } 792 793 static void 794 icmp_close_free(conn_t *connp) 795 { 796 icmp_t *icmp = connp->conn_icmp; 797 798 /* If there are any options associated with the stream, free them. */ 799 if (icmp->icmp_ip_snd_options != NULL) { 800 mi_free((char *)icmp->icmp_ip_snd_options); 801 icmp->icmp_ip_snd_options = NULL; 802 icmp->icmp_ip_snd_options_len = 0; 803 } 804 805 if (icmp->icmp_filter != NULL) { 806 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 807 icmp->icmp_filter = NULL; 808 } 809 810 /* Free memory associated with sticky options */ 811 if (icmp->icmp_sticky_hdrs_len != 0) { 812 kmem_free(icmp->icmp_sticky_hdrs, 813 icmp->icmp_sticky_hdrs_len); 814 icmp->icmp_sticky_hdrs = NULL; 815 icmp->icmp_sticky_hdrs_len = 0; 816 } 817 ip6_pkt_free(&icmp->icmp_sticky_ipp); 818 819 /* 820 * Clear any fields which the kmem_cache constructor clears. 821 * Only icmp_connp needs to be preserved. 822 * TBD: We should make this more efficient to avoid clearing 823 * everything. 824 */ 825 ASSERT(icmp->icmp_connp == connp); 826 bzero(icmp, sizeof (icmp_t)); 827 icmp->icmp_connp = connp; 828 } 829 830 static int 831 rawip_do_close(conn_t *connp) 832 { 833 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 834 835 ip_quiesce_conn(connp); 836 837 if (!IPCL_IS_NONSTR(connp)) { 838 qprocsoff(connp->conn_rq); 839 } 840 841 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 842 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 843 icmp_close_free(connp); 844 845 /* 846 * Now we are truly single threaded on this stream, and can 847 * delete the things hanging off the connp, and finally the connp. 848 * We removed this connp from the fanout list, it cannot be 849 * accessed thru the fanouts, and we already waited for the 850 * conn_ref to drop to 0. We are already in close, so 851 * there cannot be any other thread from the top. qprocsoff 852 * has completed, and service has completed or won't run in 853 * future. 854 */ 855 ASSERT(connp->conn_ref == 1); 856 857 if (!IPCL_IS_NONSTR(connp)) { 858 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 859 } else { 860 ip_free_helper_stream(connp); 861 } 862 863 connp->conn_ref--; 864 ipcl_conn_destroy(connp); 865 866 return (0); 867 } 868 869 static int 870 icmp_close(queue_t *q, int flags) 871 { 872 conn_t *connp; 873 874 if (flags & SO_FALLBACK) { 875 /* 876 * stream is being closed while in fallback 877 * simply free the resources that were allocated 878 */ 879 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 880 qprocsoff(q); 881 goto done; 882 } 883 884 connp = Q_TO_CONN(q); 885 (void) rawip_do_close(connp); 886 done: 887 q->q_ptr = WR(q)->q_ptr = NULL; 888 return (0); 889 } 890 891 /* 892 * This routine handles each T_DISCON_REQ message passed to icmp 893 * as an indicating that ICMP is no longer connected. This results 894 * in sending a T_BIND_REQ to IP to restore the binding to just 895 * the local address. 896 * 897 * The disconnect completes in rawip_post_ip_bind_connect. 898 */ 899 static int 900 icmp_do_disconnect(conn_t *connp) 901 { 902 icmp_t *icmp; 903 mblk_t *ire_mp; 904 int error; 905 906 icmp = connp->conn_icmp; 907 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 908 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 909 rw_exit(&icmp->icmp_rwlock); 910 return (-TOUTSTATE); 911 } 912 icmp->icmp_pending_op = T_DISCON_REQ; 913 icmp->icmp_v6src = icmp->icmp_bound_v6src; 914 icmp->icmp_state = TS_IDLE; 915 916 917 if (icmp->icmp_family == AF_INET6) { 918 /* Rebuild the header template */ 919 error = icmp_build_hdrs(icmp); 920 if (error != 0) { 921 icmp->icmp_pending_op = -1; 922 rw_exit(&icmp->icmp_rwlock); 923 return (error); 924 } 925 } 926 927 rw_exit(&icmp->icmp_rwlock); 928 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 929 if (ire_mp == NULL) { 930 return (ENOMEM); 931 } 932 933 if (icmp->icmp_family == AF_INET6) { 934 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 935 &icmp->icmp_bound_v6src, 0, B_TRUE); 936 } else { 937 938 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 939 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 940 } 941 942 rawip_post_ip_bind_connect(icmp, ire_mp, error); 943 944 return (error); 945 } 946 947 static void 948 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 949 { 950 conn_t *connp = Q_TO_CONN(q); 951 int error; 952 953 /* 954 * Allocate the largest primitive we need to send back 955 * T_error_ack is > than T_ok_ack 956 */ 957 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 958 if (mp == NULL) { 959 /* Unable to reuse the T_DISCON_REQ for the ack. */ 960 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 961 return; 962 } 963 964 error = icmp_do_disconnect(connp); 965 966 if (error != 0) { 967 if (error > 0) { 968 icmp_err_ack(q, mp, 0, error); 969 } else { 970 icmp_err_ack(q, mp, -error, 0); 971 } 972 } else { 973 mp = mi_tpi_ok_ack_alloc(mp); 974 ASSERT(mp != NULL); 975 qreply(q, mp); 976 } 977 978 } 979 980 static int 981 icmp_disconnect(conn_t *connp) 982 { 983 int error; 984 icmp_t *icmp = connp->conn_icmp; 985 986 icmp->icmp_dgram_errind = B_FALSE; 987 988 error = icmp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 return (error); 993 } 994 995 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 996 static void 997 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 998 { 999 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1000 qreply(q, mp); 1001 } 1002 1003 /* Shorthand to generate and send TPI error acks to our client */ 1004 static void 1005 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1006 t_scalar_t t_error, int sys_error) 1007 { 1008 struct T_error_ack *teackp; 1009 1010 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1011 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1012 teackp = (struct T_error_ack *)mp->b_rptr; 1013 teackp->ERROR_prim = primitive; 1014 teackp->TLI_error = t_error; 1015 teackp->UNIX_error = sys_error; 1016 qreply(q, mp); 1017 } 1018 } 1019 1020 /* 1021 * icmp_icmp_error is called by icmp_input to process ICMP 1022 * messages passed up by IP. 1023 * Generates the appropriate permanent (non-transient) errors. 1024 * Assumes that IP has pulled up everything up to and including 1025 * the ICMP header. 1026 */ 1027 static void 1028 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1029 { 1030 icmph_t *icmph; 1031 ipha_t *ipha; 1032 int iph_hdr_length; 1033 sin_t sin; 1034 mblk_t *mp1; 1035 int error = 0; 1036 icmp_t *icmp = connp->conn_icmp; 1037 1038 ipha = (ipha_t *)mp->b_rptr; 1039 1040 ASSERT(OK_32PTR(mp->b_rptr)); 1041 1042 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1043 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1044 icmp_icmp_error_ipv6(connp, mp); 1045 return; 1046 } 1047 1048 /* 1049 * icmp does not support v4 mapped addresses 1050 * so we can never be here for a V6 socket 1051 * i.e. icmp_family == AF_INET6 1052 */ 1053 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1054 (icmp->icmp_family == AF_INET)); 1055 1056 ASSERT(icmp->icmp_family == AF_INET); 1057 1058 /* Skip past the outer IP and ICMP headers */ 1059 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1060 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1061 ipha = (ipha_t *)&icmph[1]; 1062 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1063 1064 switch (icmph->icmph_type) { 1065 case ICMP_DEST_UNREACHABLE: 1066 switch (icmph->icmph_code) { 1067 case ICMP_FRAGMENTATION_NEEDED: 1068 /* 1069 * IP has already adjusted the path MTU. 1070 */ 1071 break; 1072 case ICMP_PORT_UNREACHABLE: 1073 case ICMP_PROTOCOL_UNREACHABLE: 1074 error = ECONNREFUSED; 1075 break; 1076 default: 1077 /* Transient errors */ 1078 break; 1079 } 1080 break; 1081 default: 1082 /* Transient errors */ 1083 break; 1084 } 1085 if (error == 0) { 1086 freemsg(mp); 1087 return; 1088 } 1089 1090 /* 1091 * Deliver T_UDERROR_IND when the application has asked for it. 1092 * The socket layer enables this automatically when connected. 1093 */ 1094 if (!icmp->icmp_dgram_errind) { 1095 freemsg(mp); 1096 return; 1097 } 1098 1099 sin = sin_null; 1100 sin.sin_family = AF_INET; 1101 sin.sin_addr.s_addr = ipha->ipha_dst; 1102 if (IPCL_IS_NONSTR(connp)) { 1103 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1104 if (icmp->icmp_state == TS_DATA_XFER) { 1105 if (sin.sin_addr.s_addr == 1106 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1107 rw_exit(&icmp->icmp_rwlock); 1108 (*connp->conn_upcalls->su_set_error) 1109 (connp->conn_upper_handle, error); 1110 goto done; 1111 } 1112 } else { 1113 icmp->icmp_delayed_error = error; 1114 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1115 } 1116 rw_exit(&icmp->icmp_rwlock); 1117 } else { 1118 1119 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1120 0, error); 1121 if (mp1 != NULL) 1122 putnext(connp->conn_rq, mp1); 1123 } 1124 done: 1125 freemsg(mp); 1126 } 1127 1128 /* 1129 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1130 * for IPv6 packets. 1131 * Send permanent (non-transient) errors upstream. 1132 * Assumes that IP has pulled up all the extension headers as well 1133 * as the ICMPv6 header. 1134 */ 1135 static void 1136 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1137 { 1138 icmp6_t *icmp6; 1139 ip6_t *ip6h, *outer_ip6h; 1140 uint16_t iph_hdr_length; 1141 uint8_t *nexthdrp; 1142 sin6_t sin6; 1143 mblk_t *mp1; 1144 int error = 0; 1145 icmp_t *icmp = connp->conn_icmp; 1146 1147 outer_ip6h = (ip6_t *)mp->b_rptr; 1148 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1149 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1150 else 1151 iph_hdr_length = IPV6_HDR_LEN; 1152 1153 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1154 ip6h = (ip6_t *)&icmp6[1]; 1155 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1156 freemsg(mp); 1157 return; 1158 } 1159 1160 switch (icmp6->icmp6_type) { 1161 case ICMP6_DST_UNREACH: 1162 switch (icmp6->icmp6_code) { 1163 case ICMP6_DST_UNREACH_NOPORT: 1164 error = ECONNREFUSED; 1165 break; 1166 case ICMP6_DST_UNREACH_ADMIN: 1167 case ICMP6_DST_UNREACH_NOROUTE: 1168 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1169 case ICMP6_DST_UNREACH_ADDR: 1170 /* Transient errors */ 1171 break; 1172 default: 1173 break; 1174 } 1175 break; 1176 case ICMP6_PACKET_TOO_BIG: { 1177 struct T_unitdata_ind *tudi; 1178 struct T_opthdr *toh; 1179 size_t udi_size; 1180 mblk_t *newmp; 1181 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1182 sizeof (struct ip6_mtuinfo); 1183 sin6_t *sin6; 1184 struct ip6_mtuinfo *mtuinfo; 1185 1186 /* 1187 * If the application has requested to receive path mtu 1188 * information, send up an empty message containing an 1189 * IPV6_PATHMTU ancillary data item. 1190 */ 1191 if (!icmp->icmp_ipv6_recvpathmtu) 1192 break; 1193 1194 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1195 opt_length; 1196 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1197 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1198 break; 1199 } 1200 1201 /* 1202 * newmp->b_cont is left to NULL on purpose. This is an 1203 * empty message containing only ancillary data. 1204 */ 1205 newmp->b_datap->db_type = M_PROTO; 1206 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1207 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1208 tudi->PRIM_type = T_UNITDATA_IND; 1209 tudi->SRC_length = sizeof (sin6_t); 1210 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1211 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1212 tudi->OPT_length = opt_length; 1213 1214 sin6 = (sin6_t *)&tudi[1]; 1215 bzero(sin6, sizeof (sin6_t)); 1216 sin6->sin6_family = AF_INET6; 1217 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1218 1219 toh = (struct T_opthdr *)&sin6[1]; 1220 toh->level = IPPROTO_IPV6; 1221 toh->name = IPV6_PATHMTU; 1222 toh->len = opt_length; 1223 toh->status = 0; 1224 1225 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1226 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1227 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1228 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1229 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1230 /* 1231 * We've consumed everything we need from the original 1232 * message. Free it, then send our empty message. 1233 */ 1234 freemsg(mp); 1235 if (!IPCL_IS_NONSTR(connp)) { 1236 putnext(connp->conn_rq, newmp); 1237 } else { 1238 (*connp->conn_upcalls->su_recv) 1239 (connp->conn_upper_handle, newmp, 0, 0, &error, 1240 NULL); 1241 ASSERT(error == 0); 1242 } 1243 return; 1244 } 1245 case ICMP6_TIME_EXCEEDED: 1246 /* Transient errors */ 1247 break; 1248 case ICMP6_PARAM_PROB: 1249 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1250 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1251 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1252 (uchar_t *)nexthdrp) { 1253 error = ECONNREFUSED; 1254 break; 1255 } 1256 break; 1257 } 1258 if (error == 0) { 1259 freemsg(mp); 1260 return; 1261 } 1262 1263 /* 1264 * Deliver T_UDERROR_IND when the application has asked for it. 1265 * The socket layer enables this automatically when connected. 1266 */ 1267 if (!icmp->icmp_dgram_errind) { 1268 freemsg(mp); 1269 return; 1270 } 1271 1272 sin6 = sin6_null; 1273 sin6.sin6_family = AF_INET6; 1274 sin6.sin6_addr = ip6h->ip6_dst; 1275 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1276 1277 if (IPCL_IS_NONSTR(connp)) { 1278 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1279 if (icmp->icmp_state == TS_DATA_XFER) { 1280 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1281 &icmp->icmp_v6dst.sin6_addr)) { 1282 rw_exit(&icmp->icmp_rwlock); 1283 (*connp->conn_upcalls->su_set_error) 1284 (connp->conn_upper_handle, error); 1285 goto done; 1286 } 1287 } else { 1288 icmp->icmp_delayed_error = error; 1289 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1290 } 1291 rw_exit(&icmp->icmp_rwlock); 1292 } else { 1293 1294 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1295 NULL, 0, error); 1296 if (mp1 != NULL) 1297 putnext(connp->conn_rq, mp1); 1298 } 1299 done: 1300 freemsg(mp); 1301 } 1302 1303 /* 1304 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1305 * The local address is filled in if endpoint is bound. The remote address 1306 * is filled in if remote address has been precified ("connected endpoint") 1307 * (The concept of connected CLTS sockets is alien to published TPI 1308 * but we support it anyway). 1309 */ 1310 static void 1311 icmp_addr_req(queue_t *q, mblk_t *mp) 1312 { 1313 icmp_t *icmp = Q_TO_ICMP(q); 1314 mblk_t *ackmp; 1315 struct T_addr_ack *taa; 1316 1317 /* Make it large enough for worst case */ 1318 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1319 2 * sizeof (sin6_t), 1); 1320 if (ackmp == NULL) { 1321 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1322 return; 1323 } 1324 taa = (struct T_addr_ack *)ackmp->b_rptr; 1325 1326 bzero(taa, sizeof (struct T_addr_ack)); 1327 ackmp->b_wptr = (uchar_t *)&taa[1]; 1328 1329 taa->PRIM_type = T_ADDR_ACK; 1330 ackmp->b_datap->db_type = M_PCPROTO; 1331 rw_enter(&icmp->icmp_rwlock, RW_READER); 1332 /* 1333 * Note: Following code assumes 32 bit alignment of basic 1334 * data structures like sin_t and struct T_addr_ack. 1335 */ 1336 if (icmp->icmp_state != TS_UNBND) { 1337 /* 1338 * Fill in local address 1339 */ 1340 taa->LOCADDR_offset = sizeof (*taa); 1341 if (icmp->icmp_family == AF_INET) { 1342 sin_t *sin; 1343 1344 taa->LOCADDR_length = sizeof (sin_t); 1345 sin = (sin_t *)&taa[1]; 1346 /* Fill zeroes and then intialize non-zero fields */ 1347 *sin = sin_null; 1348 sin->sin_family = AF_INET; 1349 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1350 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1351 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1352 sin->sin_addr.s_addr); 1353 } else { 1354 /* 1355 * INADDR_ANY 1356 * icmp_v6src is not set, we might be bound to 1357 * broadcast/multicast. Use icmp_bound_v6src as 1358 * local address instead (that could 1359 * also still be INADDR_ANY) 1360 */ 1361 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1362 sin->sin_addr.s_addr); 1363 } 1364 ackmp->b_wptr = (uchar_t *)&sin[1]; 1365 } else { 1366 sin6_t *sin6; 1367 1368 ASSERT(icmp->icmp_family == AF_INET6); 1369 taa->LOCADDR_length = sizeof (sin6_t); 1370 sin6 = (sin6_t *)&taa[1]; 1371 /* Fill zeroes and then intialize non-zero fields */ 1372 *sin6 = sin6_null; 1373 sin6->sin6_family = AF_INET6; 1374 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1375 sin6->sin6_addr = icmp->icmp_v6src; 1376 } else { 1377 /* 1378 * UNSPECIFIED 1379 * icmp_v6src is not set, we might be bound to 1380 * broadcast/multicast. Use icmp_bound_v6src as 1381 * local address instead (that could 1382 * also still be UNSPECIFIED) 1383 */ 1384 sin6->sin6_addr = icmp->icmp_bound_v6src; 1385 } 1386 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1387 } 1388 } 1389 rw_exit(&icmp->icmp_rwlock); 1390 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1391 qreply(q, ackmp); 1392 } 1393 1394 static void 1395 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1396 { 1397 *tap = icmp_g_t_info_ack; 1398 1399 if (icmp->icmp_family == AF_INET6) 1400 tap->ADDR_size = sizeof (sin6_t); 1401 else 1402 tap->ADDR_size = sizeof (sin_t); 1403 tap->CURRENT_state = icmp->icmp_state; 1404 tap->OPT_size = icmp_max_optsize; 1405 } 1406 1407 static void 1408 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1409 t_uscalar_t cap_bits1) 1410 { 1411 tcap->CAP_bits1 = 0; 1412 1413 if (cap_bits1 & TC1_INFO) { 1414 icmp_copy_info(&tcap->INFO_ack, icmp); 1415 tcap->CAP_bits1 |= TC1_INFO; 1416 } 1417 } 1418 1419 /* 1420 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1421 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1422 * icmp_g_t_info_ack. The current state of the stream is copied from 1423 * icmp_state. 1424 */ 1425 static void 1426 icmp_capability_req(queue_t *q, mblk_t *mp) 1427 { 1428 icmp_t *icmp = Q_TO_ICMP(q); 1429 t_uscalar_t cap_bits1; 1430 struct T_capability_ack *tcap; 1431 1432 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1433 1434 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1435 mp->b_datap->db_type, T_CAPABILITY_ACK); 1436 if (!mp) 1437 return; 1438 1439 tcap = (struct T_capability_ack *)mp->b_rptr; 1440 1441 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1442 1443 qreply(q, mp); 1444 } 1445 1446 /* 1447 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1448 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1449 * The current state of the stream is copied from icmp_state. 1450 */ 1451 static void 1452 icmp_info_req(queue_t *q, mblk_t *mp) 1453 { 1454 icmp_t *icmp = Q_TO_ICMP(q); 1455 1456 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1457 T_INFO_ACK); 1458 if (!mp) 1459 return; 1460 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1461 qreply(q, mp); 1462 } 1463 1464 /* For /dev/icmp aka AF_INET open */ 1465 static int 1466 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1467 int family) 1468 { 1469 conn_t *connp; 1470 dev_t conn_dev; 1471 icmp_stack_t *is; 1472 int error; 1473 1474 conn_dev = NULL; 1475 1476 /* If the stream is already open, return immediately. */ 1477 if (q->q_ptr != NULL) 1478 return (0); 1479 1480 if (sflag == MODOPEN) 1481 return (EINVAL); 1482 1483 /* 1484 * Since ICMP is not used so heavily, allocating from the small 1485 * arena should be sufficient. 1486 */ 1487 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1488 return (EBUSY); 1489 } 1490 1491 if (flag & SO_FALLBACK) { 1492 /* 1493 * Non streams socket needs a stream to fallback to 1494 */ 1495 RD(q)->q_ptr = (void *)conn_dev; 1496 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1497 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1498 qprocson(q); 1499 return (0); 1500 } 1501 1502 connp = icmp_open(family, credp, &error, KM_SLEEP); 1503 if (connp == NULL) { 1504 ASSERT(error != NULL); 1505 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1506 return (error); 1507 } 1508 1509 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1510 connp->conn_dev = conn_dev; 1511 connp->conn_minor_arena = ip_minor_arena_sa; 1512 1513 is = connp->conn_icmp->icmp_is; 1514 1515 /* 1516 * Initialize the icmp_t structure for this stream. 1517 */ 1518 q->q_ptr = connp; 1519 WR(q)->q_ptr = connp; 1520 connp->conn_rq = q; 1521 connp->conn_wq = WR(q); 1522 1523 if (connp->conn_icmp->icmp_family == AF_INET6) { 1524 /* Build initial header template for transmit */ 1525 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1526 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1527 rw_exit(&connp->conn_icmp->icmp_rwlock); 1528 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1529 ipcl_conn_destroy(connp); 1530 return (error); 1531 } 1532 rw_exit(&connp->conn_icmp->icmp_rwlock); 1533 } 1534 1535 1536 q->q_hiwat = is->is_recv_hiwat; 1537 WR(q)->q_hiwat = is->is_xmit_hiwat; 1538 WR(q)->q_lowat = is->is_xmit_lowat; 1539 1540 qprocson(q); 1541 1542 /* Set the Stream head write offset. */ 1543 (void) proto_set_tx_wroff(q, connp, 1544 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1545 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1546 1547 mutex_enter(&connp->conn_lock); 1548 connp->conn_state_flags &= ~CONN_INCIPIENT; 1549 mutex_exit(&connp->conn_lock); 1550 1551 return (0); 1552 } 1553 1554 /* For /dev/icmp4 aka AF_INET open */ 1555 static int 1556 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1557 { 1558 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1559 } 1560 1561 /* For /dev/icmp6 aka AF_INET6 open */ 1562 static int 1563 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1564 { 1565 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1566 } 1567 1568 /* 1569 * This is the open routine for icmp. It allocates a icmp_t structure for 1570 * the stream and, on the first open of the module, creates an ND table. 1571 */ 1572 /* ARGSUSED */ 1573 static conn_t * 1574 icmp_open(int family, cred_t *credp, int *err, int flags) 1575 { 1576 icmp_t *icmp; 1577 conn_t *connp; 1578 zoneid_t zoneid; 1579 netstack_t *ns; 1580 icmp_stack_t *is; 1581 boolean_t isv6 = B_FALSE; 1582 1583 *err = secpolicy_net_icmpaccess(credp); 1584 if (*err != 0) 1585 return (NULL); 1586 1587 if (family == AF_INET6) 1588 isv6 = B_TRUE; 1589 ns = netstack_find_by_cred(credp); 1590 ASSERT(ns != NULL); 1591 is = ns->netstack_icmp; 1592 ASSERT(is != NULL); 1593 1594 /* 1595 * For exclusive stacks we set the zoneid to zero 1596 * to make ICMP operate as if in the global zone. 1597 */ 1598 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1599 zoneid = GLOBAL_ZONEID; 1600 else 1601 zoneid = crgetzoneid(credp); 1602 1603 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1604 1605 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1606 icmp = connp->conn_icmp; 1607 icmp->icmp_v6dst = sin6_null; 1608 1609 /* 1610 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1611 * done by netstack_find_by_cred() 1612 */ 1613 netstack_rele(ns); 1614 1615 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1616 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1617 ASSERT(connp->conn_icmp == icmp); 1618 ASSERT(icmp->icmp_connp == connp); 1619 1620 /* Set the initial state of the stream and the privilege status. */ 1621 icmp->icmp_state = TS_UNBND; 1622 if (isv6) { 1623 icmp->icmp_ipversion = IPV6_VERSION; 1624 icmp->icmp_family = AF_INET6; 1625 connp->conn_ulp = IPPROTO_ICMPV6; 1626 /* May be changed by a SO_PROTOTYPE socket option. */ 1627 icmp->icmp_proto = IPPROTO_ICMPV6; 1628 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1629 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1630 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1631 connp->conn_af_isv6 = B_TRUE; 1632 connp->conn_flags |= IPCL_ISV6; 1633 } else { 1634 icmp->icmp_ipversion = IPV4_VERSION; 1635 icmp->icmp_family = AF_INET; 1636 /* May be changed by a SO_PROTOTYPE socket option. */ 1637 icmp->icmp_proto = IPPROTO_ICMP; 1638 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1639 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1640 connp->conn_af_isv6 = B_FALSE; 1641 connp->conn_flags &= ~IPCL_ISV6; 1642 } 1643 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1644 icmp->icmp_pending_op = -1; 1645 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1646 connp->conn_zoneid = zoneid; 1647 1648 /* 1649 * If the caller has the process-wide flag set, then default to MAC 1650 * exempt mode. This allows read-down to unlabeled hosts. 1651 */ 1652 if (getpflags(NET_MAC_AWARE, credp) != 0) 1653 connp->conn_mac_exempt = B_TRUE; 1654 1655 connp->conn_ulp_labeled = is_system_labeled(); 1656 1657 icmp->icmp_is = is; 1658 1659 connp->conn_recv = icmp_input; 1660 crhold(credp); 1661 connp->conn_cred = credp; 1662 1663 rw_exit(&icmp->icmp_rwlock); 1664 1665 connp->conn_flow_cntrld = B_FALSE; 1666 return (connp); 1667 } 1668 1669 /* 1670 * Which ICMP options OK to set through T_UNITDATA_REQ... 1671 */ 1672 /* ARGSUSED */ 1673 static boolean_t 1674 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1675 { 1676 return (B_TRUE); 1677 } 1678 1679 /* 1680 * This routine gets default values of certain options whose default 1681 * values are maintained by protcol specific code 1682 */ 1683 /* ARGSUSED */ 1684 int 1685 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1686 { 1687 icmp_t *icmp = Q_TO_ICMP(q); 1688 icmp_stack_t *is = icmp->icmp_is; 1689 int *i1 = (int *)ptr; 1690 1691 switch (level) { 1692 case IPPROTO_IP: 1693 switch (name) { 1694 case IP_MULTICAST_TTL: 1695 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1696 return (sizeof (uchar_t)); 1697 case IP_MULTICAST_LOOP: 1698 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1699 return (sizeof (uchar_t)); 1700 } 1701 break; 1702 case IPPROTO_IPV6: 1703 switch (name) { 1704 case IPV6_MULTICAST_HOPS: 1705 *i1 = IP_DEFAULT_MULTICAST_TTL; 1706 return (sizeof (int)); 1707 case IPV6_MULTICAST_LOOP: 1708 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1709 return (sizeof (int)); 1710 case IPV6_UNICAST_HOPS: 1711 *i1 = is->is_ipv6_hoplimit; 1712 return (sizeof (int)); 1713 } 1714 break; 1715 case IPPROTO_ICMPV6: 1716 switch (name) { 1717 case ICMP6_FILTER: 1718 /* Make it look like "pass all" */ 1719 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1720 return (sizeof (icmp6_filter_t)); 1721 } 1722 break; 1723 } 1724 return (-1); 1725 } 1726 1727 /* 1728 * This routine retrieves the current status of socket options. 1729 * It returns the size of the option retrieved. 1730 */ 1731 int 1732 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1733 { 1734 icmp_t *icmp = connp->conn_icmp; 1735 icmp_stack_t *is = icmp->icmp_is; 1736 int *i1 = (int *)ptr; 1737 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1738 int ret = 0; 1739 1740 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1741 switch (level) { 1742 case SOL_SOCKET: 1743 switch (name) { 1744 case SO_DEBUG: 1745 *i1 = icmp->icmp_debug; 1746 break; 1747 case SO_TYPE: 1748 *i1 = SOCK_RAW; 1749 break; 1750 case SO_PROTOTYPE: 1751 *i1 = icmp->icmp_proto; 1752 break; 1753 case SO_REUSEADDR: 1754 *i1 = icmp->icmp_reuseaddr; 1755 break; 1756 1757 /* 1758 * The following three items are available here, 1759 * but are only meaningful to IP. 1760 */ 1761 case SO_DONTROUTE: 1762 *i1 = icmp->icmp_dontroute; 1763 break; 1764 case SO_USELOOPBACK: 1765 *i1 = icmp->icmp_useloopback; 1766 break; 1767 case SO_BROADCAST: 1768 *i1 = icmp->icmp_broadcast; 1769 break; 1770 1771 case SO_SNDBUF: 1772 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1773 *i1 = icmp->icmp_xmit_hiwat; 1774 break; 1775 case SO_RCVBUF: 1776 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1777 *i1 = icmp->icmp_recv_hiwat; 1778 break; 1779 case SO_DGRAM_ERRIND: 1780 *i1 = icmp->icmp_dgram_errind; 1781 break; 1782 case SO_TIMESTAMP: 1783 *i1 = icmp->icmp_timestamp; 1784 break; 1785 case SO_MAC_EXEMPT: 1786 *i1 = connp->conn_mac_exempt; 1787 break; 1788 case SO_DOMAIN: 1789 *i1 = icmp->icmp_family; 1790 break; 1791 1792 /* 1793 * Following four not meaningful for icmp 1794 * Action is same as "default" to which we fallthrough 1795 * so we keep them in comments. 1796 * case SO_LINGER: 1797 * case SO_KEEPALIVE: 1798 * case SO_OOBINLINE: 1799 * case SO_ALLZONES: 1800 */ 1801 default: 1802 ret = -1; 1803 goto done; 1804 } 1805 break; 1806 case IPPROTO_IP: 1807 /* 1808 * Only allow IPv4 option processing on IPv4 sockets. 1809 */ 1810 if (icmp->icmp_family != AF_INET) { 1811 ret = -1; 1812 goto done; 1813 } 1814 1815 switch (name) { 1816 case IP_OPTIONS: 1817 case T_IP_OPTIONS: 1818 /* Options are passed up with each packet */ 1819 ret = 0; 1820 goto done; 1821 case IP_HDRINCL: 1822 *i1 = (int)icmp->icmp_hdrincl; 1823 break; 1824 case IP_TOS: 1825 case T_IP_TOS: 1826 *i1 = (int)icmp->icmp_type_of_service; 1827 break; 1828 case IP_TTL: 1829 *i1 = (int)icmp->icmp_ttl; 1830 break; 1831 case IP_MULTICAST_IF: 1832 /* 0 address if not set */ 1833 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1834 ret = sizeof (ipaddr_t); 1835 goto done; 1836 case IP_MULTICAST_TTL: 1837 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1838 ret = sizeof (uchar_t); 1839 goto done; 1840 case IP_MULTICAST_LOOP: 1841 *ptr = connp->conn_multicast_loop; 1842 ret = sizeof (uint8_t); 1843 goto done; 1844 case IP_BOUND_IF: 1845 /* Zero if not set */ 1846 *i1 = icmp->icmp_bound_if; 1847 break; /* goto sizeof (int) option return */ 1848 case IP_UNSPEC_SRC: 1849 *ptr = icmp->icmp_unspec_source; 1850 break; /* goto sizeof (int) option return */ 1851 case IP_RECVIF: 1852 *ptr = icmp->icmp_recvif; 1853 break; /* goto sizeof (int) option return */ 1854 case IP_BROADCAST_TTL: 1855 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1856 return (sizeof (uchar_t)); 1857 case IP_RECVPKTINFO: 1858 /* 1859 * This also handles IP_PKTINFO. 1860 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1861 * Differentiation is based on the size of the argument 1862 * passed in. 1863 * This option is handled in IP which will return an 1864 * error for IP_PKTINFO as it's not supported as a 1865 * sticky option. 1866 */ 1867 ret = -EINVAL; 1868 goto done; 1869 /* 1870 * Cannot "get" the value of following options 1871 * at this level. Action is same as "default" to 1872 * which we fallthrough so we keep them in comments. 1873 * 1874 * case IP_ADD_MEMBERSHIP: 1875 * case IP_DROP_MEMBERSHIP: 1876 * case IP_BLOCK_SOURCE: 1877 * case IP_UNBLOCK_SOURCE: 1878 * case IP_ADD_SOURCE_MEMBERSHIP: 1879 * case IP_DROP_SOURCE_MEMBERSHIP: 1880 * case MCAST_JOIN_GROUP: 1881 * case MCAST_LEAVE_GROUP: 1882 * case MCAST_BLOCK_SOURCE: 1883 * case MCAST_UNBLOCK_SOURCE: 1884 * case MCAST_JOIN_SOURCE_GROUP: 1885 * case MCAST_LEAVE_SOURCE_GROUP: 1886 * case MRT_INIT: 1887 * case MRT_DONE: 1888 * case MRT_ADD_VIF: 1889 * case MRT_DEL_VIF: 1890 * case MRT_ADD_MFC: 1891 * case MRT_DEL_MFC: 1892 * case MRT_VERSION: 1893 * case MRT_ASSERT: 1894 * case IP_SEC_OPT: 1895 * case IP_NEXTHOP: 1896 */ 1897 default: 1898 ret = -1; 1899 goto done; 1900 } 1901 break; 1902 case IPPROTO_IPV6: 1903 /* 1904 * Only allow IPv6 option processing on native IPv6 sockets. 1905 */ 1906 if (icmp->icmp_family != AF_INET6) { 1907 ret = -1; 1908 goto done; 1909 } 1910 switch (name) { 1911 case IPV6_UNICAST_HOPS: 1912 *i1 = (unsigned int)icmp->icmp_ttl; 1913 break; 1914 case IPV6_MULTICAST_IF: 1915 /* 0 index if not set */ 1916 *i1 = icmp->icmp_multicast_if_index; 1917 break; 1918 case IPV6_MULTICAST_HOPS: 1919 *i1 = icmp->icmp_multicast_ttl; 1920 break; 1921 case IPV6_MULTICAST_LOOP: 1922 *i1 = connp->conn_multicast_loop; 1923 break; 1924 case IPV6_BOUND_IF: 1925 /* Zero if not set */ 1926 *i1 = icmp->icmp_bound_if; 1927 break; 1928 case IPV6_UNSPEC_SRC: 1929 *i1 = icmp->icmp_unspec_source; 1930 break; 1931 case IPV6_CHECKSUM: 1932 /* 1933 * Return offset or -1 if no checksum offset. 1934 * Does not apply to IPPROTO_ICMPV6 1935 */ 1936 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1937 ret = -1; 1938 goto done; 1939 } 1940 1941 if (icmp->icmp_raw_checksum) { 1942 *i1 = icmp->icmp_checksum_off; 1943 } else { 1944 *i1 = -1; 1945 } 1946 break; 1947 case IPV6_JOIN_GROUP: 1948 case IPV6_LEAVE_GROUP: 1949 case MCAST_JOIN_GROUP: 1950 case MCAST_LEAVE_GROUP: 1951 case MCAST_BLOCK_SOURCE: 1952 case MCAST_UNBLOCK_SOURCE: 1953 case MCAST_JOIN_SOURCE_GROUP: 1954 case MCAST_LEAVE_SOURCE_GROUP: 1955 /* cannot "get" the value for these */ 1956 ret = -1; 1957 goto done; 1958 case IPV6_RECVPKTINFO: 1959 *i1 = icmp->icmp_ip_recvpktinfo; 1960 break; 1961 case IPV6_RECVTCLASS: 1962 *i1 = icmp->icmp_ipv6_recvtclass; 1963 break; 1964 case IPV6_RECVPATHMTU: 1965 *i1 = icmp->icmp_ipv6_recvpathmtu; 1966 break; 1967 case IPV6_V6ONLY: 1968 *i1 = 1; 1969 break; 1970 case IPV6_RECVHOPLIMIT: 1971 *i1 = icmp->icmp_ipv6_recvhoplimit; 1972 break; 1973 case IPV6_RECVHOPOPTS: 1974 *i1 = icmp->icmp_ipv6_recvhopopts; 1975 break; 1976 case IPV6_RECVDSTOPTS: 1977 *i1 = icmp->icmp_ipv6_recvdstopts; 1978 break; 1979 case _OLD_IPV6_RECVDSTOPTS: 1980 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1981 break; 1982 case IPV6_RECVRTHDRDSTOPTS: 1983 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1984 break; 1985 case IPV6_RECVRTHDR: 1986 *i1 = icmp->icmp_ipv6_recvrthdr; 1987 break; 1988 case IPV6_PKTINFO: { 1989 /* XXX assumes that caller has room for max size! */ 1990 struct in6_pktinfo *pkti; 1991 1992 pkti = (struct in6_pktinfo *)ptr; 1993 if (ipp->ipp_fields & IPPF_IFINDEX) 1994 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1995 else 1996 pkti->ipi6_ifindex = 0; 1997 if (ipp->ipp_fields & IPPF_ADDR) 1998 pkti->ipi6_addr = ipp->ipp_addr; 1999 else 2000 pkti->ipi6_addr = ipv6_all_zeros; 2001 ret = sizeof (struct in6_pktinfo); 2002 goto done; 2003 } 2004 case IPV6_NEXTHOP: { 2005 sin6_t *sin6 = (sin6_t *)ptr; 2006 2007 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2008 return (0); 2009 *sin6 = sin6_null; 2010 sin6->sin6_family = AF_INET6; 2011 sin6->sin6_addr = ipp->ipp_nexthop; 2012 ret = (sizeof (sin6_t)); 2013 goto done; 2014 } 2015 case IPV6_HOPOPTS: 2016 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2017 return (0); 2018 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2019 return (0); 2020 bcopy((char *)ipp->ipp_hopopts + 2021 icmp->icmp_label_len_v6, ptr, 2022 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2023 if (icmp->icmp_label_len_v6 > 0) { 2024 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2025 ptr[1] = (ipp->ipp_hopoptslen - 2026 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2027 } 2028 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2029 goto done; 2030 case IPV6_RTHDRDSTOPTS: 2031 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2032 return (0); 2033 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2034 ret = ipp->ipp_rtdstoptslen; 2035 goto done; 2036 case IPV6_RTHDR: 2037 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2038 return (0); 2039 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2040 ret = ipp->ipp_rthdrlen; 2041 goto done; 2042 case IPV6_DSTOPTS: 2043 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2044 ret = 0; 2045 goto done; 2046 } 2047 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2048 ret = ipp->ipp_dstoptslen; 2049 goto done; 2050 case IPV6_PATHMTU: 2051 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2052 ret = 0; 2053 } else { 2054 ret = ip_fill_mtuinfo( 2055 &icmp->icmp_v6dst.sin6_addr, 0, 2056 (struct ip6_mtuinfo *)ptr, 2057 is->is_netstack); 2058 } 2059 goto done; 2060 case IPV6_TCLASS: 2061 if (ipp->ipp_fields & IPPF_TCLASS) 2062 *i1 = ipp->ipp_tclass; 2063 else 2064 *i1 = IPV6_FLOW_TCLASS( 2065 IPV6_DEFAULT_VERS_AND_FLOW); 2066 break; 2067 default: 2068 ret = -1; 2069 goto done; 2070 } 2071 break; 2072 case IPPROTO_ICMPV6: 2073 /* 2074 * Only allow IPv6 option processing on native IPv6 sockets. 2075 */ 2076 if (icmp->icmp_family != AF_INET6) { 2077 ret = -1; 2078 } 2079 2080 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2081 ret = -1; 2082 } 2083 2084 switch (name) { 2085 case ICMP6_FILTER: 2086 if (icmp->icmp_filter == NULL) { 2087 /* Make it look like "pass all" */ 2088 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2089 } else { 2090 (void) bcopy(icmp->icmp_filter, ptr, 2091 sizeof (icmp6_filter_t)); 2092 } 2093 ret = sizeof (icmp6_filter_t); 2094 goto done; 2095 default: 2096 ret = -1; 2097 goto done; 2098 } 2099 default: 2100 ret = -1; 2101 goto done; 2102 } 2103 ret = sizeof (int); 2104 done: 2105 return (ret); 2106 } 2107 2108 /* 2109 * This routine retrieves the current status of socket options. 2110 * It returns the size of the option retrieved. 2111 */ 2112 int 2113 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2114 { 2115 conn_t *connp = Q_TO_CONN(q); 2116 icmp_t *icmp = connp->conn_icmp; 2117 int err; 2118 2119 rw_enter(&icmp->icmp_rwlock, RW_READER); 2120 err = icmp_opt_get(connp, level, name, ptr); 2121 rw_exit(&icmp->icmp_rwlock); 2122 return (err); 2123 } 2124 2125 int 2126 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2127 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2128 void *thisdg_attrs, boolean_t checkonly) 2129 { 2130 2131 int *i1 = (int *)invalp; 2132 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2133 icmp_t *icmp = connp->conn_icmp; 2134 icmp_stack_t *is = icmp->icmp_is; 2135 int error; 2136 2137 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2138 /* 2139 * For fixed length options, no sanity check 2140 * of passed in length is done. It is assumed *_optcom_req() 2141 * routines do the right thing. 2142 */ 2143 switch (level) { 2144 case SOL_SOCKET: 2145 switch (name) { 2146 case SO_DEBUG: 2147 if (!checkonly) 2148 icmp->icmp_debug = onoff; 2149 break; 2150 case SO_PROTOTYPE: 2151 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2152 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2153 secpolicy_net_rawaccess(cr) != 0) { 2154 *outlenp = 0; 2155 return (EACCES); 2156 } 2157 /* Can't use IPPROTO_RAW with IPv6 */ 2158 if ((*i1 & 0xFF) == IPPROTO_RAW && 2159 icmp->icmp_family == AF_INET6) { 2160 *outlenp = 0; 2161 return (EPROTONOSUPPORT); 2162 } 2163 if (checkonly) { 2164 /* T_CHECK case */ 2165 *(int *)outvalp = (*i1 & 0xFF); 2166 break; 2167 } 2168 icmp->icmp_proto = *i1 & 0xFF; 2169 if ((icmp->icmp_proto == IPPROTO_RAW || 2170 icmp->icmp_proto == IPPROTO_IGMP) && 2171 icmp->icmp_family == AF_INET) 2172 icmp->icmp_hdrincl = 1; 2173 else 2174 icmp->icmp_hdrincl = 0; 2175 2176 if (icmp->icmp_family == AF_INET6 && 2177 icmp->icmp_proto == IPPROTO_ICMPV6) { 2178 /* Set offset for icmp6_cksum */ 2179 icmp->icmp_raw_checksum = 0; 2180 icmp->icmp_checksum_off = 2; 2181 } 2182 if (icmp->icmp_proto == IPPROTO_UDP || 2183 icmp->icmp_proto == IPPROTO_TCP || 2184 icmp->icmp_proto == IPPROTO_SCTP) { 2185 icmp->icmp_no_tp_cksum = 1; 2186 icmp->icmp_sticky_ipp.ipp_fields |= 2187 IPPF_NO_CKSUM; 2188 } else { 2189 icmp->icmp_no_tp_cksum = 0; 2190 icmp->icmp_sticky_ipp.ipp_fields &= 2191 ~IPPF_NO_CKSUM; 2192 } 2193 2194 if (icmp->icmp_filter != NULL && 2195 icmp->icmp_proto != IPPROTO_ICMPV6) { 2196 kmem_free(icmp->icmp_filter, 2197 sizeof (icmp6_filter_t)); 2198 icmp->icmp_filter = NULL; 2199 } 2200 2201 /* Rebuild the header template */ 2202 error = icmp_build_hdrs(icmp); 2203 if (error != 0) { 2204 *outlenp = 0; 2205 return (error); 2206 } 2207 2208 /* 2209 * For SCTP, we don't use icmp_bind_proto() for 2210 * raw socket binding. Note that we do not need 2211 * to set *outlenp. 2212 * FIXME: how does SCTP work? 2213 */ 2214 if (icmp->icmp_proto == IPPROTO_SCTP) 2215 return (0); 2216 2217 *outlenp = sizeof (int); 2218 *(int *)outvalp = *i1 & 0xFF; 2219 2220 /* Drop lock across the bind operation */ 2221 rw_exit(&icmp->icmp_rwlock); 2222 (void) icmp_bind_proto(connp); 2223 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2224 return (0); 2225 case SO_REUSEADDR: 2226 if (!checkonly) { 2227 icmp->icmp_reuseaddr = onoff; 2228 PASS_OPT_TO_IP(connp); 2229 } 2230 break; 2231 2232 /* 2233 * The following three items are available here, 2234 * but are only meaningful to IP. 2235 */ 2236 case SO_DONTROUTE: 2237 if (!checkonly) { 2238 icmp->icmp_dontroute = onoff; 2239 PASS_OPT_TO_IP(connp); 2240 } 2241 break; 2242 case SO_USELOOPBACK: 2243 if (!checkonly) { 2244 icmp->icmp_useloopback = onoff; 2245 PASS_OPT_TO_IP(connp); 2246 } 2247 break; 2248 case SO_BROADCAST: 2249 if (!checkonly) { 2250 icmp->icmp_broadcast = onoff; 2251 PASS_OPT_TO_IP(connp); 2252 } 2253 break; 2254 2255 case SO_SNDBUF: 2256 if (*i1 > is->is_max_buf) { 2257 *outlenp = 0; 2258 return (ENOBUFS); 2259 } 2260 if (!checkonly) { 2261 if (!IPCL_IS_NONSTR(connp)) { 2262 connp->conn_wq->q_hiwat = *i1; 2263 } 2264 icmp->icmp_xmit_hiwat = *i1; 2265 } 2266 break; 2267 case SO_RCVBUF: 2268 if (*i1 > is->is_max_buf) { 2269 *outlenp = 0; 2270 return (ENOBUFS); 2271 } 2272 if (!checkonly) { 2273 icmp->icmp_recv_hiwat = *i1; 2274 rw_exit(&icmp->icmp_rwlock); 2275 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2276 *i1); 2277 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2278 } 2279 break; 2280 case SO_DGRAM_ERRIND: 2281 if (!checkonly) 2282 icmp->icmp_dgram_errind = onoff; 2283 break; 2284 case SO_ALLZONES: 2285 /* 2286 * "soft" error (negative) 2287 * option not handled at this level 2288 * Note: Do not modify *outlenp 2289 */ 2290 return (-EINVAL); 2291 case SO_TIMESTAMP: 2292 if (!checkonly) { 2293 icmp->icmp_timestamp = onoff; 2294 } 2295 break; 2296 case SO_MAC_EXEMPT: 2297 /* 2298 * "soft" error (negative) 2299 * option not handled at this level 2300 * Note: Do not modify *outlenp 2301 */ 2302 return (-EINVAL); 2303 case SO_RCVTIMEO: 2304 case SO_SNDTIMEO: 2305 /* 2306 * Pass these two options in order for third part 2307 * protocol usage. Here just return directly. 2308 */ 2309 return (0); 2310 /* 2311 * Following three not meaningful for icmp 2312 * Action is same as "default" so we keep them 2313 * in comments. 2314 * case SO_LINGER: 2315 * case SO_KEEPALIVE: 2316 * case SO_OOBINLINE: 2317 */ 2318 default: 2319 *outlenp = 0; 2320 return (EINVAL); 2321 } 2322 break; 2323 case IPPROTO_IP: 2324 /* 2325 * Only allow IPv4 option processing on IPv4 sockets. 2326 */ 2327 if (icmp->icmp_family != AF_INET) { 2328 *outlenp = 0; 2329 return (ENOPROTOOPT); 2330 } 2331 switch (name) { 2332 case IP_OPTIONS: 2333 case T_IP_OPTIONS: 2334 /* Save options for use by IP. */ 2335 if ((inlen & 0x3) || 2336 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2337 *outlenp = 0; 2338 return (EINVAL); 2339 } 2340 if (checkonly) 2341 break; 2342 2343 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2344 &icmp->icmp_ip_snd_options_len, 2345 icmp->icmp_label_len, invalp, inlen)) { 2346 *outlenp = 0; 2347 return (ENOMEM); 2348 } 2349 2350 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2351 icmp->icmp_ip_snd_options_len; 2352 rw_exit(&icmp->icmp_rwlock); 2353 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2354 RD(connp->conn_rq), connp, 2355 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2356 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2357 break; 2358 case IP_HDRINCL: 2359 if (!checkonly) 2360 icmp->icmp_hdrincl = onoff; 2361 break; 2362 case IP_TOS: 2363 case T_IP_TOS: 2364 if (!checkonly) { 2365 icmp->icmp_type_of_service = (uint8_t)*i1; 2366 } 2367 break; 2368 case IP_TTL: 2369 if (!checkonly) { 2370 icmp->icmp_ttl = (uint8_t)*i1; 2371 } 2372 break; 2373 case IP_MULTICAST_IF: 2374 /* 2375 * TODO should check OPTMGMT reply and undo this if 2376 * there is an error. 2377 */ 2378 if (!checkonly) { 2379 icmp->icmp_multicast_if_addr = *i1; 2380 PASS_OPT_TO_IP(connp); 2381 } 2382 break; 2383 case IP_MULTICAST_TTL: 2384 if (!checkonly) 2385 icmp->icmp_multicast_ttl = *invalp; 2386 break; 2387 case IP_MULTICAST_LOOP: 2388 if (!checkonly) { 2389 connp->conn_multicast_loop = 2390 (*invalp == 0) ? 0 : 1; 2391 PASS_OPT_TO_IP(connp); 2392 } 2393 break; 2394 case IP_BOUND_IF: 2395 if (!checkonly) { 2396 icmp->icmp_bound_if = *i1; 2397 PASS_OPT_TO_IP(connp); 2398 } 2399 break; 2400 case IP_UNSPEC_SRC: 2401 if (!checkonly) { 2402 icmp->icmp_unspec_source = onoff; 2403 PASS_OPT_TO_IP(connp); 2404 } 2405 break; 2406 case IP_BROADCAST_TTL: 2407 if (!checkonly) 2408 connp->conn_broadcast_ttl = *invalp; 2409 break; 2410 case IP_RECVIF: 2411 if (!checkonly) { 2412 icmp->icmp_recvif = onoff; 2413 } 2414 /* 2415 * pass to ip 2416 */ 2417 return (-EINVAL); 2418 case IP_PKTINFO: { 2419 /* 2420 * This also handles IP_RECVPKTINFO. 2421 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2422 * Differentiation is based on the size of the argument 2423 * passed in. 2424 */ 2425 struct in_pktinfo *pktinfop; 2426 ip4_pkt_t *attr_pktinfop; 2427 2428 if (checkonly) 2429 break; 2430 2431 if (inlen == sizeof (int)) { 2432 /* 2433 * This is IP_RECVPKTINFO option. 2434 * Keep a local copy of wether this option is 2435 * set or not and pass it down to IP for 2436 * processing. 2437 */ 2438 icmp->icmp_ip_recvpktinfo = onoff; 2439 return (-EINVAL); 2440 } 2441 2442 2443 if (inlen != sizeof (struct in_pktinfo)) { 2444 return (EINVAL); 2445 } 2446 2447 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2448 == NULL) { 2449 /* 2450 * sticky option is not supported 2451 */ 2452 return (EINVAL); 2453 } 2454 2455 pktinfop = (struct in_pktinfo *)invalp; 2456 2457 /* 2458 * Atleast one of the values should be specified 2459 */ 2460 if (pktinfop->ipi_ifindex == 0 && 2461 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2462 return (EINVAL); 2463 } 2464 2465 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2466 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2467 } 2468 break; 2469 case IP_ADD_MEMBERSHIP: 2470 case IP_DROP_MEMBERSHIP: 2471 case IP_BLOCK_SOURCE: 2472 case IP_UNBLOCK_SOURCE: 2473 case IP_ADD_SOURCE_MEMBERSHIP: 2474 case IP_DROP_SOURCE_MEMBERSHIP: 2475 case MCAST_JOIN_GROUP: 2476 case MCAST_LEAVE_GROUP: 2477 case MCAST_BLOCK_SOURCE: 2478 case MCAST_UNBLOCK_SOURCE: 2479 case MCAST_JOIN_SOURCE_GROUP: 2480 case MCAST_LEAVE_SOURCE_GROUP: 2481 case MRT_INIT: 2482 case MRT_DONE: 2483 case MRT_ADD_VIF: 2484 case MRT_DEL_VIF: 2485 case MRT_ADD_MFC: 2486 case MRT_DEL_MFC: 2487 case MRT_VERSION: 2488 case MRT_ASSERT: 2489 case IP_SEC_OPT: 2490 case IP_NEXTHOP: 2491 /* 2492 * "soft" error (negative) 2493 * option not handled at this level 2494 * Note: Do not modify *outlenp 2495 */ 2496 return (-EINVAL); 2497 default: 2498 *outlenp = 0; 2499 return (EINVAL); 2500 } 2501 break; 2502 case IPPROTO_IPV6: { 2503 ip6_pkt_t *ipp; 2504 boolean_t sticky; 2505 2506 if (icmp->icmp_family != AF_INET6) { 2507 *outlenp = 0; 2508 return (ENOPROTOOPT); 2509 } 2510 /* 2511 * Deal with both sticky options and ancillary data 2512 */ 2513 if (thisdg_attrs == NULL) { 2514 /* sticky options, or none */ 2515 ipp = &icmp->icmp_sticky_ipp; 2516 sticky = B_TRUE; 2517 } else { 2518 /* ancillary data */ 2519 ipp = (ip6_pkt_t *)thisdg_attrs; 2520 sticky = B_FALSE; 2521 } 2522 2523 switch (name) { 2524 case IPV6_MULTICAST_IF: 2525 if (!checkonly) { 2526 icmp->icmp_multicast_if_index = *i1; 2527 PASS_OPT_TO_IP(connp); 2528 } 2529 break; 2530 case IPV6_UNICAST_HOPS: 2531 /* -1 means use default */ 2532 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2533 *outlenp = 0; 2534 return (EINVAL); 2535 } 2536 if (!checkonly) { 2537 if (*i1 == -1) { 2538 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2539 is->is_ipv6_hoplimit; 2540 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2541 /* Pass modified value to IP. */ 2542 *i1 = ipp->ipp_hoplimit; 2543 } else { 2544 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2545 (uint8_t)*i1; 2546 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2547 } 2548 /* Rebuild the header template */ 2549 error = icmp_build_hdrs(icmp); 2550 if (error != 0) { 2551 *outlenp = 0; 2552 return (error); 2553 } 2554 } 2555 break; 2556 case IPV6_MULTICAST_HOPS: 2557 /* -1 means use default */ 2558 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2559 *outlenp = 0; 2560 return (EINVAL); 2561 } 2562 if (!checkonly) { 2563 if (*i1 == -1) { 2564 icmp->icmp_multicast_ttl = 2565 ipp->ipp_multicast_hops = 2566 IP_DEFAULT_MULTICAST_TTL; 2567 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2568 /* Pass modified value to IP. */ 2569 *i1 = icmp->icmp_multicast_ttl; 2570 } else { 2571 icmp->icmp_multicast_ttl = 2572 ipp->ipp_multicast_hops = 2573 (uint8_t)*i1; 2574 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2575 } 2576 } 2577 break; 2578 case IPV6_MULTICAST_LOOP: 2579 if (*i1 != 0 && *i1 != 1) { 2580 *outlenp = 0; 2581 return (EINVAL); 2582 } 2583 if (!checkonly) { 2584 connp->conn_multicast_loop = *i1; 2585 PASS_OPT_TO_IP(connp); 2586 } 2587 break; 2588 case IPV6_CHECKSUM: 2589 /* 2590 * Integer offset into the user data of where the 2591 * checksum is located. 2592 * Offset of -1 disables option. 2593 * Does not apply to IPPROTO_ICMPV6. 2594 */ 2595 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2596 *outlenp = 0; 2597 return (EINVAL); 2598 } 2599 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2600 /* Negative or not 16 bit aligned offset */ 2601 *outlenp = 0; 2602 return (EINVAL); 2603 } 2604 if (checkonly) 2605 break; 2606 2607 if (*i1 == -1) { 2608 icmp->icmp_raw_checksum = 0; 2609 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2610 } else { 2611 icmp->icmp_raw_checksum = 1; 2612 icmp->icmp_checksum_off = *i1; 2613 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2614 } 2615 /* Rebuild the header template */ 2616 error = icmp_build_hdrs(icmp); 2617 if (error != 0) { 2618 *outlenp = 0; 2619 return (error); 2620 } 2621 break; 2622 case IPV6_JOIN_GROUP: 2623 case IPV6_LEAVE_GROUP: 2624 case MCAST_JOIN_GROUP: 2625 case MCAST_LEAVE_GROUP: 2626 case MCAST_BLOCK_SOURCE: 2627 case MCAST_UNBLOCK_SOURCE: 2628 case MCAST_JOIN_SOURCE_GROUP: 2629 case MCAST_LEAVE_SOURCE_GROUP: 2630 /* 2631 * "soft" error (negative) 2632 * option not handled at this level 2633 * Note: Do not modify *outlenp 2634 */ 2635 return (-EINVAL); 2636 case IPV6_BOUND_IF: 2637 if (!checkonly) { 2638 icmp->icmp_bound_if = *i1; 2639 PASS_OPT_TO_IP(connp); 2640 } 2641 break; 2642 case IPV6_UNSPEC_SRC: 2643 if (!checkonly) { 2644 icmp->icmp_unspec_source = onoff; 2645 PASS_OPT_TO_IP(connp); 2646 } 2647 break; 2648 case IPV6_RECVTCLASS: 2649 if (!checkonly) { 2650 icmp->icmp_ipv6_recvtclass = onoff; 2651 PASS_OPT_TO_IP(connp); 2652 } 2653 break; 2654 /* 2655 * Set boolean switches for ancillary data delivery 2656 */ 2657 case IPV6_RECVPKTINFO: 2658 if (!checkonly) { 2659 icmp->icmp_ip_recvpktinfo = onoff; 2660 PASS_OPT_TO_IP(connp); 2661 } 2662 break; 2663 case IPV6_RECVPATHMTU: 2664 if (!checkonly) { 2665 icmp->icmp_ipv6_recvpathmtu = onoff; 2666 PASS_OPT_TO_IP(connp); 2667 } 2668 break; 2669 case IPV6_RECVHOPLIMIT: 2670 if (!checkonly) { 2671 icmp->icmp_ipv6_recvhoplimit = onoff; 2672 PASS_OPT_TO_IP(connp); 2673 } 2674 break; 2675 case IPV6_RECVHOPOPTS: 2676 if (!checkonly) { 2677 icmp->icmp_ipv6_recvhopopts = onoff; 2678 PASS_OPT_TO_IP(connp); 2679 } 2680 break; 2681 case IPV6_RECVDSTOPTS: 2682 if (!checkonly) { 2683 icmp->icmp_ipv6_recvdstopts = onoff; 2684 PASS_OPT_TO_IP(connp); 2685 } 2686 break; 2687 case _OLD_IPV6_RECVDSTOPTS: 2688 if (!checkonly) 2689 icmp->icmp_old_ipv6_recvdstopts = onoff; 2690 break; 2691 case IPV6_RECVRTHDRDSTOPTS: 2692 if (!checkonly) { 2693 icmp->icmp_ipv6_recvrtdstopts = onoff; 2694 PASS_OPT_TO_IP(connp); 2695 } 2696 break; 2697 case IPV6_RECVRTHDR: 2698 if (!checkonly) { 2699 icmp->icmp_ipv6_recvrthdr = onoff; 2700 PASS_OPT_TO_IP(connp); 2701 } 2702 break; 2703 /* 2704 * Set sticky options or ancillary data. 2705 * If sticky options, (re)build any extension headers 2706 * that might be needed as a result. 2707 */ 2708 case IPV6_PKTINFO: 2709 /* 2710 * The source address and ifindex are verified 2711 * in ip_opt_set(). For ancillary data the 2712 * source address is checked in ip_wput_v6. 2713 */ 2714 if (inlen != 0 && inlen != 2715 sizeof (struct in6_pktinfo)) { 2716 return (EINVAL); 2717 } 2718 if (checkonly) 2719 break; 2720 2721 if (inlen == 0) { 2722 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2723 ipp->ipp_sticky_ignored |= 2724 (IPPF_IFINDEX|IPPF_ADDR); 2725 } else { 2726 struct in6_pktinfo *pkti; 2727 2728 pkti = (struct in6_pktinfo *)invalp; 2729 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2730 ipp->ipp_addr = pkti->ipi6_addr; 2731 if (ipp->ipp_ifindex != 0) 2732 ipp->ipp_fields |= IPPF_IFINDEX; 2733 else 2734 ipp->ipp_fields &= ~IPPF_IFINDEX; 2735 if (!IN6_IS_ADDR_UNSPECIFIED( 2736 &ipp->ipp_addr)) 2737 ipp->ipp_fields |= IPPF_ADDR; 2738 else 2739 ipp->ipp_fields &= ~IPPF_ADDR; 2740 } 2741 if (sticky) { 2742 error = icmp_build_hdrs(icmp); 2743 if (error != 0) 2744 return (error); 2745 PASS_OPT_TO_IP(connp); 2746 } 2747 break; 2748 case IPV6_HOPLIMIT: 2749 /* This option can only be used as ancillary data. */ 2750 if (sticky) 2751 return (EINVAL); 2752 if (inlen != 0 && inlen != sizeof (int)) 2753 return (EINVAL); 2754 if (checkonly) 2755 break; 2756 2757 if (inlen == 0) { 2758 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2759 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2760 } else { 2761 if (*i1 > 255 || *i1 < -1) 2762 return (EINVAL); 2763 if (*i1 == -1) 2764 ipp->ipp_hoplimit = 2765 is->is_ipv6_hoplimit; 2766 else 2767 ipp->ipp_hoplimit = *i1; 2768 ipp->ipp_fields |= IPPF_HOPLIMIT; 2769 } 2770 break; 2771 case IPV6_TCLASS: 2772 /* 2773 * IPV6_RECVTCLASS accepts -1 as use kernel default 2774 * and [0, 255] as the actualy traffic class. 2775 */ 2776 if (inlen != 0 && inlen != sizeof (int)) { 2777 return (EINVAL); 2778 } 2779 if (checkonly) 2780 break; 2781 2782 if (inlen == 0) { 2783 ipp->ipp_fields &= ~IPPF_TCLASS; 2784 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2785 } else { 2786 if (*i1 >= 256 || *i1 < -1) 2787 return (EINVAL); 2788 if (*i1 == -1) { 2789 ipp->ipp_tclass = 2790 IPV6_FLOW_TCLASS( 2791 IPV6_DEFAULT_VERS_AND_FLOW); 2792 } else { 2793 ipp->ipp_tclass = *i1; 2794 } 2795 ipp->ipp_fields |= IPPF_TCLASS; 2796 } 2797 if (sticky) { 2798 error = icmp_build_hdrs(icmp); 2799 if (error != 0) 2800 return (error); 2801 } 2802 break; 2803 case IPV6_NEXTHOP: 2804 /* 2805 * IP will verify that the nexthop is reachable 2806 * and fail for sticky options. 2807 */ 2808 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2809 return (EINVAL); 2810 } 2811 if (checkonly) 2812 break; 2813 2814 if (inlen == 0) { 2815 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2816 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2817 } else { 2818 sin6_t *sin6 = (sin6_t *)invalp; 2819 2820 if (sin6->sin6_family != AF_INET6) { 2821 return (EAFNOSUPPORT); 2822 } 2823 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2824 return (EADDRNOTAVAIL); 2825 } 2826 ipp->ipp_nexthop = sin6->sin6_addr; 2827 if (!IN6_IS_ADDR_UNSPECIFIED( 2828 &ipp->ipp_nexthop)) 2829 ipp->ipp_fields |= IPPF_NEXTHOP; 2830 else 2831 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2832 } 2833 if (sticky) { 2834 error = icmp_build_hdrs(icmp); 2835 if (error != 0) 2836 return (error); 2837 PASS_OPT_TO_IP(connp); 2838 } 2839 break; 2840 case IPV6_HOPOPTS: { 2841 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2842 /* 2843 * Sanity checks - minimum size, size a multiple of 2844 * eight bytes, and matching size passed in. 2845 */ 2846 if (inlen != 0 && 2847 inlen != (8 * (hopts->ip6h_len + 1))) { 2848 return (EINVAL); 2849 } 2850 2851 if (checkonly) 2852 break; 2853 error = optcom_pkt_set(invalp, inlen, sticky, 2854 (uchar_t **)&ipp->ipp_hopopts, 2855 &ipp->ipp_hopoptslen, 2856 sticky ? icmp->icmp_label_len_v6 : 0); 2857 if (error != 0) 2858 return (error); 2859 if (ipp->ipp_hopoptslen == 0) { 2860 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2861 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2862 } else { 2863 ipp->ipp_fields |= IPPF_HOPOPTS; 2864 } 2865 if (sticky) { 2866 error = icmp_build_hdrs(icmp); 2867 if (error != 0) 2868 return (error); 2869 } 2870 break; 2871 } 2872 case IPV6_RTHDRDSTOPTS: { 2873 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2874 2875 /* 2876 * Sanity checks - minimum size, size a multiple of 2877 * eight bytes, and matching size passed in. 2878 */ 2879 if (inlen != 0 && 2880 inlen != (8 * (dopts->ip6d_len + 1))) 2881 return (EINVAL); 2882 2883 if (checkonly) 2884 break; 2885 2886 if (inlen == 0) { 2887 if (sticky && 2888 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2889 kmem_free(ipp->ipp_rtdstopts, 2890 ipp->ipp_rtdstoptslen); 2891 ipp->ipp_rtdstopts = NULL; 2892 ipp->ipp_rtdstoptslen = 0; 2893 } 2894 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2895 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2896 } else { 2897 error = optcom_pkt_set(invalp, inlen, sticky, 2898 (uchar_t **)&ipp->ipp_rtdstopts, 2899 &ipp->ipp_rtdstoptslen, 0); 2900 if (error != 0) 2901 return (error); 2902 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2903 } 2904 if (sticky) { 2905 error = icmp_build_hdrs(icmp); 2906 if (error != 0) 2907 return (error); 2908 } 2909 break; 2910 } 2911 case IPV6_DSTOPTS: { 2912 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2913 2914 /* 2915 * Sanity checks - minimum size, size a multiple of 2916 * eight bytes, and matching size passed in. 2917 */ 2918 if (inlen != 0 && 2919 inlen != (8 * (dopts->ip6d_len + 1))) 2920 return (EINVAL); 2921 2922 if (checkonly) 2923 break; 2924 2925 if (inlen == 0) { 2926 if (sticky && 2927 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2928 kmem_free(ipp->ipp_dstopts, 2929 ipp->ipp_dstoptslen); 2930 ipp->ipp_dstopts = NULL; 2931 ipp->ipp_dstoptslen = 0; 2932 } 2933 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2934 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2935 } else { 2936 error = optcom_pkt_set(invalp, inlen, sticky, 2937 (uchar_t **)&ipp->ipp_dstopts, 2938 &ipp->ipp_dstoptslen, 0); 2939 if (error != 0) 2940 return (error); 2941 ipp->ipp_fields |= IPPF_DSTOPTS; 2942 } 2943 if (sticky) { 2944 error = icmp_build_hdrs(icmp); 2945 if (error != 0) 2946 return (error); 2947 } 2948 break; 2949 } 2950 case IPV6_RTHDR: { 2951 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2952 2953 /* 2954 * Sanity checks - minimum size, size a multiple of 2955 * eight bytes, and matching size passed in. 2956 */ 2957 if (inlen != 0 && 2958 inlen != (8 * (rt->ip6r_len + 1))) 2959 return (EINVAL); 2960 2961 if (checkonly) 2962 break; 2963 2964 if (inlen == 0) { 2965 if (sticky && 2966 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2967 kmem_free(ipp->ipp_rthdr, 2968 ipp->ipp_rthdrlen); 2969 ipp->ipp_rthdr = NULL; 2970 ipp->ipp_rthdrlen = 0; 2971 } 2972 ipp->ipp_fields &= ~IPPF_RTHDR; 2973 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2974 } else { 2975 error = optcom_pkt_set(invalp, inlen, sticky, 2976 (uchar_t **)&ipp->ipp_rthdr, 2977 &ipp->ipp_rthdrlen, 0); 2978 if (error != 0) 2979 return (error); 2980 ipp->ipp_fields |= IPPF_RTHDR; 2981 } 2982 if (sticky) { 2983 error = icmp_build_hdrs(icmp); 2984 if (error != 0) 2985 return (error); 2986 } 2987 break; 2988 } 2989 2990 case IPV6_DONTFRAG: 2991 if (checkonly) 2992 break; 2993 2994 if (onoff) { 2995 ipp->ipp_fields |= IPPF_DONTFRAG; 2996 } else { 2997 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2998 } 2999 break; 3000 3001 case IPV6_USE_MIN_MTU: 3002 if (inlen != sizeof (int)) 3003 return (EINVAL); 3004 3005 if (*i1 < -1 || *i1 > 1) 3006 return (EINVAL); 3007 3008 if (checkonly) 3009 break; 3010 3011 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3012 ipp->ipp_use_min_mtu = *i1; 3013 break; 3014 3015 /* 3016 * This option can't be set. Its only returned via 3017 * getsockopt() or ancillary data. 3018 */ 3019 case IPV6_PATHMTU: 3020 return (EINVAL); 3021 3022 case IPV6_SEC_OPT: 3023 case IPV6_SRC_PREFERENCES: 3024 case IPV6_V6ONLY: 3025 /* Handled at IP level */ 3026 return (-EINVAL); 3027 default: 3028 *outlenp = 0; 3029 return (EINVAL); 3030 } 3031 break; 3032 } /* end IPPROTO_IPV6 */ 3033 3034 case IPPROTO_ICMPV6: 3035 /* 3036 * Only allow IPv6 option processing on IPv6 sockets. 3037 */ 3038 if (icmp->icmp_family != AF_INET6) { 3039 *outlenp = 0; 3040 return (ENOPROTOOPT); 3041 } 3042 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3043 *outlenp = 0; 3044 return (ENOPROTOOPT); 3045 } 3046 switch (name) { 3047 case ICMP6_FILTER: 3048 if (!checkonly) { 3049 if ((inlen != 0) && 3050 (inlen != sizeof (icmp6_filter_t))) 3051 return (EINVAL); 3052 3053 if (inlen == 0) { 3054 if (icmp->icmp_filter != NULL) { 3055 kmem_free(icmp->icmp_filter, 3056 sizeof (icmp6_filter_t)); 3057 icmp->icmp_filter = NULL; 3058 } 3059 } else { 3060 if (icmp->icmp_filter == NULL) { 3061 icmp->icmp_filter = kmem_alloc( 3062 sizeof (icmp6_filter_t), 3063 KM_NOSLEEP); 3064 if (icmp->icmp_filter == NULL) { 3065 *outlenp = 0; 3066 return (ENOBUFS); 3067 } 3068 } 3069 (void) bcopy(invalp, icmp->icmp_filter, 3070 inlen); 3071 } 3072 } 3073 break; 3074 3075 default: 3076 *outlenp = 0; 3077 return (EINVAL); 3078 } 3079 break; 3080 default: 3081 *outlenp = 0; 3082 return (EINVAL); 3083 } 3084 /* 3085 * Common case of OK return with outval same as inval. 3086 */ 3087 if (invalp != outvalp) { 3088 /* don't trust bcopy for identical src/dst */ 3089 (void) bcopy(invalp, outvalp, inlen); 3090 } 3091 *outlenp = inlen; 3092 return (0); 3093 } 3094 3095 /* This routine sets socket options. */ 3096 /* ARGSUSED */ 3097 int 3098 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3099 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3100 void *thisdg_attrs, cred_t *cr) 3101 { 3102 boolean_t checkonly; 3103 int error; 3104 3105 error = 0; 3106 switch (optset_context) { 3107 case SETFN_OPTCOM_CHECKONLY: 3108 checkonly = B_TRUE; 3109 /* 3110 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3111 * inlen != 0 implies value supplied and 3112 * we have to "pretend" to set it. 3113 * inlen == 0 implies that there is no 3114 * value part in T_CHECK request and just validation 3115 * done elsewhere should be enough, we just return here. 3116 */ 3117 if (inlen == 0) { 3118 *outlenp = 0; 3119 error = 0; 3120 goto done; 3121 } 3122 break; 3123 case SETFN_OPTCOM_NEGOTIATE: 3124 checkonly = B_FALSE; 3125 break; 3126 case SETFN_UD_NEGOTIATE: 3127 case SETFN_CONN_NEGOTIATE: 3128 checkonly = B_FALSE; 3129 /* 3130 * Negotiating local and "association-related" options 3131 * through T_UNITDATA_REQ. 3132 * 3133 * Following routine can filter out ones we do not 3134 * want to be "set" this way. 3135 */ 3136 if (!icmp_opt_allow_udr_set(level, name)) { 3137 *outlenp = 0; 3138 error = EINVAL; 3139 goto done; 3140 } 3141 break; 3142 default: 3143 /* 3144 * We should never get here 3145 */ 3146 *outlenp = 0; 3147 error = EINVAL; 3148 goto done; 3149 } 3150 3151 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3152 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3153 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3154 outvalp, cr, thisdg_attrs, checkonly); 3155 3156 done: 3157 return (error); 3158 } 3159 3160 /* This routine sets socket options. */ 3161 /* ARGSUSED */ 3162 int 3163 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3164 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3165 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3166 { 3167 conn_t *connp = Q_TO_CONN(q); 3168 icmp_t *icmp; 3169 int error; 3170 3171 icmp = connp->conn_icmp; 3172 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3173 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3174 outlenp, outvalp, thisdg_attrs, cr); 3175 rw_exit(&icmp->icmp_rwlock); 3176 return (error); 3177 } 3178 3179 /* 3180 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3181 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3182 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3183 * headers. 3184 * Returns failure if can't allocate memory. 3185 */ 3186 static int 3187 icmp_build_hdrs(icmp_t *icmp) 3188 { 3189 icmp_stack_t *is = icmp->icmp_is; 3190 uchar_t *hdrs; 3191 uint_t hdrs_len; 3192 ip6_t *ip6h; 3193 ip6i_t *ip6i; 3194 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3195 3196 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3197 hdrs_len = ip_total_hdrs_len_v6(ipp); 3198 ASSERT(hdrs_len != 0); 3199 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3200 /* Need to reallocate */ 3201 if (hdrs_len != 0) { 3202 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3203 if (hdrs == NULL) 3204 return (ENOMEM); 3205 } else { 3206 hdrs = NULL; 3207 } 3208 if (icmp->icmp_sticky_hdrs_len != 0) { 3209 kmem_free(icmp->icmp_sticky_hdrs, 3210 icmp->icmp_sticky_hdrs_len); 3211 } 3212 icmp->icmp_sticky_hdrs = hdrs; 3213 icmp->icmp_sticky_hdrs_len = hdrs_len; 3214 } 3215 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3216 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3217 3218 /* Set header fields not in ipp */ 3219 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3220 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3221 ip6h = (ip6_t *)&ip6i[1]; 3222 3223 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3224 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3225 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3226 } 3227 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3228 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3229 } 3230 } else { 3231 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3232 } 3233 3234 if (!(ipp->ipp_fields & IPPF_ADDR)) 3235 ip6h->ip6_src = icmp->icmp_v6src; 3236 3237 /* Try to get everything in a single mblk */ 3238 if (hdrs_len > icmp->icmp_max_hdr_len) { 3239 icmp->icmp_max_hdr_len = hdrs_len; 3240 rw_exit(&icmp->icmp_rwlock); 3241 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3242 icmp->icmp_connp, 3243 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3244 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3245 } 3246 return (0); 3247 } 3248 3249 /* 3250 * This routine retrieves the value of an ND variable in a icmpparam_t 3251 * structure. It is called through nd_getset when a user reads the 3252 * variable. 3253 */ 3254 /* ARGSUSED */ 3255 static int 3256 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3257 { 3258 icmpparam_t *icmppa = (icmpparam_t *)cp; 3259 3260 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3261 return (0); 3262 } 3263 3264 /* 3265 * Walk through the param array specified registering each element with the 3266 * named dispatch (ND) handler. 3267 */ 3268 static boolean_t 3269 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3270 { 3271 for (; cnt-- > 0; icmppa++) { 3272 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3273 if (!nd_load(ndp, icmppa->icmp_param_name, 3274 icmp_param_get, icmp_param_set, 3275 (caddr_t)icmppa)) { 3276 nd_free(ndp); 3277 return (B_FALSE); 3278 } 3279 } 3280 } 3281 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3282 NULL)) { 3283 nd_free(ndp); 3284 return (B_FALSE); 3285 } 3286 return (B_TRUE); 3287 } 3288 3289 /* This routine sets an ND variable in a icmpparam_t structure. */ 3290 /* ARGSUSED */ 3291 static int 3292 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3293 { 3294 long new_value; 3295 icmpparam_t *icmppa = (icmpparam_t *)cp; 3296 3297 /* 3298 * Fail the request if the new value does not lie within the 3299 * required bounds. 3300 */ 3301 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3302 new_value < icmppa->icmp_param_min || 3303 new_value > icmppa->icmp_param_max) { 3304 return (EINVAL); 3305 } 3306 /* Set the new value */ 3307 icmppa->icmp_param_value = new_value; 3308 return (0); 3309 } 3310 static void 3311 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3312 { 3313 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3314 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3315 /* 3316 * fallback has started but messages have not been moved yet 3317 */ 3318 if (icmp->icmp_fallback_queue_head == NULL) { 3319 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3320 icmp->icmp_fallback_queue_head = mp; 3321 icmp->icmp_fallback_queue_tail = mp; 3322 } else { 3323 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3324 icmp->icmp_fallback_queue_tail->b_next = mp; 3325 icmp->icmp_fallback_queue_tail = mp; 3326 } 3327 mutex_exit(&icmp->icmp_recv_lock); 3328 } else { 3329 /* 3330 * no more fallbacks possible, ok to drop lock. 3331 */ 3332 mutex_exit(&icmp->icmp_recv_lock); 3333 putnext(icmp->icmp_connp->conn_rq, mp); 3334 } 3335 } 3336 3337 /*ARGSUSED2*/ 3338 static void 3339 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3340 { 3341 conn_t *connp = (conn_t *)arg1; 3342 struct T_unitdata_ind *tudi; 3343 uchar_t *rptr; 3344 icmp_t *icmp; 3345 icmp_stack_t *is; 3346 sin_t *sin; 3347 sin6_t *sin6; 3348 ip6_t *ip6h; 3349 ip6i_t *ip6i; 3350 mblk_t *mp1; 3351 int hdr_len; 3352 ipha_t *ipha; 3353 int udi_size; /* Size of T_unitdata_ind */ 3354 uint_t ipvers; 3355 ip6_pkt_t ipp; 3356 uint8_t nexthdr; 3357 ip_pktinfo_t *pinfo = NULL; 3358 mblk_t *options_mp = NULL; 3359 uint_t icmp_opt = 0; 3360 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3361 uint_t hopstrip; 3362 int error; 3363 3364 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3365 3366 icmp = connp->conn_icmp; 3367 is = icmp->icmp_is; 3368 rptr = mp->b_rptr; 3369 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3370 ASSERT(OK_32PTR(rptr)); 3371 3372 /* 3373 * IP should have prepended the options data in an M_CTL 3374 * Check M_CTL "type" to make sure are not here bcos of 3375 * a valid ICMP message 3376 */ 3377 if (DB_TYPE(mp) == M_CTL) { 3378 /* 3379 * FIXME: does IP still do this? 3380 * IP sends up the IPSEC_IN message for handling IPSEC 3381 * policy at the TCP level. We don't need it here. 3382 */ 3383 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3384 mp1 = mp->b_cont; 3385 freeb(mp); 3386 mp = mp1; 3387 rptr = mp->b_rptr; 3388 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3389 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3390 IN_PKTINFO) { 3391 /* 3392 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3393 * has been prepended to the packet by IP. We need to 3394 * extract the mblk and adjust the rptr 3395 */ 3396 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3397 options_mp = mp; 3398 mp = mp->b_cont; 3399 rptr = mp->b_rptr; 3400 } else { 3401 /* 3402 * ICMP messages. 3403 */ 3404 icmp_icmp_error(connp, mp); 3405 return; 3406 } 3407 } 3408 3409 /* 3410 * Discard message if it is misaligned or smaller than the IP header. 3411 */ 3412 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3413 freemsg(mp); 3414 if (options_mp != NULL) 3415 freeb(options_mp); 3416 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3417 return; 3418 } 3419 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3420 3421 /* Handle M_DATA messages containing IP packets messages */ 3422 if (ipvers == IPV4_VERSION) { 3423 /* 3424 * Special case where IP attaches 3425 * the IRE needs to be handled so that we don't send up 3426 * IRE to the user land. 3427 */ 3428 ipha = (ipha_t *)rptr; 3429 hdr_len = IPH_HDR_LENGTH(ipha); 3430 3431 if (ipha->ipha_protocol == IPPROTO_TCP) { 3432 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3433 3434 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3435 TH_SYN) && mp->b_cont != NULL) { 3436 mp1 = mp->b_cont; 3437 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3438 freeb(mp1); 3439 mp->b_cont = NULL; 3440 } 3441 } 3442 } 3443 if (is->is_bsd_compat) { 3444 ushort_t len; 3445 len = ntohs(ipha->ipha_length); 3446 3447 if (mp->b_datap->db_ref > 1) { 3448 /* 3449 * Allocate a new IP header so that we can 3450 * modify ipha_length. 3451 */ 3452 mblk_t *mp1; 3453 3454 mp1 = allocb(hdr_len, BPRI_MED); 3455 if (!mp1) { 3456 freemsg(mp); 3457 if (options_mp != NULL) 3458 freeb(options_mp); 3459 BUMP_MIB(&is->is_rawip_mib, 3460 rawipInErrors); 3461 return; 3462 } 3463 bcopy(rptr, mp1->b_rptr, hdr_len); 3464 mp->b_rptr = rptr + hdr_len; 3465 rptr = mp1->b_rptr; 3466 ipha = (ipha_t *)rptr; 3467 mp1->b_cont = mp; 3468 mp1->b_wptr = rptr + hdr_len; 3469 mp = mp1; 3470 } 3471 len -= hdr_len; 3472 ipha->ipha_length = htons(len); 3473 } 3474 } 3475 3476 /* 3477 * This is the inbound data path. Packets are passed upstream as 3478 * T_UNITDATA_IND messages with full IP headers still attached. 3479 */ 3480 if (icmp->icmp_family == AF_INET) { 3481 ASSERT(ipvers == IPV4_VERSION); 3482 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3483 if (icmp->icmp_recvif && (pinfo != NULL) && 3484 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3485 udi_size += sizeof (struct T_opthdr) + 3486 sizeof (uint_t); 3487 } 3488 3489 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3490 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3491 udi_size += sizeof (struct T_opthdr) + 3492 sizeof (struct in_pktinfo); 3493 } 3494 3495 /* 3496 * If SO_TIMESTAMP is set allocate the appropriate sized 3497 * buffer. Since gethrestime() expects a pointer aligned 3498 * argument, we allocate space necessary for extra 3499 * alignment (even though it might not be used). 3500 */ 3501 if (icmp->icmp_timestamp) { 3502 udi_size += sizeof (struct T_opthdr) + 3503 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3504 } 3505 mp1 = allocb(udi_size, BPRI_MED); 3506 if (mp1 == NULL) { 3507 freemsg(mp); 3508 if (options_mp != NULL) 3509 freeb(options_mp); 3510 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3511 return; 3512 } 3513 mp1->b_cont = mp; 3514 mp = mp1; 3515 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3516 mp->b_datap->db_type = M_PROTO; 3517 mp->b_wptr = (uchar_t *)tudi + udi_size; 3518 tudi->PRIM_type = T_UNITDATA_IND; 3519 tudi->SRC_length = sizeof (sin_t); 3520 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3521 sin = (sin_t *)&tudi[1]; 3522 *sin = sin_null; 3523 sin->sin_family = AF_INET; 3524 sin->sin_addr.s_addr = ipha->ipha_src; 3525 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3526 sizeof (sin_t); 3527 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3528 tudi->OPT_length = udi_size; 3529 3530 /* 3531 * Add options if IP_RECVIF is set 3532 */ 3533 if (udi_size != 0) { 3534 char *dstopt; 3535 3536 dstopt = (char *)&sin[1]; 3537 if (icmp->icmp_recvif && (pinfo != NULL) && 3538 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3539 3540 struct T_opthdr *toh; 3541 uint_t *dstptr; 3542 3543 toh = (struct T_opthdr *)dstopt; 3544 toh->level = IPPROTO_IP; 3545 toh->name = IP_RECVIF; 3546 toh->len = sizeof (struct T_opthdr) + 3547 sizeof (uint_t); 3548 toh->status = 0; 3549 dstopt += sizeof (struct T_opthdr); 3550 dstptr = (uint_t *)dstopt; 3551 *dstptr = pinfo->ip_pkt_ifindex; 3552 dstopt += sizeof (uint_t); 3553 udi_size -= toh->len; 3554 } 3555 if (icmp->icmp_timestamp) { 3556 struct T_opthdr *toh; 3557 3558 toh = (struct T_opthdr *)dstopt; 3559 toh->level = SOL_SOCKET; 3560 toh->name = SCM_TIMESTAMP; 3561 toh->len = sizeof (struct T_opthdr) + 3562 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3563 toh->status = 0; 3564 dstopt += sizeof (struct T_opthdr); 3565 /* Align for gethrestime() */ 3566 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3567 sizeof (intptr_t)); 3568 gethrestime((timestruc_t *)dstopt); 3569 dstopt = (char *)toh + toh->len; 3570 udi_size -= toh->len; 3571 } 3572 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3573 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3574 struct T_opthdr *toh; 3575 struct in_pktinfo *pktinfop; 3576 3577 toh = (struct T_opthdr *)dstopt; 3578 toh->level = IPPROTO_IP; 3579 toh->name = IP_PKTINFO; 3580 toh->len = sizeof (struct T_opthdr) + 3581 sizeof (in_pktinfo_t); 3582 toh->status = 0; 3583 dstopt += sizeof (struct T_opthdr); 3584 pktinfop = (struct in_pktinfo *)dstopt; 3585 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3586 pktinfop->ipi_spec_dst = 3587 pinfo->ip_pkt_match_addr; 3588 3589 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3590 3591 dstopt += sizeof (struct in_pktinfo); 3592 udi_size -= toh->len; 3593 } 3594 3595 /* Consumed all of allocated space */ 3596 ASSERT(udi_size == 0); 3597 } 3598 3599 if (options_mp != NULL) 3600 freeb(options_mp); 3601 3602 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3603 goto deliver; 3604 } 3605 3606 /* 3607 * We don't need options_mp in the IPv6 path. 3608 */ 3609 if (options_mp != NULL) { 3610 freeb(options_mp); 3611 options_mp = NULL; 3612 } 3613 3614 /* 3615 * Discard message if it is smaller than the IPv6 header 3616 * or if the header is malformed. 3617 */ 3618 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3619 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3620 icmp->icmp_family != AF_INET6) { 3621 freemsg(mp); 3622 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3623 return; 3624 } 3625 3626 /* Initialize */ 3627 ipp.ipp_fields = 0; 3628 hopstrip = 0; 3629 3630 ip6h = (ip6_t *)rptr; 3631 /* 3632 * Call on ip_find_hdr_v6 which gets the total hdr len 3633 * as well as individual lenghts of ext hdrs (and ptrs to 3634 * them). 3635 */ 3636 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3637 /* Look for ifindex information */ 3638 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3639 ip6i = (ip6i_t *)ip6h; 3640 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3641 ASSERT(ip6i->ip6i_ifindex != 0); 3642 ipp.ipp_fields |= IPPF_IFINDEX; 3643 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3644 } 3645 rptr = (uchar_t *)&ip6i[1]; 3646 mp->b_rptr = rptr; 3647 if (rptr == mp->b_wptr) { 3648 mp1 = mp->b_cont; 3649 freeb(mp); 3650 mp = mp1; 3651 rptr = mp->b_rptr; 3652 } 3653 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3654 ip6h = (ip6_t *)rptr; 3655 } 3656 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3657 3658 /* 3659 * We need to lie a bit to the user because users inside 3660 * labeled compartments should not see their own labels. We 3661 * assume that in all other respects IP has checked the label, 3662 * and that the label is always first among the options. (If 3663 * it's not first, then this code won't see it, and the option 3664 * will be passed along to the user.) 3665 * 3666 * If we had multilevel ICMP sockets, then the following code 3667 * should be skipped for them to allow the user to see the 3668 * label. 3669 * 3670 * Alignment restrictions in the definition of IP options 3671 * (namely, the requirement that the 4-octet DOI goes on a 3672 * 4-octet boundary) mean that we know exactly where the option 3673 * should start, but we're lenient for other hosts. 3674 * 3675 * Note that there are no multilevel ICMP or raw IP sockets 3676 * yet, thus nobody ever sees the IP6OPT_LS option. 3677 */ 3678 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3679 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3680 const uchar_t *ucp = 3681 (const uchar_t *)ipp.ipp_hopopts + 2; 3682 int remlen = ipp.ipp_hopoptslen - 2; 3683 3684 while (remlen > 0) { 3685 if (*ucp == IP6OPT_PAD1) { 3686 remlen--; 3687 ucp++; 3688 } else if (*ucp == IP6OPT_PADN) { 3689 remlen -= ucp[1] + 2; 3690 ucp += ucp[1] + 2; 3691 } else if (*ucp == ip6opt_ls) { 3692 hopstrip = (ucp - 3693 (const uchar_t *)ipp.ipp_hopopts) + 3694 ucp[1] + 2; 3695 hopstrip = (hopstrip + 7) & ~7; 3696 break; 3697 } else { 3698 /* label option must be first */ 3699 break; 3700 } 3701 } 3702 } 3703 } else { 3704 hdr_len = IPV6_HDR_LEN; 3705 ip6i = NULL; 3706 nexthdr = ip6h->ip6_nxt; 3707 } 3708 /* 3709 * One special case where IP attaches the IRE needs to 3710 * be handled so that we don't send up IRE to the user land. 3711 */ 3712 if (nexthdr == IPPROTO_TCP) { 3713 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3714 3715 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3716 mp->b_cont != NULL) { 3717 mp1 = mp->b_cont; 3718 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3719 freeb(mp1); 3720 mp->b_cont = NULL; 3721 } 3722 } 3723 } 3724 /* 3725 * Check a filter for ICMPv6 types if needed. 3726 * Verify raw checksums if needed. 3727 */ 3728 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3729 if (icmp->icmp_filter != NULL) { 3730 int type; 3731 3732 /* Assumes that IP has done the pullupmsg */ 3733 type = mp->b_rptr[hdr_len]; 3734 3735 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3736 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3737 freemsg(mp); 3738 return; 3739 } 3740 } else { 3741 /* Checksum */ 3742 uint16_t *up; 3743 uint32_t sum; 3744 int remlen; 3745 3746 up = (uint16_t *)&ip6h->ip6_src; 3747 3748 remlen = msgdsize(mp) - hdr_len; 3749 sum = htons(icmp->icmp_proto + remlen) 3750 + up[0] + up[1] + up[2] + up[3] 3751 + up[4] + up[5] + up[6] + up[7] 3752 + up[8] + up[9] + up[10] + up[11] 3753 + up[12] + up[13] + up[14] + up[15]; 3754 sum = (sum & 0xffff) + (sum >> 16); 3755 sum = IP_CSUM(mp, hdr_len, sum); 3756 if (sum != 0) { 3757 /* IPv6 RAW checksum failed */ 3758 ip0dbg(("icmp_rput: RAW checksum " 3759 "failed %x\n", sum)); 3760 freemsg(mp); 3761 BUMP_MIB(&is->is_rawip_mib, 3762 rawipInCksumErrs); 3763 return; 3764 } 3765 } 3766 } 3767 /* Skip all the IPv6 headers per API */ 3768 mp->b_rptr += hdr_len; 3769 3770 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3771 3772 /* 3773 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3774 * maintain state information, instead of relying on icmp_t 3775 * structure, since there arent any locks protecting these members 3776 * and there is a window where there might be a race between a 3777 * thread setting options on the write side and a thread reading 3778 * these options on the read size. 3779 */ 3780 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3781 IPPF_RTHDR|IPPF_IFINDEX)) { 3782 if (icmp->icmp_ipv6_recvhopopts && 3783 (ipp.ipp_fields & IPPF_HOPOPTS) && 3784 ipp.ipp_hopoptslen > hopstrip) { 3785 udi_size += sizeof (struct T_opthdr) + 3786 ipp.ipp_hopoptslen - hopstrip; 3787 icmp_opt |= IPPF_HOPOPTS; 3788 } 3789 if ((icmp->icmp_ipv6_recvdstopts || 3790 icmp->icmp_old_ipv6_recvdstopts) && 3791 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3792 udi_size += sizeof (struct T_opthdr) + 3793 ipp.ipp_dstoptslen; 3794 icmp_opt |= IPPF_DSTOPTS; 3795 } 3796 if (((icmp->icmp_ipv6_recvdstopts && 3797 icmp->icmp_ipv6_recvrthdr && 3798 (ipp.ipp_fields & IPPF_RTHDR)) || 3799 icmp->icmp_ipv6_recvrtdstopts) && 3800 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3801 udi_size += sizeof (struct T_opthdr) + 3802 ipp.ipp_rtdstoptslen; 3803 icmp_opt |= IPPF_RTDSTOPTS; 3804 } 3805 if (icmp->icmp_ipv6_recvrthdr && 3806 (ipp.ipp_fields & IPPF_RTHDR)) { 3807 udi_size += sizeof (struct T_opthdr) + 3808 ipp.ipp_rthdrlen; 3809 icmp_opt |= IPPF_RTHDR; 3810 } 3811 if (icmp->icmp_ip_recvpktinfo && 3812 (ipp.ipp_fields & IPPF_IFINDEX)) { 3813 udi_size += sizeof (struct T_opthdr) + 3814 sizeof (struct in6_pktinfo); 3815 icmp_opt |= IPPF_IFINDEX; 3816 } 3817 } 3818 if (icmp->icmp_ipv6_recvhoplimit) { 3819 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3820 icmp_ipv6_recvhoplimit = B_TRUE; 3821 } 3822 3823 if (icmp->icmp_ipv6_recvtclass) 3824 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3825 3826 /* 3827 * If SO_TIMESTAMP is set allocate the appropriate sized 3828 * buffer. Since gethrestime() expects a pointer aligned 3829 * argument, we allocate space necessary for extra 3830 * alignment (even though it might not be used). 3831 */ 3832 if (icmp->icmp_timestamp) { 3833 udi_size += sizeof (struct T_opthdr) + 3834 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3835 } 3836 3837 mp1 = allocb(udi_size, BPRI_MED); 3838 if (mp1 == NULL) { 3839 freemsg(mp); 3840 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3841 return; 3842 } 3843 mp1->b_cont = mp; 3844 mp = mp1; 3845 mp->b_datap->db_type = M_PROTO; 3846 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3847 mp->b_wptr = (uchar_t *)tudi + udi_size; 3848 tudi->PRIM_type = T_UNITDATA_IND; 3849 tudi->SRC_length = sizeof (sin6_t); 3850 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3851 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3852 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3853 tudi->OPT_length = udi_size; 3854 sin6 = (sin6_t *)&tudi[1]; 3855 sin6->sin6_port = 0; 3856 sin6->sin6_family = AF_INET6; 3857 3858 sin6->sin6_addr = ip6h->ip6_src; 3859 /* No sin6_flowinfo per API */ 3860 sin6->sin6_flowinfo = 0; 3861 /* For link-scope source pass up scope id */ 3862 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3863 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3864 sin6->sin6_scope_id = ipp.ipp_ifindex; 3865 else 3866 sin6->sin6_scope_id = 0; 3867 3868 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3869 icmp->icmp_zoneid, is->is_netstack); 3870 3871 if (udi_size != 0) { 3872 uchar_t *dstopt; 3873 3874 dstopt = (uchar_t *)&sin6[1]; 3875 if (icmp_opt & IPPF_IFINDEX) { 3876 struct T_opthdr *toh; 3877 struct in6_pktinfo *pkti; 3878 3879 toh = (struct T_opthdr *)dstopt; 3880 toh->level = IPPROTO_IPV6; 3881 toh->name = IPV6_PKTINFO; 3882 toh->len = sizeof (struct T_opthdr) + 3883 sizeof (*pkti); 3884 toh->status = 0; 3885 dstopt += sizeof (struct T_opthdr); 3886 pkti = (struct in6_pktinfo *)dstopt; 3887 pkti->ipi6_addr = ip6h->ip6_dst; 3888 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3889 dstopt += sizeof (*pkti); 3890 udi_size -= toh->len; 3891 } 3892 if (icmp_ipv6_recvhoplimit) { 3893 struct T_opthdr *toh; 3894 3895 toh = (struct T_opthdr *)dstopt; 3896 toh->level = IPPROTO_IPV6; 3897 toh->name = IPV6_HOPLIMIT; 3898 toh->len = sizeof (struct T_opthdr) + 3899 sizeof (uint_t); 3900 toh->status = 0; 3901 dstopt += sizeof (struct T_opthdr); 3902 *(uint_t *)dstopt = ip6h->ip6_hops; 3903 dstopt += sizeof (uint_t); 3904 udi_size -= toh->len; 3905 } 3906 if (icmp->icmp_ipv6_recvtclass) { 3907 struct T_opthdr *toh; 3908 3909 toh = (struct T_opthdr *)dstopt; 3910 toh->level = IPPROTO_IPV6; 3911 toh->name = IPV6_TCLASS; 3912 toh->len = sizeof (struct T_opthdr) + 3913 sizeof (uint_t); 3914 toh->status = 0; 3915 dstopt += sizeof (struct T_opthdr); 3916 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3917 dstopt += sizeof (uint_t); 3918 udi_size -= toh->len; 3919 } 3920 if (icmp->icmp_timestamp) { 3921 struct T_opthdr *toh; 3922 3923 toh = (struct T_opthdr *)dstopt; 3924 toh->level = SOL_SOCKET; 3925 toh->name = SCM_TIMESTAMP; 3926 toh->len = sizeof (struct T_opthdr) + 3927 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3928 toh->status = 0; 3929 dstopt += sizeof (struct T_opthdr); 3930 /* Align for gethrestime() */ 3931 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3932 sizeof (intptr_t)); 3933 gethrestime((timestruc_t *)dstopt); 3934 dstopt = (uchar_t *)toh + toh->len; 3935 udi_size -= toh->len; 3936 } 3937 3938 if (icmp_opt & IPPF_HOPOPTS) { 3939 struct T_opthdr *toh; 3940 3941 toh = (struct T_opthdr *)dstopt; 3942 toh->level = IPPROTO_IPV6; 3943 toh->name = IPV6_HOPOPTS; 3944 toh->len = sizeof (struct T_opthdr) + 3945 ipp.ipp_hopoptslen - hopstrip; 3946 toh->status = 0; 3947 dstopt += sizeof (struct T_opthdr); 3948 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3949 ipp.ipp_hopoptslen - hopstrip); 3950 if (hopstrip > 0) { 3951 /* copy next header value and fake length */ 3952 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3953 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3954 hopstrip / 8; 3955 } 3956 dstopt += ipp.ipp_hopoptslen - hopstrip; 3957 udi_size -= toh->len; 3958 } 3959 if (icmp_opt & IPPF_RTDSTOPTS) { 3960 struct T_opthdr *toh; 3961 3962 toh = (struct T_opthdr *)dstopt; 3963 toh->level = IPPROTO_IPV6; 3964 toh->name = IPV6_DSTOPTS; 3965 toh->len = sizeof (struct T_opthdr) + 3966 ipp.ipp_rtdstoptslen; 3967 toh->status = 0; 3968 dstopt += sizeof (struct T_opthdr); 3969 bcopy(ipp.ipp_rtdstopts, dstopt, 3970 ipp.ipp_rtdstoptslen); 3971 dstopt += ipp.ipp_rtdstoptslen; 3972 udi_size -= toh->len; 3973 } 3974 if (icmp_opt & IPPF_RTHDR) { 3975 struct T_opthdr *toh; 3976 3977 toh = (struct T_opthdr *)dstopt; 3978 toh->level = IPPROTO_IPV6; 3979 toh->name = IPV6_RTHDR; 3980 toh->len = sizeof (struct T_opthdr) + 3981 ipp.ipp_rthdrlen; 3982 toh->status = 0; 3983 dstopt += sizeof (struct T_opthdr); 3984 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3985 dstopt += ipp.ipp_rthdrlen; 3986 udi_size -= toh->len; 3987 } 3988 if (icmp_opt & IPPF_DSTOPTS) { 3989 struct T_opthdr *toh; 3990 3991 toh = (struct T_opthdr *)dstopt; 3992 toh->level = IPPROTO_IPV6; 3993 toh->name = IPV6_DSTOPTS; 3994 toh->len = sizeof (struct T_opthdr) + 3995 ipp.ipp_dstoptslen; 3996 toh->status = 0; 3997 dstopt += sizeof (struct T_opthdr); 3998 bcopy(ipp.ipp_dstopts, dstopt, 3999 ipp.ipp_dstoptslen); 4000 dstopt += ipp.ipp_dstoptslen; 4001 udi_size -= toh->len; 4002 } 4003 /* Consumed all of allocated space */ 4004 ASSERT(udi_size == 0); 4005 } 4006 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4007 4008 deliver: 4009 if (IPCL_IS_NONSTR(connp)) { 4010 if ((*connp->conn_upcalls->su_recv) 4011 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4012 NULL) < 0) { 4013 mutex_enter(&icmp->icmp_recv_lock); 4014 if (error == ENOSPC) { 4015 /* 4016 * let's confirm while holding the lock 4017 */ 4018 if ((*connp->conn_upcalls->su_recv) 4019 (connp->conn_upper_handle, NULL, 0, 0, 4020 &error, NULL) < 0) { 4021 if (error == ENOSPC) { 4022 connp->conn_flow_cntrld = 4023 B_TRUE; 4024 } else { 4025 ASSERT(error == EOPNOTSUPP); 4026 } 4027 } 4028 mutex_exit(&icmp->icmp_recv_lock); 4029 } else { 4030 ASSERT(error == EOPNOTSUPP); 4031 icmp_queue_fallback(icmp, mp); 4032 } 4033 } 4034 } else { 4035 putnext(connp->conn_rq, mp); 4036 } 4037 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 4038 } 4039 4040 /* 4041 * return SNMP stuff in buffer in mpdata 4042 */ 4043 mblk_t * 4044 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4045 { 4046 mblk_t *mpdata; 4047 struct opthdr *optp; 4048 conn_t *connp = Q_TO_CONN(q); 4049 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4050 mblk_t *mp2ctl; 4051 4052 /* 4053 * make a copy of the original message 4054 */ 4055 mp2ctl = copymsg(mpctl); 4056 4057 if (mpctl == NULL || 4058 (mpdata = mpctl->b_cont) == NULL) { 4059 freemsg(mpctl); 4060 freemsg(mp2ctl); 4061 return (0); 4062 } 4063 4064 /* fixed length structure for IPv4 and IPv6 counters */ 4065 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4066 optp->level = EXPER_RAWIP; 4067 optp->name = 0; 4068 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4069 sizeof (is->is_rawip_mib)); 4070 optp->len = msgdsize(mpdata); 4071 qreply(q, mpctl); 4072 4073 return (mp2ctl); 4074 } 4075 4076 /* 4077 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4078 * TODO: If this ever actually tries to set anything, it needs to be 4079 * to do the appropriate locking. 4080 */ 4081 /* ARGSUSED */ 4082 int 4083 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4084 uchar_t *ptr, int len) 4085 { 4086 switch (level) { 4087 case EXPER_RAWIP: 4088 return (0); 4089 default: 4090 return (1); 4091 } 4092 } 4093 4094 /* Report for ndd "icmp_status" */ 4095 /* ARGSUSED */ 4096 static int 4097 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4098 { 4099 conn_t *connp; 4100 ip_stack_t *ipst; 4101 char laddrbuf[INET6_ADDRSTRLEN]; 4102 char faddrbuf[INET6_ADDRSTRLEN]; 4103 int i; 4104 4105 (void) mi_mpprintf(mp, 4106 "RAWIP " MI_COL_HDRPAD_STR 4107 /* 01234567[89ABCDEF] */ 4108 " src addr dest addr state"); 4109 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4110 4111 connp = Q_TO_CONN(q); 4112 ipst = connp->conn_netstack->netstack_ip; 4113 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4114 connf_t *connfp; 4115 char *state; 4116 4117 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4118 connp = NULL; 4119 4120 while ((connp = ipcl_get_next_conn(connfp, connp, 4121 IPCL_RAWIPCONN)) != NULL) { 4122 icmp_t *icmp; 4123 4124 mutex_enter(&(connp)->conn_lock); 4125 icmp = connp->conn_icmp; 4126 4127 if (icmp->icmp_state == TS_UNBND) 4128 state = "UNBOUND"; 4129 else if (icmp->icmp_state == TS_IDLE) 4130 state = "IDLE"; 4131 else if (icmp->icmp_state == TS_DATA_XFER) 4132 state = "CONNECTED"; 4133 else 4134 state = "UnkState"; 4135 4136 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4137 (void *)icmp, 4138 inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr, 4139 faddrbuf, 4140 sizeof (faddrbuf)), 4141 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4142 sizeof (laddrbuf)), 4143 state); 4144 mutex_exit(&(connp)->conn_lock); 4145 } 4146 } 4147 return (0); 4148 } 4149 4150 /* 4151 * This routine creates a T_UDERROR_IND message and passes it upstream. 4152 * The address and options are copied from the T_UNITDATA_REQ message 4153 * passed in mp. This message is freed. 4154 */ 4155 static void 4156 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4157 { 4158 mblk_t *mp1; 4159 uchar_t *rptr = mp->b_rptr; 4160 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4161 4162 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4163 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4164 tudr->OPT_length, err); 4165 if (mp1) 4166 qreply(q, mp1); 4167 freemsg(mp); 4168 } 4169 4170 4171 static int 4172 rawip_do_unbind(conn_t *connp) 4173 { 4174 icmp_t *icmp = connp->conn_icmp; 4175 4176 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4177 /* If a bind has not been done, we can't unbind. */ 4178 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4179 rw_exit(&icmp->icmp_rwlock); 4180 return (-TOUTSTATE); 4181 } 4182 icmp->icmp_pending_op = T_UNBIND_REQ; 4183 rw_exit(&icmp->icmp_rwlock); 4184 4185 /* 4186 * Call ip to unbind 4187 */ 4188 4189 ip_unbind(connp); 4190 4191 /* 4192 * Once we're unbound from IP, the pending operation may be cleared 4193 * here. 4194 */ 4195 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4196 V6_SET_ZERO(icmp->icmp_v6src); 4197 V6_SET_ZERO(icmp->icmp_bound_v6src); 4198 icmp->icmp_pending_op = -1; 4199 icmp->icmp_state = TS_UNBND; 4200 if (icmp->icmp_family == AF_INET6) 4201 (void) icmp_build_hdrs(icmp); 4202 rw_exit(&icmp->icmp_rwlock); 4203 return (0); 4204 } 4205 4206 /* 4207 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4208 * After some error checking, the message is passed downstream to ip. 4209 */ 4210 static void 4211 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4212 { 4213 conn_t *connp = Q_TO_CONN(q); 4214 int error; 4215 4216 ASSERT(mp->b_cont == NULL); 4217 error = rawip_do_unbind(connp); 4218 if (error) { 4219 if (error < 0) { 4220 icmp_err_ack(q, mp, -error, 0); 4221 } else { 4222 icmp_err_ack(q, mp, 0, error); 4223 } 4224 return; 4225 } 4226 4227 /* 4228 * Convert mp into a T_OK_ACK 4229 */ 4230 4231 mp = mi_tpi_ok_ack_alloc(mp); 4232 4233 /* 4234 * should not happen in practice... T_OK_ACK is smaller than the 4235 * original message. 4236 */ 4237 ASSERT(mp != NULL); 4238 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4239 qreply(q, mp); 4240 } 4241 4242 4243 /* 4244 * Process IPv4 packets that already include an IP header. 4245 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4246 * IPPROTO_IGMP). 4247 */ 4248 static int 4249 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4250 ip4_pkt_t *pktinfop) 4251 { 4252 icmp_stack_t *is = icmp->icmp_is; 4253 ipha_t *ipha; 4254 int ip_hdr_length; 4255 int tp_hdr_len; 4256 mblk_t *mp1; 4257 uint_t pkt_len; 4258 ip_opt_info_t optinfo; 4259 4260 optinfo.ip_opt_flags = 0; 4261 optinfo.ip_opt_ill_index = 0; 4262 ipha = (ipha_t *)mp->b_rptr; 4263 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4264 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4265 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4266 ASSERT(icmp != NULL); 4267 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4268 freemsg(mp); 4269 return (0); 4270 } 4271 ipha = (ipha_t *)mp->b_rptr; 4272 } 4273 ipha->ipha_version_and_hdr_length = 4274 (IP_VERSION<<4) | (ip_hdr_length>>2); 4275 4276 /* 4277 * For the socket of SOCK_RAW type, the checksum is provided in the 4278 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4279 * tell IP that the application has sent a complete IP header and not 4280 * to compute the transport checksum nor change the DF flag. 4281 */ 4282 ipha->ipha_ident = IP_HDR_INCLUDED; 4283 ipha->ipha_hdr_checksum = 0; 4284 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4285 /* Insert options if any */ 4286 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4287 /* 4288 * Put the IP header plus any transport header that is 4289 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4290 * that at least the checksum field is in the first mblk.) 4291 */ 4292 switch (ipha->ipha_protocol) { 4293 case IPPROTO_UDP: 4294 tp_hdr_len = 8; 4295 break; 4296 case IPPROTO_TCP: 4297 tp_hdr_len = 20; 4298 break; 4299 default: 4300 tp_hdr_len = 0; 4301 break; 4302 } 4303 /* 4304 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4305 * tp_hdr_len bytes will be in a single mblk. 4306 */ 4307 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4308 tp_hdr_len)) { 4309 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4310 tp_hdr_len)) { 4311 BUMP_MIB(&is->is_rawip_mib, 4312 rawipOutErrors); 4313 freemsg(mp); 4314 return (0); 4315 } 4316 ipha = (ipha_t *)mp->b_rptr; 4317 } 4318 4319 /* 4320 * if the length is larger then the max allowed IP packet, 4321 * then send an error and abort the processing. 4322 */ 4323 pkt_len = ntohs(ipha->ipha_length) 4324 + icmp->icmp_ip_snd_options_len; 4325 if (pkt_len > IP_MAXPACKET) { 4326 return (EMSGSIZE); 4327 } 4328 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4329 tp_hdr_len, BPRI_LO))) { 4330 return (ENOMEM); 4331 } 4332 mp1->b_rptr += is->is_wroff_extra; 4333 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4334 4335 ipha->ipha_length = htons((uint16_t)pkt_len); 4336 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4337 4338 /* Copy transport header if any */ 4339 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4340 mp1->b_wptr += tp_hdr_len; 4341 4342 /* Add options */ 4343 ipha = (ipha_t *)mp1->b_rptr; 4344 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4345 icmp->icmp_ip_snd_options_len); 4346 4347 /* Drop IP header and transport header from original */ 4348 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4349 4350 mp1->b_cont = mp; 4351 mp = mp1; 4352 /* 4353 * Massage source route putting first source 4354 * route in ipha_dst. 4355 */ 4356 (void) ip_massage_options(ipha, is->is_netstack); 4357 } 4358 4359 if (pktinfop != NULL) { 4360 /* 4361 * Over write the source address provided in the header 4362 */ 4363 if (pktinfop->ip4_addr != INADDR_ANY) { 4364 ipha->ipha_src = pktinfop->ip4_addr; 4365 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4366 } 4367 4368 if (pktinfop->ip4_ill_index != 0) { 4369 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4370 } 4371 } 4372 4373 mblk_setcred(mp, connp->conn_cred); 4374 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4375 return (0); 4376 } 4377 4378 static int 4379 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4380 { 4381 int err; 4382 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4383 icmp_stack_t *is = icmp->icmp_is; 4384 conn_t *connp = icmp->icmp_connp; 4385 4386 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4387 opt_storage, connp->conn_mac_exempt, 4388 is->is_netstack->netstack_ip); 4389 if (err == 0) { 4390 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4391 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4392 opt_storage); 4393 } 4394 if (err != 0) { 4395 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4396 DTRACE_PROBE4( 4397 tx__ip__log__drop__updatelabel__icmp, 4398 char *, "icmp(1) failed to update options(2) on mp(3)", 4399 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4400 return (err); 4401 } 4402 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4403 return (0); 4404 } 4405 4406 /* 4407 * This routine handles all messages passed downstream. It either 4408 * consumes the message or passes it downstream; it never queues a 4409 * a message. 4410 */ 4411 static void 4412 icmp_wput(queue_t *q, mblk_t *mp) 4413 { 4414 uchar_t *rptr = mp->b_rptr; 4415 ipha_t *ipha; 4416 mblk_t *mp1; 4417 #define tudr ((struct T_unitdata_req *)rptr) 4418 size_t ip_len; 4419 conn_t *connp = Q_TO_CONN(q); 4420 icmp_t *icmp = connp->conn_icmp; 4421 icmp_stack_t *is = icmp->icmp_is; 4422 sin6_t *sin6; 4423 sin_t *sin; 4424 ipaddr_t v4dst; 4425 ip4_pkt_t pktinfo; 4426 ip4_pkt_t *pktinfop = &pktinfo; 4427 ip6_pkt_t ipp_s; /* For ancillary data options */ 4428 ip6_pkt_t *ipp = &ipp_s; 4429 int error; 4430 4431 ipp->ipp_fields = 0; 4432 ipp->ipp_sticky_ignored = 0; 4433 4434 switch (mp->b_datap->db_type) { 4435 case M_DATA: 4436 if (icmp->icmp_hdrincl) { 4437 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4438 ipha = (ipha_t *)mp->b_rptr; 4439 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4440 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4441 BUMP_MIB(&is->is_rawip_mib, 4442 rawipOutErrors); 4443 freemsg(mp); 4444 return; 4445 } 4446 ipha = (ipha_t *)mp->b_rptr; 4447 } 4448 /* 4449 * If this connection was used for v6 (inconceivable!) 4450 * or if we have a new destination, then it's time to 4451 * figure a new label. 4452 */ 4453 if (is_system_labeled() && 4454 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4455 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4456 ipha->ipha_dst)) { 4457 error = icmp_update_label(icmp, mp, 4458 ipha->ipha_dst); 4459 if (error != 0) { 4460 icmp_ud_err(q, mp, error); 4461 return; 4462 } 4463 } 4464 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4465 if (error != 0) 4466 icmp_ud_err(q, mp, error); 4467 return; 4468 } 4469 freemsg(mp); 4470 return; 4471 case M_PROTO: 4472 case M_PCPROTO: 4473 ip_len = mp->b_wptr - rptr; 4474 if (ip_len >= sizeof (struct T_unitdata_req)) { 4475 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4476 if (((union T_primitives *)rptr)->type 4477 == T_UNITDATA_REQ) 4478 break; 4479 } 4480 /* FALLTHRU */ 4481 default: 4482 icmp_wput_other(q, mp); 4483 return; 4484 } 4485 4486 /* Handle T_UNITDATA_REQ messages here. */ 4487 4488 mp1 = mp->b_cont; 4489 if (mp1 == NULL) { 4490 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4491 icmp_ud_err(q, mp, EPROTO); 4492 return; 4493 } 4494 4495 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4496 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4497 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4498 return; 4499 } 4500 4501 switch (icmp->icmp_family) { 4502 case AF_INET6: 4503 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4504 if (!OK_32PTR((char *)sin6) || 4505 tudr->DEST_length != sizeof (sin6_t) || 4506 sin6->sin6_family != AF_INET6) { 4507 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4508 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4509 return; 4510 } 4511 4512 /* No support for mapped addresses on raw sockets */ 4513 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4514 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4515 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4516 return; 4517 } 4518 4519 /* 4520 * Destination is a native IPv6 address. 4521 * Send out an IPv6 format packet. 4522 */ 4523 if (tudr->OPT_length != 0) { 4524 int error; 4525 4526 error = 0; 4527 if (icmp_unitdata_opt_process(q, mp, &error, 4528 (void *)ipp) < 0) { 4529 /* failure */ 4530 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4531 icmp_ud_err(q, mp, error); 4532 return; 4533 } 4534 ASSERT(error == 0); 4535 } 4536 4537 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4538 goto done; 4539 4540 case AF_INET: 4541 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4542 if (!OK_32PTR((char *)sin) || 4543 tudr->DEST_length != sizeof (sin_t) || 4544 sin->sin_family != AF_INET) { 4545 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4546 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4547 return; 4548 } 4549 /* Extract and ipaddr */ 4550 v4dst = sin->sin_addr.s_addr; 4551 break; 4552 4553 default: 4554 ASSERT(0); 4555 } 4556 4557 pktinfop->ip4_ill_index = 0; 4558 pktinfop->ip4_addr = INADDR_ANY; 4559 4560 /* 4561 * If options passed in, feed it for verification and handling 4562 */ 4563 if (tudr->OPT_length != 0) { 4564 int error; 4565 4566 error = 0; 4567 if (icmp_unitdata_opt_process(q, mp, &error, 4568 (void *)pktinfop) < 0) { 4569 /* failure */ 4570 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4571 icmp_ud_err(q, mp, error); 4572 return; 4573 } 4574 ASSERT(error == 0); 4575 /* 4576 * Note: Success in processing options. 4577 * mp option buffer represented by 4578 * OPT_length/offset now potentially modified 4579 * and contain option setting results 4580 */ 4581 } 4582 4583 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4584 done: 4585 if (error != 0) { 4586 icmp_ud_err(q, mp, error); 4587 return; 4588 } else { 4589 mp->b_cont = NULL; 4590 freeb(mp); 4591 } 4592 } 4593 4594 4595 /* ARGSUSED */ 4596 static void 4597 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4598 { 4599 #ifdef DEBUG 4600 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4601 #endif 4602 freemsg(mp); 4603 } 4604 4605 static int 4606 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4607 ip4_pkt_t *pktinfop) 4608 { 4609 ipha_t *ipha; 4610 size_t ip_len; 4611 icmp_t *icmp = connp->conn_icmp; 4612 icmp_stack_t *is = icmp->icmp_is; 4613 int ip_hdr_length; 4614 ip_opt_info_t optinfo; 4615 4616 optinfo.ip_opt_flags = 0; 4617 optinfo.ip_opt_ill_index = 0; 4618 4619 if (icmp->icmp_state == TS_UNBND) { 4620 /* If a port has not been bound to the stream, fail. */ 4621 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4622 return (EPROTO); 4623 } 4624 4625 if (v4dst == INADDR_ANY) 4626 v4dst = htonl(INADDR_LOOPBACK); 4627 4628 /* Check if our saved options are valid; update if not */ 4629 if (is_system_labeled() && 4630 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4631 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4632 int error = icmp_update_label(icmp, mp, v4dst); 4633 4634 if (error != 0) 4635 return (error); 4636 } 4637 4638 /* Protocol 255 contains full IP headers */ 4639 if (icmp->icmp_hdrincl) 4640 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4641 4642 /* Add an IP header */ 4643 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4644 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4645 if ((uchar_t *)ipha < mp->b_datap->db_base || 4646 mp->b_datap->db_ref != 1 || 4647 !OK_32PTR(ipha)) { 4648 mblk_t *mp1; 4649 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4650 BPRI_LO))) { 4651 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4652 return (ENOMEM); 4653 } 4654 mp1->b_cont = mp; 4655 ipha = (ipha_t *)mp1->b_datap->db_lim; 4656 mp1->b_wptr = (uchar_t *)ipha; 4657 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4658 mp = mp1; 4659 } 4660 #ifdef _BIG_ENDIAN 4661 /* Set version, header length, and tos */ 4662 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4663 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4664 icmp->icmp_type_of_service); 4665 /* Set ttl and protocol */ 4666 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4667 #else 4668 /* Set version, header length, and tos */ 4669 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4670 ((icmp->icmp_type_of_service << 8) | 4671 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4672 /* Set ttl and protocol */ 4673 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4674 #endif 4675 if (pktinfop->ip4_addr != INADDR_ANY) { 4676 ipha->ipha_src = pktinfop->ip4_addr; 4677 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4678 } else { 4679 4680 /* 4681 * Copy our address into the packet. If this is zero, 4682 * ip will fill in the real source address. 4683 */ 4684 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4685 } 4686 4687 ipha->ipha_fragment_offset_and_flags = 0; 4688 4689 if (pktinfop->ip4_ill_index != 0) { 4690 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4691 } 4692 4693 4694 /* 4695 * For the socket of SOCK_RAW type, the checksum is provided in the 4696 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4697 * tell IP that the application has sent a complete IP header and not 4698 * to compute the transport checksum nor change the DF flag. 4699 */ 4700 ipha->ipha_ident = IP_HDR_INCLUDED; 4701 4702 /* Finish common formatting of the packet. */ 4703 mp->b_rptr = (uchar_t *)ipha; 4704 4705 ip_len = mp->b_wptr - (uchar_t *)ipha; 4706 if (mp->b_cont != NULL) 4707 ip_len += msgdsize(mp->b_cont); 4708 4709 /* 4710 * Set the length into the IP header. 4711 * If the length is greater than the maximum allowed by IP, 4712 * then free the message and return. Do not try and send it 4713 * as this can cause problems in layers below. 4714 */ 4715 if (ip_len > IP_MAXPACKET) { 4716 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4717 return (EMSGSIZE); 4718 } 4719 ipha->ipha_length = htons((uint16_t)ip_len); 4720 /* 4721 * Copy in the destination address request 4722 */ 4723 ipha->ipha_dst = v4dst; 4724 4725 /* 4726 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4727 */ 4728 if (CLASSD(v4dst)) 4729 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4730 4731 /* Copy in options if any */ 4732 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4733 bcopy(icmp->icmp_ip_snd_options, 4734 &ipha[1], icmp->icmp_ip_snd_options_len); 4735 /* 4736 * Massage source route putting first source route in ipha_dst. 4737 * Ignore the destination in the T_unitdata_req. 4738 */ 4739 (void) ip_massage_options(ipha, is->is_netstack); 4740 } 4741 4742 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4743 mblk_setcred(mp, connp->conn_cred); 4744 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4745 return (0); 4746 } 4747 4748 static int 4749 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4750 { 4751 int err; 4752 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4753 icmp_stack_t *is = icmp->icmp_is; 4754 conn_t *connp = icmp->icmp_connp; 4755 4756 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4757 opt_storage, connp->conn_mac_exempt, 4758 is->is_netstack->netstack_ip); 4759 if (err == 0) { 4760 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4761 &icmp->icmp_label_len_v6, opt_storage); 4762 } 4763 if (err != 0) { 4764 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4765 DTRACE_PROBE4( 4766 tx__ip__log__drop__updatelabel__icmp6, 4767 char *, "icmp(1) failed to update options(2) on mp(3)", 4768 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4769 return (err); 4770 } 4771 4772 icmp->icmp_v6lastdst = *dst; 4773 return (0); 4774 } 4775 4776 /* 4777 * raw_ip_send_data_v6(): 4778 * Assumes that icmp_wput did some sanity checking on the destination 4779 * address, but that the label may not yet be correct. 4780 */ 4781 static int 4782 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4783 ip6_pkt_t *ipp) 4784 { 4785 ip6_t *ip6h; 4786 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4787 int ip_hdr_len = IPV6_HDR_LEN; 4788 size_t ip_len; 4789 icmp_t *icmp = connp->conn_icmp; 4790 icmp_stack_t *is = icmp->icmp_is; 4791 ip6_pkt_t *tipp; 4792 uint32_t csum = 0; 4793 uint_t ignore = 0; 4794 uint_t option_exists = 0, is_sticky = 0; 4795 uint8_t *cp; 4796 uint8_t *nxthdr_ptr; 4797 in6_addr_t ip6_dst; 4798 4799 /* 4800 * If the local address is a mapped address return 4801 * an error. 4802 * It would be possible to send an IPv6 packet but the 4803 * response would never make it back to the application 4804 * since it is bound to a mapped address. 4805 */ 4806 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4807 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4808 return (EADDRNOTAVAIL); 4809 } 4810 4811 ignore = ipp->ipp_sticky_ignored; 4812 if (sin6->sin6_scope_id != 0 && 4813 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4814 /* 4815 * IPPF_SCOPE_ID is special. It's neither a sticky 4816 * option nor ancillary data. It needs to be 4817 * explicitly set in options_exists. 4818 */ 4819 option_exists |= IPPF_SCOPE_ID; 4820 } 4821 4822 /* 4823 * Compute the destination address 4824 */ 4825 ip6_dst = sin6->sin6_addr; 4826 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4827 ip6_dst = ipv6_loopback; 4828 4829 /* 4830 * If we're not going to the same destination as last time, then 4831 * recompute the label required. This is done in a separate routine to 4832 * avoid blowing up our stack here. 4833 */ 4834 if (is_system_labeled() && 4835 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4836 int error = 0; 4837 4838 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4839 if (error != 0) 4840 return (error); 4841 } 4842 4843 /* 4844 * If there's a security label here, then we ignore any options the 4845 * user may try to set. We keep the peer's label as a hidden sticky 4846 * option. 4847 */ 4848 if (icmp->icmp_label_len_v6 > 0) { 4849 ignore &= ~IPPF_HOPOPTS; 4850 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4851 } 4852 4853 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4854 (ipp->ipp_fields == 0)) { 4855 /* No sticky options nor ancillary data. */ 4856 goto no_options; 4857 } 4858 4859 /* 4860 * Go through the options figuring out where each is going to 4861 * come from and build two masks. The first mask indicates if 4862 * the option exists at all. The second mask indicates if the 4863 * option is sticky or ancillary. 4864 */ 4865 if (!(ignore & IPPF_HOPOPTS)) { 4866 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4867 option_exists |= IPPF_HOPOPTS; 4868 ip_hdr_len += ipp->ipp_hopoptslen; 4869 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4870 option_exists |= IPPF_HOPOPTS; 4871 is_sticky |= IPPF_HOPOPTS; 4872 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4873 } 4874 } 4875 4876 if (!(ignore & IPPF_RTHDR)) { 4877 if (ipp->ipp_fields & IPPF_RTHDR) { 4878 option_exists |= IPPF_RTHDR; 4879 ip_hdr_len += ipp->ipp_rthdrlen; 4880 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4881 option_exists |= IPPF_RTHDR; 4882 is_sticky |= IPPF_RTHDR; 4883 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4884 } 4885 } 4886 4887 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4888 /* 4889 * Need to have a router header to use these. 4890 */ 4891 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4892 option_exists |= IPPF_RTDSTOPTS; 4893 ip_hdr_len += ipp->ipp_rtdstoptslen; 4894 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4895 option_exists |= IPPF_RTDSTOPTS; 4896 is_sticky |= IPPF_RTDSTOPTS; 4897 ip_hdr_len += 4898 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4899 } 4900 } 4901 4902 if (!(ignore & IPPF_DSTOPTS)) { 4903 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4904 option_exists |= IPPF_DSTOPTS; 4905 ip_hdr_len += ipp->ipp_dstoptslen; 4906 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4907 option_exists |= IPPF_DSTOPTS; 4908 is_sticky |= IPPF_DSTOPTS; 4909 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4910 } 4911 } 4912 4913 if (!(ignore & IPPF_IFINDEX)) { 4914 if (ipp->ipp_fields & IPPF_IFINDEX) { 4915 option_exists |= IPPF_IFINDEX; 4916 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4917 option_exists |= IPPF_IFINDEX; 4918 is_sticky |= IPPF_IFINDEX; 4919 } 4920 } 4921 4922 if (!(ignore & IPPF_ADDR)) { 4923 if (ipp->ipp_fields & IPPF_ADDR) { 4924 option_exists |= IPPF_ADDR; 4925 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4926 option_exists |= IPPF_ADDR; 4927 is_sticky |= IPPF_ADDR; 4928 } 4929 } 4930 4931 if (!(ignore & IPPF_DONTFRAG)) { 4932 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4933 option_exists |= IPPF_DONTFRAG; 4934 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4935 option_exists |= IPPF_DONTFRAG; 4936 is_sticky |= IPPF_DONTFRAG; 4937 } 4938 } 4939 4940 if (!(ignore & IPPF_USE_MIN_MTU)) { 4941 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4942 option_exists |= IPPF_USE_MIN_MTU; 4943 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4944 IPPF_USE_MIN_MTU) { 4945 option_exists |= IPPF_USE_MIN_MTU; 4946 is_sticky |= IPPF_USE_MIN_MTU; 4947 } 4948 } 4949 4950 if (!(ignore & IPPF_NEXTHOP)) { 4951 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4952 option_exists |= IPPF_NEXTHOP; 4953 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4954 option_exists |= IPPF_NEXTHOP; 4955 is_sticky |= IPPF_NEXTHOP; 4956 } 4957 } 4958 4959 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4960 option_exists |= IPPF_HOPLIMIT; 4961 /* IPV6_HOPLIMIT can never be sticky */ 4962 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4963 4964 if (!(ignore & IPPF_UNICAST_HOPS) && 4965 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4966 option_exists |= IPPF_UNICAST_HOPS; 4967 is_sticky |= IPPF_UNICAST_HOPS; 4968 } 4969 4970 if (!(ignore & IPPF_MULTICAST_HOPS) && 4971 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4972 option_exists |= IPPF_MULTICAST_HOPS; 4973 is_sticky |= IPPF_MULTICAST_HOPS; 4974 } 4975 4976 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4977 /* This is a sticky socket option only */ 4978 option_exists |= IPPF_NO_CKSUM; 4979 is_sticky |= IPPF_NO_CKSUM; 4980 } 4981 4982 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4983 /* This is a sticky socket option only */ 4984 option_exists |= IPPF_RAW_CKSUM; 4985 is_sticky |= IPPF_RAW_CKSUM; 4986 } 4987 4988 if (!(ignore & IPPF_TCLASS)) { 4989 if (ipp->ipp_fields & IPPF_TCLASS) { 4990 option_exists |= IPPF_TCLASS; 4991 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4992 option_exists |= IPPF_TCLASS; 4993 is_sticky |= IPPF_TCLASS; 4994 } 4995 } 4996 4997 no_options: 4998 4999 /* 5000 * If any options carried in the ip6i_t were specified, we 5001 * need to account for the ip6i_t in the data we'll be sending 5002 * down. 5003 */ 5004 if (option_exists & IPPF_HAS_IP6I) 5005 ip_hdr_len += sizeof (ip6i_t); 5006 5007 /* check/fix buffer config, setup pointers into it */ 5008 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5009 if ((mp->b_datap->db_ref != 1) || 5010 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5011 !OK_32PTR(ip6h)) { 5012 mblk_t *mp1; 5013 5014 /* Try to get everything in a single mblk next time */ 5015 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5016 icmp->icmp_max_hdr_len = ip_hdr_len; 5017 5018 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5019 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5020 } 5021 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5022 if (!mp1) { 5023 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5024 return (ENOMEM); 5025 } 5026 mp1->b_cont = mp; 5027 mp1->b_wptr = mp1->b_datap->db_lim; 5028 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5029 mp = mp1; 5030 } 5031 mp->b_rptr = (unsigned char *)ip6h; 5032 ip6i = (ip6i_t *)ip6h; 5033 5034 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5035 if (option_exists & IPPF_HAS_IP6I) { 5036 ip6h = (ip6_t *)&ip6i[1]; 5037 ip6i->ip6i_flags = 0; 5038 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5039 5040 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5041 if (option_exists & IPPF_SCOPE_ID) { 5042 ip6i->ip6i_flags |= IP6I_IFINDEX; 5043 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5044 } else if (option_exists & IPPF_IFINDEX) { 5045 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5046 ASSERT(tipp->ipp_ifindex != 0); 5047 ip6i->ip6i_flags |= IP6I_IFINDEX; 5048 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5049 } 5050 5051 if (option_exists & IPPF_RAW_CKSUM) { 5052 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5053 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5054 } 5055 5056 if (option_exists & IPPF_NO_CKSUM) { 5057 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5058 } 5059 5060 if (option_exists & IPPF_ADDR) { 5061 /* 5062 * Enable per-packet source address verification if 5063 * IPV6_PKTINFO specified the source address. 5064 * ip6_src is set in the transport's _wput function. 5065 */ 5066 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5067 } 5068 5069 if (option_exists & IPPF_DONTFRAG) { 5070 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5071 } 5072 5073 if (option_exists & IPPF_USE_MIN_MTU) { 5074 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5075 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5076 } 5077 5078 if (option_exists & IPPF_NEXTHOP) { 5079 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5080 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5081 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5082 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5083 } 5084 5085 /* 5086 * tell IP this is an ip6i_t private header 5087 */ 5088 ip6i->ip6i_nxt = IPPROTO_RAW; 5089 } 5090 5091 /* Initialize IPv6 header */ 5092 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5093 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5094 5095 /* Set the hoplimit of the outgoing packet. */ 5096 if (option_exists & IPPF_HOPLIMIT) { 5097 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5098 ip6h->ip6_hops = ipp->ipp_hoplimit; 5099 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5100 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5101 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5102 if (option_exists & IPPF_MULTICAST_HOPS) 5103 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5104 } else { 5105 ip6h->ip6_hops = icmp->icmp_ttl; 5106 if (option_exists & IPPF_UNICAST_HOPS) 5107 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5108 } 5109 5110 if (option_exists & IPPF_ADDR) { 5111 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5112 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5113 ip6h->ip6_src = tipp->ipp_addr; 5114 } else { 5115 /* 5116 * The source address was not set using IPV6_PKTINFO. 5117 * First look at the bound source. 5118 * If unspecified fallback to __sin6_src_id. 5119 */ 5120 ip6h->ip6_src = icmp->icmp_v6src; 5121 if (sin6->__sin6_src_id != 0 && 5122 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5123 ip_srcid_find_id(sin6->__sin6_src_id, 5124 &ip6h->ip6_src, icmp->icmp_zoneid, 5125 is->is_netstack); 5126 } 5127 } 5128 5129 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5130 cp = (uint8_t *)&ip6h[1]; 5131 5132 /* 5133 * Here's where we have to start stringing together 5134 * any extension headers in the right order: 5135 * Hop-by-hop, destination, routing, and final destination opts. 5136 */ 5137 if (option_exists & IPPF_HOPOPTS) { 5138 /* Hop-by-hop options */ 5139 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5140 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5141 5142 *nxthdr_ptr = IPPROTO_HOPOPTS; 5143 nxthdr_ptr = &hbh->ip6h_nxt; 5144 5145 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5146 cp += tipp->ipp_hopoptslen; 5147 } 5148 /* 5149 * En-route destination options 5150 * Only do them if there's a routing header as well 5151 */ 5152 if (option_exists & IPPF_RTDSTOPTS) { 5153 ip6_dest_t *dst = (ip6_dest_t *)cp; 5154 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5155 5156 *nxthdr_ptr = IPPROTO_DSTOPTS; 5157 nxthdr_ptr = &dst->ip6d_nxt; 5158 5159 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5160 cp += tipp->ipp_rtdstoptslen; 5161 } 5162 /* 5163 * Routing header next 5164 */ 5165 if (option_exists & IPPF_RTHDR) { 5166 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5167 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5168 5169 *nxthdr_ptr = IPPROTO_ROUTING; 5170 nxthdr_ptr = &rt->ip6r_nxt; 5171 5172 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5173 cp += tipp->ipp_rthdrlen; 5174 } 5175 /* 5176 * Do ultimate destination options 5177 */ 5178 if (option_exists & IPPF_DSTOPTS) { 5179 ip6_dest_t *dest = (ip6_dest_t *)cp; 5180 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5181 5182 *nxthdr_ptr = IPPROTO_DSTOPTS; 5183 nxthdr_ptr = &dest->ip6d_nxt; 5184 5185 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5186 cp += tipp->ipp_dstoptslen; 5187 } 5188 5189 /* 5190 * Now set the last header pointer to the proto passed in 5191 */ 5192 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5193 *nxthdr_ptr = icmp->icmp_proto; 5194 5195 /* 5196 * Copy in the destination address 5197 */ 5198 ip6h->ip6_dst = ip6_dst; 5199 5200 ip6h->ip6_vcf = 5201 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5202 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5203 5204 if (option_exists & IPPF_TCLASS) { 5205 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5206 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5207 tipp->ipp_tclass); 5208 } 5209 if (option_exists & IPPF_RTHDR) { 5210 ip6_rthdr_t *rth; 5211 5212 /* 5213 * Perform any processing needed for source routing. 5214 * We know that all extension headers will be in the same mblk 5215 * as the IPv6 header. 5216 */ 5217 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5218 if (rth != NULL && rth->ip6r_segleft != 0) { 5219 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5220 /* 5221 * Drop packet - only support Type 0 routing. 5222 * Notify the application as well. 5223 */ 5224 BUMP_MIB(&is->is_rawip_mib, 5225 rawipOutErrors); 5226 return (EPROTO); 5227 } 5228 /* 5229 * rth->ip6r_len is twice the number of 5230 * addresses in the header 5231 */ 5232 if (rth->ip6r_len & 0x1) { 5233 BUMP_MIB(&is->is_rawip_mib, 5234 rawipOutErrors); 5235 return (EPROTO); 5236 } 5237 /* 5238 * Shuffle the routing header and ip6_dst 5239 * addresses, and get the checksum difference 5240 * between the first hop (in ip6_dst) and 5241 * the destination (in the last routing hdr entry). 5242 */ 5243 csum = ip_massage_options_v6(ip6h, rth, 5244 is->is_netstack); 5245 /* 5246 * Verify that the first hop isn't a mapped address. 5247 * Routers along the path need to do this verification 5248 * for subsequent hops. 5249 */ 5250 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5251 BUMP_MIB(&is->is_rawip_mib, 5252 rawipOutErrors); 5253 return (EADDRNOTAVAIL); 5254 } 5255 } 5256 } 5257 5258 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5259 if (mp->b_cont != NULL) 5260 ip_len += msgdsize(mp->b_cont); 5261 5262 /* 5263 * Set the length into the IP header. 5264 * If the length is greater than the maximum allowed by IP, 5265 * then free the message and return. Do not try and send it 5266 * as this can cause problems in layers below. 5267 */ 5268 if (ip_len > IP_MAXPACKET) { 5269 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5270 return (EMSGSIZE); 5271 } 5272 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5273 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5274 uint16_t *cksum_ptr; 5275 uint_t ext_hdrs_len; 5276 5277 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5278 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5279 icmp->icmp_checksum_off == 2); 5280 5281 /* 5282 * We make it easy for IP to include our pseudo header 5283 * by putting our length in uh_checksum, modified (if 5284 * we have a routing header) by the checksum difference 5285 * between the ultimate destination and first hop addresses. 5286 * Note: ICMPv6 must always checksum the packet. 5287 */ 5288 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5289 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5290 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5291 BUMP_MIB(&is->is_rawip_mib, 5292 rawipOutErrors); 5293 freemsg(mp); 5294 return (0); 5295 } 5296 ip6i = (ip6i_t *)mp->b_rptr; 5297 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5298 ip6h = (ip6_t *)&ip6i[1]; 5299 else 5300 ip6h = (ip6_t *)ip6i; 5301 } 5302 /* Add payload length to checksum */ 5303 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5304 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5305 csum += htons(ip_len - ext_hdrs_len); 5306 5307 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5308 csum = (csum & 0xFFFF) + (csum >> 16); 5309 *cksum_ptr = (uint16_t)csum; 5310 } 5311 5312 #ifdef _LITTLE_ENDIAN 5313 ip_len = htons(ip_len); 5314 #endif 5315 ip6h->ip6_plen = (uint16_t)ip_len; 5316 5317 /* We're done. Pass the packet to IP */ 5318 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5319 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5320 return (0); 5321 } 5322 5323 static void 5324 icmp_wput_other(queue_t *q, mblk_t *mp) 5325 { 5326 uchar_t *rptr = mp->b_rptr; 5327 struct iocblk *iocp; 5328 #define tudr ((struct T_unitdata_req *)rptr) 5329 conn_t *connp = Q_TO_CONN(q); 5330 icmp_t *icmp = connp->conn_icmp; 5331 icmp_stack_t *is = icmp->icmp_is; 5332 cred_t *cr; 5333 5334 cr = DB_CREDDEF(mp, connp->conn_cred); 5335 5336 switch (mp->b_datap->db_type) { 5337 case M_PROTO: 5338 case M_PCPROTO: 5339 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5340 /* 5341 * If the message does not contain a PRIM_type, 5342 * throw it away. 5343 */ 5344 freemsg(mp); 5345 return; 5346 } 5347 switch (((union T_primitives *)rptr)->type) { 5348 case T_ADDR_REQ: 5349 icmp_addr_req(q, mp); 5350 return; 5351 case O_T_BIND_REQ: 5352 case T_BIND_REQ: 5353 icmp_tpi_bind(q, mp); 5354 return; 5355 case T_CONN_REQ: 5356 icmp_tpi_connect(q, mp); 5357 return; 5358 case T_CAPABILITY_REQ: 5359 icmp_capability_req(q, mp); 5360 return; 5361 case T_INFO_REQ: 5362 icmp_info_req(q, mp); 5363 return; 5364 case T_UNITDATA_REQ: 5365 /* 5366 * If a T_UNITDATA_REQ gets here, the address must 5367 * be bad. Valid T_UNITDATA_REQs are found above 5368 * and break to below this switch. 5369 */ 5370 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5371 return; 5372 case T_UNBIND_REQ: 5373 icmp_tpi_unbind(q, mp); 5374 return; 5375 5376 case T_SVR4_OPTMGMT_REQ: 5377 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5378 cr)) { 5379 /* Only IP can return anything meaningful */ 5380 (void) svr4_optcom_req(q, mp, cr, 5381 &icmp_opt_obj, B_TRUE); 5382 } 5383 return; 5384 5385 case T_OPTMGMT_REQ: 5386 /* Only IP can return anything meaningful */ 5387 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5388 return; 5389 5390 case T_DISCON_REQ: 5391 icmp_tpi_disconnect(q, mp); 5392 return; 5393 5394 /* The following TPI message is not supported by icmp. */ 5395 case O_T_CONN_RES: 5396 case T_CONN_RES: 5397 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5398 return; 5399 5400 /* The following 3 TPI requests are illegal for icmp. */ 5401 case T_DATA_REQ: 5402 case T_EXDATA_REQ: 5403 case T_ORDREL_REQ: 5404 freemsg(mp); 5405 (void) putctl1(RD(q), M_ERROR, EPROTO); 5406 return; 5407 default: 5408 break; 5409 } 5410 break; 5411 case M_IOCTL: 5412 iocp = (struct iocblk *)mp->b_rptr; 5413 switch (iocp->ioc_cmd) { 5414 case TI_GETPEERNAME: 5415 if (icmp->icmp_state != TS_DATA_XFER) { 5416 /* 5417 * If a default destination address has not 5418 * been associated with the stream, then we 5419 * don't know the peer's name. 5420 */ 5421 iocp->ioc_error = ENOTCONN; 5422 err_ret:; 5423 iocp->ioc_count = 0; 5424 mp->b_datap->db_type = M_IOCACK; 5425 qreply(q, mp); 5426 return; 5427 } 5428 /* FALLTHRU */ 5429 case TI_GETMYNAME: 5430 /* 5431 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5432 * need to copyin the user's strbuf structure. 5433 * Processing will continue in the M_IOCDATA case 5434 * below. 5435 */ 5436 mi_copyin(q, mp, NULL, 5437 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5438 return; 5439 case ND_SET: 5440 /* nd_getset performs the necessary error checking */ 5441 case ND_GET: 5442 if (nd_getset(q, is->is_nd, mp)) { 5443 qreply(q, mp); 5444 return; 5445 } 5446 break; 5447 case _SIOCSOCKFALLBACK: 5448 /* 5449 * socket is falling back to be a 5450 * streams socket. Nothing to do 5451 */ 5452 iocp->ioc_count = 0; 5453 iocp->ioc_rval = 0; 5454 qreply(q, mp); 5455 return; 5456 default: 5457 break; 5458 } 5459 break; 5460 case M_IOCDATA: 5461 icmp_wput_iocdata(q, mp); 5462 return; 5463 default: 5464 break; 5465 } 5466 ip_wput(q, mp); 5467 } 5468 5469 /* 5470 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5471 * messages. 5472 */ 5473 static void 5474 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5475 { 5476 mblk_t *mp1; 5477 STRUCT_HANDLE(strbuf, sb); 5478 icmp_t *icmp; 5479 uint_t addrlen; 5480 uint_t error; 5481 5482 /* Make sure it is one of ours. */ 5483 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5484 case TI_GETMYNAME: 5485 case TI_GETPEERNAME: 5486 break; 5487 default: 5488 icmp = Q_TO_ICMP(q); 5489 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5490 return; 5491 } 5492 switch (mi_copy_state(q, mp, &mp1)) { 5493 case -1: 5494 return; 5495 case MI_COPY_CASE(MI_COPY_IN, 1): 5496 break; 5497 case MI_COPY_CASE(MI_COPY_OUT, 1): 5498 /* 5499 * The address has been copied out, so now 5500 * copyout the strbuf. 5501 */ 5502 mi_copyout(q, mp); 5503 return; 5504 case MI_COPY_CASE(MI_COPY_OUT, 2): 5505 /* 5506 * The address and strbuf have been copied out. 5507 * We're done, so just acknowledge the original 5508 * M_IOCTL. 5509 */ 5510 mi_copy_done(q, mp, 0); 5511 return; 5512 default: 5513 /* 5514 * Something strange has happened, so acknowledge 5515 * the original M_IOCTL with an EPROTO error. 5516 */ 5517 mi_copy_done(q, mp, EPROTO); 5518 return; 5519 } 5520 /* 5521 * Now we have the strbuf structure for TI_GETMYNAME 5522 * and TI_GETPEERNAME. Next we copyout the requested 5523 * address and then we'll copyout the strbuf. 5524 */ 5525 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5526 (void *)mp1->b_rptr); 5527 icmp = Q_TO_ICMP(q); 5528 if (icmp->icmp_family == AF_INET) 5529 addrlen = sizeof (sin_t); 5530 else 5531 addrlen = sizeof (sin6_t); 5532 5533 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5534 mi_copy_done(q, mp, EINVAL); 5535 return; 5536 } 5537 5538 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5539 5540 if (mp1 == NULL) 5541 return; 5542 5543 rw_enter(&icmp->icmp_rwlock, RW_READER); 5544 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5545 case TI_GETMYNAME: 5546 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5547 &addrlen); 5548 break; 5549 case TI_GETPEERNAME: 5550 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5551 &addrlen); 5552 break; 5553 } 5554 rw_exit(&icmp->icmp_rwlock); 5555 5556 if (error != 0) { 5557 mi_copy_done(q, mp, error); 5558 } else { 5559 mp1->b_wptr += addrlen; 5560 STRUCT_FSET(sb, len, addrlen); 5561 5562 /* Copy out the address */ 5563 mi_copyout(q, mp); 5564 } 5565 } 5566 5567 static int 5568 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5569 void *thisdg_attrs) 5570 { 5571 conn_t *connp = Q_TO_CONN(q); 5572 struct T_unitdata_req *udreqp; 5573 int is_absreq_failure; 5574 cred_t *cr; 5575 5576 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5577 *errorp = 0; 5578 5579 cr = DB_CREDDEF(mp, connp->conn_cred); 5580 5581 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5582 udreqp->OPT_offset, cr, &icmp_opt_obj, 5583 thisdg_attrs, &is_absreq_failure); 5584 5585 if (*errorp != 0) { 5586 /* 5587 * Note: No special action needed in this 5588 * module for "is_absreq_failure" 5589 */ 5590 return (-1); /* failure */ 5591 } 5592 ASSERT(is_absreq_failure == 0); 5593 return (0); /* success */ 5594 } 5595 5596 void 5597 icmp_ddi_g_init(void) 5598 { 5599 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5600 icmp_opt_obj.odb_opt_arr_cnt); 5601 5602 /* 5603 * We want to be informed each time a stack is created or 5604 * destroyed in the kernel, so we can maintain the 5605 * set of icmp_stack_t's. 5606 */ 5607 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5608 } 5609 5610 void 5611 icmp_ddi_g_destroy(void) 5612 { 5613 netstack_unregister(NS_ICMP); 5614 } 5615 5616 #define INET_NAME "ip" 5617 5618 /* 5619 * Initialize the ICMP stack instance. 5620 */ 5621 static void * 5622 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5623 { 5624 icmp_stack_t *is; 5625 icmpparam_t *pa; 5626 int error = 0; 5627 major_t major; 5628 5629 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5630 is->is_netstack = ns; 5631 5632 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5633 is->is_param_arr = pa; 5634 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5635 5636 (void) icmp_param_register(&is->is_nd, 5637 is->is_param_arr, A_CNT(icmp_param_arr)); 5638 is->is_ksp = rawip_kstat_init(stackid); 5639 5640 major = mod_name_to_major(INET_NAME); 5641 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5642 ASSERT(error == 0); 5643 return (is); 5644 } 5645 5646 /* 5647 * Free the ICMP stack instance. 5648 */ 5649 static void 5650 rawip_stack_fini(netstackid_t stackid, void *arg) 5651 { 5652 icmp_stack_t *is = (icmp_stack_t *)arg; 5653 5654 nd_free(&is->is_nd); 5655 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5656 is->is_param_arr = NULL; 5657 5658 rawip_kstat_fini(stackid, is->is_ksp); 5659 is->is_ksp = NULL; 5660 ldi_ident_release(is->is_ldi_ident); 5661 kmem_free(is, sizeof (*is)); 5662 } 5663 5664 static void * 5665 rawip_kstat_init(netstackid_t stackid) { 5666 kstat_t *ksp; 5667 5668 rawip_named_kstat_t template = { 5669 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5670 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5671 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5672 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5673 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5674 }; 5675 5676 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5677 KSTAT_TYPE_NAMED, 5678 NUM_OF_FIELDS(rawip_named_kstat_t), 5679 0, stackid); 5680 if (ksp == NULL || ksp->ks_data == NULL) 5681 return (NULL); 5682 5683 bcopy(&template, ksp->ks_data, sizeof (template)); 5684 ksp->ks_update = rawip_kstat_update; 5685 ksp->ks_private = (void *)(uintptr_t)stackid; 5686 5687 kstat_install(ksp); 5688 return (ksp); 5689 } 5690 5691 static void 5692 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5693 { 5694 if (ksp != NULL) { 5695 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5696 kstat_delete_netstack(ksp, stackid); 5697 } 5698 } 5699 5700 static int 5701 rawip_kstat_update(kstat_t *ksp, int rw) 5702 { 5703 rawip_named_kstat_t *rawipkp; 5704 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5705 netstack_t *ns; 5706 icmp_stack_t *is; 5707 5708 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5709 return (EIO); 5710 5711 if (rw == KSTAT_WRITE) 5712 return (EACCES); 5713 5714 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5715 5716 ns = netstack_find_by_stackid(stackid); 5717 if (ns == NULL) 5718 return (-1); 5719 is = ns->netstack_icmp; 5720 if (is == NULL) { 5721 netstack_rele(ns); 5722 return (-1); 5723 } 5724 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5725 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5726 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5727 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5728 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5729 netstack_rele(ns); 5730 return (0); 5731 } 5732 5733 /* ARGSUSED */ 5734 int 5735 rawip_accept(sock_lower_handle_t lproto_handle, 5736 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5737 cred_t *cr) 5738 { 5739 return (EOPNOTSUPP); 5740 } 5741 5742 /* ARGSUSED */ 5743 int 5744 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5745 socklen_t len, cred_t *cr) 5746 { 5747 conn_t *connp = (conn_t *)proto_handle; 5748 int error; 5749 5750 /* Binding to a NULL address really means unbind */ 5751 if (sa == NULL) 5752 error = rawip_do_unbind(connp); 5753 else 5754 error = rawip_do_bind(connp, sa, len); 5755 5756 if (error < 0) { 5757 if (error == -TOUTSTATE) 5758 error = EINVAL; 5759 else 5760 error = proto_tlitosyserr(-error); 5761 } 5762 return (error); 5763 } 5764 5765 static int 5766 rawip_implicit_bind(conn_t *connp) 5767 { 5768 sin6_t sin6addr; 5769 sin_t *sin; 5770 sin6_t *sin6; 5771 socklen_t len; 5772 int error; 5773 5774 if (connp->conn_icmp->icmp_family == AF_INET) { 5775 len = sizeof (struct sockaddr_in); 5776 sin = (sin_t *)&sin6addr; 5777 *sin = sin_null; 5778 sin->sin_family = AF_INET; 5779 sin->sin_addr.s_addr = INADDR_ANY; 5780 } else { 5781 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5782 len = sizeof (sin6_t); 5783 sin6 = (sin6_t *)&sin6addr; 5784 *sin6 = sin6_null; 5785 sin6->sin6_family = AF_INET6; 5786 V6_SET_ZERO(sin6->sin6_addr); 5787 } 5788 5789 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5790 5791 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5792 } 5793 5794 static int 5795 rawip_unbind(conn_t *connp) 5796 { 5797 int error; 5798 5799 error = rawip_do_unbind(connp); 5800 if (error < 0) { 5801 error = proto_tlitosyserr(-error); 5802 } 5803 return (error); 5804 } 5805 5806 /* ARGSUSED */ 5807 int 5808 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5809 { 5810 return (EOPNOTSUPP); 5811 } 5812 5813 /* ARGSUSED */ 5814 int 5815 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5816 socklen_t len, sock_connid_t *id, cred_t *cr) 5817 { 5818 conn_t *connp = (conn_t *)proto_handle; 5819 icmp_t *icmp = connp->conn_icmp; 5820 int error; 5821 boolean_t did_bind = B_FALSE; 5822 5823 if (sa == NULL) { 5824 /* 5825 * Disconnect 5826 * Make sure we are connected 5827 */ 5828 if (icmp->icmp_state != TS_DATA_XFER) 5829 return (EINVAL); 5830 5831 error = icmp_disconnect(connp); 5832 return (error); 5833 } 5834 5835 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5836 if (error != 0) 5837 return (error); 5838 5839 /* do an implicit bind if necessary */ 5840 if (icmp->icmp_state == TS_UNBND) { 5841 error = rawip_implicit_bind(connp); 5842 /* 5843 * We could be racing with an actual bind, in which case 5844 * we would see EPROTO. We cross our fingers and try 5845 * to connect. 5846 */ 5847 if (!(error == 0 || error == EPROTO)) 5848 return (error); 5849 did_bind = B_TRUE; 5850 } 5851 5852 /* 5853 * set SO_DGRAM_ERRIND 5854 */ 5855 icmp->icmp_dgram_errind = B_TRUE; 5856 5857 error = rawip_do_connect(connp, sa, len); 5858 5859 if (error != 0 && did_bind) { 5860 int unbind_err; 5861 5862 unbind_err = rawip_unbind(connp); 5863 ASSERT(unbind_err == 0); 5864 } 5865 5866 if (error == 0) { 5867 *id = 0; 5868 (*connp->conn_upcalls->su_connected) 5869 (connp->conn_upper_handle, 0, NULL, -1); 5870 } else if (error < 0) { 5871 error = proto_tlitosyserr(-error); 5872 } 5873 return (error); 5874 } 5875 5876 /* ARGSUSED */ 5877 void 5878 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5879 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5880 { 5881 conn_t *connp = (conn_t *)proto_handle; 5882 icmp_t *icmp; 5883 struct T_capability_ack tca; 5884 struct sockaddr_in6 laddr, faddr; 5885 socklen_t laddrlen, faddrlen; 5886 short opts; 5887 struct stroptions *stropt; 5888 mblk_t *stropt_mp; 5889 int error; 5890 5891 icmp = connp->conn_icmp; 5892 5893 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5894 5895 /* 5896 * setup the fallback stream that was allocated 5897 */ 5898 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5899 connp->conn_minor_arena = WR(q)->q_ptr; 5900 5901 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5902 5903 WR(q)->q_qinfo = &icmpwinit; 5904 5905 connp->conn_rq = RD(q); 5906 connp->conn_wq = WR(q); 5907 5908 /* Notify stream head about options before sending up data */ 5909 stropt_mp->b_datap->db_type = M_SETOPTS; 5910 stropt_mp->b_wptr += sizeof (*stropt); 5911 stropt = (struct stroptions *)stropt_mp->b_rptr; 5912 stropt->so_flags = SO_WROFF | SO_HIWAT; 5913 stropt->so_wroff = 5914 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 5915 stropt->so_hiwat = icmp->icmp_recv_hiwat; 5916 putnext(RD(q), stropt_mp); 5917 5918 /* 5919 * free helper stream 5920 */ 5921 ip_free_helper_stream(connp); 5922 5923 /* 5924 * Collect the information needed to sync with the sonode 5925 */ 5926 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 5927 5928 laddrlen = faddrlen = sizeof (sin6_t); 5929 (void) rawip_getsockname((sock_lower_handle_t)connp, 5930 (struct sockaddr *)&laddr, &laddrlen, NULL); 5931 error = rawip_getpeername((sock_lower_handle_t)connp, 5932 (struct sockaddr *)&faddr, &faddrlen, NULL); 5933 if (error != 0) 5934 faddrlen = 0; 5935 opts = 0; 5936 if (icmp->icmp_dgram_errind) 5937 opts |= SO_DGRAM_ERRIND; 5938 if (icmp->icmp_dontroute) 5939 opts |= SO_DONTROUTE; 5940 5941 /* 5942 * Once we grab the drain lock, no data will be send up 5943 * to the socket. So we notify the socket that the endpoint 5944 * is quiescent and it's therefore safe move data from 5945 * the socket to the stream head. 5946 */ 5947 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 5948 (struct sockaddr *)&laddr, laddrlen, 5949 (struct sockaddr *)&faddr, faddrlen, opts); 5950 5951 /* 5952 * push up any packets that were queued in icmp_t 5953 */ 5954 5955 mutex_enter(&icmp->icmp_recv_lock); 5956 while (icmp->icmp_fallback_queue_head != NULL) { 5957 mblk_t *mp; 5958 5959 mp = icmp->icmp_fallback_queue_head; 5960 icmp->icmp_fallback_queue_head = mp->b_next; 5961 mp->b_next = NULL; 5962 mutex_exit(&icmp->icmp_recv_lock); 5963 putnext(RD(q), mp); 5964 mutex_enter(&icmp->icmp_recv_lock); 5965 } 5966 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 5967 /* 5968 * No longer a streams less socket 5969 */ 5970 connp->conn_flags &= ~IPCL_NONSTR; 5971 mutex_exit(&icmp->icmp_recv_lock); 5972 ASSERT(icmp->icmp_fallback_queue_head == NULL && 5973 icmp->icmp_fallback_queue_tail == NULL); 5974 5975 ASSERT(connp->conn_ref >= 1); 5976 } 5977 5978 /* ARGSUSED */ 5979 sock_lower_handle_t 5980 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5981 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5982 { 5983 conn_t *connp; 5984 5985 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 5986 *errorp = EPROTONOSUPPORT; 5987 return (NULL); 5988 } 5989 5990 connp = icmp_open(family, credp, errorp, flags); 5991 if (connp != NULL) { 5992 icmp_stack_t *is; 5993 5994 is = connp->conn_icmp->icmp_is; 5995 connp->conn_flags |= IPCL_NONSTR; 5996 5997 if (connp->conn_icmp->icmp_family == AF_INET6) { 5998 /* Build initial header template for transmit */ 5999 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6000 if ((*errorp = 6001 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6002 rw_exit(&connp->conn_icmp->icmp_rwlock); 6003 ipcl_conn_destroy(connp); 6004 return (NULL); 6005 } 6006 rw_exit(&connp->conn_icmp->icmp_rwlock); 6007 } 6008 6009 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6010 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6011 6012 if ((*errorp = ip_create_helper_stream(connp, 6013 is->is_ldi_ident)) != 0) { 6014 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6015 (void) rawip_do_close(connp); 6016 return (NULL); 6017 } 6018 6019 mutex_enter(&connp->conn_lock); 6020 connp->conn_state_flags &= ~CONN_INCIPIENT; 6021 mutex_exit(&connp->conn_lock); 6022 *sock_downcalls = &sock_rawip_downcalls; 6023 *smodep = SM_ATOMIC; 6024 } else { 6025 ASSERT(*errorp != 0); 6026 } 6027 6028 return ((sock_lower_handle_t)connp); 6029 } 6030 6031 /* ARGSUSED */ 6032 void 6033 rawip_activate(sock_lower_handle_t proto_handle, 6034 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6035 cred_t *cr) 6036 { 6037 conn_t *connp = (conn_t *)proto_handle; 6038 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6039 struct sock_proto_props sopp; 6040 6041 connp->conn_upcalls = sock_upcalls; 6042 connp->conn_upper_handle = sock_handle; 6043 6044 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6045 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6046 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6047 is->is_wroff_extra; 6048 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6049 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6050 sopp.sopp_maxblk = INFPSZ; 6051 sopp.sopp_maxpsz = IP_MAXPACKET; 6052 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6053 icmp_mod_info.mi_minpsz; 6054 6055 (*connp->conn_upcalls->su_set_proto_props) 6056 (connp->conn_upper_handle, &sopp); 6057 } 6058 6059 static int 6060 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6061 { 6062 sin_t *sin = (sin_t *)sa; 6063 sin6_t *sin6 = (sin6_t *)sa; 6064 6065 ASSERT(icmp != NULL); 6066 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6067 6068 switch (icmp->icmp_family) { 6069 case AF_INET: 6070 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6071 if (*salenp < sizeof (sin_t)) 6072 return (EINVAL); 6073 6074 *salenp = sizeof (sin_t); 6075 *sin = sin_null; 6076 sin->sin_family = AF_INET; 6077 if (icmp->icmp_state == TS_UNBND) { 6078 break; 6079 } 6080 6081 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6082 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6083 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6084 } else { 6085 /* 6086 * INADDR_ANY 6087 * icmp_v6src is not set, we might be bound to 6088 * broadcast/multicast. Use icmp_bound_v6src as 6089 * local address instead (that could 6090 * also still be INADDR_ANY) 6091 */ 6092 sin->sin_addr.s_addr = 6093 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6094 } 6095 break; 6096 case AF_INET6: 6097 6098 if (*salenp < sizeof (sin6_t)) 6099 return (EINVAL); 6100 6101 *salenp = sizeof (sin6_t); 6102 *sin6 = sin6_null; 6103 sin6->sin6_family = AF_INET6; 6104 if (icmp->icmp_state == TS_UNBND) { 6105 break; 6106 } 6107 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6108 sin6->sin6_addr = icmp->icmp_v6src; 6109 } else { 6110 /* 6111 * UNSPECIFIED 6112 * icmp_v6src is not set, we might be bound to 6113 * broadcast/multicast. Use icmp_bound_v6src as 6114 * local address instead (that could 6115 * also still be UNSPECIFIED) 6116 */ 6117 6118 sin6->sin6_addr = icmp->icmp_bound_v6src; 6119 } 6120 break; 6121 } 6122 return (0); 6123 } 6124 6125 static int 6126 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6127 { 6128 sin_t *sin = (sin_t *)sa; 6129 sin6_t *sin6 = (sin6_t *)sa; 6130 6131 ASSERT(icmp != NULL); 6132 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6133 6134 if (icmp->icmp_state != TS_DATA_XFER) 6135 return (ENOTCONN); 6136 6137 sa->sa_family = icmp->icmp_family; 6138 switch (icmp->icmp_family) { 6139 case AF_INET: 6140 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6141 6142 if (*salenp < sizeof (sin_t)) 6143 return (EINVAL); 6144 6145 *salenp = sizeof (sin_t); 6146 *sin = sin_null; 6147 sin->sin_family = AF_INET; 6148 sin->sin_addr.s_addr = 6149 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6150 break; 6151 case AF_INET6: 6152 if (*salenp < sizeof (sin6_t)) 6153 return (EINVAL); 6154 6155 *salenp = sizeof (sin6_t); 6156 *sin6 = sin6_null; 6157 *sin6 = icmp->icmp_v6dst; 6158 break; 6159 } 6160 return (0); 6161 } 6162 6163 /* ARGSUSED */ 6164 int 6165 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6166 socklen_t *salenp, cred_t *cr) 6167 { 6168 conn_t *connp = (conn_t *)proto_handle; 6169 icmp_t *icmp = connp->conn_icmp; 6170 int error; 6171 6172 ASSERT(icmp != NULL); 6173 6174 rw_enter(&icmp->icmp_rwlock, RW_READER); 6175 6176 error = rawip_do_getpeername(icmp, sa, salenp); 6177 6178 rw_exit(&icmp->icmp_rwlock); 6179 6180 return (error); 6181 } 6182 6183 /* ARGSUSED */ 6184 int 6185 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6186 socklen_t *salenp, cred_t *cr) 6187 { 6188 conn_t *connp = (conn_t *)proto_handle; 6189 icmp_t *icmp = connp->conn_icmp; 6190 int error; 6191 6192 ASSERT(icmp != NULL); 6193 rw_enter(&icmp->icmp_rwlock, RW_READER); 6194 6195 error = rawip_do_getsockname(icmp, sa, salenp); 6196 6197 rw_exit(&icmp->icmp_rwlock); 6198 6199 return (error); 6200 } 6201 6202 int 6203 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6204 const void *optvalp, socklen_t optlen, cred_t *cr) 6205 { 6206 conn_t *connp = (conn_t *)proto_handle; 6207 icmp_t *icmp = connp->conn_icmp; 6208 int error; 6209 6210 error = proto_opt_check(level, option_name, optlen, NULL, 6211 icmp_opt_obj.odb_opt_des_arr, 6212 icmp_opt_obj.odb_opt_arr_cnt, 6213 icmp_opt_obj.odb_topmost_tpiprovider, 6214 B_TRUE, B_FALSE, cr); 6215 6216 if (error != 0) { 6217 /* 6218 * option not recognized 6219 */ 6220 if (error < 0) { 6221 error = proto_tlitosyserr(-error); 6222 } 6223 return (error); 6224 } 6225 6226 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6227 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6228 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6229 (uchar_t *)optvalp, NULL, cr); 6230 rw_exit(&icmp->icmp_rwlock); 6231 6232 if (error < 0) { 6233 /* 6234 * Pass on to ip 6235 */ 6236 error = ip_set_options(connp, level, option_name, optvalp, 6237 optlen, cr); 6238 } 6239 6240 ASSERT(error >= 0); 6241 6242 return (error); 6243 } 6244 6245 int 6246 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6247 void *optvalp, socklen_t *optlen, cred_t *cr) 6248 { 6249 int error; 6250 conn_t *connp = (conn_t *)proto_handle; 6251 icmp_t *icmp = connp->conn_icmp; 6252 t_uscalar_t max_optbuf_len; 6253 void *optvalp_buf; 6254 int len; 6255 6256 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6257 icmp_opt_obj.odb_opt_des_arr, 6258 icmp_opt_obj.odb_opt_arr_cnt, 6259 icmp_opt_obj.odb_topmost_tpiprovider, 6260 B_FALSE, B_TRUE, cr); 6261 6262 if (error != 0) { 6263 if (error < 0) { 6264 error = proto_tlitosyserr(-error); 6265 } 6266 return (error); 6267 } 6268 6269 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6270 rw_enter(&icmp->icmp_rwlock, RW_READER); 6271 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6272 rw_exit(&icmp->icmp_rwlock); 6273 6274 if (len < 0) { 6275 /* 6276 * Pass on to IP 6277 */ 6278 kmem_free(optvalp_buf, max_optbuf_len); 6279 return (ip_get_options(connp, level, option_name, optvalp, 6280 optlen, cr)); 6281 } else { 6282 /* 6283 * update optlen and copy option value 6284 */ 6285 t_uscalar_t size = MIN(len, *optlen); 6286 bcopy(optvalp_buf, optvalp, size); 6287 bcopy(&size, optlen, sizeof (size)); 6288 6289 kmem_free(optvalp_buf, max_optbuf_len); 6290 return (0); 6291 } 6292 } 6293 6294 /* ARGSUSED */ 6295 int 6296 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6297 { 6298 conn_t *connp = (conn_t *)proto_handle; 6299 (void) rawip_do_close(connp); 6300 return (0); 6301 } 6302 6303 /* ARGSUSED */ 6304 int 6305 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6306 { 6307 conn_t *connp = (conn_t *)proto_handle; 6308 6309 /* shut down the send side */ 6310 if (how != SHUT_RD) 6311 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6312 SOCK_OPCTL_SHUT_SEND, 0); 6313 /* shut down the recv side */ 6314 if (how != SHUT_WR) 6315 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6316 SOCK_OPCTL_SHUT_RECV, 0); 6317 return (0); 6318 } 6319 6320 void 6321 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6322 { 6323 conn_t *connp = (conn_t *)proto_handle; 6324 icmp_t *icmp = connp->conn_icmp; 6325 6326 mutex_enter(&icmp->icmp_recv_lock); 6327 connp->conn_flow_cntrld = B_FALSE; 6328 mutex_exit(&icmp->icmp_recv_lock); 6329 } 6330 6331 int 6332 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6333 int mode, int32_t *rvalp, cred_t *cr) 6334 { 6335 conn_t *connp = (conn_t *)proto_handle; 6336 int error; 6337 6338 switch (cmd) { 6339 case ND_SET: 6340 case ND_GET: 6341 case _SIOCSOCKFALLBACK: 6342 case TI_GETPEERNAME: 6343 case TI_GETMYNAME: 6344 #ifdef DEBUG 6345 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6346 " socket", cmd); 6347 #endif 6348 error = EINVAL; 6349 break; 6350 default: 6351 /* 6352 * Pass on to IP using helper stream 6353 */ 6354 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6355 cmd, arg, mode, cr, rvalp); 6356 break; 6357 } 6358 return (error); 6359 } 6360 6361 /* ARGSUSED */ 6362 int 6363 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6364 cred_t *cr) 6365 { 6366 conn_t *connp = (conn_t *)proto_handle; 6367 icmp_t *icmp = connp->conn_icmp; 6368 icmp_stack_t *is = icmp->icmp_is; 6369 int error = 0; 6370 boolean_t bypass_dgram_errind = B_FALSE; 6371 6372 ASSERT(DB_TYPE(mp) == M_DATA); 6373 6374 if (is_system_labeled()) 6375 msg_setcredpid(mp, cr, curproc->p_pid); 6376 6377 /* do an implicit bind if necessary */ 6378 if (icmp->icmp_state == TS_UNBND) { 6379 error = rawip_implicit_bind(connp); 6380 /* 6381 * We could be racing with an actual bind, in which case 6382 * we would see EPROTO. We cross our fingers and try 6383 * to connect. 6384 */ 6385 if (!(error == 0 || error == EPROTO)) { 6386 freemsg(mp); 6387 return (error); 6388 } 6389 } 6390 6391 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6392 6393 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6394 error = EISCONN; 6395 goto done_lock; 6396 } 6397 6398 switch (icmp->icmp_family) { 6399 case AF_INET6: { 6400 sin6_t *sin6; 6401 ip6_pkt_t ipp_s; /* For ancillary data options */ 6402 ip6_pkt_t *ipp = &ipp_s; 6403 6404 sin6 = (sin6_t *)msg->msg_name; 6405 if (sin6 != NULL) { 6406 error = proto_verify_ip_addr(icmp->icmp_family, 6407 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6408 if (error != 0) { 6409 bypass_dgram_errind = B_TRUE; 6410 goto done_lock; 6411 } 6412 if (icmp->icmp_delayed_error != 0) { 6413 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6414 sin6_t *sin2 = (sin6_t *) 6415 &icmp->icmp_delayed_addr; 6416 6417 error = icmp->icmp_delayed_error; 6418 icmp->icmp_delayed_error = 0; 6419 6420 /* Compare IP address and port */ 6421 6422 if (sin1->sin6_port == sin2->sin6_port && 6423 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6424 &sin2->sin6_addr)) { 6425 goto done_lock; 6426 } 6427 } 6428 } else { 6429 /* 6430 * Use connected address 6431 */ 6432 if (icmp->icmp_state != TS_DATA_XFER) { 6433 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6434 error = EDESTADDRREQ; 6435 bypass_dgram_errind = B_TRUE; 6436 goto done_lock; 6437 } 6438 sin6 = &icmp->icmp_v6dst; 6439 } 6440 6441 /* No support for mapped addresses on raw sockets */ 6442 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6443 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6444 error = EADDRNOTAVAIL; 6445 goto done_lock; 6446 } 6447 6448 ipp->ipp_fields = 0; 6449 ipp->ipp_sticky_ignored = 0; 6450 6451 /* 6452 * If options passed in, feed it for verification and handling 6453 */ 6454 if (msg->msg_controllen != 0) { 6455 error = process_auxiliary_options(connp, 6456 msg->msg_control, msg->msg_controllen, 6457 ipp, &icmp_opt_obj, icmp_opt_set); 6458 if (error != 0) { 6459 goto done_lock; 6460 } 6461 } 6462 6463 rw_exit(&icmp->icmp_rwlock); 6464 6465 /* 6466 * Destination is a native IPv6 address. 6467 * Send out an IPv6 format packet. 6468 */ 6469 6470 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6471 ipp); 6472 } 6473 break; 6474 case AF_INET: { 6475 sin_t *sin; 6476 ip4_pkt_t pktinfo; 6477 ip4_pkt_t *pktinfop = &pktinfo; 6478 ipaddr_t v4dst; 6479 6480 sin = (sin_t *)msg->msg_name; 6481 if (sin != NULL) { 6482 error = proto_verify_ip_addr(icmp->icmp_family, 6483 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6484 if (error != 0) { 6485 bypass_dgram_errind = B_TRUE; 6486 goto done_lock; 6487 } 6488 v4dst = sin->sin_addr.s_addr; 6489 if (icmp->icmp_delayed_error != 0) { 6490 sin_t *sin1 = (sin_t *)msg->msg_name; 6491 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6492 6493 error = icmp->icmp_delayed_error; 6494 icmp->icmp_delayed_error = 0; 6495 6496 /* Compare IP address and port */ 6497 if (sin1->sin_port == sin2->sin_port && 6498 sin1->sin_addr.s_addr == 6499 sin2->sin_addr.s_addr) { 6500 goto done_lock; 6501 } 6502 6503 } 6504 } else { 6505 /* 6506 * Use connected address 6507 */ 6508 if (icmp->icmp_state != TS_DATA_XFER) { 6509 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6510 error = EDESTADDRREQ; 6511 bypass_dgram_errind = B_TRUE; 6512 goto done_lock; 6513 } 6514 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6515 } 6516 6517 6518 pktinfop->ip4_ill_index = 0; 6519 pktinfop->ip4_addr = INADDR_ANY; 6520 6521 /* 6522 * If options passed in, feed it for verification and handling 6523 */ 6524 if (msg->msg_controllen != 0) { 6525 error = process_auxiliary_options(connp, 6526 msg->msg_control, msg->msg_controllen, 6527 pktinfop, &icmp_opt_obj, icmp_opt_set); 6528 if (error != 0) { 6529 goto done_lock; 6530 } 6531 } 6532 rw_exit(&icmp->icmp_rwlock); 6533 6534 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6535 v4dst, pktinfop); 6536 break; 6537 } 6538 6539 default: 6540 ASSERT(0); 6541 } 6542 6543 goto done; 6544 6545 done_lock: 6546 rw_exit(&icmp->icmp_rwlock); 6547 if (error != 0) { 6548 ASSERT(mp != NULL); 6549 freemsg(mp); 6550 } 6551 done: 6552 if (bypass_dgram_errind) 6553 return (error); 6554 return (icmp->icmp_dgram_errind ? error : 0); 6555 } 6556 6557 sock_downcalls_t sock_rawip_downcalls = { 6558 rawip_activate, 6559 rawip_accept, 6560 rawip_bind, 6561 rawip_listen, 6562 rawip_connect, 6563 rawip_getpeername, 6564 rawip_getsockname, 6565 rawip_getsockopt, 6566 rawip_setsockopt, 6567 rawip_send, 6568 NULL, 6569 NULL, 6570 NULL, 6571 rawip_shutdown, 6572 rawip_clr_flowctrl, 6573 rawip_ioctl, 6574 rawip_close 6575 }; 6576