1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 143 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 144 static void icmp_wput(queue_t *q, mblk_t *mp); 145 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 146 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 147 sin6_t *sin6, ip6_pkt_t *ipp); 148 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 149 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 150 static void icmp_wput_other(queue_t *q, mblk_t *mp); 151 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 152 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 274 icmp = connp->conn_icmp; 275 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 276 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 277 "icmp_bind: bad req, len %u", 278 (uint_t)(mp->b_wptr - mp->b_rptr)); 279 icmp_err_ack(q, mp, TPROTO, 0); 280 return; 281 } 282 283 if (icmp->icmp_state != TS_UNBND) { 284 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 285 "icmp_bind: bad state, %d", icmp->icmp_state); 286 icmp_err_ack(q, mp, TOUTSTATE, 0); 287 return; 288 } 289 290 /* 291 * Reallocate the message to make sure we have enough room for an 292 * address and the protocol type. 293 */ 294 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 295 if (!mp1) { 296 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 297 return; 298 } 299 mp = mp1; 300 301 /* Reset the message type in preparation for shipping it back. */ 302 DB_TYPE(mp) = M_PCPROTO; 303 tbr = (struct T_bind_req *)mp->b_rptr; 304 len = tbr->ADDR_length; 305 switch (len) { 306 case 0: /* request for a generic port */ 307 tbr->ADDR_offset = sizeof (struct T_bind_req); 308 if (icmp->icmp_family == AF_INET) { 309 tbr->ADDR_length = sizeof (sin_t); 310 sin = (sin_t *)&tbr[1]; 311 *sin = sin_null; 312 sin->sin_family = AF_INET; 313 mp->b_wptr = (uchar_t *)&sin[1]; 314 sa = (struct sockaddr *)sin; 315 len = sizeof (sin_t); 316 } else { 317 ASSERT(icmp->icmp_family == AF_INET6); 318 tbr->ADDR_length = sizeof (sin6_t); 319 sin6 = (sin6_t *)&tbr[1]; 320 *sin6 = sin6_null; 321 sin6->sin6_family = AF_INET6; 322 mp->b_wptr = (uchar_t *)&sin6[1]; 323 sa = (struct sockaddr *)sin6; 324 len = sizeof (sin6_t); 325 } 326 break; 327 328 case sizeof (sin_t): /* Complete IPv4 address */ 329 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 330 sizeof (sin_t)); 331 break; 332 333 case sizeof (sin6_t): /* Complete IPv6 address */ 334 sa = (struct sockaddr *)mi_offset_param(mp, 335 tbr->ADDR_offset, sizeof (sin6_t)); 336 break; 337 338 default: 339 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 340 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 341 icmp_err_ack(q, mp, TBADADDR, 0); 342 return; 343 } 344 345 error = rawip_do_bind(connp, sa, len); 346 done: 347 ASSERT(mp->b_cont == NULL); 348 if (error != 0) { 349 if (error > 0) { 350 icmp_err_ack(q, mp, TSYSERR, error); 351 } else { 352 icmp_err_ack(q, mp, -error, 0); 353 } 354 } else { 355 tbr->PRIM_type = T_BIND_ACK; 356 qreply(q, mp); 357 } 358 } 359 360 static int 361 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 362 { 363 sin_t *sin; 364 sin6_t *sin6; 365 icmp_t *icmp; 366 int error = 0; 367 mblk_t *ire_mp; 368 369 370 icmp = connp->conn_icmp; 371 372 if (sa == NULL || !OK_32PTR((char *)sa)) { 373 return (EINVAL); 374 } 375 376 /* 377 * The state must be TS_UNBND. TPI mandates that users must send 378 * TPI primitives only 1 at a time and wait for the response before 379 * sending the next primitive. 380 */ 381 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 382 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 383 error = -TOUTSTATE; 384 goto done; 385 } 386 387 ASSERT(len != 0); 388 switch (len) { 389 case sizeof (sin_t): /* Complete IPv4 address */ 390 sin = (sin_t *)sa; 391 if (sin->sin_family != AF_INET || 392 icmp->icmp_family != AF_INET) { 393 /* TSYSERR, EAFNOSUPPORT */ 394 error = EAFNOSUPPORT; 395 goto done; 396 } 397 break; 398 case sizeof (sin6_t): /* Complete IPv6 address */ 399 sin6 = (sin6_t *)sa; 400 if (sin6->sin6_family != AF_INET6 || 401 icmp->icmp_family != AF_INET6) { 402 /* TSYSERR, EAFNOSUPPORT */ 403 error = EAFNOSUPPORT; 404 goto done; 405 } 406 /* No support for mapped addresses on raw sockets */ 407 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 408 /* TSYSERR, EADDRNOTAVAIL */ 409 error = EADDRNOTAVAIL; 410 goto done; 411 } 412 break; 413 414 default: 415 /* TBADADDR */ 416 error = EADDRNOTAVAIL; 417 goto done; 418 } 419 420 icmp->icmp_pending_op = T_BIND_REQ; 421 icmp->icmp_state = TS_IDLE; 422 423 /* 424 * Copy the source address into our icmp structure. This address 425 * may still be zero; if so, ip will fill in the correct address 426 * each time an outbound packet is passed to it. 427 * If we are binding to a broadcast or multicast address then 428 * rawip_post_ip_bind_connect will clear the source address. 429 */ 430 431 if (icmp->icmp_family == AF_INET) { 432 ASSERT(sin != NULL); 433 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 434 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 435 &icmp->icmp_v6src); 436 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 437 icmp->icmp_ip_snd_options_len; 438 icmp->icmp_bound_v6src = icmp->icmp_v6src; 439 } else { 440 int error; 441 442 ASSERT(sin6 != NULL); 443 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 444 icmp->icmp_v6src = sin6->sin6_addr; 445 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 446 icmp->icmp_bound_v6src = icmp->icmp_v6src; 447 448 /* Rebuild the header template */ 449 error = icmp_build_hdrs(icmp); 450 if (error != 0) { 451 icmp->icmp_pending_op = -1; 452 /* 453 * TSYSERR 454 */ 455 goto done; 456 } 457 } 458 459 ire_mp = NULL; 460 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 461 /* 462 * request an IRE if src not 0 (INADDR_ANY) 463 */ 464 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 465 if (ire_mp == NULL) { 466 icmp->icmp_pending_op = -1; 467 error = ENOMEM; 468 goto done; 469 } 470 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 471 } 472 done: 473 rw_exit(&icmp->icmp_rwlock); 474 if (error != 0) 475 return (error); 476 477 if (icmp->icmp_family == AF_INET6) { 478 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 479 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 480 } else { 481 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 482 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 483 } 484 rawip_post_ip_bind_connect(icmp, ire_mp, error); 485 return (error); 486 } 487 488 static void 489 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 490 { 491 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 492 if (icmp->icmp_state == TS_UNBND) { 493 /* 494 * not yet bound - bind sent by icmp_bind_proto. 495 */ 496 rw_exit(&icmp->icmp_rwlock); 497 return; 498 } 499 ASSERT(icmp->icmp_pending_op != -1); 500 icmp->icmp_pending_op = -1; 501 502 if (error != 0) { 503 if (icmp->icmp_state == TS_DATA_XFER) { 504 /* Connect failed */ 505 /* Revert back to the bound source */ 506 icmp->icmp_v6src = icmp->icmp_bound_v6src; 507 icmp->icmp_state = TS_IDLE; 508 if (icmp->icmp_family == AF_INET6) 509 (void) icmp_build_hdrs(icmp); 510 } else { 511 V6_SET_ZERO(icmp->icmp_v6src); 512 V6_SET_ZERO(icmp->icmp_bound_v6src); 513 icmp->icmp_state = TS_UNBND; 514 if (icmp->icmp_family == AF_INET6) 515 (void) icmp_build_hdrs(icmp); 516 } 517 } else { 518 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 519 ire_t *ire; 520 521 ire = (ire_t *)ire_mp->b_rptr; 522 /* 523 * If a broadcast/multicast address was bound set 524 * the source address to 0. 525 * This ensures no datagrams with broadcast address 526 * as source address are emitted (which would violate 527 * RFC1122 - Hosts requirements) 528 * Note: we get IRE_BROADCAST for IPv6 529 * to "mark" a multicast local address. 530 */ 531 532 533 if (ire->ire_type == IRE_BROADCAST && 534 icmp->icmp_state != TS_DATA_XFER) { 535 /* 536 * This was just a local bind to a 537 * MC/broadcast addr 538 */ 539 V6_SET_ZERO(icmp->icmp_v6src); 540 if (icmp->icmp_family == AF_INET6) 541 (void) icmp_build_hdrs(icmp); 542 } 543 } 544 545 } 546 rw_exit(&icmp->icmp_rwlock); 547 if (ire_mp != NULL) 548 freeb(ire_mp); 549 } 550 551 /* 552 * Send message to IP to just bind to the protocol. 553 */ 554 static int 555 icmp_bind_proto(conn_t *connp) 556 { 557 icmp_t *icmp; 558 int error; 559 560 icmp = connp->conn_icmp; 561 562 if (icmp->icmp_family == AF_INET6) 563 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 564 &sin6_null.sin6_addr, 0, B_TRUE); 565 else 566 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 567 sin_null.sin_addr.s_addr, 0, B_TRUE); 568 569 rawip_post_ip_bind_connect(icmp, NULL, error); 570 return (error); 571 } 572 573 static void 574 icmp_tpi_connect(queue_t *q, mblk_t *mp) 575 { 576 conn_t *connp = Q_TO_CONN(q); 577 struct T_conn_req *tcr; 578 icmp_t *icmp; 579 struct sockaddr *sa; 580 socklen_t len; 581 int error; 582 583 icmp = connp->conn_icmp; 584 tcr = (struct T_conn_req *)mp->b_rptr; 585 /* Sanity checks */ 586 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 587 icmp_err_ack(q, mp, TPROTO, 0); 588 return; 589 } 590 591 if (tcr->OPT_length != 0) { 592 icmp_err_ack(q, mp, TBADOPT, 0); 593 return; 594 } 595 596 len = tcr->DEST_length; 597 598 switch (len) { 599 default: 600 icmp_err_ack(q, mp, TBADADDR, 0); 601 return; 602 case sizeof (sin_t): 603 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 604 sizeof (sin_t)); 605 break; 606 case sizeof (sin6_t): 607 sa = (struct sockaddr *)mi_offset_param(mp, 608 tcr->DEST_offset, sizeof (sin6_t)); 609 break; 610 } 611 612 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 613 if (error != 0) { 614 icmp_err_ack(q, mp, TSYSERR, error); 615 return; 616 } 617 618 error = rawip_do_connect(connp, sa, len); 619 if (error != 0) { 620 if (error < 0) { 621 icmp_err_ack(q, mp, -error, 0); 622 } else { 623 icmp_err_ack(q, mp, 0, error); 624 } 625 } else { 626 mblk_t *mp1; 627 628 /* 629 * We have to send a connection confirmation to 630 * keep TLI happy. 631 */ 632 if (icmp->icmp_family == AF_INET) { 633 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 634 sizeof (sin_t), NULL, 0); 635 } else { 636 ASSERT(icmp->icmp_family == AF_INET6); 637 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 638 sizeof (sin6_t), NULL, 0); 639 } 640 if (mp1 == NULL) { 641 rw_exit(&icmp->icmp_rwlock); 642 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 643 return; 644 } 645 646 /* 647 * Send ok_ack for T_CONN_REQ 648 */ 649 mp = mi_tpi_ok_ack_alloc(mp); 650 if (mp == NULL) { 651 /* Unable to reuse the T_CONN_REQ for the ack. */ 652 freemsg(mp1); 653 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 654 return; 655 } 656 putnext(connp->conn_rq, mp); 657 putnext(connp->conn_rq, mp1); 658 } 659 } 660 661 static int 662 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 663 { 664 icmp_t *icmp; 665 sin_t *sin; 666 sin6_t *sin6; 667 mblk_t *ire_mp; 668 int error; 669 ipaddr_t v4dst; 670 in6_addr_t v6dst; 671 672 icmp = connp->conn_icmp; 673 674 if (sa == NULL || !OK_32PTR((char *)sa)) { 675 return (EINVAL); 676 } 677 678 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 679 if (ire_mp == NULL) 680 return (ENOMEM); 681 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 682 683 684 ASSERT(sa != NULL && len != 0); 685 686 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 687 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 688 rw_exit(&icmp->icmp_rwlock); 689 freeb(ire_mp); 690 return (-TOUTSTATE); 691 } 692 693 switch (len) { 694 case sizeof (sin_t): 695 sin = (sin_t *)sa; 696 697 ASSERT(icmp->icmp_family == AF_INET); 698 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 699 700 v4dst = sin->sin_addr.s_addr; 701 /* 702 * Interpret a zero destination to mean loopback. 703 * Update the T_CONN_REQ (sin/sin6) since it is used to 704 * generate the T_CONN_CON. 705 */ 706 if (v4dst == INADDR_ANY) { 707 v4dst = htonl(INADDR_LOOPBACK); 708 } 709 710 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 711 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 712 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 713 icmp->icmp_ip_snd_options_len; 714 icmp->icmp_v6dst.sin6_addr = v6dst; 715 icmp->icmp_v6dst.sin6_family = AF_INET6; 716 icmp->icmp_v6dst.sin6_flowinfo = 0; 717 icmp->icmp_v6dst.sin6_port = 0; 718 719 /* 720 * If the destination address is multicast and 721 * an outgoing multicast interface has been set, 722 * use the address of that interface as our 723 * source address if no source address has been set. 724 */ 725 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 726 CLASSD(v4dst) && 727 icmp->icmp_multicast_if_addr != INADDR_ANY) { 728 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 729 &icmp->icmp_v6src); 730 } 731 break; 732 case sizeof (sin6_t): 733 sin6 = (sin6_t *)sa; 734 735 /* No support for mapped addresses on raw sockets */ 736 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 737 rw_exit(&icmp->icmp_rwlock); 738 freeb(ire_mp); 739 return (EADDRNOTAVAIL); 740 } 741 742 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 743 ASSERT(icmp->icmp_family == AF_INET6); 744 745 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 746 747 icmp->icmp_v6dst = *sin6; 748 icmp->icmp_v6dst.sin6_port = 0; 749 750 /* 751 * Interpret a zero destination to mean loopback. 752 * Update the T_CONN_REQ (sin/sin6) since it is used to 753 * generate the T_CONN_CON. 754 */ 755 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 756 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 757 } 758 /* 759 * If the destination address is multicast and 760 * an outgoing multicast interface has been set, 761 * then the ip bind logic will pick the correct source 762 * address (i.e. matching the outgoing multicast interface). 763 */ 764 break; 765 } 766 767 icmp->icmp_pending_op = T_CONN_REQ; 768 769 if (icmp->icmp_state == TS_DATA_XFER) { 770 /* Already connected - clear out state */ 771 icmp->icmp_v6src = icmp->icmp_bound_v6src; 772 icmp->icmp_state = TS_IDLE; 773 } 774 775 icmp->icmp_state = TS_DATA_XFER; 776 rw_exit(&icmp->icmp_rwlock); 777 778 if (icmp->icmp_family == AF_INET6) { 779 error = ip_proto_bind_connected_v6(connp, &ire_mp, 780 icmp->icmp_proto, &icmp->icmp_v6src, 0, 781 &icmp->icmp_v6dst.sin6_addr, 782 NULL, sin6->sin6_port, B_TRUE, B_TRUE); 783 } else { 784 error = ip_proto_bind_connected_v4(connp, &ire_mp, 785 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 786 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 787 B_TRUE, B_TRUE); 788 } 789 rawip_post_ip_bind_connect(icmp, ire_mp, error); 790 return (error); 791 } 792 793 static void 794 icmp_close_free(conn_t *connp) 795 { 796 icmp_t *icmp = connp->conn_icmp; 797 798 /* If there are any options associated with the stream, free them. */ 799 if (icmp->icmp_ip_snd_options != NULL) { 800 mi_free((char *)icmp->icmp_ip_snd_options); 801 icmp->icmp_ip_snd_options = NULL; 802 icmp->icmp_ip_snd_options_len = 0; 803 } 804 805 if (icmp->icmp_filter != NULL) { 806 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 807 icmp->icmp_filter = NULL; 808 } 809 810 /* Free memory associated with sticky options */ 811 if (icmp->icmp_sticky_hdrs_len != 0) { 812 kmem_free(icmp->icmp_sticky_hdrs, 813 icmp->icmp_sticky_hdrs_len); 814 icmp->icmp_sticky_hdrs = NULL; 815 icmp->icmp_sticky_hdrs_len = 0; 816 } 817 ip6_pkt_free(&icmp->icmp_sticky_ipp); 818 819 /* 820 * Clear any fields which the kmem_cache constructor clears. 821 * Only icmp_connp needs to be preserved. 822 * TBD: We should make this more efficient to avoid clearing 823 * everything. 824 */ 825 ASSERT(icmp->icmp_connp == connp); 826 bzero(icmp, sizeof (icmp_t)); 827 icmp->icmp_connp = connp; 828 } 829 830 static int 831 rawip_do_close(conn_t *connp) 832 { 833 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 834 835 ip_quiesce_conn(connp); 836 837 if (!IPCL_IS_NONSTR(connp)) { 838 qprocsoff(connp->conn_rq); 839 } 840 841 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 842 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 843 icmp_close_free(connp); 844 845 /* 846 * Now we are truly single threaded on this stream, and can 847 * delete the things hanging off the connp, and finally the connp. 848 * We removed this connp from the fanout list, it cannot be 849 * accessed thru the fanouts, and we already waited for the 850 * conn_ref to drop to 0. We are already in close, so 851 * there cannot be any other thread from the top. qprocsoff 852 * has completed, and service has completed or won't run in 853 * future. 854 */ 855 ASSERT(connp->conn_ref == 1); 856 857 if (!IPCL_IS_NONSTR(connp)) { 858 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 859 } else { 860 ip_free_helper_stream(connp); 861 } 862 863 connp->conn_ref--; 864 ipcl_conn_destroy(connp); 865 866 return (0); 867 } 868 869 static int 870 icmp_close(queue_t *q, int flags) 871 { 872 conn_t *connp; 873 874 if (flags & SO_FALLBACK) { 875 /* 876 * stream is being closed while in fallback 877 * simply free the resources that were allocated 878 */ 879 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 880 qprocsoff(q); 881 goto done; 882 } 883 884 connp = Q_TO_CONN(q); 885 (void) rawip_do_close(connp); 886 done: 887 q->q_ptr = WR(q)->q_ptr = NULL; 888 return (0); 889 } 890 891 /* 892 * This routine handles each T_DISCON_REQ message passed to icmp 893 * as an indicating that ICMP is no longer connected. This results 894 * in sending a T_BIND_REQ to IP to restore the binding to just 895 * the local address. 896 * 897 * The disconnect completes in rawip_post_ip_bind_connect. 898 */ 899 static int 900 icmp_do_disconnect(conn_t *connp) 901 { 902 icmp_t *icmp; 903 mblk_t *ire_mp; 904 int error; 905 906 icmp = connp->conn_icmp; 907 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 908 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 909 rw_exit(&icmp->icmp_rwlock); 910 return (-TOUTSTATE); 911 } 912 icmp->icmp_pending_op = T_DISCON_REQ; 913 icmp->icmp_v6src = icmp->icmp_bound_v6src; 914 icmp->icmp_state = TS_IDLE; 915 916 917 if (icmp->icmp_family == AF_INET6) { 918 /* Rebuild the header template */ 919 error = icmp_build_hdrs(icmp); 920 if (error != 0) { 921 icmp->icmp_pending_op = -1; 922 rw_exit(&icmp->icmp_rwlock); 923 return (error); 924 } 925 } 926 927 rw_exit(&icmp->icmp_rwlock); 928 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 929 if (ire_mp == NULL) { 930 return (ENOMEM); 931 } 932 933 if (icmp->icmp_family == AF_INET6) { 934 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 935 &icmp->icmp_bound_v6src, 0, B_TRUE); 936 } else { 937 938 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 939 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 940 } 941 942 rawip_post_ip_bind_connect(icmp, ire_mp, error); 943 944 return (error); 945 } 946 947 static void 948 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 949 { 950 conn_t *connp = Q_TO_CONN(q); 951 int error; 952 953 /* 954 * Allocate the largest primitive we need to send back 955 * T_error_ack is > than T_ok_ack 956 */ 957 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 958 if (mp == NULL) { 959 /* Unable to reuse the T_DISCON_REQ for the ack. */ 960 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 961 return; 962 } 963 964 error = icmp_do_disconnect(connp); 965 966 if (error != 0) { 967 if (error > 0) { 968 icmp_err_ack(q, mp, 0, error); 969 } else { 970 icmp_err_ack(q, mp, -error, 0); 971 } 972 } else { 973 mp = mi_tpi_ok_ack_alloc(mp); 974 ASSERT(mp != NULL); 975 qreply(q, mp); 976 } 977 978 } 979 980 static int 981 icmp_disconnect(conn_t *connp) 982 { 983 int error; 984 icmp_t *icmp = connp->conn_icmp; 985 986 icmp->icmp_dgram_errind = B_FALSE; 987 988 error = icmp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 return (error); 993 } 994 995 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 996 static void 997 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 998 { 999 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1000 qreply(q, mp); 1001 } 1002 1003 /* Shorthand to generate and send TPI error acks to our client */ 1004 static void 1005 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1006 t_scalar_t t_error, int sys_error) 1007 { 1008 struct T_error_ack *teackp; 1009 1010 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1011 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1012 teackp = (struct T_error_ack *)mp->b_rptr; 1013 teackp->ERROR_prim = primitive; 1014 teackp->TLI_error = t_error; 1015 teackp->UNIX_error = sys_error; 1016 qreply(q, mp); 1017 } 1018 } 1019 1020 /* 1021 * icmp_icmp_error is called by icmp_input to process ICMP 1022 * messages passed up by IP. 1023 * Generates the appropriate permanent (non-transient) errors. 1024 * Assumes that IP has pulled up everything up to and including 1025 * the ICMP header. 1026 */ 1027 static void 1028 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1029 { 1030 icmph_t *icmph; 1031 ipha_t *ipha; 1032 int iph_hdr_length; 1033 sin_t sin; 1034 mblk_t *mp1; 1035 int error = 0; 1036 icmp_t *icmp = connp->conn_icmp; 1037 1038 ipha = (ipha_t *)mp->b_rptr; 1039 1040 ASSERT(OK_32PTR(mp->b_rptr)); 1041 1042 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1043 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1044 icmp_icmp_error_ipv6(connp, mp); 1045 return; 1046 } 1047 1048 /* 1049 * icmp does not support v4 mapped addresses 1050 * so we can never be here for a V6 socket 1051 * i.e. icmp_family == AF_INET6 1052 */ 1053 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1054 (icmp->icmp_family == AF_INET)); 1055 1056 ASSERT(icmp->icmp_family == AF_INET); 1057 1058 /* Skip past the outer IP and ICMP headers */ 1059 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1060 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1061 ipha = (ipha_t *)&icmph[1]; 1062 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1063 1064 switch (icmph->icmph_type) { 1065 case ICMP_DEST_UNREACHABLE: 1066 switch (icmph->icmph_code) { 1067 case ICMP_FRAGMENTATION_NEEDED: 1068 /* 1069 * IP has already adjusted the path MTU. 1070 */ 1071 break; 1072 case ICMP_PORT_UNREACHABLE: 1073 case ICMP_PROTOCOL_UNREACHABLE: 1074 error = ECONNREFUSED; 1075 break; 1076 default: 1077 /* Transient errors */ 1078 break; 1079 } 1080 break; 1081 default: 1082 /* Transient errors */ 1083 break; 1084 } 1085 if (error == 0) { 1086 freemsg(mp); 1087 return; 1088 } 1089 1090 /* 1091 * Deliver T_UDERROR_IND when the application has asked for it. 1092 * The socket layer enables this automatically when connected. 1093 */ 1094 if (!icmp->icmp_dgram_errind) { 1095 freemsg(mp); 1096 return; 1097 } 1098 1099 sin = sin_null; 1100 sin.sin_family = AF_INET; 1101 sin.sin_addr.s_addr = ipha->ipha_dst; 1102 if (IPCL_IS_NONSTR(connp)) { 1103 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1104 if (icmp->icmp_state == TS_DATA_XFER) { 1105 if (sin.sin_addr.s_addr == 1106 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1107 rw_exit(&icmp->icmp_rwlock); 1108 (*connp->conn_upcalls->su_set_error) 1109 (connp->conn_upper_handle, error); 1110 goto done; 1111 } 1112 } else { 1113 icmp->icmp_delayed_error = error; 1114 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1115 } 1116 rw_exit(&icmp->icmp_rwlock); 1117 } else { 1118 1119 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1120 0, error); 1121 if (mp1 != NULL) 1122 putnext(connp->conn_rq, mp1); 1123 } 1124 done: 1125 freemsg(mp); 1126 } 1127 1128 /* 1129 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1130 * for IPv6 packets. 1131 * Send permanent (non-transient) errors upstream. 1132 * Assumes that IP has pulled up all the extension headers as well 1133 * as the ICMPv6 header. 1134 */ 1135 static void 1136 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1137 { 1138 icmp6_t *icmp6; 1139 ip6_t *ip6h, *outer_ip6h; 1140 uint16_t iph_hdr_length; 1141 uint8_t *nexthdrp; 1142 sin6_t sin6; 1143 mblk_t *mp1; 1144 int error = 0; 1145 icmp_t *icmp = connp->conn_icmp; 1146 1147 outer_ip6h = (ip6_t *)mp->b_rptr; 1148 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1149 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1150 else 1151 iph_hdr_length = IPV6_HDR_LEN; 1152 1153 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1154 ip6h = (ip6_t *)&icmp6[1]; 1155 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1156 freemsg(mp); 1157 return; 1158 } 1159 1160 switch (icmp6->icmp6_type) { 1161 case ICMP6_DST_UNREACH: 1162 switch (icmp6->icmp6_code) { 1163 case ICMP6_DST_UNREACH_NOPORT: 1164 error = ECONNREFUSED; 1165 break; 1166 case ICMP6_DST_UNREACH_ADMIN: 1167 case ICMP6_DST_UNREACH_NOROUTE: 1168 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1169 case ICMP6_DST_UNREACH_ADDR: 1170 /* Transient errors */ 1171 break; 1172 default: 1173 break; 1174 } 1175 break; 1176 case ICMP6_PACKET_TOO_BIG: { 1177 struct T_unitdata_ind *tudi; 1178 struct T_opthdr *toh; 1179 size_t udi_size; 1180 mblk_t *newmp; 1181 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1182 sizeof (struct ip6_mtuinfo); 1183 sin6_t *sin6; 1184 struct ip6_mtuinfo *mtuinfo; 1185 1186 /* 1187 * If the application has requested to receive path mtu 1188 * information, send up an empty message containing an 1189 * IPV6_PATHMTU ancillary data item. 1190 */ 1191 if (!icmp->icmp_ipv6_recvpathmtu) 1192 break; 1193 1194 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1195 opt_length; 1196 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1197 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1198 break; 1199 } 1200 1201 /* 1202 * newmp->b_cont is left to NULL on purpose. This is an 1203 * empty message containing only ancillary data. 1204 */ 1205 newmp->b_datap->db_type = M_PROTO; 1206 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1207 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1208 tudi->PRIM_type = T_UNITDATA_IND; 1209 tudi->SRC_length = sizeof (sin6_t); 1210 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1211 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1212 tudi->OPT_length = opt_length; 1213 1214 sin6 = (sin6_t *)&tudi[1]; 1215 bzero(sin6, sizeof (sin6_t)); 1216 sin6->sin6_family = AF_INET6; 1217 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1218 1219 toh = (struct T_opthdr *)&sin6[1]; 1220 toh->level = IPPROTO_IPV6; 1221 toh->name = IPV6_PATHMTU; 1222 toh->len = opt_length; 1223 toh->status = 0; 1224 1225 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1226 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1227 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1228 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1229 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1230 /* 1231 * We've consumed everything we need from the original 1232 * message. Free it, then send our empty message. 1233 */ 1234 freemsg(mp); 1235 if (!IPCL_IS_NONSTR(connp)) { 1236 putnext(connp->conn_rq, newmp); 1237 } else { 1238 (*connp->conn_upcalls->su_recv) 1239 (connp->conn_upper_handle, newmp, 0, 0, &error, 1240 NULL); 1241 ASSERT(error == 0); 1242 } 1243 return; 1244 } 1245 case ICMP6_TIME_EXCEEDED: 1246 /* Transient errors */ 1247 break; 1248 case ICMP6_PARAM_PROB: 1249 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1250 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1251 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1252 (uchar_t *)nexthdrp) { 1253 error = ECONNREFUSED; 1254 break; 1255 } 1256 break; 1257 } 1258 if (error == 0) { 1259 freemsg(mp); 1260 return; 1261 } 1262 1263 /* 1264 * Deliver T_UDERROR_IND when the application has asked for it. 1265 * The socket layer enables this automatically when connected. 1266 */ 1267 if (!icmp->icmp_dgram_errind) { 1268 freemsg(mp); 1269 return; 1270 } 1271 1272 sin6 = sin6_null; 1273 sin6.sin6_family = AF_INET6; 1274 sin6.sin6_addr = ip6h->ip6_dst; 1275 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1276 1277 if (IPCL_IS_NONSTR(connp)) { 1278 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1279 if (icmp->icmp_state == TS_DATA_XFER) { 1280 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1281 &icmp->icmp_v6dst.sin6_addr)) { 1282 rw_exit(&icmp->icmp_rwlock); 1283 (*connp->conn_upcalls->su_set_error) 1284 (connp->conn_upper_handle, error); 1285 goto done; 1286 } 1287 } else { 1288 icmp->icmp_delayed_error = error; 1289 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1290 } 1291 rw_exit(&icmp->icmp_rwlock); 1292 } else { 1293 1294 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1295 NULL, 0, error); 1296 if (mp1 != NULL) 1297 putnext(connp->conn_rq, mp1); 1298 } 1299 done: 1300 freemsg(mp); 1301 } 1302 1303 /* 1304 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1305 * The local address is filled in if endpoint is bound. The remote address 1306 * is filled in if remote address has been precified ("connected endpoint") 1307 * (The concept of connected CLTS sockets is alien to published TPI 1308 * but we support it anyway). 1309 */ 1310 static void 1311 icmp_addr_req(queue_t *q, mblk_t *mp) 1312 { 1313 icmp_t *icmp = Q_TO_ICMP(q); 1314 mblk_t *ackmp; 1315 struct T_addr_ack *taa; 1316 1317 /* Make it large enough for worst case */ 1318 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1319 2 * sizeof (sin6_t), 1); 1320 if (ackmp == NULL) { 1321 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1322 return; 1323 } 1324 taa = (struct T_addr_ack *)ackmp->b_rptr; 1325 1326 bzero(taa, sizeof (struct T_addr_ack)); 1327 ackmp->b_wptr = (uchar_t *)&taa[1]; 1328 1329 taa->PRIM_type = T_ADDR_ACK; 1330 ackmp->b_datap->db_type = M_PCPROTO; 1331 rw_enter(&icmp->icmp_rwlock, RW_READER); 1332 /* 1333 * Note: Following code assumes 32 bit alignment of basic 1334 * data structures like sin_t and struct T_addr_ack. 1335 */ 1336 if (icmp->icmp_state != TS_UNBND) { 1337 /* 1338 * Fill in local address 1339 */ 1340 taa->LOCADDR_offset = sizeof (*taa); 1341 if (icmp->icmp_family == AF_INET) { 1342 sin_t *sin; 1343 1344 taa->LOCADDR_length = sizeof (sin_t); 1345 sin = (sin_t *)&taa[1]; 1346 /* Fill zeroes and then intialize non-zero fields */ 1347 *sin = sin_null; 1348 sin->sin_family = AF_INET; 1349 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1350 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1351 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1352 sin->sin_addr.s_addr); 1353 } else { 1354 /* 1355 * INADDR_ANY 1356 * icmp_v6src is not set, we might be bound to 1357 * broadcast/multicast. Use icmp_bound_v6src as 1358 * local address instead (that could 1359 * also still be INADDR_ANY) 1360 */ 1361 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1362 sin->sin_addr.s_addr); 1363 } 1364 ackmp->b_wptr = (uchar_t *)&sin[1]; 1365 } else { 1366 sin6_t *sin6; 1367 1368 ASSERT(icmp->icmp_family == AF_INET6); 1369 taa->LOCADDR_length = sizeof (sin6_t); 1370 sin6 = (sin6_t *)&taa[1]; 1371 /* Fill zeroes and then intialize non-zero fields */ 1372 *sin6 = sin6_null; 1373 sin6->sin6_family = AF_INET6; 1374 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1375 sin6->sin6_addr = icmp->icmp_v6src; 1376 } else { 1377 /* 1378 * UNSPECIFIED 1379 * icmp_v6src is not set, we might be bound to 1380 * broadcast/multicast. Use icmp_bound_v6src as 1381 * local address instead (that could 1382 * also still be UNSPECIFIED) 1383 */ 1384 sin6->sin6_addr = icmp->icmp_bound_v6src; 1385 } 1386 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1387 } 1388 } 1389 rw_exit(&icmp->icmp_rwlock); 1390 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1391 qreply(q, ackmp); 1392 } 1393 1394 static void 1395 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1396 { 1397 *tap = icmp_g_t_info_ack; 1398 1399 if (icmp->icmp_family == AF_INET6) 1400 tap->ADDR_size = sizeof (sin6_t); 1401 else 1402 tap->ADDR_size = sizeof (sin_t); 1403 tap->CURRENT_state = icmp->icmp_state; 1404 tap->OPT_size = icmp_max_optsize; 1405 } 1406 1407 static void 1408 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1409 t_uscalar_t cap_bits1) 1410 { 1411 tcap->CAP_bits1 = 0; 1412 1413 if (cap_bits1 & TC1_INFO) { 1414 icmp_copy_info(&tcap->INFO_ack, icmp); 1415 tcap->CAP_bits1 |= TC1_INFO; 1416 } 1417 } 1418 1419 /* 1420 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1421 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1422 * icmp_g_t_info_ack. The current state of the stream is copied from 1423 * icmp_state. 1424 */ 1425 static void 1426 icmp_capability_req(queue_t *q, mblk_t *mp) 1427 { 1428 icmp_t *icmp = Q_TO_ICMP(q); 1429 t_uscalar_t cap_bits1; 1430 struct T_capability_ack *tcap; 1431 1432 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1433 1434 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1435 mp->b_datap->db_type, T_CAPABILITY_ACK); 1436 if (!mp) 1437 return; 1438 1439 tcap = (struct T_capability_ack *)mp->b_rptr; 1440 1441 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1442 1443 qreply(q, mp); 1444 } 1445 1446 /* 1447 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1448 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1449 * The current state of the stream is copied from icmp_state. 1450 */ 1451 static void 1452 icmp_info_req(queue_t *q, mblk_t *mp) 1453 { 1454 icmp_t *icmp = Q_TO_ICMP(q); 1455 1456 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1457 T_INFO_ACK); 1458 if (!mp) 1459 return; 1460 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1461 qreply(q, mp); 1462 } 1463 1464 /* For /dev/icmp aka AF_INET open */ 1465 static int 1466 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1467 int family) 1468 { 1469 conn_t *connp; 1470 dev_t conn_dev; 1471 icmp_stack_t *is; 1472 int error; 1473 1474 conn_dev = NULL; 1475 1476 /* If the stream is already open, return immediately. */ 1477 if (q->q_ptr != NULL) 1478 return (0); 1479 1480 if (sflag == MODOPEN) 1481 return (EINVAL); 1482 1483 /* 1484 * Since ICMP is not used so heavily, allocating from the small 1485 * arena should be sufficient. 1486 */ 1487 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1488 return (EBUSY); 1489 } 1490 1491 if (flag & SO_FALLBACK) { 1492 /* 1493 * Non streams socket needs a stream to fallback to 1494 */ 1495 RD(q)->q_ptr = (void *)conn_dev; 1496 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1497 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1498 qprocson(q); 1499 return (0); 1500 } 1501 1502 connp = icmp_open(family, credp, &error, KM_SLEEP); 1503 if (connp == NULL) { 1504 ASSERT(error != NULL); 1505 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1506 return (error); 1507 } 1508 1509 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1510 connp->conn_dev = conn_dev; 1511 connp->conn_minor_arena = ip_minor_arena_sa; 1512 1513 is = connp->conn_icmp->icmp_is; 1514 1515 /* 1516 * Initialize the icmp_t structure for this stream. 1517 */ 1518 q->q_ptr = connp; 1519 WR(q)->q_ptr = connp; 1520 connp->conn_rq = q; 1521 connp->conn_wq = WR(q); 1522 1523 if (connp->conn_icmp->icmp_family == AF_INET6) { 1524 /* Build initial header template for transmit */ 1525 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1526 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1527 rw_exit(&connp->conn_icmp->icmp_rwlock); 1528 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1529 ipcl_conn_destroy(connp); 1530 return (error); 1531 } 1532 rw_exit(&connp->conn_icmp->icmp_rwlock); 1533 } 1534 1535 1536 q->q_hiwat = is->is_recv_hiwat; 1537 WR(q)->q_hiwat = is->is_xmit_hiwat; 1538 WR(q)->q_lowat = is->is_xmit_lowat; 1539 1540 qprocson(q); 1541 1542 /* Set the Stream head write offset. */ 1543 (void) proto_set_tx_wroff(q, connp, 1544 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1545 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1546 1547 mutex_enter(&connp->conn_lock); 1548 connp->conn_state_flags &= ~CONN_INCIPIENT; 1549 mutex_exit(&connp->conn_lock); 1550 1551 return (0); 1552 } 1553 1554 /* For /dev/icmp4 aka AF_INET open */ 1555 static int 1556 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1557 { 1558 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1559 } 1560 1561 /* For /dev/icmp6 aka AF_INET6 open */ 1562 static int 1563 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1564 { 1565 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1566 } 1567 1568 /* 1569 * This is the open routine for icmp. It allocates a icmp_t structure for 1570 * the stream and, on the first open of the module, creates an ND table. 1571 */ 1572 /* ARGSUSED */ 1573 static conn_t * 1574 icmp_open(int family, cred_t *credp, int *err, int flags) 1575 { 1576 icmp_t *icmp; 1577 conn_t *connp; 1578 zoneid_t zoneid; 1579 netstack_t *ns; 1580 icmp_stack_t *is; 1581 boolean_t isv6 = B_FALSE; 1582 1583 *err = secpolicy_net_icmpaccess(credp); 1584 if (*err != 0) 1585 return (NULL); 1586 1587 if (family == AF_INET6) 1588 isv6 = B_TRUE; 1589 ns = netstack_find_by_cred(credp); 1590 ASSERT(ns != NULL); 1591 is = ns->netstack_icmp; 1592 ASSERT(is != NULL); 1593 1594 /* 1595 * For exclusive stacks we set the zoneid to zero 1596 * to make ICMP operate as if in the global zone. 1597 */ 1598 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1599 zoneid = GLOBAL_ZONEID; 1600 else 1601 zoneid = crgetzoneid(credp); 1602 1603 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1604 1605 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1606 icmp = connp->conn_icmp; 1607 icmp->icmp_v6dst = sin6_null; 1608 1609 /* 1610 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1611 * done by netstack_find_by_cred() 1612 */ 1613 netstack_rele(ns); 1614 1615 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1616 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1617 ASSERT(connp->conn_icmp == icmp); 1618 ASSERT(icmp->icmp_connp == connp); 1619 1620 /* Set the initial state of the stream and the privilege status. */ 1621 icmp->icmp_state = TS_UNBND; 1622 if (isv6) { 1623 icmp->icmp_ipversion = IPV6_VERSION; 1624 icmp->icmp_family = AF_INET6; 1625 connp->conn_ulp = IPPROTO_ICMPV6; 1626 /* May be changed by a SO_PROTOTYPE socket option. */ 1627 icmp->icmp_proto = IPPROTO_ICMPV6; 1628 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1629 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1630 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1631 connp->conn_af_isv6 = B_TRUE; 1632 connp->conn_flags |= IPCL_ISV6; 1633 } else { 1634 icmp->icmp_ipversion = IPV4_VERSION; 1635 icmp->icmp_family = AF_INET; 1636 /* May be changed by a SO_PROTOTYPE socket option. */ 1637 icmp->icmp_proto = IPPROTO_ICMP; 1638 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1639 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1640 connp->conn_af_isv6 = B_FALSE; 1641 connp->conn_flags &= ~IPCL_ISV6; 1642 } 1643 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1644 icmp->icmp_pending_op = -1; 1645 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1646 connp->conn_zoneid = zoneid; 1647 1648 /* 1649 * If the caller has the process-wide flag set, then default to MAC 1650 * exempt mode. This allows read-down to unlabeled hosts. 1651 */ 1652 if (getpflags(NET_MAC_AWARE, credp) != 0) 1653 connp->conn_mac_exempt = B_TRUE; 1654 1655 connp->conn_ulp_labeled = is_system_labeled(); 1656 1657 icmp->icmp_is = is; 1658 1659 connp->conn_recv = icmp_input; 1660 crhold(credp); 1661 connp->conn_cred = credp; 1662 1663 rw_exit(&icmp->icmp_rwlock); 1664 1665 connp->conn_flow_cntrld = B_FALSE; 1666 return (connp); 1667 } 1668 1669 /* 1670 * Which ICMP options OK to set through T_UNITDATA_REQ... 1671 */ 1672 /* ARGSUSED */ 1673 static boolean_t 1674 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1675 { 1676 return (B_TRUE); 1677 } 1678 1679 /* 1680 * This routine gets default values of certain options whose default 1681 * values are maintained by protcol specific code 1682 */ 1683 /* ARGSUSED */ 1684 int 1685 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1686 { 1687 icmp_t *icmp = Q_TO_ICMP(q); 1688 icmp_stack_t *is = icmp->icmp_is; 1689 int *i1 = (int *)ptr; 1690 1691 switch (level) { 1692 case IPPROTO_IP: 1693 switch (name) { 1694 case IP_MULTICAST_TTL: 1695 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1696 return (sizeof (uchar_t)); 1697 case IP_MULTICAST_LOOP: 1698 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1699 return (sizeof (uchar_t)); 1700 } 1701 break; 1702 case IPPROTO_IPV6: 1703 switch (name) { 1704 case IPV6_MULTICAST_HOPS: 1705 *i1 = IP_DEFAULT_MULTICAST_TTL; 1706 return (sizeof (int)); 1707 case IPV6_MULTICAST_LOOP: 1708 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1709 return (sizeof (int)); 1710 case IPV6_UNICAST_HOPS: 1711 *i1 = is->is_ipv6_hoplimit; 1712 return (sizeof (int)); 1713 } 1714 break; 1715 case IPPROTO_ICMPV6: 1716 switch (name) { 1717 case ICMP6_FILTER: 1718 /* Make it look like "pass all" */ 1719 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1720 return (sizeof (icmp6_filter_t)); 1721 } 1722 break; 1723 } 1724 return (-1); 1725 } 1726 1727 /* 1728 * This routine retrieves the current status of socket options. 1729 * It returns the size of the option retrieved. 1730 */ 1731 int 1732 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1733 { 1734 icmp_t *icmp = connp->conn_icmp; 1735 icmp_stack_t *is = icmp->icmp_is; 1736 int *i1 = (int *)ptr; 1737 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1738 int ret = 0; 1739 1740 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1741 switch (level) { 1742 case SOL_SOCKET: 1743 switch (name) { 1744 case SO_DEBUG: 1745 *i1 = icmp->icmp_debug; 1746 break; 1747 case SO_TYPE: 1748 *i1 = SOCK_RAW; 1749 break; 1750 case SO_PROTOTYPE: 1751 *i1 = icmp->icmp_proto; 1752 break; 1753 case SO_REUSEADDR: 1754 *i1 = icmp->icmp_reuseaddr; 1755 break; 1756 1757 /* 1758 * The following three items are available here, 1759 * but are only meaningful to IP. 1760 */ 1761 case SO_DONTROUTE: 1762 *i1 = icmp->icmp_dontroute; 1763 break; 1764 case SO_USELOOPBACK: 1765 *i1 = icmp->icmp_useloopback; 1766 break; 1767 case SO_BROADCAST: 1768 *i1 = icmp->icmp_broadcast; 1769 break; 1770 1771 case SO_SNDBUF: 1772 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1773 *i1 = icmp->icmp_xmit_hiwat; 1774 break; 1775 case SO_RCVBUF: 1776 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1777 *i1 = icmp->icmp_recv_hiwat; 1778 break; 1779 case SO_DGRAM_ERRIND: 1780 *i1 = icmp->icmp_dgram_errind; 1781 break; 1782 case SO_TIMESTAMP: 1783 *i1 = icmp->icmp_timestamp; 1784 break; 1785 case SO_MAC_EXEMPT: 1786 *i1 = connp->conn_mac_exempt; 1787 break; 1788 case SO_DOMAIN: 1789 *i1 = icmp->icmp_family; 1790 break; 1791 1792 /* 1793 * Following four not meaningful for icmp 1794 * Action is same as "default" to which we fallthrough 1795 * so we keep them in comments. 1796 * case SO_LINGER: 1797 * case SO_KEEPALIVE: 1798 * case SO_OOBINLINE: 1799 * case SO_ALLZONES: 1800 */ 1801 default: 1802 ret = -1; 1803 goto done; 1804 } 1805 break; 1806 case IPPROTO_IP: 1807 /* 1808 * Only allow IPv4 option processing on IPv4 sockets. 1809 */ 1810 if (icmp->icmp_family != AF_INET) { 1811 ret = -1; 1812 goto done; 1813 } 1814 1815 switch (name) { 1816 case IP_OPTIONS: 1817 case T_IP_OPTIONS: 1818 /* Options are passed up with each packet */ 1819 ret = 0; 1820 goto done; 1821 case IP_HDRINCL: 1822 *i1 = (int)icmp->icmp_hdrincl; 1823 break; 1824 case IP_TOS: 1825 case T_IP_TOS: 1826 *i1 = (int)icmp->icmp_type_of_service; 1827 break; 1828 case IP_TTL: 1829 *i1 = (int)icmp->icmp_ttl; 1830 break; 1831 case IP_MULTICAST_IF: 1832 /* 0 address if not set */ 1833 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1834 ret = sizeof (ipaddr_t); 1835 goto done; 1836 case IP_MULTICAST_TTL: 1837 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1838 ret = sizeof (uchar_t); 1839 goto done; 1840 case IP_MULTICAST_LOOP: 1841 *ptr = connp->conn_multicast_loop; 1842 ret = sizeof (uint8_t); 1843 goto done; 1844 case IP_BOUND_IF: 1845 /* Zero if not set */ 1846 *i1 = icmp->icmp_bound_if; 1847 break; /* goto sizeof (int) option return */ 1848 case IP_UNSPEC_SRC: 1849 *ptr = icmp->icmp_unspec_source; 1850 break; /* goto sizeof (int) option return */ 1851 case IP_RECVIF: 1852 *ptr = icmp->icmp_recvif; 1853 break; /* goto sizeof (int) option return */ 1854 case IP_BROADCAST_TTL: 1855 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1856 return (sizeof (uchar_t)); 1857 case IP_RECVPKTINFO: 1858 /* 1859 * This also handles IP_PKTINFO. 1860 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1861 * Differentiation is based on the size of the argument 1862 * passed in. 1863 * This option is handled in IP which will return an 1864 * error for IP_PKTINFO as it's not supported as a 1865 * sticky option. 1866 */ 1867 ret = -EINVAL; 1868 goto done; 1869 /* 1870 * Cannot "get" the value of following options 1871 * at this level. Action is same as "default" to 1872 * which we fallthrough so we keep them in comments. 1873 * 1874 * case IP_ADD_MEMBERSHIP: 1875 * case IP_DROP_MEMBERSHIP: 1876 * case IP_BLOCK_SOURCE: 1877 * case IP_UNBLOCK_SOURCE: 1878 * case IP_ADD_SOURCE_MEMBERSHIP: 1879 * case IP_DROP_SOURCE_MEMBERSHIP: 1880 * case MCAST_JOIN_GROUP: 1881 * case MCAST_LEAVE_GROUP: 1882 * case MCAST_BLOCK_SOURCE: 1883 * case MCAST_UNBLOCK_SOURCE: 1884 * case MCAST_JOIN_SOURCE_GROUP: 1885 * case MCAST_LEAVE_SOURCE_GROUP: 1886 * case MRT_INIT: 1887 * case MRT_DONE: 1888 * case MRT_ADD_VIF: 1889 * case MRT_DEL_VIF: 1890 * case MRT_ADD_MFC: 1891 * case MRT_DEL_MFC: 1892 * case MRT_VERSION: 1893 * case MRT_ASSERT: 1894 * case IP_SEC_OPT: 1895 * case IP_NEXTHOP: 1896 */ 1897 default: 1898 ret = -1; 1899 goto done; 1900 } 1901 break; 1902 case IPPROTO_IPV6: 1903 /* 1904 * Only allow IPv6 option processing on native IPv6 sockets. 1905 */ 1906 if (icmp->icmp_family != AF_INET6) { 1907 ret = -1; 1908 goto done; 1909 } 1910 switch (name) { 1911 case IPV6_UNICAST_HOPS: 1912 *i1 = (unsigned int)icmp->icmp_ttl; 1913 break; 1914 case IPV6_MULTICAST_IF: 1915 /* 0 index if not set */ 1916 *i1 = icmp->icmp_multicast_if_index; 1917 break; 1918 case IPV6_MULTICAST_HOPS: 1919 *i1 = icmp->icmp_multicast_ttl; 1920 break; 1921 case IPV6_MULTICAST_LOOP: 1922 *i1 = connp->conn_multicast_loop; 1923 break; 1924 case IPV6_BOUND_IF: 1925 /* Zero if not set */ 1926 *i1 = icmp->icmp_bound_if; 1927 break; 1928 case IPV6_UNSPEC_SRC: 1929 *i1 = icmp->icmp_unspec_source; 1930 break; 1931 case IPV6_CHECKSUM: 1932 /* 1933 * Return offset or -1 if no checksum offset. 1934 * Does not apply to IPPROTO_ICMPV6 1935 */ 1936 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1937 ret = -1; 1938 goto done; 1939 } 1940 1941 if (icmp->icmp_raw_checksum) { 1942 *i1 = icmp->icmp_checksum_off; 1943 } else { 1944 *i1 = -1; 1945 } 1946 break; 1947 case IPV6_JOIN_GROUP: 1948 case IPV6_LEAVE_GROUP: 1949 case MCAST_JOIN_GROUP: 1950 case MCAST_LEAVE_GROUP: 1951 case MCAST_BLOCK_SOURCE: 1952 case MCAST_UNBLOCK_SOURCE: 1953 case MCAST_JOIN_SOURCE_GROUP: 1954 case MCAST_LEAVE_SOURCE_GROUP: 1955 /* cannot "get" the value for these */ 1956 ret = -1; 1957 goto done; 1958 case IPV6_RECVPKTINFO: 1959 *i1 = icmp->icmp_ip_recvpktinfo; 1960 break; 1961 case IPV6_RECVTCLASS: 1962 *i1 = icmp->icmp_ipv6_recvtclass; 1963 break; 1964 case IPV6_RECVPATHMTU: 1965 *i1 = icmp->icmp_ipv6_recvpathmtu; 1966 break; 1967 case IPV6_V6ONLY: 1968 *i1 = 1; 1969 break; 1970 case IPV6_RECVHOPLIMIT: 1971 *i1 = icmp->icmp_ipv6_recvhoplimit; 1972 break; 1973 case IPV6_RECVHOPOPTS: 1974 *i1 = icmp->icmp_ipv6_recvhopopts; 1975 break; 1976 case IPV6_RECVDSTOPTS: 1977 *i1 = icmp->icmp_ipv6_recvdstopts; 1978 break; 1979 case _OLD_IPV6_RECVDSTOPTS: 1980 *i1 = icmp->icmp_old_ipv6_recvdstopts; 1981 break; 1982 case IPV6_RECVRTHDRDSTOPTS: 1983 *i1 = icmp->icmp_ipv6_recvrtdstopts; 1984 break; 1985 case IPV6_RECVRTHDR: 1986 *i1 = icmp->icmp_ipv6_recvrthdr; 1987 break; 1988 case IPV6_PKTINFO: { 1989 /* XXX assumes that caller has room for max size! */ 1990 struct in6_pktinfo *pkti; 1991 1992 pkti = (struct in6_pktinfo *)ptr; 1993 if (ipp->ipp_fields & IPPF_IFINDEX) 1994 pkti->ipi6_ifindex = ipp->ipp_ifindex; 1995 else 1996 pkti->ipi6_ifindex = 0; 1997 if (ipp->ipp_fields & IPPF_ADDR) 1998 pkti->ipi6_addr = ipp->ipp_addr; 1999 else 2000 pkti->ipi6_addr = ipv6_all_zeros; 2001 ret = sizeof (struct in6_pktinfo); 2002 goto done; 2003 } 2004 case IPV6_NEXTHOP: { 2005 sin6_t *sin6 = (sin6_t *)ptr; 2006 2007 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2008 return (0); 2009 *sin6 = sin6_null; 2010 sin6->sin6_family = AF_INET6; 2011 sin6->sin6_addr = ipp->ipp_nexthop; 2012 ret = (sizeof (sin6_t)); 2013 goto done; 2014 } 2015 case IPV6_HOPOPTS: 2016 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2017 return (0); 2018 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2019 return (0); 2020 bcopy((char *)ipp->ipp_hopopts + 2021 icmp->icmp_label_len_v6, ptr, 2022 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2023 if (icmp->icmp_label_len_v6 > 0) { 2024 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2025 ptr[1] = (ipp->ipp_hopoptslen - 2026 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2027 } 2028 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2029 goto done; 2030 case IPV6_RTHDRDSTOPTS: 2031 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2032 return (0); 2033 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2034 ret = ipp->ipp_rtdstoptslen; 2035 goto done; 2036 case IPV6_RTHDR: 2037 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2038 return (0); 2039 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2040 ret = ipp->ipp_rthdrlen; 2041 goto done; 2042 case IPV6_DSTOPTS: 2043 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2044 ret = 0; 2045 goto done; 2046 } 2047 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2048 ret = ipp->ipp_dstoptslen; 2049 goto done; 2050 case IPV6_PATHMTU: 2051 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2052 ret = 0; 2053 } else { 2054 ret = ip_fill_mtuinfo( 2055 &icmp->icmp_v6dst.sin6_addr, 0, 2056 (struct ip6_mtuinfo *)ptr, 2057 is->is_netstack); 2058 } 2059 goto done; 2060 case IPV6_TCLASS: 2061 if (ipp->ipp_fields & IPPF_TCLASS) 2062 *i1 = ipp->ipp_tclass; 2063 else 2064 *i1 = IPV6_FLOW_TCLASS( 2065 IPV6_DEFAULT_VERS_AND_FLOW); 2066 break; 2067 default: 2068 ret = -1; 2069 goto done; 2070 } 2071 break; 2072 case IPPROTO_ICMPV6: 2073 /* 2074 * Only allow IPv6 option processing on native IPv6 sockets. 2075 */ 2076 if (icmp->icmp_family != AF_INET6) { 2077 ret = -1; 2078 } 2079 2080 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2081 ret = -1; 2082 } 2083 2084 switch (name) { 2085 case ICMP6_FILTER: 2086 if (icmp->icmp_filter == NULL) { 2087 /* Make it look like "pass all" */ 2088 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2089 } else { 2090 (void) bcopy(icmp->icmp_filter, ptr, 2091 sizeof (icmp6_filter_t)); 2092 } 2093 ret = sizeof (icmp6_filter_t); 2094 goto done; 2095 default: 2096 ret = -1; 2097 goto done; 2098 } 2099 default: 2100 ret = -1; 2101 goto done; 2102 } 2103 ret = sizeof (int); 2104 done: 2105 return (ret); 2106 } 2107 2108 /* 2109 * This routine retrieves the current status of socket options. 2110 * It returns the size of the option retrieved. 2111 */ 2112 int 2113 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2114 { 2115 conn_t *connp = Q_TO_CONN(q); 2116 icmp_t *icmp = connp->conn_icmp; 2117 int err; 2118 2119 rw_enter(&icmp->icmp_rwlock, RW_READER); 2120 err = icmp_opt_get(connp, level, name, ptr); 2121 rw_exit(&icmp->icmp_rwlock); 2122 return (err); 2123 } 2124 2125 int 2126 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2127 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2128 void *thisdg_attrs, boolean_t checkonly) 2129 { 2130 2131 int *i1 = (int *)invalp; 2132 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2133 icmp_t *icmp = connp->conn_icmp; 2134 icmp_stack_t *is = icmp->icmp_is; 2135 int error; 2136 2137 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2138 /* 2139 * For fixed length options, no sanity check 2140 * of passed in length is done. It is assumed *_optcom_req() 2141 * routines do the right thing. 2142 */ 2143 switch (level) { 2144 case SOL_SOCKET: 2145 switch (name) { 2146 case SO_DEBUG: 2147 if (!checkonly) 2148 icmp->icmp_debug = onoff; 2149 break; 2150 case SO_PROTOTYPE: 2151 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2152 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2153 secpolicy_net_rawaccess(cr) != 0) { 2154 *outlenp = 0; 2155 return (EACCES); 2156 } 2157 /* Can't use IPPROTO_RAW with IPv6 */ 2158 if ((*i1 & 0xFF) == IPPROTO_RAW && 2159 icmp->icmp_family == AF_INET6) { 2160 *outlenp = 0; 2161 return (EPROTONOSUPPORT); 2162 } 2163 if (checkonly) { 2164 /* T_CHECK case */ 2165 *(int *)outvalp = (*i1 & 0xFF); 2166 break; 2167 } 2168 icmp->icmp_proto = *i1 & 0xFF; 2169 if ((icmp->icmp_proto == IPPROTO_RAW || 2170 icmp->icmp_proto == IPPROTO_IGMP) && 2171 icmp->icmp_family == AF_INET) 2172 icmp->icmp_hdrincl = 1; 2173 else 2174 icmp->icmp_hdrincl = 0; 2175 2176 if (icmp->icmp_family == AF_INET6 && 2177 icmp->icmp_proto == IPPROTO_ICMPV6) { 2178 /* Set offset for icmp6_cksum */ 2179 icmp->icmp_raw_checksum = 0; 2180 icmp->icmp_checksum_off = 2; 2181 } 2182 if (icmp->icmp_proto == IPPROTO_UDP || 2183 icmp->icmp_proto == IPPROTO_TCP || 2184 icmp->icmp_proto == IPPROTO_SCTP) { 2185 icmp->icmp_no_tp_cksum = 1; 2186 icmp->icmp_sticky_ipp.ipp_fields |= 2187 IPPF_NO_CKSUM; 2188 } else { 2189 icmp->icmp_no_tp_cksum = 0; 2190 icmp->icmp_sticky_ipp.ipp_fields &= 2191 ~IPPF_NO_CKSUM; 2192 } 2193 2194 if (icmp->icmp_filter != NULL && 2195 icmp->icmp_proto != IPPROTO_ICMPV6) { 2196 kmem_free(icmp->icmp_filter, 2197 sizeof (icmp6_filter_t)); 2198 icmp->icmp_filter = NULL; 2199 } 2200 2201 /* Rebuild the header template */ 2202 error = icmp_build_hdrs(icmp); 2203 if (error != 0) { 2204 *outlenp = 0; 2205 return (error); 2206 } 2207 2208 /* 2209 * For SCTP, we don't use icmp_bind_proto() for 2210 * raw socket binding. Note that we do not need 2211 * to set *outlenp. 2212 * FIXME: how does SCTP work? 2213 */ 2214 if (icmp->icmp_proto == IPPROTO_SCTP) 2215 return (0); 2216 2217 *outlenp = sizeof (int); 2218 *(int *)outvalp = *i1 & 0xFF; 2219 2220 /* Drop lock across the bind operation */ 2221 rw_exit(&icmp->icmp_rwlock); 2222 (void) icmp_bind_proto(connp); 2223 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2224 return (0); 2225 case SO_REUSEADDR: 2226 if (!checkonly) { 2227 icmp->icmp_reuseaddr = onoff; 2228 PASS_OPT_TO_IP(connp); 2229 } 2230 break; 2231 2232 /* 2233 * The following three items are available here, 2234 * but are only meaningful to IP. 2235 */ 2236 case SO_DONTROUTE: 2237 if (!checkonly) { 2238 icmp->icmp_dontroute = onoff; 2239 PASS_OPT_TO_IP(connp); 2240 } 2241 break; 2242 case SO_USELOOPBACK: 2243 if (!checkonly) { 2244 icmp->icmp_useloopback = onoff; 2245 PASS_OPT_TO_IP(connp); 2246 } 2247 break; 2248 case SO_BROADCAST: 2249 if (!checkonly) { 2250 icmp->icmp_broadcast = onoff; 2251 PASS_OPT_TO_IP(connp); 2252 } 2253 break; 2254 2255 case SO_SNDBUF: 2256 if (*i1 > is->is_max_buf) { 2257 *outlenp = 0; 2258 return (ENOBUFS); 2259 } 2260 if (!checkonly) { 2261 if (!IPCL_IS_NONSTR(connp)) { 2262 connp->conn_wq->q_hiwat = *i1; 2263 } 2264 icmp->icmp_xmit_hiwat = *i1; 2265 } 2266 break; 2267 case SO_RCVBUF: 2268 if (*i1 > is->is_max_buf) { 2269 *outlenp = 0; 2270 return (ENOBUFS); 2271 } 2272 if (!checkonly) { 2273 icmp->icmp_recv_hiwat = *i1; 2274 rw_exit(&icmp->icmp_rwlock); 2275 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2276 *i1); 2277 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2278 } 2279 break; 2280 case SO_DGRAM_ERRIND: 2281 if (!checkonly) 2282 icmp->icmp_dgram_errind = onoff; 2283 break; 2284 case SO_ALLZONES: 2285 /* 2286 * "soft" error (negative) 2287 * option not handled at this level 2288 * Note: Do not modify *outlenp 2289 */ 2290 return (-EINVAL); 2291 case SO_TIMESTAMP: 2292 if (!checkonly) { 2293 icmp->icmp_timestamp = onoff; 2294 } 2295 break; 2296 case SO_MAC_EXEMPT: 2297 /* 2298 * "soft" error (negative) 2299 * option not handled at this level 2300 * Note: Do not modify *outlenp 2301 */ 2302 return (-EINVAL); 2303 /* 2304 * Following three not meaningful for icmp 2305 * Action is same as "default" so we keep them 2306 * in comments. 2307 * case SO_LINGER: 2308 * case SO_KEEPALIVE: 2309 * case SO_OOBINLINE: 2310 */ 2311 default: 2312 *outlenp = 0; 2313 return (EINVAL); 2314 } 2315 break; 2316 case IPPROTO_IP: 2317 /* 2318 * Only allow IPv4 option processing on IPv4 sockets. 2319 */ 2320 if (icmp->icmp_family != AF_INET) { 2321 *outlenp = 0; 2322 return (ENOPROTOOPT); 2323 } 2324 switch (name) { 2325 case IP_OPTIONS: 2326 case T_IP_OPTIONS: 2327 /* Save options for use by IP. */ 2328 if ((inlen & 0x3) || 2329 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2330 *outlenp = 0; 2331 return (EINVAL); 2332 } 2333 if (checkonly) 2334 break; 2335 2336 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2337 &icmp->icmp_ip_snd_options_len, 2338 icmp->icmp_label_len, invalp, inlen)) { 2339 *outlenp = 0; 2340 return (ENOMEM); 2341 } 2342 2343 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2344 icmp->icmp_ip_snd_options_len; 2345 rw_exit(&icmp->icmp_rwlock); 2346 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2347 RD(connp->conn_rq), connp, 2348 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2349 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2350 break; 2351 case IP_HDRINCL: 2352 if (!checkonly) 2353 icmp->icmp_hdrincl = onoff; 2354 break; 2355 case IP_TOS: 2356 case T_IP_TOS: 2357 if (!checkonly) { 2358 icmp->icmp_type_of_service = (uint8_t)*i1; 2359 } 2360 break; 2361 case IP_TTL: 2362 if (!checkonly) { 2363 icmp->icmp_ttl = (uint8_t)*i1; 2364 } 2365 break; 2366 case IP_MULTICAST_IF: 2367 /* 2368 * TODO should check OPTMGMT reply and undo this if 2369 * there is an error. 2370 */ 2371 if (!checkonly) { 2372 icmp->icmp_multicast_if_addr = *i1; 2373 PASS_OPT_TO_IP(connp); 2374 } 2375 break; 2376 case IP_MULTICAST_TTL: 2377 if (!checkonly) 2378 icmp->icmp_multicast_ttl = *invalp; 2379 break; 2380 case IP_MULTICAST_LOOP: 2381 if (!checkonly) { 2382 connp->conn_multicast_loop = 2383 (*invalp == 0) ? 0 : 1; 2384 PASS_OPT_TO_IP(connp); 2385 } 2386 break; 2387 case IP_BOUND_IF: 2388 if (!checkonly) { 2389 icmp->icmp_bound_if = *i1; 2390 PASS_OPT_TO_IP(connp); 2391 } 2392 break; 2393 case IP_UNSPEC_SRC: 2394 if (!checkonly) { 2395 icmp->icmp_unspec_source = onoff; 2396 PASS_OPT_TO_IP(connp); 2397 } 2398 break; 2399 case IP_BROADCAST_TTL: 2400 if (!checkonly) 2401 connp->conn_broadcast_ttl = *invalp; 2402 break; 2403 case IP_RECVIF: 2404 if (!checkonly) { 2405 icmp->icmp_recvif = onoff; 2406 } 2407 /* 2408 * pass to ip 2409 */ 2410 return (-EINVAL); 2411 case IP_PKTINFO: { 2412 /* 2413 * This also handles IP_RECVPKTINFO. 2414 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2415 * Differentiation is based on the size of the argument 2416 * passed in. 2417 */ 2418 struct in_pktinfo *pktinfop; 2419 ip4_pkt_t *attr_pktinfop; 2420 2421 if (checkonly) 2422 break; 2423 2424 if (inlen == sizeof (int)) { 2425 /* 2426 * This is IP_RECVPKTINFO option. 2427 * Keep a local copy of wether this option is 2428 * set or not and pass it down to IP for 2429 * processing. 2430 */ 2431 icmp->icmp_ip_recvpktinfo = onoff; 2432 return (-EINVAL); 2433 } 2434 2435 2436 if (inlen != sizeof (struct in_pktinfo)) { 2437 return (EINVAL); 2438 } 2439 2440 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2441 == NULL) { 2442 /* 2443 * sticky option is not supported 2444 */ 2445 return (EINVAL); 2446 } 2447 2448 pktinfop = (struct in_pktinfo *)invalp; 2449 2450 /* 2451 * Atleast one of the values should be specified 2452 */ 2453 if (pktinfop->ipi_ifindex == 0 && 2454 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2455 return (EINVAL); 2456 } 2457 2458 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2459 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2460 } 2461 break; 2462 case IP_ADD_MEMBERSHIP: 2463 case IP_DROP_MEMBERSHIP: 2464 case IP_BLOCK_SOURCE: 2465 case IP_UNBLOCK_SOURCE: 2466 case IP_ADD_SOURCE_MEMBERSHIP: 2467 case IP_DROP_SOURCE_MEMBERSHIP: 2468 case MCAST_JOIN_GROUP: 2469 case MCAST_LEAVE_GROUP: 2470 case MCAST_BLOCK_SOURCE: 2471 case MCAST_UNBLOCK_SOURCE: 2472 case MCAST_JOIN_SOURCE_GROUP: 2473 case MCAST_LEAVE_SOURCE_GROUP: 2474 case MRT_INIT: 2475 case MRT_DONE: 2476 case MRT_ADD_VIF: 2477 case MRT_DEL_VIF: 2478 case MRT_ADD_MFC: 2479 case MRT_DEL_MFC: 2480 case MRT_VERSION: 2481 case MRT_ASSERT: 2482 case IP_SEC_OPT: 2483 case IP_NEXTHOP: 2484 /* 2485 * "soft" error (negative) 2486 * option not handled at this level 2487 * Note: Do not modify *outlenp 2488 */ 2489 return (-EINVAL); 2490 default: 2491 *outlenp = 0; 2492 return (EINVAL); 2493 } 2494 break; 2495 case IPPROTO_IPV6: { 2496 ip6_pkt_t *ipp; 2497 boolean_t sticky; 2498 2499 if (icmp->icmp_family != AF_INET6) { 2500 *outlenp = 0; 2501 return (ENOPROTOOPT); 2502 } 2503 /* 2504 * Deal with both sticky options and ancillary data 2505 */ 2506 if (thisdg_attrs == NULL) { 2507 /* sticky options, or none */ 2508 ipp = &icmp->icmp_sticky_ipp; 2509 sticky = B_TRUE; 2510 } else { 2511 /* ancillary data */ 2512 ipp = (ip6_pkt_t *)thisdg_attrs; 2513 sticky = B_FALSE; 2514 } 2515 2516 switch (name) { 2517 case IPV6_MULTICAST_IF: 2518 if (!checkonly) { 2519 icmp->icmp_multicast_if_index = *i1; 2520 PASS_OPT_TO_IP(connp); 2521 } 2522 break; 2523 case IPV6_UNICAST_HOPS: 2524 /* -1 means use default */ 2525 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2526 *outlenp = 0; 2527 return (EINVAL); 2528 } 2529 if (!checkonly) { 2530 if (*i1 == -1) { 2531 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2532 is->is_ipv6_hoplimit; 2533 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2534 /* Pass modified value to IP. */ 2535 *i1 = ipp->ipp_hoplimit; 2536 } else { 2537 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2538 (uint8_t)*i1; 2539 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2540 } 2541 /* Rebuild the header template */ 2542 error = icmp_build_hdrs(icmp); 2543 if (error != 0) { 2544 *outlenp = 0; 2545 return (error); 2546 } 2547 } 2548 break; 2549 case IPV6_MULTICAST_HOPS: 2550 /* -1 means use default */ 2551 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2552 *outlenp = 0; 2553 return (EINVAL); 2554 } 2555 if (!checkonly) { 2556 if (*i1 == -1) { 2557 icmp->icmp_multicast_ttl = 2558 ipp->ipp_multicast_hops = 2559 IP_DEFAULT_MULTICAST_TTL; 2560 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2561 /* Pass modified value to IP. */ 2562 *i1 = icmp->icmp_multicast_ttl; 2563 } else { 2564 icmp->icmp_multicast_ttl = 2565 ipp->ipp_multicast_hops = 2566 (uint8_t)*i1; 2567 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2568 } 2569 } 2570 break; 2571 case IPV6_MULTICAST_LOOP: 2572 if (*i1 != 0 && *i1 != 1) { 2573 *outlenp = 0; 2574 return (EINVAL); 2575 } 2576 if (!checkonly) { 2577 connp->conn_multicast_loop = *i1; 2578 PASS_OPT_TO_IP(connp); 2579 } 2580 break; 2581 case IPV6_CHECKSUM: 2582 /* 2583 * Integer offset into the user data of where the 2584 * checksum is located. 2585 * Offset of -1 disables option. 2586 * Does not apply to IPPROTO_ICMPV6. 2587 */ 2588 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2589 *outlenp = 0; 2590 return (EINVAL); 2591 } 2592 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2593 /* Negative or not 16 bit aligned offset */ 2594 *outlenp = 0; 2595 return (EINVAL); 2596 } 2597 if (checkonly) 2598 break; 2599 2600 if (*i1 == -1) { 2601 icmp->icmp_raw_checksum = 0; 2602 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2603 } else { 2604 icmp->icmp_raw_checksum = 1; 2605 icmp->icmp_checksum_off = *i1; 2606 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2607 } 2608 /* Rebuild the header template */ 2609 error = icmp_build_hdrs(icmp); 2610 if (error != 0) { 2611 *outlenp = 0; 2612 return (error); 2613 } 2614 break; 2615 case IPV6_JOIN_GROUP: 2616 case IPV6_LEAVE_GROUP: 2617 case MCAST_JOIN_GROUP: 2618 case MCAST_LEAVE_GROUP: 2619 case MCAST_BLOCK_SOURCE: 2620 case MCAST_UNBLOCK_SOURCE: 2621 case MCAST_JOIN_SOURCE_GROUP: 2622 case MCAST_LEAVE_SOURCE_GROUP: 2623 /* 2624 * "soft" error (negative) 2625 * option not handled at this level 2626 * Note: Do not modify *outlenp 2627 */ 2628 return (-EINVAL); 2629 case IPV6_BOUND_IF: 2630 if (!checkonly) { 2631 icmp->icmp_bound_if = *i1; 2632 PASS_OPT_TO_IP(connp); 2633 } 2634 break; 2635 case IPV6_UNSPEC_SRC: 2636 if (!checkonly) { 2637 icmp->icmp_unspec_source = onoff; 2638 PASS_OPT_TO_IP(connp); 2639 } 2640 break; 2641 case IPV6_RECVTCLASS: 2642 if (!checkonly) { 2643 icmp->icmp_ipv6_recvtclass = onoff; 2644 PASS_OPT_TO_IP(connp); 2645 } 2646 break; 2647 /* 2648 * Set boolean switches for ancillary data delivery 2649 */ 2650 case IPV6_RECVPKTINFO: 2651 if (!checkonly) { 2652 icmp->icmp_ip_recvpktinfo = onoff; 2653 PASS_OPT_TO_IP(connp); 2654 } 2655 break; 2656 case IPV6_RECVPATHMTU: 2657 if (!checkonly) { 2658 icmp->icmp_ipv6_recvpathmtu = onoff; 2659 PASS_OPT_TO_IP(connp); 2660 } 2661 break; 2662 case IPV6_RECVHOPLIMIT: 2663 if (!checkonly) { 2664 icmp->icmp_ipv6_recvhoplimit = onoff; 2665 PASS_OPT_TO_IP(connp); 2666 } 2667 break; 2668 case IPV6_RECVHOPOPTS: 2669 if (!checkonly) { 2670 icmp->icmp_ipv6_recvhopopts = onoff; 2671 PASS_OPT_TO_IP(connp); 2672 } 2673 break; 2674 case IPV6_RECVDSTOPTS: 2675 if (!checkonly) { 2676 icmp->icmp_ipv6_recvdstopts = onoff; 2677 PASS_OPT_TO_IP(connp); 2678 } 2679 break; 2680 case _OLD_IPV6_RECVDSTOPTS: 2681 if (!checkonly) 2682 icmp->icmp_old_ipv6_recvdstopts = onoff; 2683 break; 2684 case IPV6_RECVRTHDRDSTOPTS: 2685 if (!checkonly) { 2686 icmp->icmp_ipv6_recvrtdstopts = onoff; 2687 PASS_OPT_TO_IP(connp); 2688 } 2689 break; 2690 case IPV6_RECVRTHDR: 2691 if (!checkonly) { 2692 icmp->icmp_ipv6_recvrthdr = onoff; 2693 PASS_OPT_TO_IP(connp); 2694 } 2695 break; 2696 /* 2697 * Set sticky options or ancillary data. 2698 * If sticky options, (re)build any extension headers 2699 * that might be needed as a result. 2700 */ 2701 case IPV6_PKTINFO: 2702 /* 2703 * The source address and ifindex are verified 2704 * in ip_opt_set(). For ancillary data the 2705 * source address is checked in ip_wput_v6. 2706 */ 2707 if (inlen != 0 && inlen != 2708 sizeof (struct in6_pktinfo)) { 2709 return (EINVAL); 2710 } 2711 if (checkonly) 2712 break; 2713 2714 if (inlen == 0) { 2715 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2716 ipp->ipp_sticky_ignored |= 2717 (IPPF_IFINDEX|IPPF_ADDR); 2718 } else { 2719 struct in6_pktinfo *pkti; 2720 2721 pkti = (struct in6_pktinfo *)invalp; 2722 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2723 ipp->ipp_addr = pkti->ipi6_addr; 2724 if (ipp->ipp_ifindex != 0) 2725 ipp->ipp_fields |= IPPF_IFINDEX; 2726 else 2727 ipp->ipp_fields &= ~IPPF_IFINDEX; 2728 if (!IN6_IS_ADDR_UNSPECIFIED( 2729 &ipp->ipp_addr)) 2730 ipp->ipp_fields |= IPPF_ADDR; 2731 else 2732 ipp->ipp_fields &= ~IPPF_ADDR; 2733 } 2734 if (sticky) { 2735 error = icmp_build_hdrs(icmp); 2736 if (error != 0) 2737 return (error); 2738 PASS_OPT_TO_IP(connp); 2739 } 2740 break; 2741 case IPV6_HOPLIMIT: 2742 /* This option can only be used as ancillary data. */ 2743 if (sticky) 2744 return (EINVAL); 2745 if (inlen != 0 && inlen != sizeof (int)) 2746 return (EINVAL); 2747 if (checkonly) 2748 break; 2749 2750 if (inlen == 0) { 2751 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2752 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2753 } else { 2754 if (*i1 > 255 || *i1 < -1) 2755 return (EINVAL); 2756 if (*i1 == -1) 2757 ipp->ipp_hoplimit = 2758 is->is_ipv6_hoplimit; 2759 else 2760 ipp->ipp_hoplimit = *i1; 2761 ipp->ipp_fields |= IPPF_HOPLIMIT; 2762 } 2763 break; 2764 case IPV6_TCLASS: 2765 /* 2766 * IPV6_RECVTCLASS accepts -1 as use kernel default 2767 * and [0, 255] as the actualy traffic class. 2768 */ 2769 if (inlen != 0 && inlen != sizeof (int)) { 2770 return (EINVAL); 2771 } 2772 if (checkonly) 2773 break; 2774 2775 if (inlen == 0) { 2776 ipp->ipp_fields &= ~IPPF_TCLASS; 2777 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2778 } else { 2779 if (*i1 >= 256 || *i1 < -1) 2780 return (EINVAL); 2781 if (*i1 == -1) { 2782 ipp->ipp_tclass = 2783 IPV6_FLOW_TCLASS( 2784 IPV6_DEFAULT_VERS_AND_FLOW); 2785 } else { 2786 ipp->ipp_tclass = *i1; 2787 } 2788 ipp->ipp_fields |= IPPF_TCLASS; 2789 } 2790 if (sticky) { 2791 error = icmp_build_hdrs(icmp); 2792 if (error != 0) 2793 return (error); 2794 } 2795 break; 2796 case IPV6_NEXTHOP: 2797 /* 2798 * IP will verify that the nexthop is reachable 2799 * and fail for sticky options. 2800 */ 2801 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2802 return (EINVAL); 2803 } 2804 if (checkonly) 2805 break; 2806 2807 if (inlen == 0) { 2808 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2809 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2810 } else { 2811 sin6_t *sin6 = (sin6_t *)invalp; 2812 2813 if (sin6->sin6_family != AF_INET6) { 2814 return (EAFNOSUPPORT); 2815 } 2816 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2817 return (EADDRNOTAVAIL); 2818 } 2819 ipp->ipp_nexthop = sin6->sin6_addr; 2820 if (!IN6_IS_ADDR_UNSPECIFIED( 2821 &ipp->ipp_nexthop)) 2822 ipp->ipp_fields |= IPPF_NEXTHOP; 2823 else 2824 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2825 } 2826 if (sticky) { 2827 error = icmp_build_hdrs(icmp); 2828 if (error != 0) 2829 return (error); 2830 PASS_OPT_TO_IP(connp); 2831 } 2832 break; 2833 case IPV6_HOPOPTS: { 2834 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2835 /* 2836 * Sanity checks - minimum size, size a multiple of 2837 * eight bytes, and matching size passed in. 2838 */ 2839 if (inlen != 0 && 2840 inlen != (8 * (hopts->ip6h_len + 1))) { 2841 return (EINVAL); 2842 } 2843 2844 if (checkonly) 2845 break; 2846 error = optcom_pkt_set(invalp, inlen, sticky, 2847 (uchar_t **)&ipp->ipp_hopopts, 2848 &ipp->ipp_hopoptslen, 2849 sticky ? icmp->icmp_label_len_v6 : 0); 2850 if (error != 0) 2851 return (error); 2852 if (ipp->ipp_hopoptslen == 0) { 2853 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2854 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2855 } else { 2856 ipp->ipp_fields |= IPPF_HOPOPTS; 2857 } 2858 if (sticky) { 2859 error = icmp_build_hdrs(icmp); 2860 if (error != 0) 2861 return (error); 2862 } 2863 break; 2864 } 2865 case IPV6_RTHDRDSTOPTS: { 2866 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2867 2868 /* 2869 * Sanity checks - minimum size, size a multiple of 2870 * eight bytes, and matching size passed in. 2871 */ 2872 if (inlen != 0 && 2873 inlen != (8 * (dopts->ip6d_len + 1))) 2874 return (EINVAL); 2875 2876 if (checkonly) 2877 break; 2878 2879 if (inlen == 0) { 2880 if (sticky && 2881 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2882 kmem_free(ipp->ipp_rtdstopts, 2883 ipp->ipp_rtdstoptslen); 2884 ipp->ipp_rtdstopts = NULL; 2885 ipp->ipp_rtdstoptslen = 0; 2886 } 2887 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2888 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2889 } else { 2890 error = optcom_pkt_set(invalp, inlen, sticky, 2891 (uchar_t **)&ipp->ipp_rtdstopts, 2892 &ipp->ipp_rtdstoptslen, 0); 2893 if (error != 0) 2894 return (error); 2895 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2896 } 2897 if (sticky) { 2898 error = icmp_build_hdrs(icmp); 2899 if (error != 0) 2900 return (error); 2901 } 2902 break; 2903 } 2904 case IPV6_DSTOPTS: { 2905 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2906 2907 /* 2908 * Sanity checks - minimum size, size a multiple of 2909 * eight bytes, and matching size passed in. 2910 */ 2911 if (inlen != 0 && 2912 inlen != (8 * (dopts->ip6d_len + 1))) 2913 return (EINVAL); 2914 2915 if (checkonly) 2916 break; 2917 2918 if (inlen == 0) { 2919 if (sticky && 2920 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2921 kmem_free(ipp->ipp_dstopts, 2922 ipp->ipp_dstoptslen); 2923 ipp->ipp_dstopts = NULL; 2924 ipp->ipp_dstoptslen = 0; 2925 } 2926 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2927 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2928 } else { 2929 error = optcom_pkt_set(invalp, inlen, sticky, 2930 (uchar_t **)&ipp->ipp_dstopts, 2931 &ipp->ipp_dstoptslen, 0); 2932 if (error != 0) 2933 return (error); 2934 ipp->ipp_fields |= IPPF_DSTOPTS; 2935 } 2936 if (sticky) { 2937 error = icmp_build_hdrs(icmp); 2938 if (error != 0) 2939 return (error); 2940 } 2941 break; 2942 } 2943 case IPV6_RTHDR: { 2944 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2945 2946 /* 2947 * Sanity checks - minimum size, size a multiple of 2948 * eight bytes, and matching size passed in. 2949 */ 2950 if (inlen != 0 && 2951 inlen != (8 * (rt->ip6r_len + 1))) 2952 return (EINVAL); 2953 2954 if (checkonly) 2955 break; 2956 2957 if (inlen == 0) { 2958 if (sticky && 2959 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2960 kmem_free(ipp->ipp_rthdr, 2961 ipp->ipp_rthdrlen); 2962 ipp->ipp_rthdr = NULL; 2963 ipp->ipp_rthdrlen = 0; 2964 } 2965 ipp->ipp_fields &= ~IPPF_RTHDR; 2966 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2967 } else { 2968 error = optcom_pkt_set(invalp, inlen, sticky, 2969 (uchar_t **)&ipp->ipp_rthdr, 2970 &ipp->ipp_rthdrlen, 0); 2971 if (error != 0) 2972 return (error); 2973 ipp->ipp_fields |= IPPF_RTHDR; 2974 } 2975 if (sticky) { 2976 error = icmp_build_hdrs(icmp); 2977 if (error != 0) 2978 return (error); 2979 } 2980 break; 2981 } 2982 2983 case IPV6_DONTFRAG: 2984 if (checkonly) 2985 break; 2986 2987 if (onoff) { 2988 ipp->ipp_fields |= IPPF_DONTFRAG; 2989 } else { 2990 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2991 } 2992 break; 2993 2994 case IPV6_USE_MIN_MTU: 2995 if (inlen != sizeof (int)) 2996 return (EINVAL); 2997 2998 if (*i1 < -1 || *i1 > 1) 2999 return (EINVAL); 3000 3001 if (checkonly) 3002 break; 3003 3004 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3005 ipp->ipp_use_min_mtu = *i1; 3006 break; 3007 3008 /* 3009 * This option can't be set. Its only returned via 3010 * getsockopt() or ancillary data. 3011 */ 3012 case IPV6_PATHMTU: 3013 return (EINVAL); 3014 3015 case IPV6_SEC_OPT: 3016 case IPV6_SRC_PREFERENCES: 3017 case IPV6_V6ONLY: 3018 /* Handled at IP level */ 3019 return (-EINVAL); 3020 default: 3021 *outlenp = 0; 3022 return (EINVAL); 3023 } 3024 break; 3025 } /* end IPPROTO_IPV6 */ 3026 3027 case IPPROTO_ICMPV6: 3028 /* 3029 * Only allow IPv6 option processing on IPv6 sockets. 3030 */ 3031 if (icmp->icmp_family != AF_INET6) { 3032 *outlenp = 0; 3033 return (ENOPROTOOPT); 3034 } 3035 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3036 *outlenp = 0; 3037 return (ENOPROTOOPT); 3038 } 3039 switch (name) { 3040 case ICMP6_FILTER: 3041 if (!checkonly) { 3042 if ((inlen != 0) && 3043 (inlen != sizeof (icmp6_filter_t))) 3044 return (EINVAL); 3045 3046 if (inlen == 0) { 3047 if (icmp->icmp_filter != NULL) { 3048 kmem_free(icmp->icmp_filter, 3049 sizeof (icmp6_filter_t)); 3050 icmp->icmp_filter = NULL; 3051 } 3052 } else { 3053 if (icmp->icmp_filter == NULL) { 3054 icmp->icmp_filter = kmem_alloc( 3055 sizeof (icmp6_filter_t), 3056 KM_NOSLEEP); 3057 if (icmp->icmp_filter == NULL) { 3058 *outlenp = 0; 3059 return (ENOBUFS); 3060 } 3061 } 3062 (void) bcopy(invalp, icmp->icmp_filter, 3063 inlen); 3064 } 3065 } 3066 break; 3067 3068 default: 3069 *outlenp = 0; 3070 return (EINVAL); 3071 } 3072 break; 3073 default: 3074 *outlenp = 0; 3075 return (EINVAL); 3076 } 3077 /* 3078 * Common case of OK return with outval same as inval. 3079 */ 3080 if (invalp != outvalp) { 3081 /* don't trust bcopy for identical src/dst */ 3082 (void) bcopy(invalp, outvalp, inlen); 3083 } 3084 *outlenp = inlen; 3085 return (0); 3086 } 3087 3088 /* This routine sets socket options. */ 3089 /* ARGSUSED */ 3090 int 3091 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3092 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3093 void *thisdg_attrs, cred_t *cr) 3094 { 3095 boolean_t checkonly; 3096 int error; 3097 3098 error = 0; 3099 switch (optset_context) { 3100 case SETFN_OPTCOM_CHECKONLY: 3101 checkonly = B_TRUE; 3102 /* 3103 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3104 * inlen != 0 implies value supplied and 3105 * we have to "pretend" to set it. 3106 * inlen == 0 implies that there is no 3107 * value part in T_CHECK request and just validation 3108 * done elsewhere should be enough, we just return here. 3109 */ 3110 if (inlen == 0) { 3111 *outlenp = 0; 3112 error = 0; 3113 goto done; 3114 } 3115 break; 3116 case SETFN_OPTCOM_NEGOTIATE: 3117 checkonly = B_FALSE; 3118 break; 3119 case SETFN_UD_NEGOTIATE: 3120 case SETFN_CONN_NEGOTIATE: 3121 checkonly = B_FALSE; 3122 /* 3123 * Negotiating local and "association-related" options 3124 * through T_UNITDATA_REQ. 3125 * 3126 * Following routine can filter out ones we do not 3127 * want to be "set" this way. 3128 */ 3129 if (!icmp_opt_allow_udr_set(level, name)) { 3130 *outlenp = 0; 3131 error = EINVAL; 3132 goto done; 3133 } 3134 break; 3135 default: 3136 /* 3137 * We should never get here 3138 */ 3139 *outlenp = 0; 3140 error = EINVAL; 3141 goto done; 3142 } 3143 3144 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3145 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3146 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3147 outvalp, cr, thisdg_attrs, checkonly); 3148 3149 done: 3150 return (error); 3151 } 3152 3153 /* This routine sets socket options. */ 3154 /* ARGSUSED */ 3155 int 3156 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3157 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3158 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3159 { 3160 conn_t *connp = Q_TO_CONN(q); 3161 icmp_t *icmp; 3162 int error; 3163 3164 icmp = connp->conn_icmp; 3165 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3166 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3167 outlenp, outvalp, thisdg_attrs, cr); 3168 rw_exit(&icmp->icmp_rwlock); 3169 return (error); 3170 } 3171 3172 /* 3173 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3174 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3175 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3176 * headers. 3177 * Returns failure if can't allocate memory. 3178 */ 3179 static int 3180 icmp_build_hdrs(icmp_t *icmp) 3181 { 3182 icmp_stack_t *is = icmp->icmp_is; 3183 uchar_t *hdrs; 3184 uint_t hdrs_len; 3185 ip6_t *ip6h; 3186 ip6i_t *ip6i; 3187 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3188 3189 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3190 hdrs_len = ip_total_hdrs_len_v6(ipp); 3191 ASSERT(hdrs_len != 0); 3192 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3193 /* Need to reallocate */ 3194 if (hdrs_len != 0) { 3195 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3196 if (hdrs == NULL) 3197 return (ENOMEM); 3198 } else { 3199 hdrs = NULL; 3200 } 3201 if (icmp->icmp_sticky_hdrs_len != 0) { 3202 kmem_free(icmp->icmp_sticky_hdrs, 3203 icmp->icmp_sticky_hdrs_len); 3204 } 3205 icmp->icmp_sticky_hdrs = hdrs; 3206 icmp->icmp_sticky_hdrs_len = hdrs_len; 3207 } 3208 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3209 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3210 3211 /* Set header fields not in ipp */ 3212 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3213 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3214 ip6h = (ip6_t *)&ip6i[1]; 3215 3216 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3217 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3218 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3219 } 3220 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3221 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3222 } 3223 } else { 3224 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3225 } 3226 3227 if (!(ipp->ipp_fields & IPPF_ADDR)) 3228 ip6h->ip6_src = icmp->icmp_v6src; 3229 3230 /* Try to get everything in a single mblk */ 3231 if (hdrs_len > icmp->icmp_max_hdr_len) { 3232 icmp->icmp_max_hdr_len = hdrs_len; 3233 rw_exit(&icmp->icmp_rwlock); 3234 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3235 icmp->icmp_connp, 3236 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3237 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3238 } 3239 return (0); 3240 } 3241 3242 /* 3243 * This routine retrieves the value of an ND variable in a icmpparam_t 3244 * structure. It is called through nd_getset when a user reads the 3245 * variable. 3246 */ 3247 /* ARGSUSED */ 3248 static int 3249 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3250 { 3251 icmpparam_t *icmppa = (icmpparam_t *)cp; 3252 3253 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3254 return (0); 3255 } 3256 3257 /* 3258 * Walk through the param array specified registering each element with the 3259 * named dispatch (ND) handler. 3260 */ 3261 static boolean_t 3262 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3263 { 3264 for (; cnt-- > 0; icmppa++) { 3265 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3266 if (!nd_load(ndp, icmppa->icmp_param_name, 3267 icmp_param_get, icmp_param_set, 3268 (caddr_t)icmppa)) { 3269 nd_free(ndp); 3270 return (B_FALSE); 3271 } 3272 } 3273 } 3274 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3275 NULL)) { 3276 nd_free(ndp); 3277 return (B_FALSE); 3278 } 3279 return (B_TRUE); 3280 } 3281 3282 /* This routine sets an ND variable in a icmpparam_t structure. */ 3283 /* ARGSUSED */ 3284 static int 3285 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3286 { 3287 long new_value; 3288 icmpparam_t *icmppa = (icmpparam_t *)cp; 3289 3290 /* 3291 * Fail the request if the new value does not lie within the 3292 * required bounds. 3293 */ 3294 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3295 new_value < icmppa->icmp_param_min || 3296 new_value > icmppa->icmp_param_max) { 3297 return (EINVAL); 3298 } 3299 /* Set the new value */ 3300 icmppa->icmp_param_value = new_value; 3301 return (0); 3302 } 3303 static void 3304 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3305 { 3306 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3307 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3308 /* 3309 * fallback has started but messages have not been moved yet 3310 */ 3311 if (icmp->icmp_fallback_queue_head == NULL) { 3312 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3313 icmp->icmp_fallback_queue_head = mp; 3314 icmp->icmp_fallback_queue_tail = mp; 3315 } else { 3316 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3317 icmp->icmp_fallback_queue_tail->b_next = mp; 3318 icmp->icmp_fallback_queue_tail = mp; 3319 } 3320 mutex_exit(&icmp->icmp_recv_lock); 3321 } else { 3322 /* 3323 * no more fallbacks possible, ok to drop lock. 3324 */ 3325 mutex_exit(&icmp->icmp_recv_lock); 3326 putnext(icmp->icmp_connp->conn_rq, mp); 3327 } 3328 } 3329 3330 /*ARGSUSED2*/ 3331 static void 3332 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3333 { 3334 conn_t *connp = (conn_t *)arg1; 3335 struct T_unitdata_ind *tudi; 3336 uchar_t *rptr; 3337 icmp_t *icmp; 3338 icmp_stack_t *is; 3339 sin_t *sin; 3340 sin6_t *sin6; 3341 ip6_t *ip6h; 3342 ip6i_t *ip6i; 3343 mblk_t *mp1; 3344 int hdr_len; 3345 ipha_t *ipha; 3346 int udi_size; /* Size of T_unitdata_ind */ 3347 uint_t ipvers; 3348 ip6_pkt_t ipp; 3349 uint8_t nexthdr; 3350 ip_pktinfo_t *pinfo = NULL; 3351 mblk_t *options_mp = NULL; 3352 uint_t icmp_opt = 0; 3353 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3354 uint_t hopstrip; 3355 int error; 3356 3357 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3358 3359 icmp = connp->conn_icmp; 3360 is = icmp->icmp_is; 3361 rptr = mp->b_rptr; 3362 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3363 ASSERT(OK_32PTR(rptr)); 3364 3365 /* 3366 * IP should have prepended the options data in an M_CTL 3367 * Check M_CTL "type" to make sure are not here bcos of 3368 * a valid ICMP message 3369 */ 3370 if (DB_TYPE(mp) == M_CTL) { 3371 /* 3372 * FIXME: does IP still do this? 3373 * IP sends up the IPSEC_IN message for handling IPSEC 3374 * policy at the TCP level. We don't need it here. 3375 */ 3376 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3377 mp1 = mp->b_cont; 3378 freeb(mp); 3379 mp = mp1; 3380 rptr = mp->b_rptr; 3381 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3382 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3383 IN_PKTINFO) { 3384 /* 3385 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3386 * has been prepended to the packet by IP. We need to 3387 * extract the mblk and adjust the rptr 3388 */ 3389 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3390 options_mp = mp; 3391 mp = mp->b_cont; 3392 rptr = mp->b_rptr; 3393 } else { 3394 /* 3395 * ICMP messages. 3396 */ 3397 icmp_icmp_error(connp, mp); 3398 return; 3399 } 3400 } 3401 3402 /* 3403 * Discard message if it is misaligned or smaller than the IP header. 3404 */ 3405 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3406 freemsg(mp); 3407 if (options_mp != NULL) 3408 freeb(options_mp); 3409 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3410 return; 3411 } 3412 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3413 3414 /* Handle M_DATA messages containing IP packets messages */ 3415 if (ipvers == IPV4_VERSION) { 3416 /* 3417 * Special case where IP attaches 3418 * the IRE needs to be handled so that we don't send up 3419 * IRE to the user land. 3420 */ 3421 ipha = (ipha_t *)rptr; 3422 hdr_len = IPH_HDR_LENGTH(ipha); 3423 3424 if (ipha->ipha_protocol == IPPROTO_TCP) { 3425 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3426 3427 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3428 TH_SYN) && mp->b_cont != NULL) { 3429 mp1 = mp->b_cont; 3430 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3431 freeb(mp1); 3432 mp->b_cont = NULL; 3433 } 3434 } 3435 } 3436 if (is->is_bsd_compat) { 3437 ushort_t len; 3438 len = ntohs(ipha->ipha_length); 3439 3440 if (mp->b_datap->db_ref > 1) { 3441 /* 3442 * Allocate a new IP header so that we can 3443 * modify ipha_length. 3444 */ 3445 mblk_t *mp1; 3446 3447 mp1 = allocb(hdr_len, BPRI_MED); 3448 if (!mp1) { 3449 freemsg(mp); 3450 if (options_mp != NULL) 3451 freeb(options_mp); 3452 BUMP_MIB(&is->is_rawip_mib, 3453 rawipInErrors); 3454 return; 3455 } 3456 bcopy(rptr, mp1->b_rptr, hdr_len); 3457 mp->b_rptr = rptr + hdr_len; 3458 rptr = mp1->b_rptr; 3459 ipha = (ipha_t *)rptr; 3460 mp1->b_cont = mp; 3461 mp1->b_wptr = rptr + hdr_len; 3462 mp = mp1; 3463 } 3464 len -= hdr_len; 3465 ipha->ipha_length = htons(len); 3466 } 3467 } 3468 3469 /* 3470 * This is the inbound data path. Packets are passed upstream as 3471 * T_UNITDATA_IND messages with full IP headers still attached. 3472 */ 3473 if (icmp->icmp_family == AF_INET) { 3474 ASSERT(ipvers == IPV4_VERSION); 3475 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3476 if (icmp->icmp_recvif && (pinfo != NULL) && 3477 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3478 udi_size += sizeof (struct T_opthdr) + 3479 sizeof (uint_t); 3480 } 3481 3482 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3483 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3484 udi_size += sizeof (struct T_opthdr) + 3485 sizeof (struct in_pktinfo); 3486 } 3487 3488 /* 3489 * If SO_TIMESTAMP is set allocate the appropriate sized 3490 * buffer. Since gethrestime() expects a pointer aligned 3491 * argument, we allocate space necessary for extra 3492 * alignment (even though it might not be used). 3493 */ 3494 if (icmp->icmp_timestamp) { 3495 udi_size += sizeof (struct T_opthdr) + 3496 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3497 } 3498 mp1 = allocb(udi_size, BPRI_MED); 3499 if (mp1 == NULL) { 3500 freemsg(mp); 3501 if (options_mp != NULL) 3502 freeb(options_mp); 3503 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3504 return; 3505 } 3506 mp1->b_cont = mp; 3507 mp = mp1; 3508 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3509 mp->b_datap->db_type = M_PROTO; 3510 mp->b_wptr = (uchar_t *)tudi + udi_size; 3511 tudi->PRIM_type = T_UNITDATA_IND; 3512 tudi->SRC_length = sizeof (sin_t); 3513 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3514 sin = (sin_t *)&tudi[1]; 3515 *sin = sin_null; 3516 sin->sin_family = AF_INET; 3517 sin->sin_addr.s_addr = ipha->ipha_src; 3518 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3519 sizeof (sin_t); 3520 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3521 tudi->OPT_length = udi_size; 3522 3523 /* 3524 * Add options if IP_RECVIF is set 3525 */ 3526 if (udi_size != 0) { 3527 char *dstopt; 3528 3529 dstopt = (char *)&sin[1]; 3530 if (icmp->icmp_recvif && (pinfo != NULL) && 3531 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3532 3533 struct T_opthdr *toh; 3534 uint_t *dstptr; 3535 3536 toh = (struct T_opthdr *)dstopt; 3537 toh->level = IPPROTO_IP; 3538 toh->name = IP_RECVIF; 3539 toh->len = sizeof (struct T_opthdr) + 3540 sizeof (uint_t); 3541 toh->status = 0; 3542 dstopt += sizeof (struct T_opthdr); 3543 dstptr = (uint_t *)dstopt; 3544 *dstptr = pinfo->ip_pkt_ifindex; 3545 dstopt += sizeof (uint_t); 3546 udi_size -= toh->len; 3547 } 3548 if (icmp->icmp_timestamp) { 3549 struct T_opthdr *toh; 3550 3551 toh = (struct T_opthdr *)dstopt; 3552 toh->level = SOL_SOCKET; 3553 toh->name = SCM_TIMESTAMP; 3554 toh->len = sizeof (struct T_opthdr) + 3555 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3556 toh->status = 0; 3557 dstopt += sizeof (struct T_opthdr); 3558 /* Align for gethrestime() */ 3559 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3560 sizeof (intptr_t)); 3561 gethrestime((timestruc_t *)dstopt); 3562 dstopt = (char *)toh + toh->len; 3563 udi_size -= toh->len; 3564 } 3565 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3566 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3567 struct T_opthdr *toh; 3568 struct in_pktinfo *pktinfop; 3569 3570 toh = (struct T_opthdr *)dstopt; 3571 toh->level = IPPROTO_IP; 3572 toh->name = IP_PKTINFO; 3573 toh->len = sizeof (struct T_opthdr) + 3574 sizeof (in_pktinfo_t); 3575 toh->status = 0; 3576 dstopt += sizeof (struct T_opthdr); 3577 pktinfop = (struct in_pktinfo *)dstopt; 3578 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3579 pktinfop->ipi_spec_dst = 3580 pinfo->ip_pkt_match_addr; 3581 3582 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3583 3584 dstopt += sizeof (struct in_pktinfo); 3585 udi_size -= toh->len; 3586 } 3587 3588 /* Consumed all of allocated space */ 3589 ASSERT(udi_size == 0); 3590 } 3591 3592 if (options_mp != NULL) 3593 freeb(options_mp); 3594 3595 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3596 goto deliver; 3597 } 3598 3599 /* 3600 * We don't need options_mp in the IPv6 path. 3601 */ 3602 if (options_mp != NULL) { 3603 freeb(options_mp); 3604 options_mp = NULL; 3605 } 3606 3607 /* 3608 * Discard message if it is smaller than the IPv6 header 3609 * or if the header is malformed. 3610 */ 3611 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3612 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3613 icmp->icmp_family != AF_INET6) { 3614 freemsg(mp); 3615 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3616 return; 3617 } 3618 3619 /* Initialize */ 3620 ipp.ipp_fields = 0; 3621 hopstrip = 0; 3622 3623 ip6h = (ip6_t *)rptr; 3624 /* 3625 * Call on ip_find_hdr_v6 which gets the total hdr len 3626 * as well as individual lenghts of ext hdrs (and ptrs to 3627 * them). 3628 */ 3629 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3630 /* Look for ifindex information */ 3631 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3632 ip6i = (ip6i_t *)ip6h; 3633 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3634 ASSERT(ip6i->ip6i_ifindex != 0); 3635 ipp.ipp_fields |= IPPF_IFINDEX; 3636 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3637 } 3638 rptr = (uchar_t *)&ip6i[1]; 3639 mp->b_rptr = rptr; 3640 if (rptr == mp->b_wptr) { 3641 mp1 = mp->b_cont; 3642 freeb(mp); 3643 mp = mp1; 3644 rptr = mp->b_rptr; 3645 } 3646 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3647 ip6h = (ip6_t *)rptr; 3648 } 3649 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3650 3651 /* 3652 * We need to lie a bit to the user because users inside 3653 * labeled compartments should not see their own labels. We 3654 * assume that in all other respects IP has checked the label, 3655 * and that the label is always first among the options. (If 3656 * it's not first, then this code won't see it, and the option 3657 * will be passed along to the user.) 3658 * 3659 * If we had multilevel ICMP sockets, then the following code 3660 * should be skipped for them to allow the user to see the 3661 * label. 3662 * 3663 * Alignment restrictions in the definition of IP options 3664 * (namely, the requirement that the 4-octet DOI goes on a 3665 * 4-octet boundary) mean that we know exactly where the option 3666 * should start, but we're lenient for other hosts. 3667 * 3668 * Note that there are no multilevel ICMP or raw IP sockets 3669 * yet, thus nobody ever sees the IP6OPT_LS option. 3670 */ 3671 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3672 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3673 const uchar_t *ucp = 3674 (const uchar_t *)ipp.ipp_hopopts + 2; 3675 int remlen = ipp.ipp_hopoptslen - 2; 3676 3677 while (remlen > 0) { 3678 if (*ucp == IP6OPT_PAD1) { 3679 remlen--; 3680 ucp++; 3681 } else if (*ucp == IP6OPT_PADN) { 3682 remlen -= ucp[1] + 2; 3683 ucp += ucp[1] + 2; 3684 } else if (*ucp == ip6opt_ls) { 3685 hopstrip = (ucp - 3686 (const uchar_t *)ipp.ipp_hopopts) + 3687 ucp[1] + 2; 3688 hopstrip = (hopstrip + 7) & ~7; 3689 break; 3690 } else { 3691 /* label option must be first */ 3692 break; 3693 } 3694 } 3695 } 3696 } else { 3697 hdr_len = IPV6_HDR_LEN; 3698 ip6i = NULL; 3699 nexthdr = ip6h->ip6_nxt; 3700 } 3701 /* 3702 * One special case where IP attaches the IRE needs to 3703 * be handled so that we don't send up IRE to the user land. 3704 */ 3705 if (nexthdr == IPPROTO_TCP) { 3706 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3707 3708 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3709 mp->b_cont != NULL) { 3710 mp1 = mp->b_cont; 3711 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3712 freeb(mp1); 3713 mp->b_cont = NULL; 3714 } 3715 } 3716 } 3717 /* 3718 * Check a filter for ICMPv6 types if needed. 3719 * Verify raw checksums if needed. 3720 */ 3721 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3722 if (icmp->icmp_filter != NULL) { 3723 int type; 3724 3725 /* Assumes that IP has done the pullupmsg */ 3726 type = mp->b_rptr[hdr_len]; 3727 3728 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3729 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3730 freemsg(mp); 3731 return; 3732 } 3733 } else { 3734 /* Checksum */ 3735 uint16_t *up; 3736 uint32_t sum; 3737 int remlen; 3738 3739 up = (uint16_t *)&ip6h->ip6_src; 3740 3741 remlen = msgdsize(mp) - hdr_len; 3742 sum = htons(icmp->icmp_proto + remlen) 3743 + up[0] + up[1] + up[2] + up[3] 3744 + up[4] + up[5] + up[6] + up[7] 3745 + up[8] + up[9] + up[10] + up[11] 3746 + up[12] + up[13] + up[14] + up[15]; 3747 sum = (sum & 0xffff) + (sum >> 16); 3748 sum = IP_CSUM(mp, hdr_len, sum); 3749 if (sum != 0) { 3750 /* IPv6 RAW checksum failed */ 3751 ip0dbg(("icmp_rput: RAW checksum " 3752 "failed %x\n", sum)); 3753 freemsg(mp); 3754 BUMP_MIB(&is->is_rawip_mib, 3755 rawipInCksumErrs); 3756 return; 3757 } 3758 } 3759 } 3760 /* Skip all the IPv6 headers per API */ 3761 mp->b_rptr += hdr_len; 3762 3763 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3764 3765 /* 3766 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3767 * maintain state information, instead of relying on icmp_t 3768 * structure, since there arent any locks protecting these members 3769 * and there is a window where there might be a race between a 3770 * thread setting options on the write side and a thread reading 3771 * these options on the read size. 3772 */ 3773 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3774 IPPF_RTHDR|IPPF_IFINDEX)) { 3775 if (icmp->icmp_ipv6_recvhopopts && 3776 (ipp.ipp_fields & IPPF_HOPOPTS) && 3777 ipp.ipp_hopoptslen > hopstrip) { 3778 udi_size += sizeof (struct T_opthdr) + 3779 ipp.ipp_hopoptslen - hopstrip; 3780 icmp_opt |= IPPF_HOPOPTS; 3781 } 3782 if ((icmp->icmp_ipv6_recvdstopts || 3783 icmp->icmp_old_ipv6_recvdstopts) && 3784 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3785 udi_size += sizeof (struct T_opthdr) + 3786 ipp.ipp_dstoptslen; 3787 icmp_opt |= IPPF_DSTOPTS; 3788 } 3789 if (((icmp->icmp_ipv6_recvdstopts && 3790 icmp->icmp_ipv6_recvrthdr && 3791 (ipp.ipp_fields & IPPF_RTHDR)) || 3792 icmp->icmp_ipv6_recvrtdstopts) && 3793 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3794 udi_size += sizeof (struct T_opthdr) + 3795 ipp.ipp_rtdstoptslen; 3796 icmp_opt |= IPPF_RTDSTOPTS; 3797 } 3798 if (icmp->icmp_ipv6_recvrthdr && 3799 (ipp.ipp_fields & IPPF_RTHDR)) { 3800 udi_size += sizeof (struct T_opthdr) + 3801 ipp.ipp_rthdrlen; 3802 icmp_opt |= IPPF_RTHDR; 3803 } 3804 if (icmp->icmp_ip_recvpktinfo && 3805 (ipp.ipp_fields & IPPF_IFINDEX)) { 3806 udi_size += sizeof (struct T_opthdr) + 3807 sizeof (struct in6_pktinfo); 3808 icmp_opt |= IPPF_IFINDEX; 3809 } 3810 } 3811 if (icmp->icmp_ipv6_recvhoplimit) { 3812 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3813 icmp_ipv6_recvhoplimit = B_TRUE; 3814 } 3815 3816 if (icmp->icmp_ipv6_recvtclass) 3817 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3818 3819 /* 3820 * If SO_TIMESTAMP is set allocate the appropriate sized 3821 * buffer. Since gethrestime() expects a pointer aligned 3822 * argument, we allocate space necessary for extra 3823 * alignment (even though it might not be used). 3824 */ 3825 if (icmp->icmp_timestamp) { 3826 udi_size += sizeof (struct T_opthdr) + 3827 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3828 } 3829 3830 mp1 = allocb(udi_size, BPRI_MED); 3831 if (mp1 == NULL) { 3832 freemsg(mp); 3833 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3834 return; 3835 } 3836 mp1->b_cont = mp; 3837 mp = mp1; 3838 mp->b_datap->db_type = M_PROTO; 3839 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3840 mp->b_wptr = (uchar_t *)tudi + udi_size; 3841 tudi->PRIM_type = T_UNITDATA_IND; 3842 tudi->SRC_length = sizeof (sin6_t); 3843 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3844 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3845 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3846 tudi->OPT_length = udi_size; 3847 sin6 = (sin6_t *)&tudi[1]; 3848 sin6->sin6_port = 0; 3849 sin6->sin6_family = AF_INET6; 3850 3851 sin6->sin6_addr = ip6h->ip6_src; 3852 /* No sin6_flowinfo per API */ 3853 sin6->sin6_flowinfo = 0; 3854 /* For link-scope source pass up scope id */ 3855 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3856 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3857 sin6->sin6_scope_id = ipp.ipp_ifindex; 3858 else 3859 sin6->sin6_scope_id = 0; 3860 3861 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3862 icmp->icmp_zoneid, is->is_netstack); 3863 3864 if (udi_size != 0) { 3865 uchar_t *dstopt; 3866 3867 dstopt = (uchar_t *)&sin6[1]; 3868 if (icmp_opt & IPPF_IFINDEX) { 3869 struct T_opthdr *toh; 3870 struct in6_pktinfo *pkti; 3871 3872 toh = (struct T_opthdr *)dstopt; 3873 toh->level = IPPROTO_IPV6; 3874 toh->name = IPV6_PKTINFO; 3875 toh->len = sizeof (struct T_opthdr) + 3876 sizeof (*pkti); 3877 toh->status = 0; 3878 dstopt += sizeof (struct T_opthdr); 3879 pkti = (struct in6_pktinfo *)dstopt; 3880 pkti->ipi6_addr = ip6h->ip6_dst; 3881 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3882 dstopt += sizeof (*pkti); 3883 udi_size -= toh->len; 3884 } 3885 if (icmp_ipv6_recvhoplimit) { 3886 struct T_opthdr *toh; 3887 3888 toh = (struct T_opthdr *)dstopt; 3889 toh->level = IPPROTO_IPV6; 3890 toh->name = IPV6_HOPLIMIT; 3891 toh->len = sizeof (struct T_opthdr) + 3892 sizeof (uint_t); 3893 toh->status = 0; 3894 dstopt += sizeof (struct T_opthdr); 3895 *(uint_t *)dstopt = ip6h->ip6_hops; 3896 dstopt += sizeof (uint_t); 3897 udi_size -= toh->len; 3898 } 3899 if (icmp->icmp_ipv6_recvtclass) { 3900 struct T_opthdr *toh; 3901 3902 toh = (struct T_opthdr *)dstopt; 3903 toh->level = IPPROTO_IPV6; 3904 toh->name = IPV6_TCLASS; 3905 toh->len = sizeof (struct T_opthdr) + 3906 sizeof (uint_t); 3907 toh->status = 0; 3908 dstopt += sizeof (struct T_opthdr); 3909 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3910 dstopt += sizeof (uint_t); 3911 udi_size -= toh->len; 3912 } 3913 if (icmp->icmp_timestamp) { 3914 struct T_opthdr *toh; 3915 3916 toh = (struct T_opthdr *)dstopt; 3917 toh->level = SOL_SOCKET; 3918 toh->name = SCM_TIMESTAMP; 3919 toh->len = sizeof (struct T_opthdr) + 3920 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3921 toh->status = 0; 3922 dstopt += sizeof (struct T_opthdr); 3923 /* Align for gethrestime() */ 3924 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3925 sizeof (intptr_t)); 3926 gethrestime((timestruc_t *)dstopt); 3927 dstopt = (uchar_t *)toh + toh->len; 3928 udi_size -= toh->len; 3929 } 3930 3931 if (icmp_opt & IPPF_HOPOPTS) { 3932 struct T_opthdr *toh; 3933 3934 toh = (struct T_opthdr *)dstopt; 3935 toh->level = IPPROTO_IPV6; 3936 toh->name = IPV6_HOPOPTS; 3937 toh->len = sizeof (struct T_opthdr) + 3938 ipp.ipp_hopoptslen - hopstrip; 3939 toh->status = 0; 3940 dstopt += sizeof (struct T_opthdr); 3941 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3942 ipp.ipp_hopoptslen - hopstrip); 3943 if (hopstrip > 0) { 3944 /* copy next header value and fake length */ 3945 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3946 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3947 hopstrip / 8; 3948 } 3949 dstopt += ipp.ipp_hopoptslen - hopstrip; 3950 udi_size -= toh->len; 3951 } 3952 if (icmp_opt & IPPF_RTDSTOPTS) { 3953 struct T_opthdr *toh; 3954 3955 toh = (struct T_opthdr *)dstopt; 3956 toh->level = IPPROTO_IPV6; 3957 toh->name = IPV6_DSTOPTS; 3958 toh->len = sizeof (struct T_opthdr) + 3959 ipp.ipp_rtdstoptslen; 3960 toh->status = 0; 3961 dstopt += sizeof (struct T_opthdr); 3962 bcopy(ipp.ipp_rtdstopts, dstopt, 3963 ipp.ipp_rtdstoptslen); 3964 dstopt += ipp.ipp_rtdstoptslen; 3965 udi_size -= toh->len; 3966 } 3967 if (icmp_opt & IPPF_RTHDR) { 3968 struct T_opthdr *toh; 3969 3970 toh = (struct T_opthdr *)dstopt; 3971 toh->level = IPPROTO_IPV6; 3972 toh->name = IPV6_RTHDR; 3973 toh->len = sizeof (struct T_opthdr) + 3974 ipp.ipp_rthdrlen; 3975 toh->status = 0; 3976 dstopt += sizeof (struct T_opthdr); 3977 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 3978 dstopt += ipp.ipp_rthdrlen; 3979 udi_size -= toh->len; 3980 } 3981 if (icmp_opt & IPPF_DSTOPTS) { 3982 struct T_opthdr *toh; 3983 3984 toh = (struct T_opthdr *)dstopt; 3985 toh->level = IPPROTO_IPV6; 3986 toh->name = IPV6_DSTOPTS; 3987 toh->len = sizeof (struct T_opthdr) + 3988 ipp.ipp_dstoptslen; 3989 toh->status = 0; 3990 dstopt += sizeof (struct T_opthdr); 3991 bcopy(ipp.ipp_dstopts, dstopt, 3992 ipp.ipp_dstoptslen); 3993 dstopt += ipp.ipp_dstoptslen; 3994 udi_size -= toh->len; 3995 } 3996 /* Consumed all of allocated space */ 3997 ASSERT(udi_size == 0); 3998 } 3999 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4000 4001 deliver: 4002 if (IPCL_IS_NONSTR(connp)) { 4003 if ((*connp->conn_upcalls->su_recv) 4004 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4005 NULL) < 0) { 4006 mutex_enter(&icmp->icmp_recv_lock); 4007 if (error == ENOSPC) { 4008 /* 4009 * let's confirm while holding the lock 4010 */ 4011 if ((*connp->conn_upcalls->su_recv) 4012 (connp->conn_upper_handle, NULL, 0, 0, 4013 &error, NULL) < 0) { 4014 if (error == ENOSPC) { 4015 connp->conn_flow_cntrld = 4016 B_TRUE; 4017 } else { 4018 ASSERT(error == EOPNOTSUPP); 4019 } 4020 } 4021 mutex_exit(&icmp->icmp_recv_lock); 4022 } else { 4023 ASSERT(error == EOPNOTSUPP); 4024 icmp_queue_fallback(icmp, mp); 4025 } 4026 } 4027 } else { 4028 putnext(connp->conn_rq, mp); 4029 } 4030 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 4031 } 4032 4033 /* 4034 * return SNMP stuff in buffer in mpdata 4035 */ 4036 mblk_t * 4037 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4038 { 4039 mblk_t *mpdata; 4040 struct opthdr *optp; 4041 conn_t *connp = Q_TO_CONN(q); 4042 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4043 mblk_t *mp2ctl; 4044 4045 /* 4046 * make a copy of the original message 4047 */ 4048 mp2ctl = copymsg(mpctl); 4049 4050 if (mpctl == NULL || 4051 (mpdata = mpctl->b_cont) == NULL) { 4052 freemsg(mpctl); 4053 freemsg(mp2ctl); 4054 return (0); 4055 } 4056 4057 /* fixed length structure for IPv4 and IPv6 counters */ 4058 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4059 optp->level = EXPER_RAWIP; 4060 optp->name = 0; 4061 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4062 sizeof (is->is_rawip_mib)); 4063 optp->len = msgdsize(mpdata); 4064 qreply(q, mpctl); 4065 4066 return (mp2ctl); 4067 } 4068 4069 /* 4070 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4071 * TODO: If this ever actually tries to set anything, it needs to be 4072 * to do the appropriate locking. 4073 */ 4074 /* ARGSUSED */ 4075 int 4076 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4077 uchar_t *ptr, int len) 4078 { 4079 switch (level) { 4080 case EXPER_RAWIP: 4081 return (0); 4082 default: 4083 return (1); 4084 } 4085 } 4086 4087 /* Report for ndd "icmp_status" */ 4088 /* ARGSUSED */ 4089 static int 4090 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4091 { 4092 conn_t *connp; 4093 ip_stack_t *ipst; 4094 char laddrbuf[INET6_ADDRSTRLEN]; 4095 char faddrbuf[INET6_ADDRSTRLEN]; 4096 int i; 4097 4098 (void) mi_mpprintf(mp, 4099 "RAWIP " MI_COL_HDRPAD_STR 4100 /* 01234567[89ABCDEF] */ 4101 " src addr dest addr state"); 4102 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4103 4104 connp = Q_TO_CONN(q); 4105 ipst = connp->conn_netstack->netstack_ip; 4106 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4107 connf_t *connfp; 4108 char *state; 4109 4110 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4111 connp = NULL; 4112 4113 while ((connp = ipcl_get_next_conn(connfp, connp, 4114 IPCL_RAWIPCONN)) != NULL) { 4115 icmp_t *icmp; 4116 4117 mutex_enter(&(connp)->conn_lock); 4118 icmp = connp->conn_icmp; 4119 4120 if (icmp->icmp_state == TS_UNBND) 4121 state = "UNBOUND"; 4122 else if (icmp->icmp_state == TS_IDLE) 4123 state = "IDLE"; 4124 else if (icmp->icmp_state == TS_DATA_XFER) 4125 state = "CONNECTED"; 4126 else 4127 state = "UnkState"; 4128 4129 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4130 (void *)icmp, 4131 inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr, 4132 faddrbuf, 4133 sizeof (faddrbuf)), 4134 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4135 sizeof (laddrbuf)), 4136 state); 4137 mutex_exit(&(connp)->conn_lock); 4138 } 4139 } 4140 return (0); 4141 } 4142 4143 /* 4144 * This routine creates a T_UDERROR_IND message and passes it upstream. 4145 * The address and options are copied from the T_UNITDATA_REQ message 4146 * passed in mp. This message is freed. 4147 */ 4148 static void 4149 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4150 { 4151 mblk_t *mp1; 4152 uchar_t *rptr = mp->b_rptr; 4153 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4154 4155 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4156 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4157 tudr->OPT_length, err); 4158 if (mp1) 4159 qreply(q, mp1); 4160 freemsg(mp); 4161 } 4162 4163 4164 static int 4165 rawip_do_unbind(conn_t *connp) 4166 { 4167 icmp_t *icmp = connp->conn_icmp; 4168 4169 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4170 /* If a bind has not been done, we can't unbind. */ 4171 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4172 rw_exit(&icmp->icmp_rwlock); 4173 return (-TOUTSTATE); 4174 } 4175 icmp->icmp_pending_op = T_UNBIND_REQ; 4176 rw_exit(&icmp->icmp_rwlock); 4177 4178 /* 4179 * Call ip to unbind 4180 */ 4181 4182 ip_unbind(connp); 4183 4184 /* 4185 * Once we're unbound from IP, the pending operation may be cleared 4186 * here. 4187 */ 4188 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4189 V6_SET_ZERO(icmp->icmp_v6src); 4190 V6_SET_ZERO(icmp->icmp_bound_v6src); 4191 icmp->icmp_pending_op = -1; 4192 icmp->icmp_state = TS_UNBND; 4193 if (icmp->icmp_family == AF_INET6) 4194 (void) icmp_build_hdrs(icmp); 4195 rw_exit(&icmp->icmp_rwlock); 4196 return (0); 4197 } 4198 4199 /* 4200 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4201 * After some error checking, the message is passed downstream to ip. 4202 */ 4203 static void 4204 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4205 { 4206 conn_t *connp = Q_TO_CONN(q); 4207 int error; 4208 4209 ASSERT(mp->b_cont == NULL); 4210 error = rawip_do_unbind(connp); 4211 if (error) { 4212 if (error < 0) { 4213 icmp_err_ack(q, mp, -error, 0); 4214 } else { 4215 icmp_err_ack(q, mp, 0, error); 4216 } 4217 return; 4218 } 4219 4220 /* 4221 * Convert mp into a T_OK_ACK 4222 */ 4223 4224 mp = mi_tpi_ok_ack_alloc(mp); 4225 4226 /* 4227 * should not happen in practice... T_OK_ACK is smaller than the 4228 * original message. 4229 */ 4230 ASSERT(mp != NULL); 4231 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4232 qreply(q, mp); 4233 } 4234 4235 4236 /* 4237 * Process IPv4 packets that already include an IP header. 4238 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4239 * IPPROTO_IGMP). 4240 */ 4241 static int 4242 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4243 ip4_pkt_t *pktinfop) 4244 { 4245 icmp_stack_t *is = icmp->icmp_is; 4246 ipha_t *ipha; 4247 int ip_hdr_length; 4248 int tp_hdr_len; 4249 mblk_t *mp1; 4250 uint_t pkt_len; 4251 ip_opt_info_t optinfo; 4252 4253 optinfo.ip_opt_flags = 0; 4254 optinfo.ip_opt_ill_index = 0; 4255 ipha = (ipha_t *)mp->b_rptr; 4256 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4257 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4258 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4259 ASSERT(icmp != NULL); 4260 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4261 freemsg(mp); 4262 return (0); 4263 } 4264 ipha = (ipha_t *)mp->b_rptr; 4265 } 4266 ipha->ipha_version_and_hdr_length = 4267 (IP_VERSION<<4) | (ip_hdr_length>>2); 4268 4269 /* 4270 * For the socket of SOCK_RAW type, the checksum is provided in the 4271 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4272 * tell IP that the application has sent a complete IP header and not 4273 * to compute the transport checksum nor change the DF flag. 4274 */ 4275 ipha->ipha_ident = IP_HDR_INCLUDED; 4276 ipha->ipha_hdr_checksum = 0; 4277 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4278 /* Insert options if any */ 4279 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4280 /* 4281 * Put the IP header plus any transport header that is 4282 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4283 * that at least the checksum field is in the first mblk.) 4284 */ 4285 switch (ipha->ipha_protocol) { 4286 case IPPROTO_UDP: 4287 tp_hdr_len = 8; 4288 break; 4289 case IPPROTO_TCP: 4290 tp_hdr_len = 20; 4291 break; 4292 default: 4293 tp_hdr_len = 0; 4294 break; 4295 } 4296 /* 4297 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4298 * tp_hdr_len bytes will be in a single mblk. 4299 */ 4300 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4301 tp_hdr_len)) { 4302 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4303 tp_hdr_len)) { 4304 BUMP_MIB(&is->is_rawip_mib, 4305 rawipOutErrors); 4306 freemsg(mp); 4307 return (0); 4308 } 4309 ipha = (ipha_t *)mp->b_rptr; 4310 } 4311 4312 /* 4313 * if the length is larger then the max allowed IP packet, 4314 * then send an error and abort the processing. 4315 */ 4316 pkt_len = ntohs(ipha->ipha_length) 4317 + icmp->icmp_ip_snd_options_len; 4318 if (pkt_len > IP_MAXPACKET) { 4319 return (EMSGSIZE); 4320 } 4321 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4322 tp_hdr_len, BPRI_LO))) { 4323 return (ENOMEM); 4324 } 4325 mp1->b_rptr += is->is_wroff_extra; 4326 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4327 4328 ipha->ipha_length = htons((uint16_t)pkt_len); 4329 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4330 4331 /* Copy transport header if any */ 4332 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4333 mp1->b_wptr += tp_hdr_len; 4334 4335 /* Add options */ 4336 ipha = (ipha_t *)mp1->b_rptr; 4337 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4338 icmp->icmp_ip_snd_options_len); 4339 4340 /* Drop IP header and transport header from original */ 4341 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4342 4343 mp1->b_cont = mp; 4344 mp = mp1; 4345 /* 4346 * Massage source route putting first source 4347 * route in ipha_dst. 4348 */ 4349 (void) ip_massage_options(ipha, is->is_netstack); 4350 } 4351 4352 if (pktinfop != NULL) { 4353 /* 4354 * Over write the source address provided in the header 4355 */ 4356 if (pktinfop->ip4_addr != INADDR_ANY) { 4357 ipha->ipha_src = pktinfop->ip4_addr; 4358 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4359 } 4360 4361 if (pktinfop->ip4_ill_index != 0) { 4362 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4363 } 4364 } 4365 4366 mblk_setcred(mp, connp->conn_cred); 4367 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4368 return (0); 4369 } 4370 4371 static int 4372 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4373 { 4374 int err; 4375 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4376 icmp_stack_t *is = icmp->icmp_is; 4377 conn_t *connp = icmp->icmp_connp; 4378 4379 err = tsol_compute_label(DB_CREDDEF(mp, connp->conn_cred), dst, 4380 opt_storage, connp->conn_mac_exempt, 4381 is->is_netstack->netstack_ip); 4382 if (err == 0) { 4383 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4384 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4385 opt_storage); 4386 } 4387 if (err != 0) { 4388 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4389 DTRACE_PROBE4( 4390 tx__ip__log__drop__updatelabel__icmp, 4391 char *, "icmp(1) failed to update options(2) on mp(3)", 4392 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4393 return (err); 4394 } 4395 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4396 return (0); 4397 } 4398 4399 /* 4400 * This routine handles all messages passed downstream. It either 4401 * consumes the message or passes it downstream; it never queues a 4402 * a message. 4403 */ 4404 static void 4405 icmp_wput(queue_t *q, mblk_t *mp) 4406 { 4407 uchar_t *rptr = mp->b_rptr; 4408 ipha_t *ipha; 4409 mblk_t *mp1; 4410 #define tudr ((struct T_unitdata_req *)rptr) 4411 size_t ip_len; 4412 conn_t *connp = Q_TO_CONN(q); 4413 icmp_t *icmp = connp->conn_icmp; 4414 icmp_stack_t *is = icmp->icmp_is; 4415 sin6_t *sin6; 4416 sin_t *sin; 4417 ipaddr_t v4dst; 4418 ip4_pkt_t pktinfo; 4419 ip4_pkt_t *pktinfop = &pktinfo; 4420 ip6_pkt_t ipp_s; /* For ancillary data options */ 4421 ip6_pkt_t *ipp = &ipp_s; 4422 int error; 4423 4424 ipp->ipp_fields = 0; 4425 ipp->ipp_sticky_ignored = 0; 4426 4427 switch (mp->b_datap->db_type) { 4428 case M_DATA: 4429 if (icmp->icmp_hdrincl) { 4430 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4431 ipha = (ipha_t *)mp->b_rptr; 4432 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4433 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4434 BUMP_MIB(&is->is_rawip_mib, 4435 rawipOutErrors); 4436 freemsg(mp); 4437 return; 4438 } 4439 ipha = (ipha_t *)mp->b_rptr; 4440 } 4441 /* 4442 * If this connection was used for v6 (inconceivable!) 4443 * or if we have a new destination, then it's time to 4444 * figure a new label. 4445 */ 4446 if (is_system_labeled() && 4447 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4448 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4449 ipha->ipha_dst)) { 4450 error = icmp_update_label(icmp, mp, 4451 ipha->ipha_dst); 4452 if (error != 0) { 4453 icmp_ud_err(q, mp, error); 4454 return; 4455 } 4456 } 4457 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4458 if (error != 0) 4459 icmp_ud_err(q, mp, error); 4460 return; 4461 } 4462 freemsg(mp); 4463 return; 4464 case M_PROTO: 4465 case M_PCPROTO: 4466 ip_len = mp->b_wptr - rptr; 4467 if (ip_len >= sizeof (struct T_unitdata_req)) { 4468 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4469 if (((union T_primitives *)rptr)->type 4470 == T_UNITDATA_REQ) 4471 break; 4472 } 4473 /* FALLTHRU */ 4474 default: 4475 icmp_wput_other(q, mp); 4476 return; 4477 } 4478 4479 /* Handle T_UNITDATA_REQ messages here. */ 4480 4481 mp1 = mp->b_cont; 4482 if (mp1 == NULL) { 4483 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4484 icmp_ud_err(q, mp, EPROTO); 4485 return; 4486 } 4487 4488 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4489 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4490 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4491 return; 4492 } 4493 4494 switch (icmp->icmp_family) { 4495 case AF_INET6: 4496 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4497 if (!OK_32PTR((char *)sin6) || 4498 tudr->DEST_length != sizeof (sin6_t) || 4499 sin6->sin6_family != AF_INET6) { 4500 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4501 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4502 return; 4503 } 4504 4505 /* No support for mapped addresses on raw sockets */ 4506 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4507 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4508 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4509 return; 4510 } 4511 4512 /* 4513 * Destination is a native IPv6 address. 4514 * Send out an IPv6 format packet. 4515 */ 4516 if (tudr->OPT_length != 0) { 4517 int error; 4518 4519 error = 0; 4520 if (icmp_unitdata_opt_process(q, mp, &error, 4521 (void *)ipp) < 0) { 4522 /* failure */ 4523 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4524 icmp_ud_err(q, mp, error); 4525 return; 4526 } 4527 ASSERT(error == 0); 4528 } 4529 4530 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4531 goto done; 4532 4533 case AF_INET: 4534 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4535 if (!OK_32PTR((char *)sin) || 4536 tudr->DEST_length != sizeof (sin_t) || 4537 sin->sin_family != AF_INET) { 4538 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4539 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4540 return; 4541 } 4542 /* Extract and ipaddr */ 4543 v4dst = sin->sin_addr.s_addr; 4544 break; 4545 4546 default: 4547 ASSERT(0); 4548 } 4549 4550 pktinfop->ip4_ill_index = 0; 4551 pktinfop->ip4_addr = INADDR_ANY; 4552 4553 /* 4554 * If options passed in, feed it for verification and handling 4555 */ 4556 if (tudr->OPT_length != 0) { 4557 int error; 4558 4559 error = 0; 4560 if (icmp_unitdata_opt_process(q, mp, &error, 4561 (void *)pktinfop) < 0) { 4562 /* failure */ 4563 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4564 icmp_ud_err(q, mp, error); 4565 return; 4566 } 4567 ASSERT(error == 0); 4568 /* 4569 * Note: Success in processing options. 4570 * mp option buffer represented by 4571 * OPT_length/offset now potentially modified 4572 * and contain option setting results 4573 */ 4574 } 4575 4576 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4577 done: 4578 if (error != 0) { 4579 icmp_ud_err(q, mp, error); 4580 return; 4581 } else { 4582 mp->b_cont = NULL; 4583 freeb(mp); 4584 } 4585 } 4586 4587 4588 /* ARGSUSED */ 4589 static void 4590 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4591 { 4592 #ifdef DEBUG 4593 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4594 #endif 4595 freemsg(mp); 4596 } 4597 4598 static int 4599 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4600 ip4_pkt_t *pktinfop) 4601 { 4602 ipha_t *ipha; 4603 size_t ip_len; 4604 icmp_t *icmp = connp->conn_icmp; 4605 icmp_stack_t *is = icmp->icmp_is; 4606 int ip_hdr_length; 4607 ip_opt_info_t optinfo; 4608 4609 optinfo.ip_opt_flags = 0; 4610 optinfo.ip_opt_ill_index = 0; 4611 4612 if (icmp->icmp_state == TS_UNBND) { 4613 /* If a port has not been bound to the stream, fail. */ 4614 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4615 return (EPROTO); 4616 } 4617 4618 if (v4dst == INADDR_ANY) 4619 v4dst = htonl(INADDR_LOOPBACK); 4620 4621 /* Check if our saved options are valid; update if not */ 4622 if (is_system_labeled() && 4623 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4624 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4625 int error = icmp_update_label(icmp, mp, v4dst); 4626 4627 if (error != 0) 4628 return (error); 4629 } 4630 4631 /* Protocol 255 contains full IP headers */ 4632 if (icmp->icmp_hdrincl) 4633 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4634 4635 /* Add an IP header */ 4636 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4637 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4638 if ((uchar_t *)ipha < mp->b_datap->db_base || 4639 mp->b_datap->db_ref != 1 || 4640 !OK_32PTR(ipha)) { 4641 mblk_t *mp1; 4642 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4643 BPRI_LO))) { 4644 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4645 return (ENOMEM); 4646 } 4647 mp1->b_cont = mp; 4648 ipha = (ipha_t *)mp1->b_datap->db_lim; 4649 mp1->b_wptr = (uchar_t *)ipha; 4650 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4651 mp = mp1; 4652 } 4653 #ifdef _BIG_ENDIAN 4654 /* Set version, header length, and tos */ 4655 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4656 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4657 icmp->icmp_type_of_service); 4658 /* Set ttl and protocol */ 4659 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4660 #else 4661 /* Set version, header length, and tos */ 4662 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4663 ((icmp->icmp_type_of_service << 8) | 4664 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4665 /* Set ttl and protocol */ 4666 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4667 #endif 4668 if (pktinfop->ip4_addr != INADDR_ANY) { 4669 ipha->ipha_src = pktinfop->ip4_addr; 4670 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4671 } else { 4672 4673 /* 4674 * Copy our address into the packet. If this is zero, 4675 * ip will fill in the real source address. 4676 */ 4677 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4678 } 4679 4680 ipha->ipha_fragment_offset_and_flags = 0; 4681 4682 if (pktinfop->ip4_ill_index != 0) { 4683 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4684 } 4685 4686 4687 /* 4688 * For the socket of SOCK_RAW type, the checksum is provided in the 4689 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4690 * tell IP that the application has sent a complete IP header and not 4691 * to compute the transport checksum nor change the DF flag. 4692 */ 4693 ipha->ipha_ident = IP_HDR_INCLUDED; 4694 4695 /* Finish common formatting of the packet. */ 4696 mp->b_rptr = (uchar_t *)ipha; 4697 4698 ip_len = mp->b_wptr - (uchar_t *)ipha; 4699 if (mp->b_cont != NULL) 4700 ip_len += msgdsize(mp->b_cont); 4701 4702 /* 4703 * Set the length into the IP header. 4704 * If the length is greater than the maximum allowed by IP, 4705 * then free the message and return. Do not try and send it 4706 * as this can cause problems in layers below. 4707 */ 4708 if (ip_len > IP_MAXPACKET) { 4709 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4710 return (EMSGSIZE); 4711 } 4712 ipha->ipha_length = htons((uint16_t)ip_len); 4713 /* 4714 * Copy in the destination address request 4715 */ 4716 ipha->ipha_dst = v4dst; 4717 4718 /* 4719 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4720 */ 4721 if (CLASSD(v4dst)) 4722 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4723 4724 /* Copy in options if any */ 4725 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4726 bcopy(icmp->icmp_ip_snd_options, 4727 &ipha[1], icmp->icmp_ip_snd_options_len); 4728 /* 4729 * Massage source route putting first source route in ipha_dst. 4730 * Ignore the destination in the T_unitdata_req. 4731 */ 4732 (void) ip_massage_options(ipha, is->is_netstack); 4733 } 4734 4735 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4736 mblk_setcred(mp, connp->conn_cred); 4737 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4738 return (0); 4739 } 4740 4741 static int 4742 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4743 { 4744 int err; 4745 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4746 icmp_stack_t *is = icmp->icmp_is; 4747 conn_t *connp = icmp->icmp_connp; 4748 4749 err = tsol_compute_label_v6(DB_CREDDEF(mp, connp->conn_cred), dst, 4750 opt_storage, connp->conn_mac_exempt, 4751 is->is_netstack->netstack_ip); 4752 if (err == 0) { 4753 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4754 &icmp->icmp_label_len_v6, opt_storage); 4755 } 4756 if (err != 0) { 4757 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4758 DTRACE_PROBE4( 4759 tx__ip__log__drop__updatelabel__icmp6, 4760 char *, "icmp(1) failed to update options(2) on mp(3)", 4761 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4762 return (err); 4763 } 4764 4765 icmp->icmp_v6lastdst = *dst; 4766 return (0); 4767 } 4768 4769 /* 4770 * raw_ip_send_data_v6(): 4771 * Assumes that icmp_wput did some sanity checking on the destination 4772 * address, but that the label may not yet be correct. 4773 */ 4774 static int 4775 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4776 ip6_pkt_t *ipp) 4777 { 4778 ip6_t *ip6h; 4779 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4780 int ip_hdr_len = IPV6_HDR_LEN; 4781 size_t ip_len; 4782 icmp_t *icmp = connp->conn_icmp; 4783 icmp_stack_t *is = icmp->icmp_is; 4784 ip6_pkt_t *tipp; 4785 uint32_t csum = 0; 4786 uint_t ignore = 0; 4787 uint_t option_exists = 0, is_sticky = 0; 4788 uint8_t *cp; 4789 uint8_t *nxthdr_ptr; 4790 in6_addr_t ip6_dst; 4791 4792 /* 4793 * If the local address is a mapped address return 4794 * an error. 4795 * It would be possible to send an IPv6 packet but the 4796 * response would never make it back to the application 4797 * since it is bound to a mapped address. 4798 */ 4799 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4800 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4801 return (EADDRNOTAVAIL); 4802 } 4803 4804 ignore = ipp->ipp_sticky_ignored; 4805 if (sin6->sin6_scope_id != 0 && 4806 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4807 /* 4808 * IPPF_SCOPE_ID is special. It's neither a sticky 4809 * option nor ancillary data. It needs to be 4810 * explicitly set in options_exists. 4811 */ 4812 option_exists |= IPPF_SCOPE_ID; 4813 } 4814 4815 /* 4816 * Compute the destination address 4817 */ 4818 ip6_dst = sin6->sin6_addr; 4819 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4820 ip6_dst = ipv6_loopback; 4821 4822 /* 4823 * If we're not going to the same destination as last time, then 4824 * recompute the label required. This is done in a separate routine to 4825 * avoid blowing up our stack here. 4826 */ 4827 if (is_system_labeled() && 4828 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4829 int error = 0; 4830 4831 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4832 if (error != 0) 4833 return (error); 4834 } 4835 4836 /* 4837 * If there's a security label here, then we ignore any options the 4838 * user may try to set. We keep the peer's label as a hidden sticky 4839 * option. 4840 */ 4841 if (icmp->icmp_label_len_v6 > 0) { 4842 ignore &= ~IPPF_HOPOPTS; 4843 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4844 } 4845 4846 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4847 (ipp->ipp_fields == 0)) { 4848 /* No sticky options nor ancillary data. */ 4849 goto no_options; 4850 } 4851 4852 /* 4853 * Go through the options figuring out where each is going to 4854 * come from and build two masks. The first mask indicates if 4855 * the option exists at all. The second mask indicates if the 4856 * option is sticky or ancillary. 4857 */ 4858 if (!(ignore & IPPF_HOPOPTS)) { 4859 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4860 option_exists |= IPPF_HOPOPTS; 4861 ip_hdr_len += ipp->ipp_hopoptslen; 4862 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4863 option_exists |= IPPF_HOPOPTS; 4864 is_sticky |= IPPF_HOPOPTS; 4865 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4866 } 4867 } 4868 4869 if (!(ignore & IPPF_RTHDR)) { 4870 if (ipp->ipp_fields & IPPF_RTHDR) { 4871 option_exists |= IPPF_RTHDR; 4872 ip_hdr_len += ipp->ipp_rthdrlen; 4873 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4874 option_exists |= IPPF_RTHDR; 4875 is_sticky |= IPPF_RTHDR; 4876 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4877 } 4878 } 4879 4880 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4881 /* 4882 * Need to have a router header to use these. 4883 */ 4884 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4885 option_exists |= IPPF_RTDSTOPTS; 4886 ip_hdr_len += ipp->ipp_rtdstoptslen; 4887 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4888 option_exists |= IPPF_RTDSTOPTS; 4889 is_sticky |= IPPF_RTDSTOPTS; 4890 ip_hdr_len += 4891 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4892 } 4893 } 4894 4895 if (!(ignore & IPPF_DSTOPTS)) { 4896 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4897 option_exists |= IPPF_DSTOPTS; 4898 ip_hdr_len += ipp->ipp_dstoptslen; 4899 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4900 option_exists |= IPPF_DSTOPTS; 4901 is_sticky |= IPPF_DSTOPTS; 4902 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4903 } 4904 } 4905 4906 if (!(ignore & IPPF_IFINDEX)) { 4907 if (ipp->ipp_fields & IPPF_IFINDEX) { 4908 option_exists |= IPPF_IFINDEX; 4909 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4910 option_exists |= IPPF_IFINDEX; 4911 is_sticky |= IPPF_IFINDEX; 4912 } 4913 } 4914 4915 if (!(ignore & IPPF_ADDR)) { 4916 if (ipp->ipp_fields & IPPF_ADDR) { 4917 option_exists |= IPPF_ADDR; 4918 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4919 option_exists |= IPPF_ADDR; 4920 is_sticky |= IPPF_ADDR; 4921 } 4922 } 4923 4924 if (!(ignore & IPPF_DONTFRAG)) { 4925 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4926 option_exists |= IPPF_DONTFRAG; 4927 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4928 option_exists |= IPPF_DONTFRAG; 4929 is_sticky |= IPPF_DONTFRAG; 4930 } 4931 } 4932 4933 if (!(ignore & IPPF_USE_MIN_MTU)) { 4934 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4935 option_exists |= IPPF_USE_MIN_MTU; 4936 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4937 IPPF_USE_MIN_MTU) { 4938 option_exists |= IPPF_USE_MIN_MTU; 4939 is_sticky |= IPPF_USE_MIN_MTU; 4940 } 4941 } 4942 4943 if (!(ignore & IPPF_NEXTHOP)) { 4944 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4945 option_exists |= IPPF_NEXTHOP; 4946 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4947 option_exists |= IPPF_NEXTHOP; 4948 is_sticky |= IPPF_NEXTHOP; 4949 } 4950 } 4951 4952 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4953 option_exists |= IPPF_HOPLIMIT; 4954 /* IPV6_HOPLIMIT can never be sticky */ 4955 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4956 4957 if (!(ignore & IPPF_UNICAST_HOPS) && 4958 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4959 option_exists |= IPPF_UNICAST_HOPS; 4960 is_sticky |= IPPF_UNICAST_HOPS; 4961 } 4962 4963 if (!(ignore & IPPF_MULTICAST_HOPS) && 4964 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4965 option_exists |= IPPF_MULTICAST_HOPS; 4966 is_sticky |= IPPF_MULTICAST_HOPS; 4967 } 4968 4969 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4970 /* This is a sticky socket option only */ 4971 option_exists |= IPPF_NO_CKSUM; 4972 is_sticky |= IPPF_NO_CKSUM; 4973 } 4974 4975 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4976 /* This is a sticky socket option only */ 4977 option_exists |= IPPF_RAW_CKSUM; 4978 is_sticky |= IPPF_RAW_CKSUM; 4979 } 4980 4981 if (!(ignore & IPPF_TCLASS)) { 4982 if (ipp->ipp_fields & IPPF_TCLASS) { 4983 option_exists |= IPPF_TCLASS; 4984 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4985 option_exists |= IPPF_TCLASS; 4986 is_sticky |= IPPF_TCLASS; 4987 } 4988 } 4989 4990 no_options: 4991 4992 /* 4993 * If any options carried in the ip6i_t were specified, we 4994 * need to account for the ip6i_t in the data we'll be sending 4995 * down. 4996 */ 4997 if (option_exists & IPPF_HAS_IP6I) 4998 ip_hdr_len += sizeof (ip6i_t); 4999 5000 /* check/fix buffer config, setup pointers into it */ 5001 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5002 if ((mp->b_datap->db_ref != 1) || 5003 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5004 !OK_32PTR(ip6h)) { 5005 mblk_t *mp1; 5006 5007 /* Try to get everything in a single mblk next time */ 5008 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5009 icmp->icmp_max_hdr_len = ip_hdr_len; 5010 5011 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5012 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5013 } 5014 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5015 if (!mp1) { 5016 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5017 return (ENOMEM); 5018 } 5019 mp1->b_cont = mp; 5020 mp1->b_wptr = mp1->b_datap->db_lim; 5021 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5022 mp = mp1; 5023 } 5024 mp->b_rptr = (unsigned char *)ip6h; 5025 ip6i = (ip6i_t *)ip6h; 5026 5027 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5028 if (option_exists & IPPF_HAS_IP6I) { 5029 ip6h = (ip6_t *)&ip6i[1]; 5030 ip6i->ip6i_flags = 0; 5031 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5032 5033 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5034 if (option_exists & IPPF_SCOPE_ID) { 5035 ip6i->ip6i_flags |= IP6I_IFINDEX; 5036 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5037 } else if (option_exists & IPPF_IFINDEX) { 5038 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5039 ASSERT(tipp->ipp_ifindex != 0); 5040 ip6i->ip6i_flags |= IP6I_IFINDEX; 5041 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5042 } 5043 5044 if (option_exists & IPPF_RAW_CKSUM) { 5045 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5046 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5047 } 5048 5049 if (option_exists & IPPF_NO_CKSUM) { 5050 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5051 } 5052 5053 if (option_exists & IPPF_ADDR) { 5054 /* 5055 * Enable per-packet source address verification if 5056 * IPV6_PKTINFO specified the source address. 5057 * ip6_src is set in the transport's _wput function. 5058 */ 5059 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5060 } 5061 5062 if (option_exists & IPPF_DONTFRAG) { 5063 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5064 } 5065 5066 if (option_exists & IPPF_USE_MIN_MTU) { 5067 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5068 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5069 } 5070 5071 if (option_exists & IPPF_NEXTHOP) { 5072 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5073 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5074 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5075 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5076 } 5077 5078 /* 5079 * tell IP this is an ip6i_t private header 5080 */ 5081 ip6i->ip6i_nxt = IPPROTO_RAW; 5082 } 5083 5084 /* Initialize IPv6 header */ 5085 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5086 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5087 5088 /* Set the hoplimit of the outgoing packet. */ 5089 if (option_exists & IPPF_HOPLIMIT) { 5090 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5091 ip6h->ip6_hops = ipp->ipp_hoplimit; 5092 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5093 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5094 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5095 if (option_exists & IPPF_MULTICAST_HOPS) 5096 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5097 } else { 5098 ip6h->ip6_hops = icmp->icmp_ttl; 5099 if (option_exists & IPPF_UNICAST_HOPS) 5100 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5101 } 5102 5103 if (option_exists & IPPF_ADDR) { 5104 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5105 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5106 ip6h->ip6_src = tipp->ipp_addr; 5107 } else { 5108 /* 5109 * The source address was not set using IPV6_PKTINFO. 5110 * First look at the bound source. 5111 * If unspecified fallback to __sin6_src_id. 5112 */ 5113 ip6h->ip6_src = icmp->icmp_v6src; 5114 if (sin6->__sin6_src_id != 0 && 5115 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5116 ip_srcid_find_id(sin6->__sin6_src_id, 5117 &ip6h->ip6_src, icmp->icmp_zoneid, 5118 is->is_netstack); 5119 } 5120 } 5121 5122 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5123 cp = (uint8_t *)&ip6h[1]; 5124 5125 /* 5126 * Here's where we have to start stringing together 5127 * any extension headers in the right order: 5128 * Hop-by-hop, destination, routing, and final destination opts. 5129 */ 5130 if (option_exists & IPPF_HOPOPTS) { 5131 /* Hop-by-hop options */ 5132 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5133 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5134 5135 *nxthdr_ptr = IPPROTO_HOPOPTS; 5136 nxthdr_ptr = &hbh->ip6h_nxt; 5137 5138 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5139 cp += tipp->ipp_hopoptslen; 5140 } 5141 /* 5142 * En-route destination options 5143 * Only do them if there's a routing header as well 5144 */ 5145 if (option_exists & IPPF_RTDSTOPTS) { 5146 ip6_dest_t *dst = (ip6_dest_t *)cp; 5147 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5148 5149 *nxthdr_ptr = IPPROTO_DSTOPTS; 5150 nxthdr_ptr = &dst->ip6d_nxt; 5151 5152 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5153 cp += tipp->ipp_rtdstoptslen; 5154 } 5155 /* 5156 * Routing header next 5157 */ 5158 if (option_exists & IPPF_RTHDR) { 5159 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5160 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5161 5162 *nxthdr_ptr = IPPROTO_ROUTING; 5163 nxthdr_ptr = &rt->ip6r_nxt; 5164 5165 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5166 cp += tipp->ipp_rthdrlen; 5167 } 5168 /* 5169 * Do ultimate destination options 5170 */ 5171 if (option_exists & IPPF_DSTOPTS) { 5172 ip6_dest_t *dest = (ip6_dest_t *)cp; 5173 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5174 5175 *nxthdr_ptr = IPPROTO_DSTOPTS; 5176 nxthdr_ptr = &dest->ip6d_nxt; 5177 5178 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5179 cp += tipp->ipp_dstoptslen; 5180 } 5181 5182 /* 5183 * Now set the last header pointer to the proto passed in 5184 */ 5185 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5186 *nxthdr_ptr = icmp->icmp_proto; 5187 5188 /* 5189 * Copy in the destination address 5190 */ 5191 ip6h->ip6_dst = ip6_dst; 5192 5193 ip6h->ip6_vcf = 5194 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5195 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5196 5197 if (option_exists & IPPF_TCLASS) { 5198 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5199 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5200 tipp->ipp_tclass); 5201 } 5202 if (option_exists & IPPF_RTHDR) { 5203 ip6_rthdr_t *rth; 5204 5205 /* 5206 * Perform any processing needed for source routing. 5207 * We know that all extension headers will be in the same mblk 5208 * as the IPv6 header. 5209 */ 5210 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5211 if (rth != NULL && rth->ip6r_segleft != 0) { 5212 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5213 /* 5214 * Drop packet - only support Type 0 routing. 5215 * Notify the application as well. 5216 */ 5217 BUMP_MIB(&is->is_rawip_mib, 5218 rawipOutErrors); 5219 return (EPROTO); 5220 } 5221 /* 5222 * rth->ip6r_len is twice the number of 5223 * addresses in the header 5224 */ 5225 if (rth->ip6r_len & 0x1) { 5226 BUMP_MIB(&is->is_rawip_mib, 5227 rawipOutErrors); 5228 return (EPROTO); 5229 } 5230 /* 5231 * Shuffle the routing header and ip6_dst 5232 * addresses, and get the checksum difference 5233 * between the first hop (in ip6_dst) and 5234 * the destination (in the last routing hdr entry). 5235 */ 5236 csum = ip_massage_options_v6(ip6h, rth, 5237 is->is_netstack); 5238 /* 5239 * Verify that the first hop isn't a mapped address. 5240 * Routers along the path need to do this verification 5241 * for subsequent hops. 5242 */ 5243 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5244 BUMP_MIB(&is->is_rawip_mib, 5245 rawipOutErrors); 5246 return (EADDRNOTAVAIL); 5247 } 5248 } 5249 } 5250 5251 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5252 if (mp->b_cont != NULL) 5253 ip_len += msgdsize(mp->b_cont); 5254 5255 /* 5256 * Set the length into the IP header. 5257 * If the length is greater than the maximum allowed by IP, 5258 * then free the message and return. Do not try and send it 5259 * as this can cause problems in layers below. 5260 */ 5261 if (ip_len > IP_MAXPACKET) { 5262 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5263 return (EMSGSIZE); 5264 } 5265 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5266 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5267 uint16_t *cksum_ptr; 5268 uint_t ext_hdrs_len; 5269 5270 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5271 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5272 icmp->icmp_checksum_off == 2); 5273 5274 /* 5275 * We make it easy for IP to include our pseudo header 5276 * by putting our length in uh_checksum, modified (if 5277 * we have a routing header) by the checksum difference 5278 * between the ultimate destination and first hop addresses. 5279 * Note: ICMPv6 must always checksum the packet. 5280 */ 5281 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5282 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5283 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5284 BUMP_MIB(&is->is_rawip_mib, 5285 rawipOutErrors); 5286 freemsg(mp); 5287 return (0); 5288 } 5289 ip6i = (ip6i_t *)mp->b_rptr; 5290 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5291 ip6h = (ip6_t *)&ip6i[1]; 5292 else 5293 ip6h = (ip6_t *)ip6i; 5294 } 5295 /* Add payload length to checksum */ 5296 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5297 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5298 csum += htons(ip_len - ext_hdrs_len); 5299 5300 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5301 csum = (csum & 0xFFFF) + (csum >> 16); 5302 *cksum_ptr = (uint16_t)csum; 5303 } 5304 5305 #ifdef _LITTLE_ENDIAN 5306 ip_len = htons(ip_len); 5307 #endif 5308 ip6h->ip6_plen = (uint16_t)ip_len; 5309 5310 /* We're done. Pass the packet to IP */ 5311 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5312 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5313 return (0); 5314 } 5315 5316 static void 5317 icmp_wput_other(queue_t *q, mblk_t *mp) 5318 { 5319 uchar_t *rptr = mp->b_rptr; 5320 struct iocblk *iocp; 5321 #define tudr ((struct T_unitdata_req *)rptr) 5322 conn_t *connp = Q_TO_CONN(q); 5323 icmp_t *icmp = connp->conn_icmp; 5324 icmp_stack_t *is = icmp->icmp_is; 5325 cred_t *cr; 5326 5327 cr = DB_CREDDEF(mp, connp->conn_cred); 5328 5329 switch (mp->b_datap->db_type) { 5330 case M_PROTO: 5331 case M_PCPROTO: 5332 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5333 /* 5334 * If the message does not contain a PRIM_type, 5335 * throw it away. 5336 */ 5337 freemsg(mp); 5338 return; 5339 } 5340 switch (((union T_primitives *)rptr)->type) { 5341 case T_ADDR_REQ: 5342 icmp_addr_req(q, mp); 5343 return; 5344 case O_T_BIND_REQ: 5345 case T_BIND_REQ: 5346 icmp_tpi_bind(q, mp); 5347 return; 5348 case T_CONN_REQ: 5349 icmp_tpi_connect(q, mp); 5350 return; 5351 case T_CAPABILITY_REQ: 5352 icmp_capability_req(q, mp); 5353 return; 5354 case T_INFO_REQ: 5355 icmp_info_req(q, mp); 5356 return; 5357 case T_UNITDATA_REQ: 5358 /* 5359 * If a T_UNITDATA_REQ gets here, the address must 5360 * be bad. Valid T_UNITDATA_REQs are found above 5361 * and break to below this switch. 5362 */ 5363 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5364 return; 5365 case T_UNBIND_REQ: 5366 icmp_tpi_unbind(q, mp); 5367 return; 5368 5369 case T_SVR4_OPTMGMT_REQ: 5370 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5371 cr)) { 5372 /* Only IP can return anything meaningful */ 5373 (void) svr4_optcom_req(q, mp, cr, 5374 &icmp_opt_obj, B_TRUE); 5375 } 5376 return; 5377 5378 case T_OPTMGMT_REQ: 5379 /* Only IP can return anything meaningful */ 5380 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5381 return; 5382 5383 case T_DISCON_REQ: 5384 icmp_tpi_disconnect(q, mp); 5385 return; 5386 5387 /* The following TPI message is not supported by icmp. */ 5388 case O_T_CONN_RES: 5389 case T_CONN_RES: 5390 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5391 return; 5392 5393 /* The following 3 TPI requests are illegal for icmp. */ 5394 case T_DATA_REQ: 5395 case T_EXDATA_REQ: 5396 case T_ORDREL_REQ: 5397 freemsg(mp); 5398 (void) putctl1(RD(q), M_ERROR, EPROTO); 5399 return; 5400 default: 5401 break; 5402 } 5403 break; 5404 case M_IOCTL: 5405 iocp = (struct iocblk *)mp->b_rptr; 5406 switch (iocp->ioc_cmd) { 5407 case TI_GETPEERNAME: 5408 if (icmp->icmp_state != TS_DATA_XFER) { 5409 /* 5410 * If a default destination address has not 5411 * been associated with the stream, then we 5412 * don't know the peer's name. 5413 */ 5414 iocp->ioc_error = ENOTCONN; 5415 err_ret:; 5416 iocp->ioc_count = 0; 5417 mp->b_datap->db_type = M_IOCACK; 5418 qreply(q, mp); 5419 return; 5420 } 5421 /* FALLTHRU */ 5422 case TI_GETMYNAME: 5423 /* 5424 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5425 * need to copyin the user's strbuf structure. 5426 * Processing will continue in the M_IOCDATA case 5427 * below. 5428 */ 5429 mi_copyin(q, mp, NULL, 5430 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5431 return; 5432 case ND_SET: 5433 /* nd_getset performs the necessary error checking */ 5434 case ND_GET: 5435 if (nd_getset(q, is->is_nd, mp)) { 5436 qreply(q, mp); 5437 return; 5438 } 5439 break; 5440 case _SIOCSOCKFALLBACK: 5441 /* 5442 * socket is falling back to be a 5443 * streams socket. Nothing to do 5444 */ 5445 iocp->ioc_count = 0; 5446 iocp->ioc_rval = 0; 5447 qreply(q, mp); 5448 return; 5449 default: 5450 break; 5451 } 5452 break; 5453 case M_IOCDATA: 5454 icmp_wput_iocdata(q, mp); 5455 return; 5456 default: 5457 break; 5458 } 5459 ip_wput(q, mp); 5460 } 5461 5462 /* 5463 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5464 * messages. 5465 */ 5466 static void 5467 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5468 { 5469 mblk_t *mp1; 5470 STRUCT_HANDLE(strbuf, sb); 5471 icmp_t *icmp; 5472 uint_t addrlen; 5473 uint_t error; 5474 5475 /* Make sure it is one of ours. */ 5476 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5477 case TI_GETMYNAME: 5478 case TI_GETPEERNAME: 5479 break; 5480 default: 5481 icmp = Q_TO_ICMP(q); 5482 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5483 return; 5484 } 5485 switch (mi_copy_state(q, mp, &mp1)) { 5486 case -1: 5487 return; 5488 case MI_COPY_CASE(MI_COPY_IN, 1): 5489 break; 5490 case MI_COPY_CASE(MI_COPY_OUT, 1): 5491 /* 5492 * The address has been copied out, so now 5493 * copyout the strbuf. 5494 */ 5495 mi_copyout(q, mp); 5496 return; 5497 case MI_COPY_CASE(MI_COPY_OUT, 2): 5498 /* 5499 * The address and strbuf have been copied out. 5500 * We're done, so just acknowledge the original 5501 * M_IOCTL. 5502 */ 5503 mi_copy_done(q, mp, 0); 5504 return; 5505 default: 5506 /* 5507 * Something strange has happened, so acknowledge 5508 * the original M_IOCTL with an EPROTO error. 5509 */ 5510 mi_copy_done(q, mp, EPROTO); 5511 return; 5512 } 5513 /* 5514 * Now we have the strbuf structure for TI_GETMYNAME 5515 * and TI_GETPEERNAME. Next we copyout the requested 5516 * address and then we'll copyout the strbuf. 5517 */ 5518 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5519 (void *)mp1->b_rptr); 5520 icmp = Q_TO_ICMP(q); 5521 if (icmp->icmp_family == AF_INET) 5522 addrlen = sizeof (sin_t); 5523 else 5524 addrlen = sizeof (sin6_t); 5525 5526 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5527 mi_copy_done(q, mp, EINVAL); 5528 return; 5529 } 5530 5531 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5532 5533 if (mp1 == NULL) 5534 return; 5535 5536 rw_enter(&icmp->icmp_rwlock, RW_READER); 5537 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5538 case TI_GETMYNAME: 5539 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5540 &addrlen); 5541 break; 5542 case TI_GETPEERNAME: 5543 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5544 &addrlen); 5545 break; 5546 } 5547 rw_exit(&icmp->icmp_rwlock); 5548 5549 if (error != 0) { 5550 mi_copy_done(q, mp, error); 5551 } else { 5552 mp1->b_wptr += addrlen; 5553 STRUCT_FSET(sb, len, addrlen); 5554 5555 /* Copy out the address */ 5556 mi_copyout(q, mp); 5557 } 5558 } 5559 5560 static int 5561 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5562 void *thisdg_attrs) 5563 { 5564 conn_t *connp = Q_TO_CONN(q); 5565 struct T_unitdata_req *udreqp; 5566 int is_absreq_failure; 5567 cred_t *cr; 5568 5569 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5570 *errorp = 0; 5571 5572 cr = DB_CREDDEF(mp, connp->conn_cred); 5573 5574 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5575 udreqp->OPT_offset, cr, &icmp_opt_obj, 5576 thisdg_attrs, &is_absreq_failure); 5577 5578 if (*errorp != 0) { 5579 /* 5580 * Note: No special action needed in this 5581 * module for "is_absreq_failure" 5582 */ 5583 return (-1); /* failure */ 5584 } 5585 ASSERT(is_absreq_failure == 0); 5586 return (0); /* success */ 5587 } 5588 5589 void 5590 icmp_ddi_g_init(void) 5591 { 5592 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5593 icmp_opt_obj.odb_opt_arr_cnt); 5594 5595 /* 5596 * We want to be informed each time a stack is created or 5597 * destroyed in the kernel, so we can maintain the 5598 * set of icmp_stack_t's. 5599 */ 5600 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5601 } 5602 5603 void 5604 icmp_ddi_g_destroy(void) 5605 { 5606 netstack_unregister(NS_ICMP); 5607 } 5608 5609 #define INET_NAME "ip" 5610 5611 /* 5612 * Initialize the ICMP stack instance. 5613 */ 5614 static void * 5615 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5616 { 5617 icmp_stack_t *is; 5618 icmpparam_t *pa; 5619 int error = 0; 5620 major_t major; 5621 5622 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5623 is->is_netstack = ns; 5624 5625 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5626 is->is_param_arr = pa; 5627 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5628 5629 (void) icmp_param_register(&is->is_nd, 5630 is->is_param_arr, A_CNT(icmp_param_arr)); 5631 is->is_ksp = rawip_kstat_init(stackid); 5632 5633 major = mod_name_to_major(INET_NAME); 5634 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5635 ASSERT(error == 0); 5636 return (is); 5637 } 5638 5639 /* 5640 * Free the ICMP stack instance. 5641 */ 5642 static void 5643 rawip_stack_fini(netstackid_t stackid, void *arg) 5644 { 5645 icmp_stack_t *is = (icmp_stack_t *)arg; 5646 5647 nd_free(&is->is_nd); 5648 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5649 is->is_param_arr = NULL; 5650 5651 rawip_kstat_fini(stackid, is->is_ksp); 5652 is->is_ksp = NULL; 5653 ldi_ident_release(is->is_ldi_ident); 5654 kmem_free(is, sizeof (*is)); 5655 } 5656 5657 static void * 5658 rawip_kstat_init(netstackid_t stackid) { 5659 kstat_t *ksp; 5660 5661 rawip_named_kstat_t template = { 5662 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5663 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5664 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5665 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5666 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5667 }; 5668 5669 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5670 KSTAT_TYPE_NAMED, 5671 NUM_OF_FIELDS(rawip_named_kstat_t), 5672 0, stackid); 5673 if (ksp == NULL || ksp->ks_data == NULL) 5674 return (NULL); 5675 5676 bcopy(&template, ksp->ks_data, sizeof (template)); 5677 ksp->ks_update = rawip_kstat_update; 5678 ksp->ks_private = (void *)(uintptr_t)stackid; 5679 5680 kstat_install(ksp); 5681 return (ksp); 5682 } 5683 5684 static void 5685 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5686 { 5687 if (ksp != NULL) { 5688 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5689 kstat_delete_netstack(ksp, stackid); 5690 } 5691 } 5692 5693 static int 5694 rawip_kstat_update(kstat_t *ksp, int rw) 5695 { 5696 rawip_named_kstat_t *rawipkp; 5697 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5698 netstack_t *ns; 5699 icmp_stack_t *is; 5700 5701 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5702 return (EIO); 5703 5704 if (rw == KSTAT_WRITE) 5705 return (EACCES); 5706 5707 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5708 5709 ns = netstack_find_by_stackid(stackid); 5710 if (ns == NULL) 5711 return (-1); 5712 is = ns->netstack_icmp; 5713 if (is == NULL) { 5714 netstack_rele(ns); 5715 return (-1); 5716 } 5717 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5718 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5719 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5720 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5721 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5722 netstack_rele(ns); 5723 return (0); 5724 } 5725 5726 /* ARGSUSED */ 5727 int 5728 rawip_accept(sock_lower_handle_t lproto_handle, 5729 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5730 cred_t *cr) 5731 { 5732 return (EOPNOTSUPP); 5733 } 5734 5735 /* ARGSUSED */ 5736 int 5737 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5738 socklen_t len, cred_t *cr) 5739 { 5740 conn_t *connp = (conn_t *)proto_handle; 5741 int error; 5742 5743 /* Binding to a NULL address really means unbind */ 5744 if (sa == NULL) 5745 error = rawip_do_unbind(connp); 5746 else 5747 error = rawip_do_bind(connp, sa, len); 5748 5749 if (error < 0) { 5750 if (error == -TOUTSTATE) 5751 error = EINVAL; 5752 else 5753 error = proto_tlitosyserr(-error); 5754 } 5755 return (error); 5756 } 5757 5758 static int 5759 rawip_implicit_bind(conn_t *connp) 5760 { 5761 sin6_t sin6addr; 5762 sin_t *sin; 5763 sin6_t *sin6; 5764 socklen_t len; 5765 int error; 5766 5767 if (connp->conn_icmp->icmp_family == AF_INET) { 5768 len = sizeof (struct sockaddr_in); 5769 sin = (sin_t *)&sin6addr; 5770 *sin = sin_null; 5771 sin->sin_family = AF_INET; 5772 sin->sin_addr.s_addr = INADDR_ANY; 5773 } else { 5774 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5775 len = sizeof (sin6_t); 5776 sin6 = (sin6_t *)&sin6addr; 5777 *sin6 = sin6_null; 5778 sin6->sin6_family = AF_INET6; 5779 V6_SET_ZERO(sin6->sin6_addr); 5780 } 5781 5782 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5783 5784 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5785 } 5786 5787 static int 5788 rawip_unbind(conn_t *connp) 5789 { 5790 int error; 5791 5792 error = rawip_do_unbind(connp); 5793 if (error < 0) { 5794 error = proto_tlitosyserr(-error); 5795 } 5796 return (error); 5797 } 5798 5799 /* ARGSUSED */ 5800 int 5801 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5802 { 5803 return (EOPNOTSUPP); 5804 } 5805 5806 /* ARGSUSED */ 5807 int 5808 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5809 socklen_t len, sock_connid_t *id, cred_t *cr) 5810 { 5811 conn_t *connp = (conn_t *)proto_handle; 5812 icmp_t *icmp = connp->conn_icmp; 5813 int error; 5814 boolean_t did_bind = B_FALSE; 5815 5816 if (sa == NULL) { 5817 /* 5818 * Disconnect 5819 * Make sure we are connected 5820 */ 5821 if (icmp->icmp_state != TS_DATA_XFER) 5822 return (EINVAL); 5823 5824 error = icmp_disconnect(connp); 5825 return (error); 5826 } 5827 5828 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5829 if (error != 0) 5830 return (error); 5831 5832 /* do an implicit bind if necessary */ 5833 if (icmp->icmp_state == TS_UNBND) { 5834 error = rawip_implicit_bind(connp); 5835 /* 5836 * We could be racing with an actual bind, in which case 5837 * we would see EPROTO. We cross our fingers and try 5838 * to connect. 5839 */ 5840 if (!(error == 0 || error == EPROTO)) 5841 return (error); 5842 did_bind = B_TRUE; 5843 } 5844 5845 /* 5846 * set SO_DGRAM_ERRIND 5847 */ 5848 icmp->icmp_dgram_errind = B_TRUE; 5849 5850 error = rawip_do_connect(connp, sa, len); 5851 5852 if (error != 0 && did_bind) { 5853 int unbind_err; 5854 5855 unbind_err = rawip_unbind(connp); 5856 ASSERT(unbind_err == 0); 5857 } 5858 5859 if (error == 0) { 5860 *id = 0; 5861 (*connp->conn_upcalls->su_connected) 5862 (connp->conn_upper_handle, 0, NULL, -1); 5863 } else if (error < 0) { 5864 error = proto_tlitosyserr(-error); 5865 } 5866 return (error); 5867 } 5868 5869 /* ARGSUSED */ 5870 void 5871 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5872 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5873 { 5874 conn_t *connp = (conn_t *)proto_handle; 5875 icmp_t *icmp; 5876 struct T_capability_ack tca; 5877 struct sockaddr_in6 laddr, faddr; 5878 socklen_t laddrlen, faddrlen; 5879 short opts; 5880 struct stroptions *stropt; 5881 mblk_t *stropt_mp; 5882 int error; 5883 5884 icmp = connp->conn_icmp; 5885 5886 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5887 5888 /* 5889 * setup the fallback stream that was allocated 5890 */ 5891 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5892 connp->conn_minor_arena = WR(q)->q_ptr; 5893 5894 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5895 5896 WR(q)->q_qinfo = &icmpwinit; 5897 5898 connp->conn_rq = RD(q); 5899 connp->conn_wq = WR(q); 5900 5901 /* Notify stream head about options before sending up data */ 5902 stropt_mp->b_datap->db_type = M_SETOPTS; 5903 stropt_mp->b_wptr += sizeof (*stropt); 5904 stropt = (struct stroptions *)stropt_mp->b_rptr; 5905 stropt->so_flags = SO_WROFF | SO_HIWAT; 5906 stropt->so_wroff = 5907 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 5908 stropt->so_hiwat = icmp->icmp_recv_hiwat; 5909 putnext(RD(q), stropt_mp); 5910 5911 /* 5912 * free helper stream 5913 */ 5914 ip_free_helper_stream(connp); 5915 5916 /* 5917 * Collect the information needed to sync with the sonode 5918 */ 5919 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 5920 5921 laddrlen = faddrlen = sizeof (sin6_t); 5922 (void) rawip_getsockname((sock_lower_handle_t)connp, 5923 (struct sockaddr *)&laddr, &laddrlen, NULL); 5924 error = rawip_getpeername((sock_lower_handle_t)connp, 5925 (struct sockaddr *)&faddr, &faddrlen, NULL); 5926 if (error != 0) 5927 faddrlen = 0; 5928 opts = 0; 5929 if (icmp->icmp_dgram_errind) 5930 opts |= SO_DGRAM_ERRIND; 5931 if (icmp->icmp_dontroute) 5932 opts |= SO_DONTROUTE; 5933 5934 /* 5935 * Once we grab the drain lock, no data will be send up 5936 * to the socket. So we notify the socket that the endpoint 5937 * is quiescent and it's therefore safe move data from 5938 * the socket to the stream head. 5939 */ 5940 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 5941 (struct sockaddr *)&laddr, laddrlen, 5942 (struct sockaddr *)&faddr, faddrlen, opts); 5943 5944 /* 5945 * push up any packets that were queued in icmp_t 5946 */ 5947 5948 mutex_enter(&icmp->icmp_recv_lock); 5949 while (icmp->icmp_fallback_queue_head != NULL) { 5950 mblk_t *mp; 5951 5952 mp = icmp->icmp_fallback_queue_head; 5953 icmp->icmp_fallback_queue_head = mp->b_next; 5954 mp->b_next = NULL; 5955 mutex_exit(&icmp->icmp_recv_lock); 5956 putnext(RD(q), mp); 5957 mutex_enter(&icmp->icmp_recv_lock); 5958 } 5959 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 5960 /* 5961 * No longer a streams less socket 5962 */ 5963 connp->conn_flags &= ~IPCL_NONSTR; 5964 mutex_exit(&icmp->icmp_recv_lock); 5965 ASSERT(icmp->icmp_fallback_queue_head == NULL && 5966 icmp->icmp_fallback_queue_tail == NULL); 5967 5968 ASSERT(connp->conn_ref >= 1); 5969 } 5970 5971 /* ARGSUSED */ 5972 sock_lower_handle_t 5973 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5974 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5975 { 5976 conn_t *connp; 5977 5978 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 5979 *errorp = EPROTONOSUPPORT; 5980 return (NULL); 5981 } 5982 5983 connp = icmp_open(family, credp, errorp, flags); 5984 if (connp != NULL) { 5985 icmp_stack_t *is; 5986 5987 is = connp->conn_icmp->icmp_is; 5988 connp->conn_flags |= IPCL_NONSTR; 5989 5990 if (connp->conn_icmp->icmp_family == AF_INET6) { 5991 /* Build initial header template for transmit */ 5992 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 5993 if ((*errorp = 5994 icmp_build_hdrs(connp->conn_icmp)) != 0) { 5995 rw_exit(&connp->conn_icmp->icmp_rwlock); 5996 ipcl_conn_destroy(connp); 5997 return (NULL); 5998 } 5999 rw_exit(&connp->conn_icmp->icmp_rwlock); 6000 } 6001 6002 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6003 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6004 6005 if ((*errorp = ip_create_helper_stream(connp, 6006 is->is_ldi_ident)) != 0) { 6007 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6008 (void) rawip_do_close(connp); 6009 return (NULL); 6010 } 6011 6012 mutex_enter(&connp->conn_lock); 6013 connp->conn_state_flags &= ~CONN_INCIPIENT; 6014 mutex_exit(&connp->conn_lock); 6015 *sock_downcalls = &sock_rawip_downcalls; 6016 *smodep = SM_ATOMIC; 6017 } else { 6018 ASSERT(*errorp != 0); 6019 } 6020 6021 return ((sock_lower_handle_t)connp); 6022 } 6023 6024 /* ARGSUSED */ 6025 void 6026 rawip_activate(sock_lower_handle_t proto_handle, 6027 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6028 cred_t *cr) 6029 { 6030 conn_t *connp = (conn_t *)proto_handle; 6031 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6032 struct sock_proto_props sopp; 6033 6034 connp->conn_upcalls = sock_upcalls; 6035 connp->conn_upper_handle = sock_handle; 6036 6037 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6038 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6039 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6040 is->is_wroff_extra; 6041 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6042 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6043 sopp.sopp_maxblk = INFPSZ; 6044 sopp.sopp_maxpsz = IP_MAXPACKET; 6045 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6046 icmp_mod_info.mi_minpsz; 6047 6048 (*connp->conn_upcalls->su_set_proto_props) 6049 (connp->conn_upper_handle, &sopp); 6050 } 6051 6052 static int 6053 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6054 { 6055 sin_t *sin = (sin_t *)sa; 6056 sin6_t *sin6 = (sin6_t *)sa; 6057 6058 ASSERT(icmp != NULL); 6059 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6060 6061 switch (icmp->icmp_family) { 6062 case AF_INET: 6063 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6064 if (*salenp < sizeof (sin_t)) 6065 return (EINVAL); 6066 6067 *salenp = sizeof (sin_t); 6068 *sin = sin_null; 6069 sin->sin_family = AF_INET; 6070 if (icmp->icmp_state == TS_UNBND) { 6071 break; 6072 } 6073 6074 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6075 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6076 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6077 } else { 6078 /* 6079 * INADDR_ANY 6080 * icmp_v6src is not set, we might be bound to 6081 * broadcast/multicast. Use icmp_bound_v6src as 6082 * local address instead (that could 6083 * also still be INADDR_ANY) 6084 */ 6085 sin->sin_addr.s_addr = 6086 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6087 } 6088 break; 6089 case AF_INET6: 6090 6091 if (*salenp < sizeof (sin6_t)) 6092 return (EINVAL); 6093 6094 *salenp = sizeof (sin6_t); 6095 *sin6 = sin6_null; 6096 sin6->sin6_family = AF_INET6; 6097 if (icmp->icmp_state == TS_UNBND) { 6098 break; 6099 } 6100 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6101 sin6->sin6_addr = icmp->icmp_v6src; 6102 } else { 6103 /* 6104 * UNSPECIFIED 6105 * icmp_v6src is not set, we might be bound to 6106 * broadcast/multicast. Use icmp_bound_v6src as 6107 * local address instead (that could 6108 * also still be UNSPECIFIED) 6109 */ 6110 6111 sin6->sin6_addr = icmp->icmp_bound_v6src; 6112 } 6113 break; 6114 } 6115 return (0); 6116 } 6117 6118 static int 6119 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6120 { 6121 sin_t *sin = (sin_t *)sa; 6122 sin6_t *sin6 = (sin6_t *)sa; 6123 6124 ASSERT(icmp != NULL); 6125 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6126 6127 if (icmp->icmp_state != TS_DATA_XFER) 6128 return (ENOTCONN); 6129 6130 sa->sa_family = icmp->icmp_family; 6131 switch (icmp->icmp_family) { 6132 case AF_INET: 6133 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6134 6135 if (*salenp < sizeof (sin_t)) 6136 return (EINVAL); 6137 6138 *salenp = sizeof (sin_t); 6139 *sin = sin_null; 6140 sin->sin_family = AF_INET; 6141 sin->sin_addr.s_addr = 6142 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6143 break; 6144 case AF_INET6: 6145 if (*salenp < sizeof (sin6_t)) 6146 return (EINVAL); 6147 6148 *salenp = sizeof (sin6_t); 6149 *sin6 = sin6_null; 6150 *sin6 = icmp->icmp_v6dst; 6151 break; 6152 } 6153 return (0); 6154 } 6155 6156 /* ARGSUSED */ 6157 int 6158 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6159 socklen_t *salenp, cred_t *cr) 6160 { 6161 conn_t *connp = (conn_t *)proto_handle; 6162 icmp_t *icmp = connp->conn_icmp; 6163 int error; 6164 6165 ASSERT(icmp != NULL); 6166 6167 rw_enter(&icmp->icmp_rwlock, RW_READER); 6168 6169 error = rawip_do_getpeername(icmp, sa, salenp); 6170 6171 rw_exit(&icmp->icmp_rwlock); 6172 6173 return (error); 6174 } 6175 6176 /* ARGSUSED */ 6177 int 6178 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6179 socklen_t *salenp, cred_t *cr) 6180 { 6181 conn_t *connp = (conn_t *)proto_handle; 6182 icmp_t *icmp = connp->conn_icmp; 6183 int error; 6184 6185 ASSERT(icmp != NULL); 6186 rw_enter(&icmp->icmp_rwlock, RW_READER); 6187 6188 error = rawip_do_getsockname(icmp, sa, salenp); 6189 6190 rw_exit(&icmp->icmp_rwlock); 6191 6192 return (error); 6193 } 6194 6195 int 6196 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6197 const void *optvalp, socklen_t optlen, cred_t *cr) 6198 { 6199 conn_t *connp = (conn_t *)proto_handle; 6200 icmp_t *icmp = connp->conn_icmp; 6201 int error; 6202 6203 error = proto_opt_check(level, option_name, optlen, NULL, 6204 icmp_opt_obj.odb_opt_des_arr, 6205 icmp_opt_obj.odb_opt_arr_cnt, 6206 icmp_opt_obj.odb_topmost_tpiprovider, 6207 B_TRUE, B_FALSE, cr); 6208 6209 if (error != 0) { 6210 /* 6211 * option not recognized 6212 */ 6213 if (error < 0) { 6214 error = proto_tlitosyserr(-error); 6215 } 6216 return (error); 6217 } 6218 6219 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6220 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6221 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6222 (uchar_t *)optvalp, NULL, cr); 6223 rw_exit(&icmp->icmp_rwlock); 6224 6225 if (error < 0) { 6226 /* 6227 * Pass on to ip 6228 */ 6229 error = ip_set_options(connp, level, option_name, optvalp, 6230 optlen, cr); 6231 } 6232 6233 ASSERT(error >= 0); 6234 6235 return (error); 6236 } 6237 6238 int 6239 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6240 void *optvalp, socklen_t *optlen, cred_t *cr) 6241 { 6242 int error; 6243 conn_t *connp = (conn_t *)proto_handle; 6244 icmp_t *icmp = connp->conn_icmp; 6245 t_uscalar_t max_optbuf_len; 6246 void *optvalp_buf; 6247 int len; 6248 6249 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6250 icmp_opt_obj.odb_opt_des_arr, 6251 icmp_opt_obj.odb_opt_arr_cnt, 6252 icmp_opt_obj.odb_topmost_tpiprovider, 6253 B_FALSE, B_TRUE, cr); 6254 6255 if (error != 0) { 6256 if (error < 0) { 6257 error = proto_tlitosyserr(-error); 6258 } 6259 return (error); 6260 } 6261 6262 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6263 rw_enter(&icmp->icmp_rwlock, RW_READER); 6264 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6265 rw_exit(&icmp->icmp_rwlock); 6266 6267 if (len < 0) { 6268 /* 6269 * Pass on to IP 6270 */ 6271 kmem_free(optvalp_buf, max_optbuf_len); 6272 return (ip_get_options(connp, level, option_name, optvalp, 6273 optlen, cr)); 6274 } else { 6275 /* 6276 * update optlen and copy option value 6277 */ 6278 t_uscalar_t size = MIN(len, *optlen); 6279 bcopy(optvalp_buf, optvalp, size); 6280 bcopy(&size, optlen, sizeof (size)); 6281 6282 kmem_free(optvalp_buf, max_optbuf_len); 6283 return (0); 6284 } 6285 } 6286 6287 /* ARGSUSED */ 6288 int 6289 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6290 { 6291 conn_t *connp = (conn_t *)proto_handle; 6292 (void) rawip_do_close(connp); 6293 return (0); 6294 } 6295 6296 /* ARGSUSED */ 6297 int 6298 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6299 { 6300 conn_t *connp = (conn_t *)proto_handle; 6301 6302 /* shut down the send side */ 6303 if (how != SHUT_RD) 6304 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6305 SOCK_OPCTL_SHUT_SEND, 0); 6306 /* shut down the recv side */ 6307 if (how != SHUT_WR) 6308 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6309 SOCK_OPCTL_SHUT_RECV, 0); 6310 return (0); 6311 } 6312 6313 void 6314 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6315 { 6316 conn_t *connp = (conn_t *)proto_handle; 6317 icmp_t *icmp = connp->conn_icmp; 6318 6319 mutex_enter(&icmp->icmp_recv_lock); 6320 connp->conn_flow_cntrld = B_FALSE; 6321 mutex_exit(&icmp->icmp_recv_lock); 6322 } 6323 6324 int 6325 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6326 int mode, int32_t *rvalp, cred_t *cr) 6327 { 6328 conn_t *connp = (conn_t *)proto_handle; 6329 int error; 6330 6331 switch (cmd) { 6332 case ND_SET: 6333 case ND_GET: 6334 case _SIOCSOCKFALLBACK: 6335 case TI_GETPEERNAME: 6336 case TI_GETMYNAME: 6337 #ifdef DEBUG 6338 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6339 " socket", cmd); 6340 #endif 6341 error = EINVAL; 6342 break; 6343 default: 6344 /* 6345 * Pass on to IP using helper stream 6346 */ 6347 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6348 cmd, arg, mode, cr, rvalp); 6349 break; 6350 } 6351 return (error); 6352 } 6353 6354 /* ARGSUSED */ 6355 int 6356 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6357 cred_t *cr) 6358 { 6359 conn_t *connp = (conn_t *)proto_handle; 6360 icmp_t *icmp = connp->conn_icmp; 6361 icmp_stack_t *is = icmp->icmp_is; 6362 int error = 0; 6363 boolean_t bypass_dgram_errind = B_FALSE; 6364 6365 ASSERT(DB_TYPE(mp) == M_DATA); 6366 6367 if (is_system_labeled()) 6368 msg_setcredpid(mp, cr, curproc->p_pid); 6369 6370 /* do an implicit bind if necessary */ 6371 if (icmp->icmp_state == TS_UNBND) { 6372 error = rawip_implicit_bind(connp); 6373 /* 6374 * We could be racing with an actual bind, in which case 6375 * we would see EPROTO. We cross our fingers and try 6376 * to connect. 6377 */ 6378 if (!(error == 0 || error == EPROTO)) { 6379 freemsg(mp); 6380 return (error); 6381 } 6382 } 6383 6384 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6385 6386 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6387 error = EISCONN; 6388 goto done_lock; 6389 } 6390 6391 switch (icmp->icmp_family) { 6392 case AF_INET6: { 6393 sin6_t *sin6; 6394 ip6_pkt_t ipp_s; /* For ancillary data options */ 6395 ip6_pkt_t *ipp = &ipp_s; 6396 6397 sin6 = (sin6_t *)msg->msg_name; 6398 if (sin6 != NULL) { 6399 error = proto_verify_ip_addr(icmp->icmp_family, 6400 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6401 if (error != 0) { 6402 bypass_dgram_errind = B_TRUE; 6403 goto done_lock; 6404 } 6405 if (icmp->icmp_delayed_error != 0) { 6406 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6407 sin6_t *sin2 = (sin6_t *) 6408 &icmp->icmp_delayed_addr; 6409 6410 error = icmp->icmp_delayed_error; 6411 icmp->icmp_delayed_error = 0; 6412 6413 /* Compare IP address and port */ 6414 6415 if (sin1->sin6_port == sin2->sin6_port && 6416 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6417 &sin2->sin6_addr)) { 6418 goto done_lock; 6419 } 6420 } 6421 } else { 6422 /* 6423 * Use connected address 6424 */ 6425 if (icmp->icmp_state != TS_DATA_XFER) { 6426 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6427 error = EDESTADDRREQ; 6428 bypass_dgram_errind = B_TRUE; 6429 goto done_lock; 6430 } 6431 sin6 = &icmp->icmp_v6dst; 6432 } 6433 6434 /* No support for mapped addresses on raw sockets */ 6435 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6436 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6437 error = EADDRNOTAVAIL; 6438 goto done_lock; 6439 } 6440 6441 ipp->ipp_fields = 0; 6442 ipp->ipp_sticky_ignored = 0; 6443 6444 /* 6445 * If options passed in, feed it for verification and handling 6446 */ 6447 if (msg->msg_controllen != 0) { 6448 error = process_auxiliary_options(connp, 6449 msg->msg_control, msg->msg_controllen, 6450 ipp, &icmp_opt_obj, icmp_opt_set); 6451 if (error != 0) { 6452 goto done_lock; 6453 } 6454 } 6455 6456 rw_exit(&icmp->icmp_rwlock); 6457 6458 /* 6459 * Destination is a native IPv6 address. 6460 * Send out an IPv6 format packet. 6461 */ 6462 6463 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6464 ipp); 6465 } 6466 break; 6467 case AF_INET: { 6468 sin_t *sin; 6469 ip4_pkt_t pktinfo; 6470 ip4_pkt_t *pktinfop = &pktinfo; 6471 ipaddr_t v4dst; 6472 6473 sin = (sin_t *)msg->msg_name; 6474 if (sin != NULL) { 6475 error = proto_verify_ip_addr(icmp->icmp_family, 6476 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6477 if (error != 0) { 6478 bypass_dgram_errind = B_TRUE; 6479 goto done_lock; 6480 } 6481 v4dst = sin->sin_addr.s_addr; 6482 if (icmp->icmp_delayed_error != 0) { 6483 sin_t *sin1 = (sin_t *)msg->msg_name; 6484 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6485 6486 error = icmp->icmp_delayed_error; 6487 icmp->icmp_delayed_error = 0; 6488 6489 /* Compare IP address and port */ 6490 if (sin1->sin_port == sin2->sin_port && 6491 sin1->sin_addr.s_addr == 6492 sin2->sin_addr.s_addr) { 6493 goto done_lock; 6494 } 6495 6496 } 6497 } else { 6498 /* 6499 * Use connected address 6500 */ 6501 if (icmp->icmp_state != TS_DATA_XFER) { 6502 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6503 error = EDESTADDRREQ; 6504 bypass_dgram_errind = B_TRUE; 6505 goto done_lock; 6506 } 6507 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6508 } 6509 6510 6511 pktinfop->ip4_ill_index = 0; 6512 pktinfop->ip4_addr = INADDR_ANY; 6513 6514 /* 6515 * If options passed in, feed it for verification and handling 6516 */ 6517 if (msg->msg_controllen != 0) { 6518 error = process_auxiliary_options(connp, 6519 msg->msg_control, msg->msg_controllen, 6520 pktinfop, &icmp_opt_obj, icmp_opt_set); 6521 if (error != 0) { 6522 goto done_lock; 6523 } 6524 } 6525 rw_exit(&icmp->icmp_rwlock); 6526 6527 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6528 v4dst, pktinfop); 6529 break; 6530 } 6531 6532 default: 6533 ASSERT(0); 6534 } 6535 6536 goto done; 6537 6538 done_lock: 6539 rw_exit(&icmp->icmp_rwlock); 6540 if (error != 0) { 6541 ASSERT(mp != NULL); 6542 freemsg(mp); 6543 } 6544 done: 6545 if (bypass_dgram_errind) 6546 return (error); 6547 return (icmp->icmp_dgram_errind ? error : 0); 6548 } 6549 6550 sock_downcalls_t sock_rawip_downcalls = { 6551 rawip_activate, 6552 rawip_accept, 6553 rawip_bind, 6554 rawip_listen, 6555 rawip_connect, 6556 rawip_getpeername, 6557 rawip_getsockname, 6558 rawip_getsockopt, 6559 rawip_setsockopt, 6560 rawip_send, 6561 NULL, 6562 NULL, 6563 NULL, 6564 rawip_shutdown, 6565 rawip_clr_flowctrl, 6566 rawip_ioctl, 6567 rawip_close 6568 }; 6569