1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 143 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 144 static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst); 145 static void icmp_wput(queue_t *q, mblk_t *mp); 146 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 147 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 148 sin6_t *sin6, ip6_pkt_t *ipp); 149 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 150 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 151 static void icmp_wput_other(queue_t *q, mblk_t *mp); 152 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 153 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 154 static void icmp_ulp_recv(conn_t *, mblk_t *); 155 156 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 157 static void rawip_stack_fini(netstackid_t stackid, void *arg); 158 159 static void *rawip_kstat_init(netstackid_t stackid); 160 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 161 static int rawip_kstat_update(kstat_t *kp, int rw); 162 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 163 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 166 uint_t *salenp); 167 168 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 171 socklen_t *, cred_t *); 172 173 static struct module_info icmp_mod_info = { 174 5707, "icmp", 1, INFPSZ, 512, 128 175 }; 176 177 /* 178 * Entry points for ICMP as a device. 179 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 180 */ 181 static struct qinit icmprinitv4 = { 182 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 183 }; 184 185 static struct qinit icmprinitv6 = { 186 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 187 }; 188 189 static struct qinit icmpwinit = { 190 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 191 }; 192 193 /* ICMP entry point during fallback */ 194 static struct qinit icmp_fallback_sock_winit = { 195 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 196 }; 197 198 /* For AF_INET aka /dev/icmp */ 199 struct streamtab icmpinfov4 = { 200 &icmprinitv4, &icmpwinit 201 }; 202 203 /* For AF_INET6 aka /dev/icmp6 */ 204 struct streamtab icmpinfov6 = { 205 &icmprinitv6, &icmpwinit 206 }; 207 208 static sin_t sin_null; /* Zero address for quick clears */ 209 static sin6_t sin6_null; /* Zero address for quick clears */ 210 211 /* Default structure copied into T_INFO_ACK messages */ 212 static struct T_info_ack icmp_g_t_info_ack = { 213 T_INFO_ACK, 214 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 215 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 216 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 217 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 218 0, /* ADDR_size - filled in later. */ 219 0, /* OPT_size - not initialized here */ 220 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 221 T_CLTS, /* SERV_type. icmp supports connection-less. */ 222 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 223 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 224 }; 225 226 /* 227 * Table of ND variables supported by icmp. These are loaded into is_nd 228 * when the stack instance is created. 229 * All of these are alterable, within the min/max values given, at run time. 230 */ 231 static icmpparam_t icmp_param_arr[] = { 232 /* min max value name */ 233 { 0, 128, 32, "icmp_wroff_extra" }, 234 { 1, 255, 255, "icmp_ipv4_ttl" }, 235 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 236 { 0, 1, 1, "icmp_bsd_compat" }, 237 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 238 { 0, 65536, 1024, "icmp_xmit_lowat"}, 239 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 240 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 241 }; 242 #define is_wroff_extra is_param_arr[0].icmp_param_value 243 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 244 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 245 #define is_bsd_compat is_param_arr[3].icmp_param_value 246 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 247 #define is_xmit_lowat is_param_arr[5].icmp_param_value 248 #define is_recv_hiwat is_param_arr[6].icmp_param_value 249 #define is_max_buf is_param_arr[7].icmp_param_value 250 251 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 252 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 253 socklen_t len, cred_t *cr); 254 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 255 256 /* 257 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 258 * passed to icmp_wput. 259 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 260 * protocol type placed in the message following the address. A T_BIND_ACK 261 * message is returned by ip_bind_v4/v6. 262 */ 263 static void 264 icmp_tpi_bind(queue_t *q, mblk_t *mp) 265 { 266 int error; 267 struct sockaddr *sa; 268 struct T_bind_req *tbr; 269 socklen_t len; 270 sin_t *sin; 271 sin6_t *sin6; 272 icmp_t *icmp; 273 conn_t *connp = Q_TO_CONN(q); 274 mblk_t *mp1; 275 cred_t *cr; 276 277 /* 278 * All Solaris components should pass a db_credp 279 * for this TPI message, hence we ASSERT. 280 * But in case there is some other M_PROTO that looks 281 * like a TPI message sent by some other kernel 282 * component, we check and return an error. 283 */ 284 cr = msg_getcred(mp, NULL); 285 ASSERT(cr != NULL); 286 if (cr == NULL) { 287 icmp_err_ack(q, mp, TSYSERR, EINVAL); 288 return; 289 } 290 291 icmp = connp->conn_icmp; 292 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 293 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 294 "icmp_bind: bad req, len %u", 295 (uint_t)(mp->b_wptr - mp->b_rptr)); 296 icmp_err_ack(q, mp, TPROTO, 0); 297 return; 298 } 299 300 if (icmp->icmp_state != TS_UNBND) { 301 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 302 "icmp_bind: bad state, %d", icmp->icmp_state); 303 icmp_err_ack(q, mp, TOUTSTATE, 0); 304 return; 305 } 306 307 /* 308 * Reallocate the message to make sure we have enough room for an 309 * address and the protocol type. 310 */ 311 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 312 if (!mp1) { 313 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 314 return; 315 } 316 mp = mp1; 317 318 /* Reset the message type in preparation for shipping it back. */ 319 DB_TYPE(mp) = M_PCPROTO; 320 tbr = (struct T_bind_req *)mp->b_rptr; 321 len = tbr->ADDR_length; 322 switch (len) { 323 case 0: /* request for a generic port */ 324 tbr->ADDR_offset = sizeof (struct T_bind_req); 325 if (icmp->icmp_family == AF_INET) { 326 tbr->ADDR_length = sizeof (sin_t); 327 sin = (sin_t *)&tbr[1]; 328 *sin = sin_null; 329 sin->sin_family = AF_INET; 330 mp->b_wptr = (uchar_t *)&sin[1]; 331 sa = (struct sockaddr *)sin; 332 len = sizeof (sin_t); 333 } else { 334 ASSERT(icmp->icmp_family == AF_INET6); 335 tbr->ADDR_length = sizeof (sin6_t); 336 sin6 = (sin6_t *)&tbr[1]; 337 *sin6 = sin6_null; 338 sin6->sin6_family = AF_INET6; 339 mp->b_wptr = (uchar_t *)&sin6[1]; 340 sa = (struct sockaddr *)sin6; 341 len = sizeof (sin6_t); 342 } 343 break; 344 345 case sizeof (sin_t): /* Complete IPv4 address */ 346 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 347 sizeof (sin_t)); 348 break; 349 350 case sizeof (sin6_t): /* Complete IPv6 address */ 351 sa = (struct sockaddr *)mi_offset_param(mp, 352 tbr->ADDR_offset, sizeof (sin6_t)); 353 break; 354 355 default: 356 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 357 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 358 icmp_err_ack(q, mp, TBADADDR, 0); 359 return; 360 } 361 362 error = rawip_do_bind(connp, sa, len); 363 done: 364 ASSERT(mp->b_cont == NULL); 365 if (error != 0) { 366 if (error > 0) { 367 icmp_err_ack(q, mp, TSYSERR, error); 368 } else { 369 icmp_err_ack(q, mp, -error, 0); 370 } 371 } else { 372 tbr->PRIM_type = T_BIND_ACK; 373 qreply(q, mp); 374 } 375 } 376 377 static int 378 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 379 { 380 sin_t *sin; 381 sin6_t *sin6; 382 icmp_t *icmp; 383 int error = 0; 384 mblk_t *ire_mp; 385 386 387 icmp = connp->conn_icmp; 388 389 if (sa == NULL || !OK_32PTR((char *)sa)) { 390 return (EINVAL); 391 } 392 393 /* 394 * The state must be TS_UNBND. TPI mandates that users must send 395 * TPI primitives only 1 at a time and wait for the response before 396 * sending the next primitive. 397 */ 398 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 399 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 400 error = -TOUTSTATE; 401 goto done; 402 } 403 404 ASSERT(len != 0); 405 switch (len) { 406 case sizeof (sin_t): /* Complete IPv4 address */ 407 sin = (sin_t *)sa; 408 if (sin->sin_family != AF_INET || 409 icmp->icmp_family != AF_INET) { 410 /* TSYSERR, EAFNOSUPPORT */ 411 error = EAFNOSUPPORT; 412 goto done; 413 } 414 break; 415 case sizeof (sin6_t): /* Complete IPv6 address */ 416 sin6 = (sin6_t *)sa; 417 if (sin6->sin6_family != AF_INET6 || 418 icmp->icmp_family != AF_INET6) { 419 /* TSYSERR, EAFNOSUPPORT */ 420 error = EAFNOSUPPORT; 421 goto done; 422 } 423 /* No support for mapped addresses on raw sockets */ 424 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 425 /* TSYSERR, EADDRNOTAVAIL */ 426 error = EADDRNOTAVAIL; 427 goto done; 428 } 429 break; 430 431 default: 432 /* TBADADDR */ 433 error = EADDRNOTAVAIL; 434 goto done; 435 } 436 437 icmp->icmp_pending_op = T_BIND_REQ; 438 icmp->icmp_state = TS_IDLE; 439 440 /* 441 * Copy the source address into our icmp structure. This address 442 * may still be zero; if so, ip will fill in the correct address 443 * each time an outbound packet is passed to it. 444 * If we are binding to a broadcast or multicast address then 445 * rawip_post_ip_bind_connect will clear the source address. 446 */ 447 448 if (icmp->icmp_family == AF_INET) { 449 ASSERT(sin != NULL); 450 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 451 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 452 &icmp->icmp_v6src); 453 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 454 icmp->icmp_ip_snd_options_len; 455 icmp->icmp_bound_v6src = icmp->icmp_v6src; 456 } else { 457 int error; 458 459 ASSERT(sin6 != NULL); 460 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 461 icmp->icmp_v6src = sin6->sin6_addr; 462 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 463 icmp->icmp_bound_v6src = icmp->icmp_v6src; 464 465 /* Rebuild the header template */ 466 error = icmp_build_hdrs(icmp); 467 if (error != 0) { 468 icmp->icmp_pending_op = -1; 469 /* 470 * TSYSERR 471 */ 472 goto done; 473 } 474 } 475 476 ire_mp = NULL; 477 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 478 /* 479 * request an IRE if src not 0 (INADDR_ANY) 480 */ 481 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 482 if (ire_mp == NULL) { 483 icmp->icmp_pending_op = -1; 484 error = ENOMEM; 485 goto done; 486 } 487 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 488 } 489 done: 490 rw_exit(&icmp->icmp_rwlock); 491 if (error != 0) 492 return (error); 493 494 if (icmp->icmp_family == AF_INET6) { 495 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 496 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 497 } else { 498 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 499 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 500 } 501 rawip_post_ip_bind_connect(icmp, ire_mp, error); 502 return (error); 503 } 504 505 static void 506 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 507 { 508 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 509 if (icmp->icmp_state == TS_UNBND) { 510 /* 511 * not yet bound - bind sent by icmp_bind_proto. 512 */ 513 rw_exit(&icmp->icmp_rwlock); 514 return; 515 } 516 ASSERT(icmp->icmp_pending_op != -1); 517 icmp->icmp_pending_op = -1; 518 519 if (error != 0) { 520 if (icmp->icmp_state == TS_DATA_XFER) { 521 /* Connect failed */ 522 /* Revert back to the bound source */ 523 icmp->icmp_v6src = icmp->icmp_bound_v6src; 524 icmp->icmp_state = TS_IDLE; 525 if (icmp->icmp_family == AF_INET6) 526 (void) icmp_build_hdrs(icmp); 527 } else { 528 V6_SET_ZERO(icmp->icmp_v6src); 529 V6_SET_ZERO(icmp->icmp_bound_v6src); 530 icmp->icmp_state = TS_UNBND; 531 if (icmp->icmp_family == AF_INET6) 532 (void) icmp_build_hdrs(icmp); 533 } 534 } else { 535 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 536 ire_t *ire; 537 538 ire = (ire_t *)ire_mp->b_rptr; 539 /* 540 * If a broadcast/multicast address was bound set 541 * the source address to 0. 542 * This ensures no datagrams with broadcast address 543 * as source address are emitted (which would violate 544 * RFC1122 - Hosts requirements) 545 * Note: we get IRE_BROADCAST for IPv6 546 * to "mark" a multicast local address. 547 */ 548 549 550 if (ire->ire_type == IRE_BROADCAST && 551 icmp->icmp_state != TS_DATA_XFER) { 552 /* 553 * This was just a local bind to a 554 * MC/broadcast addr 555 */ 556 V6_SET_ZERO(icmp->icmp_v6src); 557 if (icmp->icmp_family == AF_INET6) 558 (void) icmp_build_hdrs(icmp); 559 } 560 } 561 562 } 563 rw_exit(&icmp->icmp_rwlock); 564 if (ire_mp != NULL) 565 freeb(ire_mp); 566 } 567 568 /* 569 * Send message to IP to just bind to the protocol. 570 */ 571 static int 572 icmp_bind_proto(conn_t *connp) 573 { 574 icmp_t *icmp; 575 int error; 576 577 icmp = connp->conn_icmp; 578 579 if (icmp->icmp_family == AF_INET6) 580 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 581 &sin6_null.sin6_addr, 0, B_TRUE); 582 else 583 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 584 sin_null.sin_addr.s_addr, 0, B_TRUE); 585 586 rawip_post_ip_bind_connect(icmp, NULL, error); 587 return (error); 588 } 589 590 static void 591 icmp_tpi_connect(queue_t *q, mblk_t *mp) 592 { 593 conn_t *connp = Q_TO_CONN(q); 594 struct T_conn_req *tcr; 595 icmp_t *icmp; 596 struct sockaddr *sa; 597 socklen_t len; 598 int error; 599 cred_t *cr; 600 601 /* 602 * All Solaris components should pass a db_credp 603 * for this TPI message, hence we ASSERT. 604 * But in case there is some other M_PROTO that looks 605 * like a TPI message sent by some other kernel 606 * component, we check and return an error. 607 */ 608 cr = msg_getcred(mp, NULL); 609 ASSERT(cr != NULL); 610 if (cr == NULL) { 611 icmp_err_ack(q, mp, TSYSERR, EINVAL); 612 return; 613 } 614 615 icmp = connp->conn_icmp; 616 tcr = (struct T_conn_req *)mp->b_rptr; 617 /* Sanity checks */ 618 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 619 icmp_err_ack(q, mp, TPROTO, 0); 620 return; 621 } 622 623 if (tcr->OPT_length != 0) { 624 icmp_err_ack(q, mp, TBADOPT, 0); 625 return; 626 } 627 628 len = tcr->DEST_length; 629 630 switch (len) { 631 default: 632 icmp_err_ack(q, mp, TBADADDR, 0); 633 return; 634 case sizeof (sin_t): 635 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 636 sizeof (sin_t)); 637 break; 638 case sizeof (sin6_t): 639 sa = (struct sockaddr *)mi_offset_param(mp, 640 tcr->DEST_offset, sizeof (sin6_t)); 641 break; 642 } 643 644 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 645 if (error != 0) { 646 icmp_err_ack(q, mp, TSYSERR, error); 647 return; 648 } 649 650 error = rawip_do_connect(connp, sa, len, cr); 651 if (error != 0) { 652 if (error < 0) { 653 icmp_err_ack(q, mp, -error, 0); 654 } else { 655 icmp_err_ack(q, mp, 0, error); 656 } 657 } else { 658 mblk_t *mp1; 659 660 /* 661 * We have to send a connection confirmation to 662 * keep TLI happy. 663 */ 664 if (icmp->icmp_family == AF_INET) { 665 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 666 sizeof (sin_t), NULL, 0); 667 } else { 668 ASSERT(icmp->icmp_family == AF_INET6); 669 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 670 sizeof (sin6_t), NULL, 0); 671 } 672 if (mp1 == NULL) { 673 rw_exit(&icmp->icmp_rwlock); 674 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 675 return; 676 } 677 678 /* 679 * Send ok_ack for T_CONN_REQ 680 */ 681 mp = mi_tpi_ok_ack_alloc(mp); 682 if (mp == NULL) { 683 /* Unable to reuse the T_CONN_REQ for the ack. */ 684 freemsg(mp1); 685 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 686 return; 687 } 688 putnext(connp->conn_rq, mp); 689 putnext(connp->conn_rq, mp1); 690 } 691 } 692 693 static int 694 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 695 cred_t *cr) 696 { 697 icmp_t *icmp; 698 sin_t *sin; 699 sin6_t *sin6; 700 mblk_t *ire_mp; 701 int error; 702 ipaddr_t v4dst; 703 in6_addr_t v6dst; 704 705 icmp = connp->conn_icmp; 706 707 if (sa == NULL || !OK_32PTR((char *)sa)) { 708 return (EINVAL); 709 } 710 711 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 712 if (ire_mp == NULL) 713 return (ENOMEM); 714 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 715 716 717 ASSERT(sa != NULL && len != 0); 718 719 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 720 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 721 rw_exit(&icmp->icmp_rwlock); 722 freeb(ire_mp); 723 return (-TOUTSTATE); 724 } 725 726 switch (len) { 727 case sizeof (sin_t): 728 sin = (sin_t *)sa; 729 730 ASSERT(icmp->icmp_family == AF_INET); 731 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 732 733 v4dst = sin->sin_addr.s_addr; 734 /* 735 * Interpret a zero destination to mean loopback. 736 * Update the T_CONN_REQ (sin/sin6) since it is used to 737 * generate the T_CONN_CON. 738 */ 739 if (v4dst == INADDR_ANY) { 740 v4dst = htonl(INADDR_LOOPBACK); 741 } 742 743 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 744 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 745 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 746 icmp->icmp_ip_snd_options_len; 747 icmp->icmp_v6dst.sin6_addr = v6dst; 748 icmp->icmp_v6dst.sin6_family = AF_INET6; 749 icmp->icmp_v6dst.sin6_flowinfo = 0; 750 icmp->icmp_v6dst.sin6_port = 0; 751 752 /* 753 * If the destination address is multicast and 754 * an outgoing multicast interface has been set, 755 * use the address of that interface as our 756 * source address if no source address has been set. 757 */ 758 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 759 CLASSD(v4dst) && 760 icmp->icmp_multicast_if_addr != INADDR_ANY) { 761 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 762 &icmp->icmp_v6src); 763 } 764 break; 765 case sizeof (sin6_t): 766 sin6 = (sin6_t *)sa; 767 768 /* No support for mapped addresses on raw sockets */ 769 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 770 rw_exit(&icmp->icmp_rwlock); 771 freeb(ire_mp); 772 return (EADDRNOTAVAIL); 773 } 774 775 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 776 ASSERT(icmp->icmp_family == AF_INET6); 777 778 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 779 780 icmp->icmp_v6dst = *sin6; 781 icmp->icmp_v6dst.sin6_port = 0; 782 783 /* 784 * Interpret a zero destination to mean loopback. 785 * Update the T_CONN_REQ (sin/sin6) since it is used to 786 * generate the T_CONN_CON. 787 */ 788 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 789 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 790 } 791 /* 792 * If the destination address is multicast and 793 * an outgoing multicast interface has been set, 794 * then the ip bind logic will pick the correct source 795 * address (i.e. matching the outgoing multicast interface). 796 */ 797 break; 798 } 799 800 icmp->icmp_pending_op = T_CONN_REQ; 801 802 if (icmp->icmp_state == TS_DATA_XFER) { 803 /* Already connected - clear out state */ 804 icmp->icmp_v6src = icmp->icmp_bound_v6src; 805 icmp->icmp_state = TS_IDLE; 806 } 807 808 icmp->icmp_state = TS_DATA_XFER; 809 rw_exit(&icmp->icmp_rwlock); 810 811 if (icmp->icmp_family == AF_INET6) { 812 error = ip_proto_bind_connected_v6(connp, &ire_mp, 813 icmp->icmp_proto, &icmp->icmp_v6src, 0, 814 &icmp->icmp_v6dst.sin6_addr, 815 NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr); 816 } else { 817 error = ip_proto_bind_connected_v4(connp, &ire_mp, 818 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 819 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 820 B_TRUE, B_TRUE, cr); 821 } 822 rawip_post_ip_bind_connect(icmp, ire_mp, error); 823 return (error); 824 } 825 826 static void 827 icmp_close_free(conn_t *connp) 828 { 829 icmp_t *icmp = connp->conn_icmp; 830 831 /* If there are any options associated with the stream, free them. */ 832 if (icmp->icmp_ip_snd_options != NULL) { 833 mi_free((char *)icmp->icmp_ip_snd_options); 834 icmp->icmp_ip_snd_options = NULL; 835 icmp->icmp_ip_snd_options_len = 0; 836 } 837 838 if (icmp->icmp_filter != NULL) { 839 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 840 icmp->icmp_filter = NULL; 841 } 842 843 /* Free memory associated with sticky options */ 844 if (icmp->icmp_sticky_hdrs_len != 0) { 845 kmem_free(icmp->icmp_sticky_hdrs, 846 icmp->icmp_sticky_hdrs_len); 847 icmp->icmp_sticky_hdrs = NULL; 848 icmp->icmp_sticky_hdrs_len = 0; 849 } 850 ip6_pkt_free(&icmp->icmp_sticky_ipp); 851 852 /* 853 * Clear any fields which the kmem_cache constructor clears. 854 * Only icmp_connp needs to be preserved. 855 * TBD: We should make this more efficient to avoid clearing 856 * everything. 857 */ 858 ASSERT(icmp->icmp_connp == connp); 859 bzero(icmp, sizeof (icmp_t)); 860 icmp->icmp_connp = connp; 861 } 862 863 static int 864 rawip_do_close(conn_t *connp) 865 { 866 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 867 868 ip_quiesce_conn(connp); 869 870 if (!IPCL_IS_NONSTR(connp)) { 871 qprocsoff(connp->conn_rq); 872 } 873 874 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 875 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 876 icmp_close_free(connp); 877 878 /* 879 * Now we are truly single threaded on this stream, and can 880 * delete the things hanging off the connp, and finally the connp. 881 * We removed this connp from the fanout list, it cannot be 882 * accessed thru the fanouts, and we already waited for the 883 * conn_ref to drop to 0. We are already in close, so 884 * there cannot be any other thread from the top. qprocsoff 885 * has completed, and service has completed or won't run in 886 * future. 887 */ 888 ASSERT(connp->conn_ref == 1); 889 890 if (!IPCL_IS_NONSTR(connp)) { 891 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 892 } else { 893 ip_free_helper_stream(connp); 894 } 895 896 connp->conn_ref--; 897 ipcl_conn_destroy(connp); 898 899 return (0); 900 } 901 902 static int 903 icmp_close(queue_t *q, int flags) 904 { 905 conn_t *connp; 906 907 if (flags & SO_FALLBACK) { 908 /* 909 * stream is being closed while in fallback 910 * simply free the resources that were allocated 911 */ 912 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 913 qprocsoff(q); 914 goto done; 915 } 916 917 connp = Q_TO_CONN(q); 918 (void) rawip_do_close(connp); 919 done: 920 q->q_ptr = WR(q)->q_ptr = NULL; 921 return (0); 922 } 923 924 /* 925 * This routine handles each T_DISCON_REQ message passed to icmp 926 * as an indicating that ICMP is no longer connected. This results 927 * in sending a T_BIND_REQ to IP to restore the binding to just 928 * the local address. 929 * 930 * The disconnect completes in rawip_post_ip_bind_connect. 931 */ 932 static int 933 icmp_do_disconnect(conn_t *connp) 934 { 935 icmp_t *icmp; 936 mblk_t *ire_mp; 937 int error; 938 939 icmp = connp->conn_icmp; 940 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 941 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 942 rw_exit(&icmp->icmp_rwlock); 943 return (-TOUTSTATE); 944 } 945 icmp->icmp_pending_op = T_DISCON_REQ; 946 icmp->icmp_v6src = icmp->icmp_bound_v6src; 947 icmp->icmp_state = TS_IDLE; 948 949 950 if (icmp->icmp_family == AF_INET6) { 951 /* Rebuild the header template */ 952 error = icmp_build_hdrs(icmp); 953 if (error != 0) { 954 icmp->icmp_pending_op = -1; 955 rw_exit(&icmp->icmp_rwlock); 956 return (error); 957 } 958 } 959 960 rw_exit(&icmp->icmp_rwlock); 961 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 962 if (ire_mp == NULL) { 963 return (ENOMEM); 964 } 965 966 if (icmp->icmp_family == AF_INET6) { 967 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 968 &icmp->icmp_bound_v6src, 0, B_TRUE); 969 } else { 970 971 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 972 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 973 } 974 975 rawip_post_ip_bind_connect(icmp, ire_mp, error); 976 977 return (error); 978 } 979 980 static void 981 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 982 { 983 conn_t *connp = Q_TO_CONN(q); 984 int error; 985 986 /* 987 * Allocate the largest primitive we need to send back 988 * T_error_ack is > than T_ok_ack 989 */ 990 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 991 if (mp == NULL) { 992 /* Unable to reuse the T_DISCON_REQ for the ack. */ 993 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 994 return; 995 } 996 997 error = icmp_do_disconnect(connp); 998 999 if (error != 0) { 1000 if (error > 0) { 1001 icmp_err_ack(q, mp, 0, error); 1002 } else { 1003 icmp_err_ack(q, mp, -error, 0); 1004 } 1005 } else { 1006 mp = mi_tpi_ok_ack_alloc(mp); 1007 ASSERT(mp != NULL); 1008 qreply(q, mp); 1009 } 1010 1011 } 1012 1013 static int 1014 icmp_disconnect(conn_t *connp) 1015 { 1016 int error; 1017 icmp_t *icmp = connp->conn_icmp; 1018 1019 icmp->icmp_dgram_errind = B_FALSE; 1020 1021 error = icmp_do_disconnect(connp); 1022 1023 if (error < 0) 1024 error = proto_tlitosyserr(-error); 1025 return (error); 1026 } 1027 1028 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1029 static void 1030 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1031 { 1032 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1033 qreply(q, mp); 1034 } 1035 1036 /* Shorthand to generate and send TPI error acks to our client */ 1037 static void 1038 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1039 t_scalar_t t_error, int sys_error) 1040 { 1041 struct T_error_ack *teackp; 1042 1043 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1044 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1045 teackp = (struct T_error_ack *)mp->b_rptr; 1046 teackp->ERROR_prim = primitive; 1047 teackp->TLI_error = t_error; 1048 teackp->UNIX_error = sys_error; 1049 qreply(q, mp); 1050 } 1051 } 1052 1053 /* 1054 * icmp_icmp_error is called by icmp_input to process ICMP 1055 * messages passed up by IP. 1056 * Generates the appropriate permanent (non-transient) errors. 1057 * Assumes that IP has pulled up everything up to and including 1058 * the ICMP header. 1059 */ 1060 static void 1061 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1062 { 1063 icmph_t *icmph; 1064 ipha_t *ipha; 1065 int iph_hdr_length; 1066 sin_t sin; 1067 mblk_t *mp1; 1068 int error = 0; 1069 icmp_t *icmp = connp->conn_icmp; 1070 1071 ipha = (ipha_t *)mp->b_rptr; 1072 1073 ASSERT(OK_32PTR(mp->b_rptr)); 1074 1075 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1076 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1077 icmp_icmp_error_ipv6(connp, mp); 1078 return; 1079 } 1080 1081 /* 1082 * icmp does not support v4 mapped addresses 1083 * so we can never be here for a V6 socket 1084 * i.e. icmp_family == AF_INET6 1085 */ 1086 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1087 (icmp->icmp_family == AF_INET)); 1088 1089 ASSERT(icmp->icmp_family == AF_INET); 1090 1091 /* Skip past the outer IP and ICMP headers */ 1092 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1093 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1094 ipha = (ipha_t *)&icmph[1]; 1095 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1096 1097 switch (icmph->icmph_type) { 1098 case ICMP_DEST_UNREACHABLE: 1099 switch (icmph->icmph_code) { 1100 case ICMP_FRAGMENTATION_NEEDED: 1101 /* 1102 * IP has already adjusted the path MTU. 1103 */ 1104 break; 1105 case ICMP_PORT_UNREACHABLE: 1106 case ICMP_PROTOCOL_UNREACHABLE: 1107 error = ECONNREFUSED; 1108 break; 1109 default: 1110 /* Transient errors */ 1111 break; 1112 } 1113 break; 1114 default: 1115 /* Transient errors */ 1116 break; 1117 } 1118 if (error == 0) { 1119 freemsg(mp); 1120 return; 1121 } 1122 1123 /* 1124 * Deliver T_UDERROR_IND when the application has asked for it. 1125 * The socket layer enables this automatically when connected. 1126 */ 1127 if (!icmp->icmp_dgram_errind) { 1128 freemsg(mp); 1129 return; 1130 } 1131 1132 sin = sin_null; 1133 sin.sin_family = AF_INET; 1134 sin.sin_addr.s_addr = ipha->ipha_dst; 1135 1136 if (IPCL_IS_NONSTR(connp)) { 1137 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1138 if (icmp->icmp_state == TS_DATA_XFER) { 1139 if (sin.sin_addr.s_addr == 1140 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1141 rw_exit(&icmp->icmp_rwlock); 1142 (*connp->conn_upcalls->su_set_error) 1143 (connp->conn_upper_handle, error); 1144 goto done; 1145 } 1146 } else { 1147 icmp->icmp_delayed_error = error; 1148 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1149 } 1150 rw_exit(&icmp->icmp_rwlock); 1151 } else { 1152 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1153 0, error); 1154 if (mp1 != NULL) 1155 putnext(connp->conn_rq, mp1); 1156 } 1157 done: 1158 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1159 freemsg(mp); 1160 } 1161 1162 /* 1163 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1164 * for IPv6 packets. 1165 * Send permanent (non-transient) errors upstream. 1166 * Assumes that IP has pulled up all the extension headers as well 1167 * as the ICMPv6 header. 1168 */ 1169 static void 1170 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1171 { 1172 icmp6_t *icmp6; 1173 ip6_t *ip6h, *outer_ip6h; 1174 uint16_t iph_hdr_length; 1175 uint8_t *nexthdrp; 1176 sin6_t sin6; 1177 mblk_t *mp1; 1178 int error = 0; 1179 icmp_t *icmp = connp->conn_icmp; 1180 1181 outer_ip6h = (ip6_t *)mp->b_rptr; 1182 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1183 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1184 else 1185 iph_hdr_length = IPV6_HDR_LEN; 1186 1187 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1188 ip6h = (ip6_t *)&icmp6[1]; 1189 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1190 freemsg(mp); 1191 return; 1192 } 1193 1194 switch (icmp6->icmp6_type) { 1195 case ICMP6_DST_UNREACH: 1196 switch (icmp6->icmp6_code) { 1197 case ICMP6_DST_UNREACH_NOPORT: 1198 error = ECONNREFUSED; 1199 break; 1200 case ICMP6_DST_UNREACH_ADMIN: 1201 case ICMP6_DST_UNREACH_NOROUTE: 1202 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1203 case ICMP6_DST_UNREACH_ADDR: 1204 /* Transient errors */ 1205 break; 1206 default: 1207 break; 1208 } 1209 break; 1210 case ICMP6_PACKET_TOO_BIG: { 1211 struct T_unitdata_ind *tudi; 1212 struct T_opthdr *toh; 1213 size_t udi_size; 1214 mblk_t *newmp; 1215 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1216 sizeof (struct ip6_mtuinfo); 1217 sin6_t *sin6; 1218 struct ip6_mtuinfo *mtuinfo; 1219 1220 /* 1221 * If the application has requested to receive path mtu 1222 * information, send up an empty message containing an 1223 * IPV6_PATHMTU ancillary data item. 1224 */ 1225 if (!icmp->icmp_ipv6_recvpathmtu) 1226 break; 1227 1228 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1229 opt_length; 1230 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1231 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1232 break; 1233 } 1234 1235 /* 1236 * newmp->b_cont is left to NULL on purpose. This is an 1237 * empty message containing only ancillary data. 1238 */ 1239 newmp->b_datap->db_type = M_PROTO; 1240 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1241 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1242 tudi->PRIM_type = T_UNITDATA_IND; 1243 tudi->SRC_length = sizeof (sin6_t); 1244 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1245 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1246 tudi->OPT_length = opt_length; 1247 1248 sin6 = (sin6_t *)&tudi[1]; 1249 bzero(sin6, sizeof (sin6_t)); 1250 sin6->sin6_family = AF_INET6; 1251 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1252 1253 toh = (struct T_opthdr *)&sin6[1]; 1254 toh->level = IPPROTO_IPV6; 1255 toh->name = IPV6_PATHMTU; 1256 toh->len = opt_length; 1257 toh->status = 0; 1258 1259 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1260 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1261 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1262 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1263 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1264 /* 1265 * We've consumed everything we need from the original 1266 * message. Free it, then send our empty message. 1267 */ 1268 freemsg(mp); 1269 icmp_ulp_recv(connp, newmp); 1270 1271 return; 1272 } 1273 case ICMP6_TIME_EXCEEDED: 1274 /* Transient errors */ 1275 break; 1276 case ICMP6_PARAM_PROB: 1277 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1278 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1279 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1280 (uchar_t *)nexthdrp) { 1281 error = ECONNREFUSED; 1282 break; 1283 } 1284 break; 1285 } 1286 if (error == 0) { 1287 freemsg(mp); 1288 return; 1289 } 1290 1291 /* 1292 * Deliver T_UDERROR_IND when the application has asked for it. 1293 * The socket layer enables this automatically when connected. 1294 */ 1295 if (!icmp->icmp_dgram_errind) { 1296 freemsg(mp); 1297 return; 1298 } 1299 1300 sin6 = sin6_null; 1301 sin6.sin6_family = AF_INET6; 1302 sin6.sin6_addr = ip6h->ip6_dst; 1303 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1304 1305 if (IPCL_IS_NONSTR(connp)) { 1306 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1307 if (icmp->icmp_state == TS_DATA_XFER) { 1308 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1309 &icmp->icmp_v6dst.sin6_addr)) { 1310 rw_exit(&icmp->icmp_rwlock); 1311 (*connp->conn_upcalls->su_set_error) 1312 (connp->conn_upper_handle, error); 1313 goto done; 1314 } 1315 } else { 1316 icmp->icmp_delayed_error = error; 1317 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1318 } 1319 rw_exit(&icmp->icmp_rwlock); 1320 } else { 1321 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1322 NULL, 0, error); 1323 if (mp1 != NULL) 1324 putnext(connp->conn_rq, mp1); 1325 } 1326 done: 1327 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1328 freemsg(mp); 1329 } 1330 1331 /* 1332 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1333 * The local address is filled in if endpoint is bound. The remote address 1334 * is filled in if remote address has been precified ("connected endpoint") 1335 * (The concept of connected CLTS sockets is alien to published TPI 1336 * but we support it anyway). 1337 */ 1338 static void 1339 icmp_addr_req(queue_t *q, mblk_t *mp) 1340 { 1341 icmp_t *icmp = Q_TO_ICMP(q); 1342 mblk_t *ackmp; 1343 struct T_addr_ack *taa; 1344 1345 /* Make it large enough for worst case */ 1346 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1347 2 * sizeof (sin6_t), 1); 1348 if (ackmp == NULL) { 1349 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1350 return; 1351 } 1352 taa = (struct T_addr_ack *)ackmp->b_rptr; 1353 1354 bzero(taa, sizeof (struct T_addr_ack)); 1355 ackmp->b_wptr = (uchar_t *)&taa[1]; 1356 1357 taa->PRIM_type = T_ADDR_ACK; 1358 ackmp->b_datap->db_type = M_PCPROTO; 1359 rw_enter(&icmp->icmp_rwlock, RW_READER); 1360 /* 1361 * Note: Following code assumes 32 bit alignment of basic 1362 * data structures like sin_t and struct T_addr_ack. 1363 */ 1364 if (icmp->icmp_state != TS_UNBND) { 1365 /* 1366 * Fill in local address 1367 */ 1368 taa->LOCADDR_offset = sizeof (*taa); 1369 if (icmp->icmp_family == AF_INET) { 1370 sin_t *sin; 1371 1372 taa->LOCADDR_length = sizeof (sin_t); 1373 sin = (sin_t *)&taa[1]; 1374 /* Fill zeroes and then intialize non-zero fields */ 1375 *sin = sin_null; 1376 sin->sin_family = AF_INET; 1377 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1378 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1379 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1380 sin->sin_addr.s_addr); 1381 } else { 1382 /* 1383 * INADDR_ANY 1384 * icmp_v6src is not set, we might be bound to 1385 * broadcast/multicast. Use icmp_bound_v6src as 1386 * local address instead (that could 1387 * also still be INADDR_ANY) 1388 */ 1389 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1390 sin->sin_addr.s_addr); 1391 } 1392 ackmp->b_wptr = (uchar_t *)&sin[1]; 1393 } else { 1394 sin6_t *sin6; 1395 1396 ASSERT(icmp->icmp_family == AF_INET6); 1397 taa->LOCADDR_length = sizeof (sin6_t); 1398 sin6 = (sin6_t *)&taa[1]; 1399 /* Fill zeroes and then intialize non-zero fields */ 1400 *sin6 = sin6_null; 1401 sin6->sin6_family = AF_INET6; 1402 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1403 sin6->sin6_addr = icmp->icmp_v6src; 1404 } else { 1405 /* 1406 * UNSPECIFIED 1407 * icmp_v6src is not set, we might be bound to 1408 * broadcast/multicast. Use icmp_bound_v6src as 1409 * local address instead (that could 1410 * also still be UNSPECIFIED) 1411 */ 1412 sin6->sin6_addr = icmp->icmp_bound_v6src; 1413 } 1414 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1415 } 1416 } 1417 rw_exit(&icmp->icmp_rwlock); 1418 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1419 qreply(q, ackmp); 1420 } 1421 1422 static void 1423 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1424 { 1425 *tap = icmp_g_t_info_ack; 1426 1427 if (icmp->icmp_family == AF_INET6) 1428 tap->ADDR_size = sizeof (sin6_t); 1429 else 1430 tap->ADDR_size = sizeof (sin_t); 1431 tap->CURRENT_state = icmp->icmp_state; 1432 tap->OPT_size = icmp_max_optsize; 1433 } 1434 1435 static void 1436 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1437 t_uscalar_t cap_bits1) 1438 { 1439 tcap->CAP_bits1 = 0; 1440 1441 if (cap_bits1 & TC1_INFO) { 1442 icmp_copy_info(&tcap->INFO_ack, icmp); 1443 tcap->CAP_bits1 |= TC1_INFO; 1444 } 1445 } 1446 1447 /* 1448 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1449 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1450 * icmp_g_t_info_ack. The current state of the stream is copied from 1451 * icmp_state. 1452 */ 1453 static void 1454 icmp_capability_req(queue_t *q, mblk_t *mp) 1455 { 1456 icmp_t *icmp = Q_TO_ICMP(q); 1457 t_uscalar_t cap_bits1; 1458 struct T_capability_ack *tcap; 1459 1460 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1461 1462 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1463 mp->b_datap->db_type, T_CAPABILITY_ACK); 1464 if (!mp) 1465 return; 1466 1467 tcap = (struct T_capability_ack *)mp->b_rptr; 1468 1469 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1470 1471 qreply(q, mp); 1472 } 1473 1474 /* 1475 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1476 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1477 * The current state of the stream is copied from icmp_state. 1478 */ 1479 static void 1480 icmp_info_req(queue_t *q, mblk_t *mp) 1481 { 1482 icmp_t *icmp = Q_TO_ICMP(q); 1483 1484 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1485 T_INFO_ACK); 1486 if (!mp) 1487 return; 1488 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1489 qreply(q, mp); 1490 } 1491 1492 /* For /dev/icmp aka AF_INET open */ 1493 static int 1494 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1495 int family) 1496 { 1497 conn_t *connp; 1498 dev_t conn_dev; 1499 icmp_stack_t *is; 1500 int error; 1501 1502 conn_dev = NULL; 1503 1504 /* If the stream is already open, return immediately. */ 1505 if (q->q_ptr != NULL) 1506 return (0); 1507 1508 if (sflag == MODOPEN) 1509 return (EINVAL); 1510 1511 /* 1512 * Since ICMP is not used so heavily, allocating from the small 1513 * arena should be sufficient. 1514 */ 1515 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1516 return (EBUSY); 1517 } 1518 1519 if (flag & SO_FALLBACK) { 1520 /* 1521 * Non streams socket needs a stream to fallback to 1522 */ 1523 RD(q)->q_ptr = (void *)conn_dev; 1524 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1525 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1526 qprocson(q); 1527 return (0); 1528 } 1529 1530 connp = icmp_open(family, credp, &error, KM_SLEEP); 1531 if (connp == NULL) { 1532 ASSERT(error != NULL); 1533 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1534 return (error); 1535 } 1536 1537 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1538 connp->conn_dev = conn_dev; 1539 connp->conn_minor_arena = ip_minor_arena_sa; 1540 1541 is = connp->conn_icmp->icmp_is; 1542 1543 /* 1544 * Initialize the icmp_t structure for this stream. 1545 */ 1546 q->q_ptr = connp; 1547 WR(q)->q_ptr = connp; 1548 connp->conn_rq = q; 1549 connp->conn_wq = WR(q); 1550 1551 if (connp->conn_icmp->icmp_family == AF_INET6) { 1552 /* Build initial header template for transmit */ 1553 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1554 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1555 rw_exit(&connp->conn_icmp->icmp_rwlock); 1556 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1557 ipcl_conn_destroy(connp); 1558 return (error); 1559 } 1560 rw_exit(&connp->conn_icmp->icmp_rwlock); 1561 } 1562 1563 1564 q->q_hiwat = is->is_recv_hiwat; 1565 WR(q)->q_hiwat = is->is_xmit_hiwat; 1566 WR(q)->q_lowat = is->is_xmit_lowat; 1567 1568 qprocson(q); 1569 1570 /* Set the Stream head write offset. */ 1571 (void) proto_set_tx_wroff(q, connp, 1572 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1573 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1574 1575 mutex_enter(&connp->conn_lock); 1576 connp->conn_state_flags &= ~CONN_INCIPIENT; 1577 mutex_exit(&connp->conn_lock); 1578 1579 return (0); 1580 } 1581 1582 /* For /dev/icmp4 aka AF_INET open */ 1583 static int 1584 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1585 { 1586 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1587 } 1588 1589 /* For /dev/icmp6 aka AF_INET6 open */ 1590 static int 1591 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1592 { 1593 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1594 } 1595 1596 /* 1597 * This is the open routine for icmp. It allocates a icmp_t structure for 1598 * the stream and, on the first open of the module, creates an ND table. 1599 */ 1600 /* ARGSUSED */ 1601 static conn_t * 1602 icmp_open(int family, cred_t *credp, int *err, int flags) 1603 { 1604 icmp_t *icmp; 1605 conn_t *connp; 1606 zoneid_t zoneid; 1607 netstack_t *ns; 1608 icmp_stack_t *is; 1609 boolean_t isv6 = B_FALSE; 1610 1611 *err = secpolicy_net_icmpaccess(credp); 1612 if (*err != 0) 1613 return (NULL); 1614 1615 if (family == AF_INET6) 1616 isv6 = B_TRUE; 1617 ns = netstack_find_by_cred(credp); 1618 ASSERT(ns != NULL); 1619 is = ns->netstack_icmp; 1620 ASSERT(is != NULL); 1621 1622 /* 1623 * For exclusive stacks we set the zoneid to zero 1624 * to make ICMP operate as if in the global zone. 1625 */ 1626 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1627 zoneid = GLOBAL_ZONEID; 1628 else 1629 zoneid = crgetzoneid(credp); 1630 1631 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1632 1633 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1634 icmp = connp->conn_icmp; 1635 icmp->icmp_v6dst = sin6_null; 1636 1637 /* 1638 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1639 * done by netstack_find_by_cred() 1640 */ 1641 netstack_rele(ns); 1642 1643 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1644 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1645 ASSERT(connp->conn_icmp == icmp); 1646 ASSERT(icmp->icmp_connp == connp); 1647 1648 /* Set the initial state of the stream and the privilege status. */ 1649 icmp->icmp_state = TS_UNBND; 1650 if (isv6) { 1651 icmp->icmp_ipversion = IPV6_VERSION; 1652 icmp->icmp_family = AF_INET6; 1653 connp->conn_ulp = IPPROTO_ICMPV6; 1654 /* May be changed by a SO_PROTOTYPE socket option. */ 1655 icmp->icmp_proto = IPPROTO_ICMPV6; 1656 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1657 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1658 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1659 connp->conn_af_isv6 = B_TRUE; 1660 connp->conn_flags |= IPCL_ISV6; 1661 } else { 1662 icmp->icmp_ipversion = IPV4_VERSION; 1663 icmp->icmp_family = AF_INET; 1664 /* May be changed by a SO_PROTOTYPE socket option. */ 1665 icmp->icmp_proto = IPPROTO_ICMP; 1666 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1667 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1668 connp->conn_af_isv6 = B_FALSE; 1669 connp->conn_flags &= ~IPCL_ISV6; 1670 } 1671 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1672 icmp->icmp_pending_op = -1; 1673 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1674 connp->conn_zoneid = zoneid; 1675 1676 /* 1677 * If the caller has the process-wide flag set, then default to MAC 1678 * exempt mode. This allows read-down to unlabeled hosts. 1679 */ 1680 if (getpflags(NET_MAC_AWARE, credp) != 0) 1681 connp->conn_mac_exempt = B_TRUE; 1682 1683 connp->conn_ulp_labeled = is_system_labeled(); 1684 1685 icmp->icmp_is = is; 1686 1687 connp->conn_recv = icmp_input; 1688 crhold(credp); 1689 connp->conn_cred = credp; 1690 1691 rw_exit(&icmp->icmp_rwlock); 1692 1693 connp->conn_flow_cntrld = B_FALSE; 1694 return (connp); 1695 } 1696 1697 /* 1698 * Which ICMP options OK to set through T_UNITDATA_REQ... 1699 */ 1700 /* ARGSUSED */ 1701 static boolean_t 1702 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1703 { 1704 return (B_TRUE); 1705 } 1706 1707 /* 1708 * This routine gets default values of certain options whose default 1709 * values are maintained by protcol specific code 1710 */ 1711 /* ARGSUSED */ 1712 int 1713 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1714 { 1715 icmp_t *icmp = Q_TO_ICMP(q); 1716 icmp_stack_t *is = icmp->icmp_is; 1717 int *i1 = (int *)ptr; 1718 1719 switch (level) { 1720 case IPPROTO_IP: 1721 switch (name) { 1722 case IP_MULTICAST_TTL: 1723 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1724 return (sizeof (uchar_t)); 1725 case IP_MULTICAST_LOOP: 1726 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1727 return (sizeof (uchar_t)); 1728 } 1729 break; 1730 case IPPROTO_IPV6: 1731 switch (name) { 1732 case IPV6_MULTICAST_HOPS: 1733 *i1 = IP_DEFAULT_MULTICAST_TTL; 1734 return (sizeof (int)); 1735 case IPV6_MULTICAST_LOOP: 1736 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1737 return (sizeof (int)); 1738 case IPV6_UNICAST_HOPS: 1739 *i1 = is->is_ipv6_hoplimit; 1740 return (sizeof (int)); 1741 } 1742 break; 1743 case IPPROTO_ICMPV6: 1744 switch (name) { 1745 case ICMP6_FILTER: 1746 /* Make it look like "pass all" */ 1747 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1748 return (sizeof (icmp6_filter_t)); 1749 } 1750 break; 1751 } 1752 return (-1); 1753 } 1754 1755 /* 1756 * This routine retrieves the current status of socket options. 1757 * It returns the size of the option retrieved. 1758 */ 1759 int 1760 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1761 { 1762 icmp_t *icmp = connp->conn_icmp; 1763 icmp_stack_t *is = icmp->icmp_is; 1764 int *i1 = (int *)ptr; 1765 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1766 int ret = 0; 1767 1768 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1769 switch (level) { 1770 case SOL_SOCKET: 1771 switch (name) { 1772 case SO_DEBUG: 1773 *i1 = icmp->icmp_debug; 1774 break; 1775 case SO_TYPE: 1776 *i1 = SOCK_RAW; 1777 break; 1778 case SO_PROTOTYPE: 1779 *i1 = icmp->icmp_proto; 1780 break; 1781 case SO_REUSEADDR: 1782 *i1 = icmp->icmp_reuseaddr; 1783 break; 1784 1785 /* 1786 * The following three items are available here, 1787 * but are only meaningful to IP. 1788 */ 1789 case SO_DONTROUTE: 1790 *i1 = icmp->icmp_dontroute; 1791 break; 1792 case SO_USELOOPBACK: 1793 *i1 = icmp->icmp_useloopback; 1794 break; 1795 case SO_BROADCAST: 1796 *i1 = icmp->icmp_broadcast; 1797 break; 1798 1799 case SO_SNDBUF: 1800 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1801 *i1 = icmp->icmp_xmit_hiwat; 1802 break; 1803 case SO_RCVBUF: 1804 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1805 *i1 = icmp->icmp_recv_hiwat; 1806 break; 1807 case SO_DGRAM_ERRIND: 1808 *i1 = icmp->icmp_dgram_errind; 1809 break; 1810 case SO_TIMESTAMP: 1811 *i1 = icmp->icmp_timestamp; 1812 break; 1813 case SO_MAC_EXEMPT: 1814 *i1 = connp->conn_mac_exempt; 1815 break; 1816 case SO_DOMAIN: 1817 *i1 = icmp->icmp_family; 1818 break; 1819 1820 /* 1821 * Following four not meaningful for icmp 1822 * Action is same as "default" to which we fallthrough 1823 * so we keep them in comments. 1824 * case SO_LINGER: 1825 * case SO_KEEPALIVE: 1826 * case SO_OOBINLINE: 1827 * case SO_ALLZONES: 1828 */ 1829 default: 1830 ret = -1; 1831 goto done; 1832 } 1833 break; 1834 case IPPROTO_IP: 1835 /* 1836 * Only allow IPv4 option processing on IPv4 sockets. 1837 */ 1838 if (icmp->icmp_family != AF_INET) { 1839 ret = -1; 1840 goto done; 1841 } 1842 1843 switch (name) { 1844 case IP_OPTIONS: 1845 case T_IP_OPTIONS: 1846 /* Options are passed up with each packet */ 1847 ret = 0; 1848 goto done; 1849 case IP_HDRINCL: 1850 *i1 = (int)icmp->icmp_hdrincl; 1851 break; 1852 case IP_TOS: 1853 case T_IP_TOS: 1854 *i1 = (int)icmp->icmp_type_of_service; 1855 break; 1856 case IP_TTL: 1857 *i1 = (int)icmp->icmp_ttl; 1858 break; 1859 case IP_MULTICAST_IF: 1860 /* 0 address if not set */ 1861 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1862 ret = sizeof (ipaddr_t); 1863 goto done; 1864 case IP_MULTICAST_TTL: 1865 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1866 ret = sizeof (uchar_t); 1867 goto done; 1868 case IP_MULTICAST_LOOP: 1869 *ptr = connp->conn_multicast_loop; 1870 ret = sizeof (uint8_t); 1871 goto done; 1872 case IP_BOUND_IF: 1873 /* Zero if not set */ 1874 *i1 = icmp->icmp_bound_if; 1875 break; /* goto sizeof (int) option return */ 1876 case IP_UNSPEC_SRC: 1877 *ptr = icmp->icmp_unspec_source; 1878 break; /* goto sizeof (int) option return */ 1879 case IP_RECVIF: 1880 *ptr = icmp->icmp_recvif; 1881 break; /* goto sizeof (int) option return */ 1882 case IP_BROADCAST_TTL: 1883 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1884 return (sizeof (uchar_t)); 1885 case IP_RECVPKTINFO: 1886 /* 1887 * This also handles IP_PKTINFO. 1888 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1889 * Differentiation is based on the size of the argument 1890 * passed in. 1891 * This option is handled in IP which will return an 1892 * error for IP_PKTINFO as it's not supported as a 1893 * sticky option. 1894 */ 1895 ret = -EINVAL; 1896 goto done; 1897 /* 1898 * Cannot "get" the value of following options 1899 * at this level. Action is same as "default" to 1900 * which we fallthrough so we keep them in comments. 1901 * 1902 * case IP_ADD_MEMBERSHIP: 1903 * case IP_DROP_MEMBERSHIP: 1904 * case IP_BLOCK_SOURCE: 1905 * case IP_UNBLOCK_SOURCE: 1906 * case IP_ADD_SOURCE_MEMBERSHIP: 1907 * case IP_DROP_SOURCE_MEMBERSHIP: 1908 * case MCAST_JOIN_GROUP: 1909 * case MCAST_LEAVE_GROUP: 1910 * case MCAST_BLOCK_SOURCE: 1911 * case MCAST_UNBLOCK_SOURCE: 1912 * case MCAST_JOIN_SOURCE_GROUP: 1913 * case MCAST_LEAVE_SOURCE_GROUP: 1914 * case MRT_INIT: 1915 * case MRT_DONE: 1916 * case MRT_ADD_VIF: 1917 * case MRT_DEL_VIF: 1918 * case MRT_ADD_MFC: 1919 * case MRT_DEL_MFC: 1920 * case MRT_VERSION: 1921 * case MRT_ASSERT: 1922 * case IP_SEC_OPT: 1923 * case IP_NEXTHOP: 1924 */ 1925 default: 1926 ret = -1; 1927 goto done; 1928 } 1929 break; 1930 case IPPROTO_IPV6: 1931 /* 1932 * Only allow IPv6 option processing on native IPv6 sockets. 1933 */ 1934 if (icmp->icmp_family != AF_INET6) { 1935 ret = -1; 1936 goto done; 1937 } 1938 switch (name) { 1939 case IPV6_UNICAST_HOPS: 1940 *i1 = (unsigned int)icmp->icmp_ttl; 1941 break; 1942 case IPV6_MULTICAST_IF: 1943 /* 0 index if not set */ 1944 *i1 = icmp->icmp_multicast_if_index; 1945 break; 1946 case IPV6_MULTICAST_HOPS: 1947 *i1 = icmp->icmp_multicast_ttl; 1948 break; 1949 case IPV6_MULTICAST_LOOP: 1950 *i1 = connp->conn_multicast_loop; 1951 break; 1952 case IPV6_BOUND_IF: 1953 /* Zero if not set */ 1954 *i1 = icmp->icmp_bound_if; 1955 break; 1956 case IPV6_UNSPEC_SRC: 1957 *i1 = icmp->icmp_unspec_source; 1958 break; 1959 case IPV6_CHECKSUM: 1960 /* 1961 * Return offset or -1 if no checksum offset. 1962 * Does not apply to IPPROTO_ICMPV6 1963 */ 1964 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1965 ret = -1; 1966 goto done; 1967 } 1968 1969 if (icmp->icmp_raw_checksum) { 1970 *i1 = icmp->icmp_checksum_off; 1971 } else { 1972 *i1 = -1; 1973 } 1974 break; 1975 case IPV6_JOIN_GROUP: 1976 case IPV6_LEAVE_GROUP: 1977 case MCAST_JOIN_GROUP: 1978 case MCAST_LEAVE_GROUP: 1979 case MCAST_BLOCK_SOURCE: 1980 case MCAST_UNBLOCK_SOURCE: 1981 case MCAST_JOIN_SOURCE_GROUP: 1982 case MCAST_LEAVE_SOURCE_GROUP: 1983 /* cannot "get" the value for these */ 1984 ret = -1; 1985 goto done; 1986 case IPV6_RECVPKTINFO: 1987 *i1 = icmp->icmp_ip_recvpktinfo; 1988 break; 1989 case IPV6_RECVTCLASS: 1990 *i1 = icmp->icmp_ipv6_recvtclass; 1991 break; 1992 case IPV6_RECVPATHMTU: 1993 *i1 = icmp->icmp_ipv6_recvpathmtu; 1994 break; 1995 case IPV6_V6ONLY: 1996 *i1 = 1; 1997 break; 1998 case IPV6_RECVHOPLIMIT: 1999 *i1 = icmp->icmp_ipv6_recvhoplimit; 2000 break; 2001 case IPV6_RECVHOPOPTS: 2002 *i1 = icmp->icmp_ipv6_recvhopopts; 2003 break; 2004 case IPV6_RECVDSTOPTS: 2005 *i1 = icmp->icmp_ipv6_recvdstopts; 2006 break; 2007 case _OLD_IPV6_RECVDSTOPTS: 2008 *i1 = icmp->icmp_old_ipv6_recvdstopts; 2009 break; 2010 case IPV6_RECVRTHDRDSTOPTS: 2011 *i1 = icmp->icmp_ipv6_recvrtdstopts; 2012 break; 2013 case IPV6_RECVRTHDR: 2014 *i1 = icmp->icmp_ipv6_recvrthdr; 2015 break; 2016 case IPV6_PKTINFO: { 2017 /* XXX assumes that caller has room for max size! */ 2018 struct in6_pktinfo *pkti; 2019 2020 pkti = (struct in6_pktinfo *)ptr; 2021 if (ipp->ipp_fields & IPPF_IFINDEX) 2022 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2023 else 2024 pkti->ipi6_ifindex = 0; 2025 if (ipp->ipp_fields & IPPF_ADDR) 2026 pkti->ipi6_addr = ipp->ipp_addr; 2027 else 2028 pkti->ipi6_addr = ipv6_all_zeros; 2029 ret = sizeof (struct in6_pktinfo); 2030 goto done; 2031 } 2032 case IPV6_NEXTHOP: { 2033 sin6_t *sin6 = (sin6_t *)ptr; 2034 2035 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2036 return (0); 2037 *sin6 = sin6_null; 2038 sin6->sin6_family = AF_INET6; 2039 sin6->sin6_addr = ipp->ipp_nexthop; 2040 ret = (sizeof (sin6_t)); 2041 goto done; 2042 } 2043 case IPV6_HOPOPTS: 2044 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2045 return (0); 2046 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2047 return (0); 2048 bcopy((char *)ipp->ipp_hopopts + 2049 icmp->icmp_label_len_v6, ptr, 2050 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2051 if (icmp->icmp_label_len_v6 > 0) { 2052 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2053 ptr[1] = (ipp->ipp_hopoptslen - 2054 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2055 } 2056 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2057 goto done; 2058 case IPV6_RTHDRDSTOPTS: 2059 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2060 return (0); 2061 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2062 ret = ipp->ipp_rtdstoptslen; 2063 goto done; 2064 case IPV6_RTHDR: 2065 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2066 return (0); 2067 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2068 ret = ipp->ipp_rthdrlen; 2069 goto done; 2070 case IPV6_DSTOPTS: 2071 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2072 ret = 0; 2073 goto done; 2074 } 2075 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2076 ret = ipp->ipp_dstoptslen; 2077 goto done; 2078 case IPV6_PATHMTU: 2079 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2080 ret = 0; 2081 } else { 2082 ret = ip_fill_mtuinfo( 2083 &icmp->icmp_v6dst.sin6_addr, 0, 2084 (struct ip6_mtuinfo *)ptr, 2085 is->is_netstack); 2086 } 2087 goto done; 2088 case IPV6_TCLASS: 2089 if (ipp->ipp_fields & IPPF_TCLASS) 2090 *i1 = ipp->ipp_tclass; 2091 else 2092 *i1 = IPV6_FLOW_TCLASS( 2093 IPV6_DEFAULT_VERS_AND_FLOW); 2094 break; 2095 default: 2096 ret = -1; 2097 goto done; 2098 } 2099 break; 2100 case IPPROTO_ICMPV6: 2101 /* 2102 * Only allow IPv6 option processing on native IPv6 sockets. 2103 */ 2104 if (icmp->icmp_family != AF_INET6) { 2105 ret = -1; 2106 } 2107 2108 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2109 ret = -1; 2110 } 2111 2112 switch (name) { 2113 case ICMP6_FILTER: 2114 if (icmp->icmp_filter == NULL) { 2115 /* Make it look like "pass all" */ 2116 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2117 } else { 2118 (void) bcopy(icmp->icmp_filter, ptr, 2119 sizeof (icmp6_filter_t)); 2120 } 2121 ret = sizeof (icmp6_filter_t); 2122 goto done; 2123 default: 2124 ret = -1; 2125 goto done; 2126 } 2127 default: 2128 ret = -1; 2129 goto done; 2130 } 2131 ret = sizeof (int); 2132 done: 2133 return (ret); 2134 } 2135 2136 /* 2137 * This routine retrieves the current status of socket options. 2138 * It returns the size of the option retrieved. 2139 */ 2140 int 2141 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2142 { 2143 conn_t *connp = Q_TO_CONN(q); 2144 icmp_t *icmp = connp->conn_icmp; 2145 int err; 2146 2147 rw_enter(&icmp->icmp_rwlock, RW_READER); 2148 err = icmp_opt_get(connp, level, name, ptr); 2149 rw_exit(&icmp->icmp_rwlock); 2150 return (err); 2151 } 2152 2153 int 2154 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2155 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2156 void *thisdg_attrs, boolean_t checkonly) 2157 { 2158 2159 int *i1 = (int *)invalp; 2160 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2161 icmp_t *icmp = connp->conn_icmp; 2162 icmp_stack_t *is = icmp->icmp_is; 2163 int error; 2164 2165 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2166 /* 2167 * For fixed length options, no sanity check 2168 * of passed in length is done. It is assumed *_optcom_req() 2169 * routines do the right thing. 2170 */ 2171 switch (level) { 2172 case SOL_SOCKET: 2173 switch (name) { 2174 case SO_DEBUG: 2175 if (!checkonly) 2176 icmp->icmp_debug = onoff; 2177 break; 2178 case SO_PROTOTYPE: 2179 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2180 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2181 secpolicy_net_rawaccess(cr) != 0) { 2182 *outlenp = 0; 2183 return (EACCES); 2184 } 2185 /* Can't use IPPROTO_RAW with IPv6 */ 2186 if ((*i1 & 0xFF) == IPPROTO_RAW && 2187 icmp->icmp_family == AF_INET6) { 2188 *outlenp = 0; 2189 return (EPROTONOSUPPORT); 2190 } 2191 if (checkonly) { 2192 /* T_CHECK case */ 2193 *(int *)outvalp = (*i1 & 0xFF); 2194 break; 2195 } 2196 icmp->icmp_proto = *i1 & 0xFF; 2197 if ((icmp->icmp_proto == IPPROTO_RAW || 2198 icmp->icmp_proto == IPPROTO_IGMP) && 2199 icmp->icmp_family == AF_INET) 2200 icmp->icmp_hdrincl = 1; 2201 else 2202 icmp->icmp_hdrincl = 0; 2203 2204 if (icmp->icmp_family == AF_INET6 && 2205 icmp->icmp_proto == IPPROTO_ICMPV6) { 2206 /* Set offset for icmp6_cksum */ 2207 icmp->icmp_raw_checksum = 0; 2208 icmp->icmp_checksum_off = 2; 2209 } 2210 if (icmp->icmp_proto == IPPROTO_UDP || 2211 icmp->icmp_proto == IPPROTO_TCP || 2212 icmp->icmp_proto == IPPROTO_SCTP) { 2213 icmp->icmp_no_tp_cksum = 1; 2214 icmp->icmp_sticky_ipp.ipp_fields |= 2215 IPPF_NO_CKSUM; 2216 } else { 2217 icmp->icmp_no_tp_cksum = 0; 2218 icmp->icmp_sticky_ipp.ipp_fields &= 2219 ~IPPF_NO_CKSUM; 2220 } 2221 2222 if (icmp->icmp_filter != NULL && 2223 icmp->icmp_proto != IPPROTO_ICMPV6) { 2224 kmem_free(icmp->icmp_filter, 2225 sizeof (icmp6_filter_t)); 2226 icmp->icmp_filter = NULL; 2227 } 2228 2229 /* Rebuild the header template */ 2230 error = icmp_build_hdrs(icmp); 2231 if (error != 0) { 2232 *outlenp = 0; 2233 return (error); 2234 } 2235 2236 /* 2237 * For SCTP, we don't use icmp_bind_proto() for 2238 * raw socket binding. Note that we do not need 2239 * to set *outlenp. 2240 * FIXME: how does SCTP work? 2241 */ 2242 if (icmp->icmp_proto == IPPROTO_SCTP) 2243 return (0); 2244 2245 *outlenp = sizeof (int); 2246 *(int *)outvalp = *i1 & 0xFF; 2247 2248 /* Drop lock across the bind operation */ 2249 rw_exit(&icmp->icmp_rwlock); 2250 (void) icmp_bind_proto(connp); 2251 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2252 return (0); 2253 case SO_REUSEADDR: 2254 if (!checkonly) { 2255 icmp->icmp_reuseaddr = onoff; 2256 PASS_OPT_TO_IP(connp); 2257 } 2258 break; 2259 2260 /* 2261 * The following three items are available here, 2262 * but are only meaningful to IP. 2263 */ 2264 case SO_DONTROUTE: 2265 if (!checkonly) { 2266 icmp->icmp_dontroute = onoff; 2267 PASS_OPT_TO_IP(connp); 2268 } 2269 break; 2270 case SO_USELOOPBACK: 2271 if (!checkonly) { 2272 icmp->icmp_useloopback = onoff; 2273 PASS_OPT_TO_IP(connp); 2274 } 2275 break; 2276 case SO_BROADCAST: 2277 if (!checkonly) { 2278 icmp->icmp_broadcast = onoff; 2279 PASS_OPT_TO_IP(connp); 2280 } 2281 break; 2282 2283 case SO_SNDBUF: 2284 if (*i1 > is->is_max_buf) { 2285 *outlenp = 0; 2286 return (ENOBUFS); 2287 } 2288 if (!checkonly) { 2289 if (!IPCL_IS_NONSTR(connp)) { 2290 connp->conn_wq->q_hiwat = *i1; 2291 } 2292 icmp->icmp_xmit_hiwat = *i1; 2293 } 2294 break; 2295 case SO_RCVBUF: 2296 if (*i1 > is->is_max_buf) { 2297 *outlenp = 0; 2298 return (ENOBUFS); 2299 } 2300 if (!checkonly) { 2301 icmp->icmp_recv_hiwat = *i1; 2302 rw_exit(&icmp->icmp_rwlock); 2303 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2304 *i1); 2305 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2306 } 2307 break; 2308 case SO_DGRAM_ERRIND: 2309 if (!checkonly) 2310 icmp->icmp_dgram_errind = onoff; 2311 break; 2312 case SO_ALLZONES: 2313 /* 2314 * "soft" error (negative) 2315 * option not handled at this level 2316 * Note: Do not modify *outlenp 2317 */ 2318 return (-EINVAL); 2319 case SO_TIMESTAMP: 2320 if (!checkonly) { 2321 icmp->icmp_timestamp = onoff; 2322 } 2323 break; 2324 case SO_MAC_EXEMPT: 2325 /* 2326 * "soft" error (negative) 2327 * option not handled at this level 2328 * Note: Do not modify *outlenp 2329 */ 2330 return (-EINVAL); 2331 case SO_RCVTIMEO: 2332 case SO_SNDTIMEO: 2333 /* 2334 * Pass these two options in order for third part 2335 * protocol usage. Here just return directly. 2336 */ 2337 return (0); 2338 /* 2339 * Following three not meaningful for icmp 2340 * Action is same as "default" so we keep them 2341 * in comments. 2342 * case SO_LINGER: 2343 * case SO_KEEPALIVE: 2344 * case SO_OOBINLINE: 2345 */ 2346 default: 2347 *outlenp = 0; 2348 return (EINVAL); 2349 } 2350 break; 2351 case IPPROTO_IP: 2352 /* 2353 * Only allow IPv4 option processing on IPv4 sockets. 2354 */ 2355 if (icmp->icmp_family != AF_INET) { 2356 *outlenp = 0; 2357 return (ENOPROTOOPT); 2358 } 2359 switch (name) { 2360 case IP_OPTIONS: 2361 case T_IP_OPTIONS: 2362 /* Save options for use by IP. */ 2363 if ((inlen & 0x3) || 2364 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2365 *outlenp = 0; 2366 return (EINVAL); 2367 } 2368 if (checkonly) 2369 break; 2370 2371 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2372 &icmp->icmp_ip_snd_options_len, 2373 icmp->icmp_label_len, invalp, inlen)) { 2374 *outlenp = 0; 2375 return (ENOMEM); 2376 } 2377 2378 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2379 icmp->icmp_ip_snd_options_len; 2380 rw_exit(&icmp->icmp_rwlock); 2381 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2382 RD(connp->conn_rq), connp, 2383 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2384 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2385 break; 2386 case IP_HDRINCL: 2387 if (!checkonly) 2388 icmp->icmp_hdrincl = onoff; 2389 break; 2390 case IP_TOS: 2391 case T_IP_TOS: 2392 if (!checkonly) { 2393 icmp->icmp_type_of_service = (uint8_t)*i1; 2394 } 2395 break; 2396 case IP_TTL: 2397 if (!checkonly) { 2398 icmp->icmp_ttl = (uint8_t)*i1; 2399 } 2400 break; 2401 case IP_MULTICAST_IF: 2402 /* 2403 * TODO should check OPTMGMT reply and undo this if 2404 * there is an error. 2405 */ 2406 if (!checkonly) { 2407 icmp->icmp_multicast_if_addr = *i1; 2408 PASS_OPT_TO_IP(connp); 2409 } 2410 break; 2411 case IP_MULTICAST_TTL: 2412 if (!checkonly) 2413 icmp->icmp_multicast_ttl = *invalp; 2414 break; 2415 case IP_MULTICAST_LOOP: 2416 if (!checkonly) { 2417 connp->conn_multicast_loop = 2418 (*invalp == 0) ? 0 : 1; 2419 PASS_OPT_TO_IP(connp); 2420 } 2421 break; 2422 case IP_BOUND_IF: 2423 if (!checkonly) { 2424 icmp->icmp_bound_if = *i1; 2425 PASS_OPT_TO_IP(connp); 2426 } 2427 break; 2428 case IP_UNSPEC_SRC: 2429 if (!checkonly) { 2430 icmp->icmp_unspec_source = onoff; 2431 PASS_OPT_TO_IP(connp); 2432 } 2433 break; 2434 case IP_BROADCAST_TTL: 2435 if (!checkonly) 2436 connp->conn_broadcast_ttl = *invalp; 2437 break; 2438 case IP_RECVIF: 2439 if (!checkonly) { 2440 icmp->icmp_recvif = onoff; 2441 } 2442 /* 2443 * pass to ip 2444 */ 2445 return (-EINVAL); 2446 case IP_PKTINFO: { 2447 /* 2448 * This also handles IP_RECVPKTINFO. 2449 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2450 * Differentiation is based on the size of the argument 2451 * passed in. 2452 */ 2453 struct in_pktinfo *pktinfop; 2454 ip4_pkt_t *attr_pktinfop; 2455 2456 if (checkonly) 2457 break; 2458 2459 if (inlen == sizeof (int)) { 2460 /* 2461 * This is IP_RECVPKTINFO option. 2462 * Keep a local copy of wether this option is 2463 * set or not and pass it down to IP for 2464 * processing. 2465 */ 2466 icmp->icmp_ip_recvpktinfo = onoff; 2467 return (-EINVAL); 2468 } 2469 2470 2471 if (inlen != sizeof (struct in_pktinfo)) { 2472 return (EINVAL); 2473 } 2474 2475 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2476 == NULL) { 2477 /* 2478 * sticky option is not supported 2479 */ 2480 return (EINVAL); 2481 } 2482 2483 pktinfop = (struct in_pktinfo *)invalp; 2484 2485 /* 2486 * Atleast one of the values should be specified 2487 */ 2488 if (pktinfop->ipi_ifindex == 0 && 2489 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2490 return (EINVAL); 2491 } 2492 2493 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2494 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2495 } 2496 break; 2497 case IP_ADD_MEMBERSHIP: 2498 case IP_DROP_MEMBERSHIP: 2499 case IP_BLOCK_SOURCE: 2500 case IP_UNBLOCK_SOURCE: 2501 case IP_ADD_SOURCE_MEMBERSHIP: 2502 case IP_DROP_SOURCE_MEMBERSHIP: 2503 case MCAST_JOIN_GROUP: 2504 case MCAST_LEAVE_GROUP: 2505 case MCAST_BLOCK_SOURCE: 2506 case MCAST_UNBLOCK_SOURCE: 2507 case MCAST_JOIN_SOURCE_GROUP: 2508 case MCAST_LEAVE_SOURCE_GROUP: 2509 case MRT_INIT: 2510 case MRT_DONE: 2511 case MRT_ADD_VIF: 2512 case MRT_DEL_VIF: 2513 case MRT_ADD_MFC: 2514 case MRT_DEL_MFC: 2515 case MRT_VERSION: 2516 case MRT_ASSERT: 2517 case IP_SEC_OPT: 2518 case IP_NEXTHOP: 2519 /* 2520 * "soft" error (negative) 2521 * option not handled at this level 2522 * Note: Do not modify *outlenp 2523 */ 2524 return (-EINVAL); 2525 default: 2526 *outlenp = 0; 2527 return (EINVAL); 2528 } 2529 break; 2530 case IPPROTO_IPV6: { 2531 ip6_pkt_t *ipp; 2532 boolean_t sticky; 2533 2534 if (icmp->icmp_family != AF_INET6) { 2535 *outlenp = 0; 2536 return (ENOPROTOOPT); 2537 } 2538 /* 2539 * Deal with both sticky options and ancillary data 2540 */ 2541 if (thisdg_attrs == NULL) { 2542 /* sticky options, or none */ 2543 ipp = &icmp->icmp_sticky_ipp; 2544 sticky = B_TRUE; 2545 } else { 2546 /* ancillary data */ 2547 ipp = (ip6_pkt_t *)thisdg_attrs; 2548 sticky = B_FALSE; 2549 } 2550 2551 switch (name) { 2552 case IPV6_MULTICAST_IF: 2553 if (!checkonly) { 2554 icmp->icmp_multicast_if_index = *i1; 2555 PASS_OPT_TO_IP(connp); 2556 } 2557 break; 2558 case IPV6_UNICAST_HOPS: 2559 /* -1 means use default */ 2560 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2561 *outlenp = 0; 2562 return (EINVAL); 2563 } 2564 if (!checkonly) { 2565 if (*i1 == -1) { 2566 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2567 is->is_ipv6_hoplimit; 2568 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2569 /* Pass modified value to IP. */ 2570 *i1 = ipp->ipp_hoplimit; 2571 } else { 2572 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2573 (uint8_t)*i1; 2574 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2575 } 2576 /* Rebuild the header template */ 2577 error = icmp_build_hdrs(icmp); 2578 if (error != 0) { 2579 *outlenp = 0; 2580 return (error); 2581 } 2582 } 2583 break; 2584 case IPV6_MULTICAST_HOPS: 2585 /* -1 means use default */ 2586 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2587 *outlenp = 0; 2588 return (EINVAL); 2589 } 2590 if (!checkonly) { 2591 if (*i1 == -1) { 2592 icmp->icmp_multicast_ttl = 2593 ipp->ipp_multicast_hops = 2594 IP_DEFAULT_MULTICAST_TTL; 2595 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2596 /* Pass modified value to IP. */ 2597 *i1 = icmp->icmp_multicast_ttl; 2598 } else { 2599 icmp->icmp_multicast_ttl = 2600 ipp->ipp_multicast_hops = 2601 (uint8_t)*i1; 2602 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2603 } 2604 } 2605 break; 2606 case IPV6_MULTICAST_LOOP: 2607 if (*i1 != 0 && *i1 != 1) { 2608 *outlenp = 0; 2609 return (EINVAL); 2610 } 2611 if (!checkonly) { 2612 connp->conn_multicast_loop = *i1; 2613 PASS_OPT_TO_IP(connp); 2614 } 2615 break; 2616 case IPV6_CHECKSUM: 2617 /* 2618 * Integer offset into the user data of where the 2619 * checksum is located. 2620 * Offset of -1 disables option. 2621 * Does not apply to IPPROTO_ICMPV6. 2622 */ 2623 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2624 *outlenp = 0; 2625 return (EINVAL); 2626 } 2627 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2628 /* Negative or not 16 bit aligned offset */ 2629 *outlenp = 0; 2630 return (EINVAL); 2631 } 2632 if (checkonly) 2633 break; 2634 2635 if (*i1 == -1) { 2636 icmp->icmp_raw_checksum = 0; 2637 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2638 } else { 2639 icmp->icmp_raw_checksum = 1; 2640 icmp->icmp_checksum_off = *i1; 2641 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2642 } 2643 /* Rebuild the header template */ 2644 error = icmp_build_hdrs(icmp); 2645 if (error != 0) { 2646 *outlenp = 0; 2647 return (error); 2648 } 2649 break; 2650 case IPV6_JOIN_GROUP: 2651 case IPV6_LEAVE_GROUP: 2652 case MCAST_JOIN_GROUP: 2653 case MCAST_LEAVE_GROUP: 2654 case MCAST_BLOCK_SOURCE: 2655 case MCAST_UNBLOCK_SOURCE: 2656 case MCAST_JOIN_SOURCE_GROUP: 2657 case MCAST_LEAVE_SOURCE_GROUP: 2658 /* 2659 * "soft" error (negative) 2660 * option not handled at this level 2661 * Note: Do not modify *outlenp 2662 */ 2663 return (-EINVAL); 2664 case IPV6_BOUND_IF: 2665 if (!checkonly) { 2666 icmp->icmp_bound_if = *i1; 2667 PASS_OPT_TO_IP(connp); 2668 } 2669 break; 2670 case IPV6_UNSPEC_SRC: 2671 if (!checkonly) { 2672 icmp->icmp_unspec_source = onoff; 2673 PASS_OPT_TO_IP(connp); 2674 } 2675 break; 2676 case IPV6_RECVTCLASS: 2677 if (!checkonly) { 2678 icmp->icmp_ipv6_recvtclass = onoff; 2679 PASS_OPT_TO_IP(connp); 2680 } 2681 break; 2682 /* 2683 * Set boolean switches for ancillary data delivery 2684 */ 2685 case IPV6_RECVPKTINFO: 2686 if (!checkonly) { 2687 icmp->icmp_ip_recvpktinfo = onoff; 2688 PASS_OPT_TO_IP(connp); 2689 } 2690 break; 2691 case IPV6_RECVPATHMTU: 2692 if (!checkonly) { 2693 icmp->icmp_ipv6_recvpathmtu = onoff; 2694 PASS_OPT_TO_IP(connp); 2695 } 2696 break; 2697 case IPV6_RECVHOPLIMIT: 2698 if (!checkonly) { 2699 icmp->icmp_ipv6_recvhoplimit = onoff; 2700 PASS_OPT_TO_IP(connp); 2701 } 2702 break; 2703 case IPV6_RECVHOPOPTS: 2704 if (!checkonly) { 2705 icmp->icmp_ipv6_recvhopopts = onoff; 2706 PASS_OPT_TO_IP(connp); 2707 } 2708 break; 2709 case IPV6_RECVDSTOPTS: 2710 if (!checkonly) { 2711 icmp->icmp_ipv6_recvdstopts = onoff; 2712 PASS_OPT_TO_IP(connp); 2713 } 2714 break; 2715 case _OLD_IPV6_RECVDSTOPTS: 2716 if (!checkonly) 2717 icmp->icmp_old_ipv6_recvdstopts = onoff; 2718 break; 2719 case IPV6_RECVRTHDRDSTOPTS: 2720 if (!checkonly) { 2721 icmp->icmp_ipv6_recvrtdstopts = onoff; 2722 PASS_OPT_TO_IP(connp); 2723 } 2724 break; 2725 case IPV6_RECVRTHDR: 2726 if (!checkonly) { 2727 icmp->icmp_ipv6_recvrthdr = onoff; 2728 PASS_OPT_TO_IP(connp); 2729 } 2730 break; 2731 /* 2732 * Set sticky options or ancillary data. 2733 * If sticky options, (re)build any extension headers 2734 * that might be needed as a result. 2735 */ 2736 case IPV6_PKTINFO: 2737 /* 2738 * The source address and ifindex are verified 2739 * in ip_opt_set(). For ancillary data the 2740 * source address is checked in ip_wput_v6. 2741 */ 2742 if (inlen != 0 && inlen != 2743 sizeof (struct in6_pktinfo)) { 2744 return (EINVAL); 2745 } 2746 if (checkonly) 2747 break; 2748 2749 if (inlen == 0) { 2750 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2751 ipp->ipp_sticky_ignored |= 2752 (IPPF_IFINDEX|IPPF_ADDR); 2753 } else { 2754 struct in6_pktinfo *pkti; 2755 2756 pkti = (struct in6_pktinfo *)invalp; 2757 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2758 ipp->ipp_addr = pkti->ipi6_addr; 2759 if (ipp->ipp_ifindex != 0) 2760 ipp->ipp_fields |= IPPF_IFINDEX; 2761 else 2762 ipp->ipp_fields &= ~IPPF_IFINDEX; 2763 if (!IN6_IS_ADDR_UNSPECIFIED( 2764 &ipp->ipp_addr)) 2765 ipp->ipp_fields |= IPPF_ADDR; 2766 else 2767 ipp->ipp_fields &= ~IPPF_ADDR; 2768 } 2769 if (sticky) { 2770 error = icmp_build_hdrs(icmp); 2771 if (error != 0) 2772 return (error); 2773 PASS_OPT_TO_IP(connp); 2774 } 2775 break; 2776 case IPV6_HOPLIMIT: 2777 /* This option can only be used as ancillary data. */ 2778 if (sticky) 2779 return (EINVAL); 2780 if (inlen != 0 && inlen != sizeof (int)) 2781 return (EINVAL); 2782 if (checkonly) 2783 break; 2784 2785 if (inlen == 0) { 2786 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2787 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2788 } else { 2789 if (*i1 > 255 || *i1 < -1) 2790 return (EINVAL); 2791 if (*i1 == -1) 2792 ipp->ipp_hoplimit = 2793 is->is_ipv6_hoplimit; 2794 else 2795 ipp->ipp_hoplimit = *i1; 2796 ipp->ipp_fields |= IPPF_HOPLIMIT; 2797 } 2798 break; 2799 case IPV6_TCLASS: 2800 /* 2801 * IPV6_RECVTCLASS accepts -1 as use kernel default 2802 * and [0, 255] as the actualy traffic class. 2803 */ 2804 if (inlen != 0 && inlen != sizeof (int)) { 2805 return (EINVAL); 2806 } 2807 if (checkonly) 2808 break; 2809 2810 if (inlen == 0) { 2811 ipp->ipp_fields &= ~IPPF_TCLASS; 2812 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2813 } else { 2814 if (*i1 >= 256 || *i1 < -1) 2815 return (EINVAL); 2816 if (*i1 == -1) { 2817 ipp->ipp_tclass = 2818 IPV6_FLOW_TCLASS( 2819 IPV6_DEFAULT_VERS_AND_FLOW); 2820 } else { 2821 ipp->ipp_tclass = *i1; 2822 } 2823 ipp->ipp_fields |= IPPF_TCLASS; 2824 } 2825 if (sticky) { 2826 error = icmp_build_hdrs(icmp); 2827 if (error != 0) 2828 return (error); 2829 } 2830 break; 2831 case IPV6_NEXTHOP: 2832 /* 2833 * IP will verify that the nexthop is reachable 2834 * and fail for sticky options. 2835 */ 2836 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2837 return (EINVAL); 2838 } 2839 if (checkonly) 2840 break; 2841 2842 if (inlen == 0) { 2843 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2844 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2845 } else { 2846 sin6_t *sin6 = (sin6_t *)invalp; 2847 2848 if (sin6->sin6_family != AF_INET6) { 2849 return (EAFNOSUPPORT); 2850 } 2851 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2852 return (EADDRNOTAVAIL); 2853 } 2854 ipp->ipp_nexthop = sin6->sin6_addr; 2855 if (!IN6_IS_ADDR_UNSPECIFIED( 2856 &ipp->ipp_nexthop)) 2857 ipp->ipp_fields |= IPPF_NEXTHOP; 2858 else 2859 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2860 } 2861 if (sticky) { 2862 error = icmp_build_hdrs(icmp); 2863 if (error != 0) 2864 return (error); 2865 PASS_OPT_TO_IP(connp); 2866 } 2867 break; 2868 case IPV6_HOPOPTS: { 2869 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2870 /* 2871 * Sanity checks - minimum size, size a multiple of 2872 * eight bytes, and matching size passed in. 2873 */ 2874 if (inlen != 0 && 2875 inlen != (8 * (hopts->ip6h_len + 1))) { 2876 return (EINVAL); 2877 } 2878 2879 if (checkonly) 2880 break; 2881 error = optcom_pkt_set(invalp, inlen, sticky, 2882 (uchar_t **)&ipp->ipp_hopopts, 2883 &ipp->ipp_hopoptslen, 2884 sticky ? icmp->icmp_label_len_v6 : 0); 2885 if (error != 0) 2886 return (error); 2887 if (ipp->ipp_hopoptslen == 0) { 2888 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2889 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2890 } else { 2891 ipp->ipp_fields |= IPPF_HOPOPTS; 2892 } 2893 if (sticky) { 2894 error = icmp_build_hdrs(icmp); 2895 if (error != 0) 2896 return (error); 2897 } 2898 break; 2899 } 2900 case IPV6_RTHDRDSTOPTS: { 2901 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2902 2903 /* 2904 * Sanity checks - minimum size, size a multiple of 2905 * eight bytes, and matching size passed in. 2906 */ 2907 if (inlen != 0 && 2908 inlen != (8 * (dopts->ip6d_len + 1))) 2909 return (EINVAL); 2910 2911 if (checkonly) 2912 break; 2913 2914 if (inlen == 0) { 2915 if (sticky && 2916 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2917 kmem_free(ipp->ipp_rtdstopts, 2918 ipp->ipp_rtdstoptslen); 2919 ipp->ipp_rtdstopts = NULL; 2920 ipp->ipp_rtdstoptslen = 0; 2921 } 2922 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2923 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2924 } else { 2925 error = optcom_pkt_set(invalp, inlen, sticky, 2926 (uchar_t **)&ipp->ipp_rtdstopts, 2927 &ipp->ipp_rtdstoptslen, 0); 2928 if (error != 0) 2929 return (error); 2930 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2931 } 2932 if (sticky) { 2933 error = icmp_build_hdrs(icmp); 2934 if (error != 0) 2935 return (error); 2936 } 2937 break; 2938 } 2939 case IPV6_DSTOPTS: { 2940 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2941 2942 /* 2943 * Sanity checks - minimum size, size a multiple of 2944 * eight bytes, and matching size passed in. 2945 */ 2946 if (inlen != 0 && 2947 inlen != (8 * (dopts->ip6d_len + 1))) 2948 return (EINVAL); 2949 2950 if (checkonly) 2951 break; 2952 2953 if (inlen == 0) { 2954 if (sticky && 2955 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2956 kmem_free(ipp->ipp_dstopts, 2957 ipp->ipp_dstoptslen); 2958 ipp->ipp_dstopts = NULL; 2959 ipp->ipp_dstoptslen = 0; 2960 } 2961 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2962 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2963 } else { 2964 error = optcom_pkt_set(invalp, inlen, sticky, 2965 (uchar_t **)&ipp->ipp_dstopts, 2966 &ipp->ipp_dstoptslen, 0); 2967 if (error != 0) 2968 return (error); 2969 ipp->ipp_fields |= IPPF_DSTOPTS; 2970 } 2971 if (sticky) { 2972 error = icmp_build_hdrs(icmp); 2973 if (error != 0) 2974 return (error); 2975 } 2976 break; 2977 } 2978 case IPV6_RTHDR: { 2979 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2980 2981 /* 2982 * Sanity checks - minimum size, size a multiple of 2983 * eight bytes, and matching size passed in. 2984 */ 2985 if (inlen != 0 && 2986 inlen != (8 * (rt->ip6r_len + 1))) 2987 return (EINVAL); 2988 2989 if (checkonly) 2990 break; 2991 2992 if (inlen == 0) { 2993 if (sticky && 2994 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2995 kmem_free(ipp->ipp_rthdr, 2996 ipp->ipp_rthdrlen); 2997 ipp->ipp_rthdr = NULL; 2998 ipp->ipp_rthdrlen = 0; 2999 } 3000 ipp->ipp_fields &= ~IPPF_RTHDR; 3001 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3002 } else { 3003 error = optcom_pkt_set(invalp, inlen, sticky, 3004 (uchar_t **)&ipp->ipp_rthdr, 3005 &ipp->ipp_rthdrlen, 0); 3006 if (error != 0) 3007 return (error); 3008 ipp->ipp_fields |= IPPF_RTHDR; 3009 } 3010 if (sticky) { 3011 error = icmp_build_hdrs(icmp); 3012 if (error != 0) 3013 return (error); 3014 } 3015 break; 3016 } 3017 3018 case IPV6_DONTFRAG: 3019 if (checkonly) 3020 break; 3021 3022 if (onoff) { 3023 ipp->ipp_fields |= IPPF_DONTFRAG; 3024 } else { 3025 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3026 } 3027 break; 3028 3029 case IPV6_USE_MIN_MTU: 3030 if (inlen != sizeof (int)) 3031 return (EINVAL); 3032 3033 if (*i1 < -1 || *i1 > 1) 3034 return (EINVAL); 3035 3036 if (checkonly) 3037 break; 3038 3039 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3040 ipp->ipp_use_min_mtu = *i1; 3041 break; 3042 3043 /* 3044 * This option can't be set. Its only returned via 3045 * getsockopt() or ancillary data. 3046 */ 3047 case IPV6_PATHMTU: 3048 return (EINVAL); 3049 3050 case IPV6_SEC_OPT: 3051 case IPV6_SRC_PREFERENCES: 3052 case IPV6_V6ONLY: 3053 /* Handled at IP level */ 3054 return (-EINVAL); 3055 default: 3056 *outlenp = 0; 3057 return (EINVAL); 3058 } 3059 break; 3060 } /* end IPPROTO_IPV6 */ 3061 3062 case IPPROTO_ICMPV6: 3063 /* 3064 * Only allow IPv6 option processing on IPv6 sockets. 3065 */ 3066 if (icmp->icmp_family != AF_INET6) { 3067 *outlenp = 0; 3068 return (ENOPROTOOPT); 3069 } 3070 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3071 *outlenp = 0; 3072 return (ENOPROTOOPT); 3073 } 3074 switch (name) { 3075 case ICMP6_FILTER: 3076 if (!checkonly) { 3077 if ((inlen != 0) && 3078 (inlen != sizeof (icmp6_filter_t))) 3079 return (EINVAL); 3080 3081 if (inlen == 0) { 3082 if (icmp->icmp_filter != NULL) { 3083 kmem_free(icmp->icmp_filter, 3084 sizeof (icmp6_filter_t)); 3085 icmp->icmp_filter = NULL; 3086 } 3087 } else { 3088 if (icmp->icmp_filter == NULL) { 3089 icmp->icmp_filter = kmem_alloc( 3090 sizeof (icmp6_filter_t), 3091 KM_NOSLEEP); 3092 if (icmp->icmp_filter == NULL) { 3093 *outlenp = 0; 3094 return (ENOBUFS); 3095 } 3096 } 3097 (void) bcopy(invalp, icmp->icmp_filter, 3098 inlen); 3099 } 3100 } 3101 break; 3102 3103 default: 3104 *outlenp = 0; 3105 return (EINVAL); 3106 } 3107 break; 3108 default: 3109 *outlenp = 0; 3110 return (EINVAL); 3111 } 3112 /* 3113 * Common case of OK return with outval same as inval. 3114 */ 3115 if (invalp != outvalp) { 3116 /* don't trust bcopy for identical src/dst */ 3117 (void) bcopy(invalp, outvalp, inlen); 3118 } 3119 *outlenp = inlen; 3120 return (0); 3121 } 3122 3123 /* This routine sets socket options. */ 3124 /* ARGSUSED */ 3125 int 3126 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3127 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3128 void *thisdg_attrs, cred_t *cr) 3129 { 3130 boolean_t checkonly; 3131 int error; 3132 3133 error = 0; 3134 switch (optset_context) { 3135 case SETFN_OPTCOM_CHECKONLY: 3136 checkonly = B_TRUE; 3137 /* 3138 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3139 * inlen != 0 implies value supplied and 3140 * we have to "pretend" to set it. 3141 * inlen == 0 implies that there is no 3142 * value part in T_CHECK request and just validation 3143 * done elsewhere should be enough, we just return here. 3144 */ 3145 if (inlen == 0) { 3146 *outlenp = 0; 3147 error = 0; 3148 goto done; 3149 } 3150 break; 3151 case SETFN_OPTCOM_NEGOTIATE: 3152 checkonly = B_FALSE; 3153 break; 3154 case SETFN_UD_NEGOTIATE: 3155 case SETFN_CONN_NEGOTIATE: 3156 checkonly = B_FALSE; 3157 /* 3158 * Negotiating local and "association-related" options 3159 * through T_UNITDATA_REQ. 3160 * 3161 * Following routine can filter out ones we do not 3162 * want to be "set" this way. 3163 */ 3164 if (!icmp_opt_allow_udr_set(level, name)) { 3165 *outlenp = 0; 3166 error = EINVAL; 3167 goto done; 3168 } 3169 break; 3170 default: 3171 /* 3172 * We should never get here 3173 */ 3174 *outlenp = 0; 3175 error = EINVAL; 3176 goto done; 3177 } 3178 3179 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3180 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3181 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3182 outvalp, cr, thisdg_attrs, checkonly); 3183 3184 done: 3185 return (error); 3186 } 3187 3188 /* This routine sets socket options. */ 3189 /* ARGSUSED */ 3190 int 3191 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3192 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3193 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3194 { 3195 conn_t *connp = Q_TO_CONN(q); 3196 icmp_t *icmp; 3197 int error; 3198 3199 icmp = connp->conn_icmp; 3200 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3201 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3202 outlenp, outvalp, thisdg_attrs, cr); 3203 rw_exit(&icmp->icmp_rwlock); 3204 return (error); 3205 } 3206 3207 /* 3208 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3209 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3210 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3211 * headers. 3212 * Returns failure if can't allocate memory. 3213 */ 3214 static int 3215 icmp_build_hdrs(icmp_t *icmp) 3216 { 3217 icmp_stack_t *is = icmp->icmp_is; 3218 uchar_t *hdrs; 3219 uint_t hdrs_len; 3220 ip6_t *ip6h; 3221 ip6i_t *ip6i; 3222 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3223 3224 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3225 hdrs_len = ip_total_hdrs_len_v6(ipp); 3226 ASSERT(hdrs_len != 0); 3227 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3228 /* Need to reallocate */ 3229 if (hdrs_len != 0) { 3230 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3231 if (hdrs == NULL) 3232 return (ENOMEM); 3233 } else { 3234 hdrs = NULL; 3235 } 3236 if (icmp->icmp_sticky_hdrs_len != 0) { 3237 kmem_free(icmp->icmp_sticky_hdrs, 3238 icmp->icmp_sticky_hdrs_len); 3239 } 3240 icmp->icmp_sticky_hdrs = hdrs; 3241 icmp->icmp_sticky_hdrs_len = hdrs_len; 3242 } 3243 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3244 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3245 3246 /* Set header fields not in ipp */ 3247 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3248 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3249 ip6h = (ip6_t *)&ip6i[1]; 3250 3251 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3252 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3253 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3254 } 3255 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3256 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3257 } 3258 } else { 3259 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3260 } 3261 3262 if (!(ipp->ipp_fields & IPPF_ADDR)) 3263 ip6h->ip6_src = icmp->icmp_v6src; 3264 3265 /* Try to get everything in a single mblk */ 3266 if (hdrs_len > icmp->icmp_max_hdr_len) { 3267 icmp->icmp_max_hdr_len = hdrs_len; 3268 rw_exit(&icmp->icmp_rwlock); 3269 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3270 icmp->icmp_connp, 3271 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3272 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3273 } 3274 return (0); 3275 } 3276 3277 /* 3278 * This routine retrieves the value of an ND variable in a icmpparam_t 3279 * structure. It is called through nd_getset when a user reads the 3280 * variable. 3281 */ 3282 /* ARGSUSED */ 3283 static int 3284 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3285 { 3286 icmpparam_t *icmppa = (icmpparam_t *)cp; 3287 3288 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3289 return (0); 3290 } 3291 3292 /* 3293 * Walk through the param array specified registering each element with the 3294 * named dispatch (ND) handler. 3295 */ 3296 static boolean_t 3297 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3298 { 3299 for (; cnt-- > 0; icmppa++) { 3300 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3301 if (!nd_load(ndp, icmppa->icmp_param_name, 3302 icmp_param_get, icmp_param_set, 3303 (caddr_t)icmppa)) { 3304 nd_free(ndp); 3305 return (B_FALSE); 3306 } 3307 } 3308 } 3309 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3310 NULL)) { 3311 nd_free(ndp); 3312 return (B_FALSE); 3313 } 3314 return (B_TRUE); 3315 } 3316 3317 /* This routine sets an ND variable in a icmpparam_t structure. */ 3318 /* ARGSUSED */ 3319 static int 3320 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3321 { 3322 long new_value; 3323 icmpparam_t *icmppa = (icmpparam_t *)cp; 3324 3325 /* 3326 * Fail the request if the new value does not lie within the 3327 * required bounds. 3328 */ 3329 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3330 new_value < icmppa->icmp_param_min || 3331 new_value > icmppa->icmp_param_max) { 3332 return (EINVAL); 3333 } 3334 /* Set the new value */ 3335 icmppa->icmp_param_value = new_value; 3336 return (0); 3337 } 3338 3339 static mblk_t * 3340 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3341 { 3342 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3343 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3344 /* 3345 * fallback has started but messages have not been moved yet 3346 */ 3347 if (icmp->icmp_fallback_queue_head == NULL) { 3348 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3349 icmp->icmp_fallback_queue_head = mp; 3350 icmp->icmp_fallback_queue_tail = mp; 3351 } else { 3352 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3353 icmp->icmp_fallback_queue_tail->b_next = mp; 3354 icmp->icmp_fallback_queue_tail = mp; 3355 } 3356 return (NULL); 3357 } else { 3358 /* 3359 * Fallback completed, let the caller putnext() the mblk. 3360 */ 3361 return (mp); 3362 } 3363 } 3364 3365 /* 3366 * Deliver data to ULP. In case we have a socket, and it's falling back to 3367 * TPI, then we'll queue the mp for later processing. 3368 */ 3369 static void 3370 icmp_ulp_recv(conn_t *connp, mblk_t *mp) 3371 { 3372 3373 if (IPCL_IS_NONSTR(connp)) { 3374 icmp_t *icmp = connp->conn_icmp; 3375 int error; 3376 3377 if ((*connp->conn_upcalls->su_recv) 3378 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3379 NULL) < 0) { 3380 mutex_enter(&icmp->icmp_recv_lock); 3381 if (error == ENOSPC) { 3382 /* 3383 * let's confirm while holding the lock 3384 */ 3385 if ((*connp->conn_upcalls->su_recv) 3386 (connp->conn_upper_handle, NULL, 0, 0, 3387 &error, NULL) < 0) { 3388 ASSERT(error == ENOSPC); 3389 if (error == ENOSPC) { 3390 connp->conn_flow_cntrld = 3391 B_TRUE; 3392 } 3393 } 3394 mutex_exit(&icmp->icmp_recv_lock); 3395 } else { 3396 ASSERT(error == EOPNOTSUPP); 3397 mp = icmp_queue_fallback(icmp, mp); 3398 mutex_exit(&icmp->icmp_recv_lock); 3399 if (mp != NULL) 3400 putnext(connp->conn_rq, mp); 3401 } 3402 } 3403 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 3404 } else { 3405 putnext(connp->conn_rq, mp); 3406 } 3407 } 3408 3409 /*ARGSUSED2*/ 3410 static void 3411 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3412 { 3413 conn_t *connp = (conn_t *)arg1; 3414 struct T_unitdata_ind *tudi; 3415 uchar_t *rptr; 3416 icmp_t *icmp; 3417 icmp_stack_t *is; 3418 sin_t *sin; 3419 sin6_t *sin6; 3420 ip6_t *ip6h; 3421 ip6i_t *ip6i; 3422 mblk_t *mp1; 3423 int hdr_len; 3424 ipha_t *ipha; 3425 int udi_size; /* Size of T_unitdata_ind */ 3426 uint_t ipvers; 3427 ip6_pkt_t ipp; 3428 uint8_t nexthdr; 3429 ip_pktinfo_t *pinfo = NULL; 3430 mblk_t *options_mp = NULL; 3431 uint_t icmp_opt = 0; 3432 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3433 uint_t hopstrip; 3434 3435 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3436 3437 icmp = connp->conn_icmp; 3438 is = icmp->icmp_is; 3439 rptr = mp->b_rptr; 3440 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3441 ASSERT(OK_32PTR(rptr)); 3442 3443 /* 3444 * IP should have prepended the options data in an M_CTL 3445 * Check M_CTL "type" to make sure are not here bcos of 3446 * a valid ICMP message 3447 */ 3448 if (DB_TYPE(mp) == M_CTL) { 3449 /* 3450 * FIXME: does IP still do this? 3451 * IP sends up the IPSEC_IN message for handling IPSEC 3452 * policy at the TCP level. We don't need it here. 3453 */ 3454 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3455 mp1 = mp->b_cont; 3456 freeb(mp); 3457 mp = mp1; 3458 rptr = mp->b_rptr; 3459 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3460 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3461 IN_PKTINFO) { 3462 /* 3463 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3464 * has been prepended to the packet by IP. We need to 3465 * extract the mblk and adjust the rptr 3466 */ 3467 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3468 options_mp = mp; 3469 mp = mp->b_cont; 3470 rptr = mp->b_rptr; 3471 } else { 3472 /* 3473 * ICMP messages. 3474 */ 3475 icmp_icmp_error(connp, mp); 3476 return; 3477 } 3478 } 3479 3480 /* 3481 * Discard message if it is misaligned or smaller than the IP header. 3482 */ 3483 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3484 freemsg(mp); 3485 if (options_mp != NULL) 3486 freeb(options_mp); 3487 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3488 return; 3489 } 3490 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3491 3492 /* Handle M_DATA messages containing IP packets messages */ 3493 if (ipvers == IPV4_VERSION) { 3494 /* 3495 * Special case where IP attaches 3496 * the IRE needs to be handled so that we don't send up 3497 * IRE to the user land. 3498 */ 3499 ipha = (ipha_t *)rptr; 3500 hdr_len = IPH_HDR_LENGTH(ipha); 3501 3502 if (ipha->ipha_protocol == IPPROTO_TCP) { 3503 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3504 3505 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3506 TH_SYN) && mp->b_cont != NULL) { 3507 mp1 = mp->b_cont; 3508 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3509 freeb(mp1); 3510 mp->b_cont = NULL; 3511 } 3512 } 3513 } 3514 if (is->is_bsd_compat) { 3515 ushort_t len; 3516 len = ntohs(ipha->ipha_length); 3517 3518 if (mp->b_datap->db_ref > 1) { 3519 /* 3520 * Allocate a new IP header so that we can 3521 * modify ipha_length. 3522 */ 3523 mblk_t *mp1; 3524 3525 mp1 = allocb(hdr_len, BPRI_MED); 3526 if (!mp1) { 3527 freemsg(mp); 3528 if (options_mp != NULL) 3529 freeb(options_mp); 3530 BUMP_MIB(&is->is_rawip_mib, 3531 rawipInErrors); 3532 return; 3533 } 3534 bcopy(rptr, mp1->b_rptr, hdr_len); 3535 mp->b_rptr = rptr + hdr_len; 3536 rptr = mp1->b_rptr; 3537 ipha = (ipha_t *)rptr; 3538 mp1->b_cont = mp; 3539 mp1->b_wptr = rptr + hdr_len; 3540 mp = mp1; 3541 } 3542 len -= hdr_len; 3543 ipha->ipha_length = htons(len); 3544 } 3545 } 3546 3547 /* 3548 * This is the inbound data path. Packets are passed upstream as 3549 * T_UNITDATA_IND messages with full IP headers still attached. 3550 */ 3551 if (icmp->icmp_family == AF_INET) { 3552 ASSERT(ipvers == IPV4_VERSION); 3553 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3554 if (icmp->icmp_recvif && (pinfo != NULL) && 3555 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3556 udi_size += sizeof (struct T_opthdr) + 3557 sizeof (uint_t); 3558 } 3559 3560 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3561 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3562 udi_size += sizeof (struct T_opthdr) + 3563 sizeof (struct in_pktinfo); 3564 } 3565 3566 /* 3567 * If SO_TIMESTAMP is set allocate the appropriate sized 3568 * buffer. Since gethrestime() expects a pointer aligned 3569 * argument, we allocate space necessary for extra 3570 * alignment (even though it might not be used). 3571 */ 3572 if (icmp->icmp_timestamp) { 3573 udi_size += sizeof (struct T_opthdr) + 3574 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3575 } 3576 mp1 = allocb(udi_size, BPRI_MED); 3577 if (mp1 == NULL) { 3578 freemsg(mp); 3579 if (options_mp != NULL) 3580 freeb(options_mp); 3581 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3582 return; 3583 } 3584 mp1->b_cont = mp; 3585 mp = mp1; 3586 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3587 mp->b_datap->db_type = M_PROTO; 3588 mp->b_wptr = (uchar_t *)tudi + udi_size; 3589 tudi->PRIM_type = T_UNITDATA_IND; 3590 tudi->SRC_length = sizeof (sin_t); 3591 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3592 sin = (sin_t *)&tudi[1]; 3593 *sin = sin_null; 3594 sin->sin_family = AF_INET; 3595 sin->sin_addr.s_addr = ipha->ipha_src; 3596 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3597 sizeof (sin_t); 3598 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3599 tudi->OPT_length = udi_size; 3600 3601 /* 3602 * Add options if IP_RECVIF is set 3603 */ 3604 if (udi_size != 0) { 3605 char *dstopt; 3606 3607 dstopt = (char *)&sin[1]; 3608 if (icmp->icmp_recvif && (pinfo != NULL) && 3609 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3610 3611 struct T_opthdr *toh; 3612 uint_t *dstptr; 3613 3614 toh = (struct T_opthdr *)dstopt; 3615 toh->level = IPPROTO_IP; 3616 toh->name = IP_RECVIF; 3617 toh->len = sizeof (struct T_opthdr) + 3618 sizeof (uint_t); 3619 toh->status = 0; 3620 dstopt += sizeof (struct T_opthdr); 3621 dstptr = (uint_t *)dstopt; 3622 *dstptr = pinfo->ip_pkt_ifindex; 3623 dstopt += sizeof (uint_t); 3624 udi_size -= toh->len; 3625 } 3626 if (icmp->icmp_timestamp) { 3627 struct T_opthdr *toh; 3628 3629 toh = (struct T_opthdr *)dstopt; 3630 toh->level = SOL_SOCKET; 3631 toh->name = SCM_TIMESTAMP; 3632 toh->len = sizeof (struct T_opthdr) + 3633 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3634 toh->status = 0; 3635 dstopt += sizeof (struct T_opthdr); 3636 /* Align for gethrestime() */ 3637 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3638 sizeof (intptr_t)); 3639 gethrestime((timestruc_t *)dstopt); 3640 dstopt = (char *)toh + toh->len; 3641 udi_size -= toh->len; 3642 } 3643 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3644 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3645 struct T_opthdr *toh; 3646 struct in_pktinfo *pktinfop; 3647 3648 toh = (struct T_opthdr *)dstopt; 3649 toh->level = IPPROTO_IP; 3650 toh->name = IP_PKTINFO; 3651 toh->len = sizeof (struct T_opthdr) + 3652 sizeof (in_pktinfo_t); 3653 toh->status = 0; 3654 dstopt += sizeof (struct T_opthdr); 3655 pktinfop = (struct in_pktinfo *)dstopt; 3656 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3657 pktinfop->ipi_spec_dst = 3658 pinfo->ip_pkt_match_addr; 3659 3660 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3661 3662 dstopt += sizeof (struct in_pktinfo); 3663 udi_size -= toh->len; 3664 } 3665 3666 /* Consumed all of allocated space */ 3667 ASSERT(udi_size == 0); 3668 } 3669 3670 if (options_mp != NULL) 3671 freeb(options_mp); 3672 3673 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3674 goto deliver; 3675 } 3676 3677 /* 3678 * We don't need options_mp in the IPv6 path. 3679 */ 3680 if (options_mp != NULL) { 3681 freeb(options_mp); 3682 options_mp = NULL; 3683 } 3684 3685 /* 3686 * Discard message if it is smaller than the IPv6 header 3687 * or if the header is malformed. 3688 */ 3689 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3690 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3691 icmp->icmp_family != AF_INET6) { 3692 freemsg(mp); 3693 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3694 return; 3695 } 3696 3697 /* Initialize */ 3698 ipp.ipp_fields = 0; 3699 hopstrip = 0; 3700 3701 ip6h = (ip6_t *)rptr; 3702 /* 3703 * Call on ip_find_hdr_v6 which gets the total hdr len 3704 * as well as individual lenghts of ext hdrs (and ptrs to 3705 * them). 3706 */ 3707 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3708 /* Look for ifindex information */ 3709 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3710 ip6i = (ip6i_t *)ip6h; 3711 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3712 ASSERT(ip6i->ip6i_ifindex != 0); 3713 ipp.ipp_fields |= IPPF_IFINDEX; 3714 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3715 } 3716 rptr = (uchar_t *)&ip6i[1]; 3717 mp->b_rptr = rptr; 3718 if (rptr == mp->b_wptr) { 3719 mp1 = mp->b_cont; 3720 freeb(mp); 3721 mp = mp1; 3722 rptr = mp->b_rptr; 3723 } 3724 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3725 ip6h = (ip6_t *)rptr; 3726 } 3727 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3728 3729 /* 3730 * We need to lie a bit to the user because users inside 3731 * labeled compartments should not see their own labels. We 3732 * assume that in all other respects IP has checked the label, 3733 * and that the label is always first among the options. (If 3734 * it's not first, then this code won't see it, and the option 3735 * will be passed along to the user.) 3736 * 3737 * If we had multilevel ICMP sockets, then the following code 3738 * should be skipped for them to allow the user to see the 3739 * label. 3740 * 3741 * Alignment restrictions in the definition of IP options 3742 * (namely, the requirement that the 4-octet DOI goes on a 3743 * 4-octet boundary) mean that we know exactly where the option 3744 * should start, but we're lenient for other hosts. 3745 * 3746 * Note that there are no multilevel ICMP or raw IP sockets 3747 * yet, thus nobody ever sees the IP6OPT_LS option. 3748 */ 3749 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3750 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3751 const uchar_t *ucp = 3752 (const uchar_t *)ipp.ipp_hopopts + 2; 3753 int remlen = ipp.ipp_hopoptslen - 2; 3754 3755 while (remlen > 0) { 3756 if (*ucp == IP6OPT_PAD1) { 3757 remlen--; 3758 ucp++; 3759 } else if (*ucp == IP6OPT_PADN) { 3760 remlen -= ucp[1] + 2; 3761 ucp += ucp[1] + 2; 3762 } else if (*ucp == ip6opt_ls) { 3763 hopstrip = (ucp - 3764 (const uchar_t *)ipp.ipp_hopopts) + 3765 ucp[1] + 2; 3766 hopstrip = (hopstrip + 7) & ~7; 3767 break; 3768 } else { 3769 /* label option must be first */ 3770 break; 3771 } 3772 } 3773 } 3774 } else { 3775 hdr_len = IPV6_HDR_LEN; 3776 ip6i = NULL; 3777 nexthdr = ip6h->ip6_nxt; 3778 } 3779 /* 3780 * One special case where IP attaches the IRE needs to 3781 * be handled so that we don't send up IRE to the user land. 3782 */ 3783 if (nexthdr == IPPROTO_TCP) { 3784 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3785 3786 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3787 mp->b_cont != NULL) { 3788 mp1 = mp->b_cont; 3789 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3790 freeb(mp1); 3791 mp->b_cont = NULL; 3792 } 3793 } 3794 } 3795 /* 3796 * Check a filter for ICMPv6 types if needed. 3797 * Verify raw checksums if needed. 3798 */ 3799 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3800 if (icmp->icmp_filter != NULL) { 3801 int type; 3802 3803 /* Assumes that IP has done the pullupmsg */ 3804 type = mp->b_rptr[hdr_len]; 3805 3806 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3807 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3808 freemsg(mp); 3809 return; 3810 } 3811 } else { 3812 /* Checksum */ 3813 uint16_t *up; 3814 uint32_t sum; 3815 int remlen; 3816 3817 up = (uint16_t *)&ip6h->ip6_src; 3818 3819 remlen = msgdsize(mp) - hdr_len; 3820 sum = htons(icmp->icmp_proto + remlen) 3821 + up[0] + up[1] + up[2] + up[3] 3822 + up[4] + up[5] + up[6] + up[7] 3823 + up[8] + up[9] + up[10] + up[11] 3824 + up[12] + up[13] + up[14] + up[15]; 3825 sum = (sum & 0xffff) + (sum >> 16); 3826 sum = IP_CSUM(mp, hdr_len, sum); 3827 if (sum != 0) { 3828 /* IPv6 RAW checksum failed */ 3829 ip0dbg(("icmp_rput: RAW checksum " 3830 "failed %x\n", sum)); 3831 freemsg(mp); 3832 BUMP_MIB(&is->is_rawip_mib, 3833 rawipInCksumErrs); 3834 return; 3835 } 3836 } 3837 } 3838 /* Skip all the IPv6 headers per API */ 3839 mp->b_rptr += hdr_len; 3840 3841 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3842 3843 /* 3844 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3845 * maintain state information, instead of relying on icmp_t 3846 * structure, since there arent any locks protecting these members 3847 * and there is a window where there might be a race between a 3848 * thread setting options on the write side and a thread reading 3849 * these options on the read size. 3850 */ 3851 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3852 IPPF_RTHDR|IPPF_IFINDEX)) { 3853 if (icmp->icmp_ipv6_recvhopopts && 3854 (ipp.ipp_fields & IPPF_HOPOPTS) && 3855 ipp.ipp_hopoptslen > hopstrip) { 3856 udi_size += sizeof (struct T_opthdr) + 3857 ipp.ipp_hopoptslen - hopstrip; 3858 icmp_opt |= IPPF_HOPOPTS; 3859 } 3860 if ((icmp->icmp_ipv6_recvdstopts || 3861 icmp->icmp_old_ipv6_recvdstopts) && 3862 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3863 udi_size += sizeof (struct T_opthdr) + 3864 ipp.ipp_dstoptslen; 3865 icmp_opt |= IPPF_DSTOPTS; 3866 } 3867 if (((icmp->icmp_ipv6_recvdstopts && 3868 icmp->icmp_ipv6_recvrthdr && 3869 (ipp.ipp_fields & IPPF_RTHDR)) || 3870 icmp->icmp_ipv6_recvrtdstopts) && 3871 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3872 udi_size += sizeof (struct T_opthdr) + 3873 ipp.ipp_rtdstoptslen; 3874 icmp_opt |= IPPF_RTDSTOPTS; 3875 } 3876 if (icmp->icmp_ipv6_recvrthdr && 3877 (ipp.ipp_fields & IPPF_RTHDR)) { 3878 udi_size += sizeof (struct T_opthdr) + 3879 ipp.ipp_rthdrlen; 3880 icmp_opt |= IPPF_RTHDR; 3881 } 3882 if (icmp->icmp_ip_recvpktinfo && 3883 (ipp.ipp_fields & IPPF_IFINDEX)) { 3884 udi_size += sizeof (struct T_opthdr) + 3885 sizeof (struct in6_pktinfo); 3886 icmp_opt |= IPPF_IFINDEX; 3887 } 3888 } 3889 if (icmp->icmp_ipv6_recvhoplimit) { 3890 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3891 icmp_ipv6_recvhoplimit = B_TRUE; 3892 } 3893 3894 if (icmp->icmp_ipv6_recvtclass) 3895 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3896 3897 /* 3898 * If SO_TIMESTAMP is set allocate the appropriate sized 3899 * buffer. Since gethrestime() expects a pointer aligned 3900 * argument, we allocate space necessary for extra 3901 * alignment (even though it might not be used). 3902 */ 3903 if (icmp->icmp_timestamp) { 3904 udi_size += sizeof (struct T_opthdr) + 3905 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3906 } 3907 3908 mp1 = allocb(udi_size, BPRI_MED); 3909 if (mp1 == NULL) { 3910 freemsg(mp); 3911 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3912 return; 3913 } 3914 mp1->b_cont = mp; 3915 mp = mp1; 3916 mp->b_datap->db_type = M_PROTO; 3917 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3918 mp->b_wptr = (uchar_t *)tudi + udi_size; 3919 tudi->PRIM_type = T_UNITDATA_IND; 3920 tudi->SRC_length = sizeof (sin6_t); 3921 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3922 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3923 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3924 tudi->OPT_length = udi_size; 3925 sin6 = (sin6_t *)&tudi[1]; 3926 sin6->sin6_port = 0; 3927 sin6->sin6_family = AF_INET6; 3928 3929 sin6->sin6_addr = ip6h->ip6_src; 3930 /* No sin6_flowinfo per API */ 3931 sin6->sin6_flowinfo = 0; 3932 /* For link-scope source pass up scope id */ 3933 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3934 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3935 sin6->sin6_scope_id = ipp.ipp_ifindex; 3936 else 3937 sin6->sin6_scope_id = 0; 3938 3939 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3940 icmp->icmp_zoneid, is->is_netstack); 3941 3942 if (udi_size != 0) { 3943 uchar_t *dstopt; 3944 3945 dstopt = (uchar_t *)&sin6[1]; 3946 if (icmp_opt & IPPF_IFINDEX) { 3947 struct T_opthdr *toh; 3948 struct in6_pktinfo *pkti; 3949 3950 toh = (struct T_opthdr *)dstopt; 3951 toh->level = IPPROTO_IPV6; 3952 toh->name = IPV6_PKTINFO; 3953 toh->len = sizeof (struct T_opthdr) + 3954 sizeof (*pkti); 3955 toh->status = 0; 3956 dstopt += sizeof (struct T_opthdr); 3957 pkti = (struct in6_pktinfo *)dstopt; 3958 pkti->ipi6_addr = ip6h->ip6_dst; 3959 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3960 dstopt += sizeof (*pkti); 3961 udi_size -= toh->len; 3962 } 3963 if (icmp_ipv6_recvhoplimit) { 3964 struct T_opthdr *toh; 3965 3966 toh = (struct T_opthdr *)dstopt; 3967 toh->level = IPPROTO_IPV6; 3968 toh->name = IPV6_HOPLIMIT; 3969 toh->len = sizeof (struct T_opthdr) + 3970 sizeof (uint_t); 3971 toh->status = 0; 3972 dstopt += sizeof (struct T_opthdr); 3973 *(uint_t *)dstopt = ip6h->ip6_hops; 3974 dstopt += sizeof (uint_t); 3975 udi_size -= toh->len; 3976 } 3977 if (icmp->icmp_ipv6_recvtclass) { 3978 struct T_opthdr *toh; 3979 3980 toh = (struct T_opthdr *)dstopt; 3981 toh->level = IPPROTO_IPV6; 3982 toh->name = IPV6_TCLASS; 3983 toh->len = sizeof (struct T_opthdr) + 3984 sizeof (uint_t); 3985 toh->status = 0; 3986 dstopt += sizeof (struct T_opthdr); 3987 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3988 dstopt += sizeof (uint_t); 3989 udi_size -= toh->len; 3990 } 3991 if (icmp->icmp_timestamp) { 3992 struct T_opthdr *toh; 3993 3994 toh = (struct T_opthdr *)dstopt; 3995 toh->level = SOL_SOCKET; 3996 toh->name = SCM_TIMESTAMP; 3997 toh->len = sizeof (struct T_opthdr) + 3998 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3999 toh->status = 0; 4000 dstopt += sizeof (struct T_opthdr); 4001 /* Align for gethrestime() */ 4002 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4003 sizeof (intptr_t)); 4004 gethrestime((timestruc_t *)dstopt); 4005 dstopt = (uchar_t *)toh + toh->len; 4006 udi_size -= toh->len; 4007 } 4008 4009 if (icmp_opt & IPPF_HOPOPTS) { 4010 struct T_opthdr *toh; 4011 4012 toh = (struct T_opthdr *)dstopt; 4013 toh->level = IPPROTO_IPV6; 4014 toh->name = IPV6_HOPOPTS; 4015 toh->len = sizeof (struct T_opthdr) + 4016 ipp.ipp_hopoptslen - hopstrip; 4017 toh->status = 0; 4018 dstopt += sizeof (struct T_opthdr); 4019 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 4020 ipp.ipp_hopoptslen - hopstrip); 4021 if (hopstrip > 0) { 4022 /* copy next header value and fake length */ 4023 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 4024 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 4025 hopstrip / 8; 4026 } 4027 dstopt += ipp.ipp_hopoptslen - hopstrip; 4028 udi_size -= toh->len; 4029 } 4030 if (icmp_opt & IPPF_RTDSTOPTS) { 4031 struct T_opthdr *toh; 4032 4033 toh = (struct T_opthdr *)dstopt; 4034 toh->level = IPPROTO_IPV6; 4035 toh->name = IPV6_DSTOPTS; 4036 toh->len = sizeof (struct T_opthdr) + 4037 ipp.ipp_rtdstoptslen; 4038 toh->status = 0; 4039 dstopt += sizeof (struct T_opthdr); 4040 bcopy(ipp.ipp_rtdstopts, dstopt, 4041 ipp.ipp_rtdstoptslen); 4042 dstopt += ipp.ipp_rtdstoptslen; 4043 udi_size -= toh->len; 4044 } 4045 if (icmp_opt & IPPF_RTHDR) { 4046 struct T_opthdr *toh; 4047 4048 toh = (struct T_opthdr *)dstopt; 4049 toh->level = IPPROTO_IPV6; 4050 toh->name = IPV6_RTHDR; 4051 toh->len = sizeof (struct T_opthdr) + 4052 ipp.ipp_rthdrlen; 4053 toh->status = 0; 4054 dstopt += sizeof (struct T_opthdr); 4055 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4056 dstopt += ipp.ipp_rthdrlen; 4057 udi_size -= toh->len; 4058 } 4059 if (icmp_opt & IPPF_DSTOPTS) { 4060 struct T_opthdr *toh; 4061 4062 toh = (struct T_opthdr *)dstopt; 4063 toh->level = IPPROTO_IPV6; 4064 toh->name = IPV6_DSTOPTS; 4065 toh->len = sizeof (struct T_opthdr) + 4066 ipp.ipp_dstoptslen; 4067 toh->status = 0; 4068 dstopt += sizeof (struct T_opthdr); 4069 bcopy(ipp.ipp_dstopts, dstopt, 4070 ipp.ipp_dstoptslen); 4071 dstopt += ipp.ipp_dstoptslen; 4072 udi_size -= toh->len; 4073 } 4074 /* Consumed all of allocated space */ 4075 ASSERT(udi_size == 0); 4076 } 4077 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4078 4079 deliver: 4080 icmp_ulp_recv(connp, mp); 4081 4082 } 4083 4084 /* 4085 * return SNMP stuff in buffer in mpdata 4086 */ 4087 mblk_t * 4088 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4089 { 4090 mblk_t *mpdata; 4091 struct opthdr *optp; 4092 conn_t *connp = Q_TO_CONN(q); 4093 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4094 mblk_t *mp2ctl; 4095 4096 /* 4097 * make a copy of the original message 4098 */ 4099 mp2ctl = copymsg(mpctl); 4100 4101 if (mpctl == NULL || 4102 (mpdata = mpctl->b_cont) == NULL) { 4103 freemsg(mpctl); 4104 freemsg(mp2ctl); 4105 return (0); 4106 } 4107 4108 /* fixed length structure for IPv4 and IPv6 counters */ 4109 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4110 optp->level = EXPER_RAWIP; 4111 optp->name = 0; 4112 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4113 sizeof (is->is_rawip_mib)); 4114 optp->len = msgdsize(mpdata); 4115 qreply(q, mpctl); 4116 4117 return (mp2ctl); 4118 } 4119 4120 /* 4121 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4122 * TODO: If this ever actually tries to set anything, it needs to be 4123 * to do the appropriate locking. 4124 */ 4125 /* ARGSUSED */ 4126 int 4127 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4128 uchar_t *ptr, int len) 4129 { 4130 switch (level) { 4131 case EXPER_RAWIP: 4132 return (0); 4133 default: 4134 return (1); 4135 } 4136 } 4137 4138 /* Report for ndd "icmp_status" */ 4139 /* ARGSUSED */ 4140 static int 4141 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4142 { 4143 conn_t *connp; 4144 ip_stack_t *ipst; 4145 char laddrbuf[INET6_ADDRSTRLEN]; 4146 char faddrbuf[INET6_ADDRSTRLEN]; 4147 int i; 4148 4149 (void) mi_mpprintf(mp, 4150 "RAWIP " MI_COL_HDRPAD_STR 4151 /* 01234567[89ABCDEF] */ 4152 " src addr dest addr state"); 4153 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4154 4155 connp = Q_TO_CONN(q); 4156 ipst = connp->conn_netstack->netstack_ip; 4157 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4158 connf_t *connfp; 4159 char *state; 4160 4161 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4162 connp = NULL; 4163 4164 while ((connp = ipcl_get_next_conn(connfp, connp, 4165 IPCL_RAWIPCONN)) != NULL) { 4166 icmp_t *icmp; 4167 4168 mutex_enter(&(connp)->conn_lock); 4169 icmp = connp->conn_icmp; 4170 4171 if (icmp->icmp_state == TS_UNBND) 4172 state = "UNBOUND"; 4173 else if (icmp->icmp_state == TS_IDLE) 4174 state = "IDLE"; 4175 else if (icmp->icmp_state == TS_DATA_XFER) 4176 state = "CONNECTED"; 4177 else 4178 state = "UnkState"; 4179 4180 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4181 (void *)icmp, 4182 inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr, 4183 faddrbuf, 4184 sizeof (faddrbuf)), 4185 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4186 sizeof (laddrbuf)), 4187 state); 4188 mutex_exit(&(connp)->conn_lock); 4189 } 4190 } 4191 return (0); 4192 } 4193 4194 /* 4195 * This routine creates a T_UDERROR_IND message and passes it upstream. 4196 * The address and options are copied from the T_UNITDATA_REQ message 4197 * passed in mp. This message is freed. 4198 */ 4199 static void 4200 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4201 { 4202 mblk_t *mp1; 4203 uchar_t *rptr = mp->b_rptr; 4204 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4205 4206 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4207 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4208 tudr->OPT_length, err); 4209 if (mp1) 4210 qreply(q, mp1); 4211 freemsg(mp); 4212 } 4213 4214 4215 static int 4216 rawip_do_unbind(conn_t *connp) 4217 { 4218 icmp_t *icmp = connp->conn_icmp; 4219 4220 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4221 /* If a bind has not been done, we can't unbind. */ 4222 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4223 rw_exit(&icmp->icmp_rwlock); 4224 return (-TOUTSTATE); 4225 } 4226 icmp->icmp_pending_op = T_UNBIND_REQ; 4227 rw_exit(&icmp->icmp_rwlock); 4228 4229 /* 4230 * Call ip to unbind 4231 */ 4232 4233 ip_unbind(connp); 4234 4235 /* 4236 * Once we're unbound from IP, the pending operation may be cleared 4237 * here. 4238 */ 4239 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4240 V6_SET_ZERO(icmp->icmp_v6src); 4241 V6_SET_ZERO(icmp->icmp_bound_v6src); 4242 icmp->icmp_pending_op = -1; 4243 icmp->icmp_state = TS_UNBND; 4244 if (icmp->icmp_family == AF_INET6) 4245 (void) icmp_build_hdrs(icmp); 4246 rw_exit(&icmp->icmp_rwlock); 4247 return (0); 4248 } 4249 4250 /* 4251 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4252 * After some error checking, the message is passed downstream to ip. 4253 */ 4254 static void 4255 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4256 { 4257 conn_t *connp = Q_TO_CONN(q); 4258 int error; 4259 4260 ASSERT(mp->b_cont == NULL); 4261 error = rawip_do_unbind(connp); 4262 if (error) { 4263 if (error < 0) { 4264 icmp_err_ack(q, mp, -error, 0); 4265 } else { 4266 icmp_err_ack(q, mp, 0, error); 4267 } 4268 return; 4269 } 4270 4271 /* 4272 * Convert mp into a T_OK_ACK 4273 */ 4274 4275 mp = mi_tpi_ok_ack_alloc(mp); 4276 4277 /* 4278 * should not happen in practice... T_OK_ACK is smaller than the 4279 * original message. 4280 */ 4281 ASSERT(mp != NULL); 4282 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4283 qreply(q, mp); 4284 } 4285 4286 4287 /* 4288 * Process IPv4 packets that already include an IP header. 4289 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4290 * IPPROTO_IGMP). 4291 */ 4292 static int 4293 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4294 ip4_pkt_t *pktinfop) 4295 { 4296 icmp_stack_t *is = icmp->icmp_is; 4297 ipha_t *ipha; 4298 int ip_hdr_length; 4299 int tp_hdr_len; 4300 mblk_t *mp1; 4301 uint_t pkt_len; 4302 ip_opt_info_t optinfo; 4303 4304 optinfo.ip_opt_flags = 0; 4305 optinfo.ip_opt_ill_index = 0; 4306 ipha = (ipha_t *)mp->b_rptr; 4307 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4308 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4309 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4310 ASSERT(icmp != NULL); 4311 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4312 freemsg(mp); 4313 return (0); 4314 } 4315 ipha = (ipha_t *)mp->b_rptr; 4316 } 4317 ipha->ipha_version_and_hdr_length = 4318 (IP_VERSION<<4) | (ip_hdr_length>>2); 4319 4320 /* 4321 * For the socket of SOCK_RAW type, the checksum is provided in the 4322 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4323 * tell IP that the application has sent a complete IP header and not 4324 * to compute the transport checksum nor change the DF flag. 4325 */ 4326 ipha->ipha_ident = IP_HDR_INCLUDED; 4327 ipha->ipha_hdr_checksum = 0; 4328 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4329 /* Insert options if any */ 4330 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4331 /* 4332 * Put the IP header plus any transport header that is 4333 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4334 * that at least the checksum field is in the first mblk.) 4335 */ 4336 switch (ipha->ipha_protocol) { 4337 case IPPROTO_UDP: 4338 tp_hdr_len = 8; 4339 break; 4340 case IPPROTO_TCP: 4341 tp_hdr_len = 20; 4342 break; 4343 default: 4344 tp_hdr_len = 0; 4345 break; 4346 } 4347 /* 4348 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4349 * tp_hdr_len bytes will be in a single mblk. 4350 */ 4351 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4352 tp_hdr_len)) { 4353 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4354 tp_hdr_len)) { 4355 BUMP_MIB(&is->is_rawip_mib, 4356 rawipOutErrors); 4357 freemsg(mp); 4358 return (0); 4359 } 4360 ipha = (ipha_t *)mp->b_rptr; 4361 } 4362 4363 /* 4364 * if the length is larger then the max allowed IP packet, 4365 * then send an error and abort the processing. 4366 */ 4367 pkt_len = ntohs(ipha->ipha_length) 4368 + icmp->icmp_ip_snd_options_len; 4369 if (pkt_len > IP_MAXPACKET) { 4370 return (EMSGSIZE); 4371 } 4372 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4373 tp_hdr_len, BPRI_LO))) { 4374 return (ENOMEM); 4375 } 4376 mp1->b_rptr += is->is_wroff_extra; 4377 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4378 4379 ipha->ipha_length = htons((uint16_t)pkt_len); 4380 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4381 4382 /* Copy transport header if any */ 4383 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4384 mp1->b_wptr += tp_hdr_len; 4385 4386 /* Add options */ 4387 ipha = (ipha_t *)mp1->b_rptr; 4388 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4389 icmp->icmp_ip_snd_options_len); 4390 4391 /* Drop IP header and transport header from original */ 4392 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4393 4394 mp1->b_cont = mp; 4395 mp = mp1; 4396 /* 4397 * Massage source route putting first source 4398 * route in ipha_dst. 4399 */ 4400 (void) ip_massage_options(ipha, is->is_netstack); 4401 } 4402 4403 if (pktinfop != NULL) { 4404 /* 4405 * Over write the source address provided in the header 4406 */ 4407 if (pktinfop->ip4_addr != INADDR_ANY) { 4408 ipha->ipha_src = pktinfop->ip4_addr; 4409 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4410 } 4411 4412 if (pktinfop->ip4_ill_index != 0) { 4413 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4414 } 4415 } 4416 4417 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4418 return (0); 4419 } 4420 4421 static int 4422 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4423 { 4424 int err; 4425 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4426 icmp_stack_t *is = icmp->icmp_is; 4427 conn_t *connp = icmp->icmp_connp; 4428 cred_t *cr; 4429 4430 /* 4431 * All Solaris components should pass a db_credp 4432 * for this message, hence we ASSERT. 4433 * On production kernels we return an error to be robust against 4434 * random streams modules sitting on top of us. 4435 */ 4436 cr = msg_getcred(mp, NULL); 4437 ASSERT(cr != NULL); 4438 if (cr == NULL) 4439 return (EINVAL); 4440 4441 err = tsol_compute_label(cr, dst, 4442 opt_storage, connp->conn_mac_exempt, 4443 is->is_netstack->netstack_ip); 4444 if (err == 0) { 4445 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4446 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4447 opt_storage); 4448 } 4449 if (err != 0) { 4450 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4451 DTRACE_PROBE4( 4452 tx__ip__log__drop__updatelabel__icmp, 4453 char *, "icmp(1) failed to update options(2) on mp(3)", 4454 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4455 return (err); 4456 } 4457 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4458 return (0); 4459 } 4460 4461 /* 4462 * This routine handles all messages passed downstream. It either 4463 * consumes the message or passes it downstream; it never queues a 4464 * a message. 4465 */ 4466 static void 4467 icmp_wput(queue_t *q, mblk_t *mp) 4468 { 4469 uchar_t *rptr = mp->b_rptr; 4470 ipha_t *ipha; 4471 mblk_t *mp1; 4472 #define tudr ((struct T_unitdata_req *)rptr) 4473 size_t ip_len; 4474 conn_t *connp = Q_TO_CONN(q); 4475 icmp_t *icmp = connp->conn_icmp; 4476 icmp_stack_t *is = icmp->icmp_is; 4477 sin6_t *sin6; 4478 sin_t *sin; 4479 ipaddr_t v4dst; 4480 ip4_pkt_t pktinfo; 4481 ip4_pkt_t *pktinfop = &pktinfo; 4482 ip6_pkt_t ipp_s; /* For ancillary data options */ 4483 ip6_pkt_t *ipp = &ipp_s; 4484 int error; 4485 4486 ipp->ipp_fields = 0; 4487 ipp->ipp_sticky_ignored = 0; 4488 4489 switch (mp->b_datap->db_type) { 4490 case M_DATA: 4491 if (icmp->icmp_hdrincl) { 4492 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4493 ipha = (ipha_t *)mp->b_rptr; 4494 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4495 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4496 BUMP_MIB(&is->is_rawip_mib, 4497 rawipOutErrors); 4498 freemsg(mp); 4499 return; 4500 } 4501 ipha = (ipha_t *)mp->b_rptr; 4502 } 4503 /* 4504 * If this connection was used for v6 (inconceivable!) 4505 * or if we have a new destination, then it's time to 4506 * figure a new label. 4507 */ 4508 if (is_system_labeled() && 4509 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4510 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4511 ipha->ipha_dst)) { 4512 error = icmp_update_label(icmp, mp, 4513 ipha->ipha_dst); 4514 if (error != 0) { 4515 icmp_ud_err(q, mp, error); 4516 return; 4517 } 4518 } 4519 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4520 if (error != 0) 4521 icmp_ud_err(q, mp, error); 4522 return; 4523 } 4524 freemsg(mp); 4525 return; 4526 case M_PROTO: 4527 case M_PCPROTO: 4528 ip_len = mp->b_wptr - rptr; 4529 if (ip_len >= sizeof (struct T_unitdata_req)) { 4530 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4531 if (((union T_primitives *)rptr)->type 4532 == T_UNITDATA_REQ) 4533 break; 4534 } 4535 /* FALLTHRU */ 4536 default: 4537 icmp_wput_other(q, mp); 4538 return; 4539 } 4540 4541 /* Handle T_UNITDATA_REQ messages here. */ 4542 4543 mp1 = mp->b_cont; 4544 if (mp1 == NULL) { 4545 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4546 icmp_ud_err(q, mp, EPROTO); 4547 return; 4548 } 4549 4550 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4551 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4552 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4553 return; 4554 } 4555 4556 switch (icmp->icmp_family) { 4557 case AF_INET6: 4558 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4559 if (!OK_32PTR((char *)sin6) || 4560 tudr->DEST_length != sizeof (sin6_t) || 4561 sin6->sin6_family != AF_INET6) { 4562 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4563 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4564 return; 4565 } 4566 4567 /* No support for mapped addresses on raw sockets */ 4568 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4569 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4570 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4571 return; 4572 } 4573 4574 /* 4575 * Destination is a native IPv6 address. 4576 * Send out an IPv6 format packet. 4577 */ 4578 if (tudr->OPT_length != 0) { 4579 int error; 4580 4581 error = 0; 4582 if (icmp_unitdata_opt_process(q, mp, &error, 4583 (void *)ipp) < 0) { 4584 /* failure */ 4585 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4586 icmp_ud_err(q, mp, error); 4587 return; 4588 } 4589 ASSERT(error == 0); 4590 } 4591 4592 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4593 goto done; 4594 4595 case AF_INET: 4596 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4597 if (!OK_32PTR((char *)sin) || 4598 tudr->DEST_length != sizeof (sin_t) || 4599 sin->sin_family != AF_INET) { 4600 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4601 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4602 return; 4603 } 4604 /* Extract and ipaddr */ 4605 v4dst = sin->sin_addr.s_addr; 4606 break; 4607 4608 default: 4609 ASSERT(0); 4610 } 4611 4612 pktinfop->ip4_ill_index = 0; 4613 pktinfop->ip4_addr = INADDR_ANY; 4614 4615 /* 4616 * If options passed in, feed it for verification and handling 4617 */ 4618 if (tudr->OPT_length != 0) { 4619 int error; 4620 4621 error = 0; 4622 if (icmp_unitdata_opt_process(q, mp, &error, 4623 (void *)pktinfop) < 0) { 4624 /* failure */ 4625 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4626 icmp_ud_err(q, mp, error); 4627 return; 4628 } 4629 ASSERT(error == 0); 4630 /* 4631 * Note: Success in processing options. 4632 * mp option buffer represented by 4633 * OPT_length/offset now potentially modified 4634 * and contain option setting results 4635 */ 4636 } 4637 4638 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4639 done: 4640 if (error != 0) { 4641 icmp_ud_err(q, mp, error); 4642 return; 4643 } else { 4644 mp->b_cont = NULL; 4645 freeb(mp); 4646 } 4647 } 4648 4649 4650 /* ARGSUSED */ 4651 static void 4652 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4653 { 4654 #ifdef DEBUG 4655 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4656 #endif 4657 freemsg(mp); 4658 } 4659 4660 static int 4661 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4662 ip4_pkt_t *pktinfop) 4663 { 4664 ipha_t *ipha; 4665 size_t ip_len; 4666 icmp_t *icmp = connp->conn_icmp; 4667 icmp_stack_t *is = icmp->icmp_is; 4668 int ip_hdr_length; 4669 ip_opt_info_t optinfo; 4670 4671 optinfo.ip_opt_flags = 0; 4672 optinfo.ip_opt_ill_index = 0; 4673 4674 if (icmp->icmp_state == TS_UNBND) { 4675 /* If a port has not been bound to the stream, fail. */ 4676 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4677 return (EPROTO); 4678 } 4679 4680 if (v4dst == INADDR_ANY) 4681 v4dst = htonl(INADDR_LOOPBACK); 4682 4683 /* Check if our saved options are valid; update if not */ 4684 if (is_system_labeled() && 4685 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4686 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4687 int error = icmp_update_label(icmp, mp, v4dst); 4688 4689 if (error != 0) 4690 return (error); 4691 } 4692 4693 /* Protocol 255 contains full IP headers */ 4694 if (icmp->icmp_hdrincl) 4695 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4696 4697 /* Add an IP header */ 4698 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4699 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4700 if ((uchar_t *)ipha < mp->b_datap->db_base || 4701 mp->b_datap->db_ref != 1 || 4702 !OK_32PTR(ipha)) { 4703 mblk_t *mp1; 4704 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4705 BPRI_LO))) { 4706 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4707 return (ENOMEM); 4708 } 4709 mp1->b_cont = mp; 4710 ipha = (ipha_t *)mp1->b_datap->db_lim; 4711 mp1->b_wptr = (uchar_t *)ipha; 4712 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4713 mp = mp1; 4714 } 4715 #ifdef _BIG_ENDIAN 4716 /* Set version, header length, and tos */ 4717 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4718 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4719 icmp->icmp_type_of_service); 4720 /* Set ttl and protocol */ 4721 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4722 #else 4723 /* Set version, header length, and tos */ 4724 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4725 ((icmp->icmp_type_of_service << 8) | 4726 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4727 /* Set ttl and protocol */ 4728 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4729 #endif 4730 if (pktinfop->ip4_addr != INADDR_ANY) { 4731 ipha->ipha_src = pktinfop->ip4_addr; 4732 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4733 } else { 4734 4735 /* 4736 * Copy our address into the packet. If this is zero, 4737 * ip will fill in the real source address. 4738 */ 4739 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4740 } 4741 4742 ipha->ipha_fragment_offset_and_flags = 0; 4743 4744 if (pktinfop->ip4_ill_index != 0) { 4745 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4746 } 4747 4748 4749 /* 4750 * For the socket of SOCK_RAW type, the checksum is provided in the 4751 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4752 * tell IP that the application has sent a complete IP header and not 4753 * to compute the transport checksum nor change the DF flag. 4754 */ 4755 ipha->ipha_ident = IP_HDR_INCLUDED; 4756 4757 /* Finish common formatting of the packet. */ 4758 mp->b_rptr = (uchar_t *)ipha; 4759 4760 ip_len = mp->b_wptr - (uchar_t *)ipha; 4761 if (mp->b_cont != NULL) 4762 ip_len += msgdsize(mp->b_cont); 4763 4764 /* 4765 * Set the length into the IP header. 4766 * If the length is greater than the maximum allowed by IP, 4767 * then free the message and return. Do not try and send it 4768 * as this can cause problems in layers below. 4769 */ 4770 if (ip_len > IP_MAXPACKET) { 4771 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4772 return (EMSGSIZE); 4773 } 4774 ipha->ipha_length = htons((uint16_t)ip_len); 4775 /* 4776 * Copy in the destination address request 4777 */ 4778 ipha->ipha_dst = v4dst; 4779 4780 /* 4781 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4782 */ 4783 if (CLASSD(v4dst)) 4784 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4785 4786 /* Copy in options if any */ 4787 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4788 bcopy(icmp->icmp_ip_snd_options, 4789 &ipha[1], icmp->icmp_ip_snd_options_len); 4790 /* 4791 * Massage source route putting first source route in ipha_dst. 4792 * Ignore the destination in the T_unitdata_req. 4793 */ 4794 (void) ip_massage_options(ipha, is->is_netstack); 4795 } 4796 4797 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4798 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4799 return (0); 4800 } 4801 4802 static int 4803 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4804 { 4805 int err; 4806 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4807 icmp_stack_t *is = icmp->icmp_is; 4808 conn_t *connp = icmp->icmp_connp; 4809 cred_t *cr; 4810 4811 /* 4812 * All Solaris components should pass a db_credp 4813 * for this message, hence we ASSERT. 4814 * On production kernels we return an error to be robust against 4815 * random streams modules sitting on top of us. 4816 */ 4817 cr = msg_getcred(mp, NULL); 4818 ASSERT(cr != NULL); 4819 if (cr == NULL) 4820 return (EINVAL); 4821 4822 err = tsol_compute_label_v6(cr, dst, 4823 opt_storage, connp->conn_mac_exempt, 4824 is->is_netstack->netstack_ip); 4825 if (err == 0) { 4826 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4827 &icmp->icmp_label_len_v6, opt_storage); 4828 } 4829 if (err != 0) { 4830 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4831 DTRACE_PROBE4( 4832 tx__ip__log__drop__updatelabel__icmp6, 4833 char *, "icmp(1) failed to update options(2) on mp(3)", 4834 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4835 return (err); 4836 } 4837 4838 icmp->icmp_v6lastdst = *dst; 4839 return (0); 4840 } 4841 4842 /* 4843 * raw_ip_send_data_v6(): 4844 * Assumes that icmp_wput did some sanity checking on the destination 4845 * address, but that the label may not yet be correct. 4846 */ 4847 static int 4848 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4849 ip6_pkt_t *ipp) 4850 { 4851 ip6_t *ip6h; 4852 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4853 int ip_hdr_len = IPV6_HDR_LEN; 4854 size_t ip_len; 4855 icmp_t *icmp = connp->conn_icmp; 4856 icmp_stack_t *is = icmp->icmp_is; 4857 ip6_pkt_t *tipp; 4858 uint32_t csum = 0; 4859 uint_t ignore = 0; 4860 uint_t option_exists = 0, is_sticky = 0; 4861 uint8_t *cp; 4862 uint8_t *nxthdr_ptr; 4863 in6_addr_t ip6_dst; 4864 4865 /* 4866 * If the local address is a mapped address return 4867 * an error. 4868 * It would be possible to send an IPv6 packet but the 4869 * response would never make it back to the application 4870 * since it is bound to a mapped address. 4871 */ 4872 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4873 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4874 return (EADDRNOTAVAIL); 4875 } 4876 4877 ignore = ipp->ipp_sticky_ignored; 4878 if (sin6->sin6_scope_id != 0 && 4879 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4880 /* 4881 * IPPF_SCOPE_ID is special. It's neither a sticky 4882 * option nor ancillary data. It needs to be 4883 * explicitly set in options_exists. 4884 */ 4885 option_exists |= IPPF_SCOPE_ID; 4886 } 4887 4888 /* 4889 * Compute the destination address 4890 */ 4891 ip6_dst = sin6->sin6_addr; 4892 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4893 ip6_dst = ipv6_loopback; 4894 4895 /* 4896 * If we're not going to the same destination as last time, then 4897 * recompute the label required. This is done in a separate routine to 4898 * avoid blowing up our stack here. 4899 */ 4900 if (is_system_labeled() && 4901 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4902 int error = 0; 4903 4904 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4905 if (error != 0) 4906 return (error); 4907 } 4908 4909 /* 4910 * If there's a security label here, then we ignore any options the 4911 * user may try to set. We keep the peer's label as a hidden sticky 4912 * option. 4913 */ 4914 if (icmp->icmp_label_len_v6 > 0) { 4915 ignore &= ~IPPF_HOPOPTS; 4916 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4917 } 4918 4919 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4920 (ipp->ipp_fields == 0)) { 4921 /* No sticky options nor ancillary data. */ 4922 goto no_options; 4923 } 4924 4925 /* 4926 * Go through the options figuring out where each is going to 4927 * come from and build two masks. The first mask indicates if 4928 * the option exists at all. The second mask indicates if the 4929 * option is sticky or ancillary. 4930 */ 4931 if (!(ignore & IPPF_HOPOPTS)) { 4932 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4933 option_exists |= IPPF_HOPOPTS; 4934 ip_hdr_len += ipp->ipp_hopoptslen; 4935 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4936 option_exists |= IPPF_HOPOPTS; 4937 is_sticky |= IPPF_HOPOPTS; 4938 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4939 } 4940 } 4941 4942 if (!(ignore & IPPF_RTHDR)) { 4943 if (ipp->ipp_fields & IPPF_RTHDR) { 4944 option_exists |= IPPF_RTHDR; 4945 ip_hdr_len += ipp->ipp_rthdrlen; 4946 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4947 option_exists |= IPPF_RTHDR; 4948 is_sticky |= IPPF_RTHDR; 4949 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4950 } 4951 } 4952 4953 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4954 /* 4955 * Need to have a router header to use these. 4956 */ 4957 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4958 option_exists |= IPPF_RTDSTOPTS; 4959 ip_hdr_len += ipp->ipp_rtdstoptslen; 4960 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4961 option_exists |= IPPF_RTDSTOPTS; 4962 is_sticky |= IPPF_RTDSTOPTS; 4963 ip_hdr_len += 4964 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4965 } 4966 } 4967 4968 if (!(ignore & IPPF_DSTOPTS)) { 4969 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4970 option_exists |= IPPF_DSTOPTS; 4971 ip_hdr_len += ipp->ipp_dstoptslen; 4972 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4973 option_exists |= IPPF_DSTOPTS; 4974 is_sticky |= IPPF_DSTOPTS; 4975 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4976 } 4977 } 4978 4979 if (!(ignore & IPPF_IFINDEX)) { 4980 if (ipp->ipp_fields & IPPF_IFINDEX) { 4981 option_exists |= IPPF_IFINDEX; 4982 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4983 option_exists |= IPPF_IFINDEX; 4984 is_sticky |= IPPF_IFINDEX; 4985 } 4986 } 4987 4988 if (!(ignore & IPPF_ADDR)) { 4989 if (ipp->ipp_fields & IPPF_ADDR) { 4990 option_exists |= IPPF_ADDR; 4991 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4992 option_exists |= IPPF_ADDR; 4993 is_sticky |= IPPF_ADDR; 4994 } 4995 } 4996 4997 if (!(ignore & IPPF_DONTFRAG)) { 4998 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4999 option_exists |= IPPF_DONTFRAG; 5000 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 5001 option_exists |= IPPF_DONTFRAG; 5002 is_sticky |= IPPF_DONTFRAG; 5003 } 5004 } 5005 5006 if (!(ignore & IPPF_USE_MIN_MTU)) { 5007 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 5008 option_exists |= IPPF_USE_MIN_MTU; 5009 } else if (icmp->icmp_sticky_ipp.ipp_fields & 5010 IPPF_USE_MIN_MTU) { 5011 option_exists |= IPPF_USE_MIN_MTU; 5012 is_sticky |= IPPF_USE_MIN_MTU; 5013 } 5014 } 5015 5016 if (!(ignore & IPPF_NEXTHOP)) { 5017 if (ipp->ipp_fields & IPPF_NEXTHOP) { 5018 option_exists |= IPPF_NEXTHOP; 5019 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 5020 option_exists |= IPPF_NEXTHOP; 5021 is_sticky |= IPPF_NEXTHOP; 5022 } 5023 } 5024 5025 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 5026 option_exists |= IPPF_HOPLIMIT; 5027 /* IPV6_HOPLIMIT can never be sticky */ 5028 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 5029 5030 if (!(ignore & IPPF_UNICAST_HOPS) && 5031 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 5032 option_exists |= IPPF_UNICAST_HOPS; 5033 is_sticky |= IPPF_UNICAST_HOPS; 5034 } 5035 5036 if (!(ignore & IPPF_MULTICAST_HOPS) && 5037 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 5038 option_exists |= IPPF_MULTICAST_HOPS; 5039 is_sticky |= IPPF_MULTICAST_HOPS; 5040 } 5041 5042 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 5043 /* This is a sticky socket option only */ 5044 option_exists |= IPPF_NO_CKSUM; 5045 is_sticky |= IPPF_NO_CKSUM; 5046 } 5047 5048 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 5049 /* This is a sticky socket option only */ 5050 option_exists |= IPPF_RAW_CKSUM; 5051 is_sticky |= IPPF_RAW_CKSUM; 5052 } 5053 5054 if (!(ignore & IPPF_TCLASS)) { 5055 if (ipp->ipp_fields & IPPF_TCLASS) { 5056 option_exists |= IPPF_TCLASS; 5057 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 5058 option_exists |= IPPF_TCLASS; 5059 is_sticky |= IPPF_TCLASS; 5060 } 5061 } 5062 5063 no_options: 5064 5065 /* 5066 * If any options carried in the ip6i_t were specified, we 5067 * need to account for the ip6i_t in the data we'll be sending 5068 * down. 5069 */ 5070 if (option_exists & IPPF_HAS_IP6I) 5071 ip_hdr_len += sizeof (ip6i_t); 5072 5073 /* check/fix buffer config, setup pointers into it */ 5074 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5075 if ((mp->b_datap->db_ref != 1) || 5076 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5077 !OK_32PTR(ip6h)) { 5078 mblk_t *mp1; 5079 5080 /* Try to get everything in a single mblk next time */ 5081 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5082 icmp->icmp_max_hdr_len = ip_hdr_len; 5083 5084 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5085 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5086 } 5087 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5088 if (!mp1) { 5089 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5090 return (ENOMEM); 5091 } 5092 mp1->b_cont = mp; 5093 mp1->b_wptr = mp1->b_datap->db_lim; 5094 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5095 mp = mp1; 5096 } 5097 mp->b_rptr = (unsigned char *)ip6h; 5098 ip6i = (ip6i_t *)ip6h; 5099 5100 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5101 if (option_exists & IPPF_HAS_IP6I) { 5102 ip6h = (ip6_t *)&ip6i[1]; 5103 ip6i->ip6i_flags = 0; 5104 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5105 5106 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5107 if (option_exists & IPPF_SCOPE_ID) { 5108 ip6i->ip6i_flags |= IP6I_IFINDEX; 5109 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5110 } else if (option_exists & IPPF_IFINDEX) { 5111 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5112 ASSERT(tipp->ipp_ifindex != 0); 5113 ip6i->ip6i_flags |= IP6I_IFINDEX; 5114 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5115 } 5116 5117 if (option_exists & IPPF_RAW_CKSUM) { 5118 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5119 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5120 } 5121 5122 if (option_exists & IPPF_NO_CKSUM) { 5123 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5124 } 5125 5126 if (option_exists & IPPF_ADDR) { 5127 /* 5128 * Enable per-packet source address verification if 5129 * IPV6_PKTINFO specified the source address. 5130 * ip6_src is set in the transport's _wput function. 5131 */ 5132 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5133 } 5134 5135 if (option_exists & IPPF_DONTFRAG) { 5136 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5137 } 5138 5139 if (option_exists & IPPF_USE_MIN_MTU) { 5140 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5141 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5142 } 5143 5144 if (option_exists & IPPF_NEXTHOP) { 5145 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5146 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5147 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5148 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5149 } 5150 5151 /* 5152 * tell IP this is an ip6i_t private header 5153 */ 5154 ip6i->ip6i_nxt = IPPROTO_RAW; 5155 } 5156 5157 /* Initialize IPv6 header */ 5158 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5159 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5160 5161 /* Set the hoplimit of the outgoing packet. */ 5162 if (option_exists & IPPF_HOPLIMIT) { 5163 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5164 ip6h->ip6_hops = ipp->ipp_hoplimit; 5165 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5166 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5167 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5168 if (option_exists & IPPF_MULTICAST_HOPS) 5169 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5170 } else { 5171 ip6h->ip6_hops = icmp->icmp_ttl; 5172 if (option_exists & IPPF_UNICAST_HOPS) 5173 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5174 } 5175 5176 if (option_exists & IPPF_ADDR) { 5177 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5178 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5179 ip6h->ip6_src = tipp->ipp_addr; 5180 } else { 5181 /* 5182 * The source address was not set using IPV6_PKTINFO. 5183 * First look at the bound source. 5184 * If unspecified fallback to __sin6_src_id. 5185 */ 5186 ip6h->ip6_src = icmp->icmp_v6src; 5187 if (sin6->__sin6_src_id != 0 && 5188 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5189 ip_srcid_find_id(sin6->__sin6_src_id, 5190 &ip6h->ip6_src, icmp->icmp_zoneid, 5191 is->is_netstack); 5192 } 5193 } 5194 5195 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5196 cp = (uint8_t *)&ip6h[1]; 5197 5198 /* 5199 * Here's where we have to start stringing together 5200 * any extension headers in the right order: 5201 * Hop-by-hop, destination, routing, and final destination opts. 5202 */ 5203 if (option_exists & IPPF_HOPOPTS) { 5204 /* Hop-by-hop options */ 5205 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5206 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5207 5208 *nxthdr_ptr = IPPROTO_HOPOPTS; 5209 nxthdr_ptr = &hbh->ip6h_nxt; 5210 5211 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5212 cp += tipp->ipp_hopoptslen; 5213 } 5214 /* 5215 * En-route destination options 5216 * Only do them if there's a routing header as well 5217 */ 5218 if (option_exists & IPPF_RTDSTOPTS) { 5219 ip6_dest_t *dst = (ip6_dest_t *)cp; 5220 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5221 5222 *nxthdr_ptr = IPPROTO_DSTOPTS; 5223 nxthdr_ptr = &dst->ip6d_nxt; 5224 5225 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5226 cp += tipp->ipp_rtdstoptslen; 5227 } 5228 /* 5229 * Routing header next 5230 */ 5231 if (option_exists & IPPF_RTHDR) { 5232 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5233 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5234 5235 *nxthdr_ptr = IPPROTO_ROUTING; 5236 nxthdr_ptr = &rt->ip6r_nxt; 5237 5238 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5239 cp += tipp->ipp_rthdrlen; 5240 } 5241 /* 5242 * Do ultimate destination options 5243 */ 5244 if (option_exists & IPPF_DSTOPTS) { 5245 ip6_dest_t *dest = (ip6_dest_t *)cp; 5246 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5247 5248 *nxthdr_ptr = IPPROTO_DSTOPTS; 5249 nxthdr_ptr = &dest->ip6d_nxt; 5250 5251 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5252 cp += tipp->ipp_dstoptslen; 5253 } 5254 5255 /* 5256 * Now set the last header pointer to the proto passed in 5257 */ 5258 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5259 *nxthdr_ptr = icmp->icmp_proto; 5260 5261 /* 5262 * Copy in the destination address 5263 */ 5264 ip6h->ip6_dst = ip6_dst; 5265 5266 ip6h->ip6_vcf = 5267 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5268 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5269 5270 if (option_exists & IPPF_TCLASS) { 5271 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5272 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5273 tipp->ipp_tclass); 5274 } 5275 if (option_exists & IPPF_RTHDR) { 5276 ip6_rthdr_t *rth; 5277 5278 /* 5279 * Perform any processing needed for source routing. 5280 * We know that all extension headers will be in the same mblk 5281 * as the IPv6 header. 5282 */ 5283 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5284 if (rth != NULL && rth->ip6r_segleft != 0) { 5285 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5286 /* 5287 * Drop packet - only support Type 0 routing. 5288 * Notify the application as well. 5289 */ 5290 BUMP_MIB(&is->is_rawip_mib, 5291 rawipOutErrors); 5292 return (EPROTO); 5293 } 5294 /* 5295 * rth->ip6r_len is twice the number of 5296 * addresses in the header 5297 */ 5298 if (rth->ip6r_len & 0x1) { 5299 BUMP_MIB(&is->is_rawip_mib, 5300 rawipOutErrors); 5301 return (EPROTO); 5302 } 5303 /* 5304 * Shuffle the routing header and ip6_dst 5305 * addresses, and get the checksum difference 5306 * between the first hop (in ip6_dst) and 5307 * the destination (in the last routing hdr entry). 5308 */ 5309 csum = ip_massage_options_v6(ip6h, rth, 5310 is->is_netstack); 5311 /* 5312 * Verify that the first hop isn't a mapped address. 5313 * Routers along the path need to do this verification 5314 * for subsequent hops. 5315 */ 5316 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5317 BUMP_MIB(&is->is_rawip_mib, 5318 rawipOutErrors); 5319 return (EADDRNOTAVAIL); 5320 } 5321 } 5322 } 5323 5324 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5325 if (mp->b_cont != NULL) 5326 ip_len += msgdsize(mp->b_cont); 5327 5328 /* 5329 * Set the length into the IP header. 5330 * If the length is greater than the maximum allowed by IP, 5331 * then free the message and return. Do not try and send it 5332 * as this can cause problems in layers below. 5333 */ 5334 if (ip_len > IP_MAXPACKET) { 5335 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5336 return (EMSGSIZE); 5337 } 5338 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5339 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5340 uint16_t *cksum_ptr; 5341 uint_t ext_hdrs_len; 5342 5343 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5344 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5345 icmp->icmp_checksum_off == 2); 5346 5347 /* 5348 * We make it easy for IP to include our pseudo header 5349 * by putting our length in uh_checksum, modified (if 5350 * we have a routing header) by the checksum difference 5351 * between the ultimate destination and first hop addresses. 5352 * Note: ICMPv6 must always checksum the packet. 5353 */ 5354 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5355 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5356 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5357 BUMP_MIB(&is->is_rawip_mib, 5358 rawipOutErrors); 5359 freemsg(mp); 5360 return (0); 5361 } 5362 ip6i = (ip6i_t *)mp->b_rptr; 5363 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5364 ip6h = (ip6_t *)&ip6i[1]; 5365 else 5366 ip6h = (ip6_t *)ip6i; 5367 } 5368 /* Add payload length to checksum */ 5369 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5370 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5371 csum += htons(ip_len - ext_hdrs_len); 5372 5373 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5374 csum = (csum & 0xFFFF) + (csum >> 16); 5375 *cksum_ptr = (uint16_t)csum; 5376 } 5377 5378 #ifdef _LITTLE_ENDIAN 5379 ip_len = htons(ip_len); 5380 #endif 5381 ip6h->ip6_plen = (uint16_t)ip_len; 5382 5383 /* We're done. Pass the packet to IP */ 5384 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5385 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5386 return (0); 5387 } 5388 5389 static void 5390 icmp_wput_other(queue_t *q, mblk_t *mp) 5391 { 5392 uchar_t *rptr = mp->b_rptr; 5393 struct iocblk *iocp; 5394 #define tudr ((struct T_unitdata_req *)rptr) 5395 conn_t *connp = Q_TO_CONN(q); 5396 icmp_t *icmp = connp->conn_icmp; 5397 icmp_stack_t *is = icmp->icmp_is; 5398 cred_t *cr; 5399 5400 switch (mp->b_datap->db_type) { 5401 case M_PROTO: 5402 case M_PCPROTO: 5403 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5404 /* 5405 * If the message does not contain a PRIM_type, 5406 * throw it away. 5407 */ 5408 freemsg(mp); 5409 return; 5410 } 5411 switch (((union T_primitives *)rptr)->type) { 5412 case T_ADDR_REQ: 5413 icmp_addr_req(q, mp); 5414 return; 5415 case O_T_BIND_REQ: 5416 case T_BIND_REQ: 5417 icmp_tpi_bind(q, mp); 5418 return; 5419 case T_CONN_REQ: 5420 icmp_tpi_connect(q, mp); 5421 return; 5422 case T_CAPABILITY_REQ: 5423 icmp_capability_req(q, mp); 5424 return; 5425 case T_INFO_REQ: 5426 icmp_info_req(q, mp); 5427 return; 5428 case T_UNITDATA_REQ: 5429 /* 5430 * If a T_UNITDATA_REQ gets here, the address must 5431 * be bad. Valid T_UNITDATA_REQs are found above 5432 * and break to below this switch. 5433 */ 5434 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5435 return; 5436 case T_UNBIND_REQ: 5437 icmp_tpi_unbind(q, mp); 5438 return; 5439 5440 case T_SVR4_OPTMGMT_REQ: 5441 /* 5442 * All Solaris components should pass a db_credp 5443 * for this TPI message, hence we ASSERT. 5444 * But in case there is some other M_PROTO that looks 5445 * like a TPI message sent by some other kernel 5446 * component, we check and return an error. 5447 */ 5448 cr = msg_getcred(mp, NULL); 5449 ASSERT(cr != NULL); 5450 if (cr == NULL) { 5451 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5452 return; 5453 } 5454 5455 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5456 cr)) { 5457 /* Only IP can return anything meaningful */ 5458 (void) svr4_optcom_req(q, mp, cr, 5459 &icmp_opt_obj, B_TRUE); 5460 } 5461 return; 5462 5463 case T_OPTMGMT_REQ: 5464 /* 5465 * All Solaris components should pass a db_credp 5466 * for this TPI message, hence we ASSERT. 5467 * But in case there is some other M_PROTO that looks 5468 * like a TPI message sent by some other kernel 5469 * component, we check and return an error. 5470 */ 5471 cr = msg_getcred(mp, NULL); 5472 ASSERT(cr != NULL); 5473 if (cr == NULL) { 5474 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5475 return; 5476 } 5477 /* Only IP can return anything meaningful */ 5478 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5479 return; 5480 5481 case T_DISCON_REQ: 5482 icmp_tpi_disconnect(q, mp); 5483 return; 5484 5485 /* The following TPI message is not supported by icmp. */ 5486 case O_T_CONN_RES: 5487 case T_CONN_RES: 5488 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5489 return; 5490 5491 /* The following 3 TPI requests are illegal for icmp. */ 5492 case T_DATA_REQ: 5493 case T_EXDATA_REQ: 5494 case T_ORDREL_REQ: 5495 freemsg(mp); 5496 (void) putctl1(RD(q), M_ERROR, EPROTO); 5497 return; 5498 default: 5499 break; 5500 } 5501 break; 5502 case M_IOCTL: 5503 iocp = (struct iocblk *)mp->b_rptr; 5504 switch (iocp->ioc_cmd) { 5505 case TI_GETPEERNAME: 5506 if (icmp->icmp_state != TS_DATA_XFER) { 5507 /* 5508 * If a default destination address has not 5509 * been associated with the stream, then we 5510 * don't know the peer's name. 5511 */ 5512 iocp->ioc_error = ENOTCONN; 5513 err_ret:; 5514 iocp->ioc_count = 0; 5515 mp->b_datap->db_type = M_IOCACK; 5516 qreply(q, mp); 5517 return; 5518 } 5519 /* FALLTHRU */ 5520 case TI_GETMYNAME: 5521 /* 5522 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5523 * need to copyin the user's strbuf structure. 5524 * Processing will continue in the M_IOCDATA case 5525 * below. 5526 */ 5527 mi_copyin(q, mp, NULL, 5528 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5529 return; 5530 case ND_SET: 5531 /* nd_getset performs the necessary error checking */ 5532 case ND_GET: 5533 if (nd_getset(q, is->is_nd, mp)) { 5534 qreply(q, mp); 5535 return; 5536 } 5537 break; 5538 case _SIOCSOCKFALLBACK: 5539 /* 5540 * socket is falling back to be a 5541 * streams socket. Nothing to do 5542 */ 5543 iocp->ioc_count = 0; 5544 iocp->ioc_rval = 0; 5545 qreply(q, mp); 5546 return; 5547 default: 5548 break; 5549 } 5550 break; 5551 case M_IOCDATA: 5552 icmp_wput_iocdata(q, mp); 5553 return; 5554 default: 5555 break; 5556 } 5557 ip_wput(q, mp); 5558 } 5559 5560 /* 5561 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5562 * messages. 5563 */ 5564 static void 5565 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5566 { 5567 mblk_t *mp1; 5568 STRUCT_HANDLE(strbuf, sb); 5569 icmp_t *icmp; 5570 uint_t addrlen; 5571 uint_t error; 5572 5573 /* Make sure it is one of ours. */ 5574 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5575 case TI_GETMYNAME: 5576 case TI_GETPEERNAME: 5577 break; 5578 default: 5579 icmp = Q_TO_ICMP(q); 5580 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5581 return; 5582 } 5583 switch (mi_copy_state(q, mp, &mp1)) { 5584 case -1: 5585 return; 5586 case MI_COPY_CASE(MI_COPY_IN, 1): 5587 break; 5588 case MI_COPY_CASE(MI_COPY_OUT, 1): 5589 /* 5590 * The address has been copied out, so now 5591 * copyout the strbuf. 5592 */ 5593 mi_copyout(q, mp); 5594 return; 5595 case MI_COPY_CASE(MI_COPY_OUT, 2): 5596 /* 5597 * The address and strbuf have been copied out. 5598 * We're done, so just acknowledge the original 5599 * M_IOCTL. 5600 */ 5601 mi_copy_done(q, mp, 0); 5602 return; 5603 default: 5604 /* 5605 * Something strange has happened, so acknowledge 5606 * the original M_IOCTL with an EPROTO error. 5607 */ 5608 mi_copy_done(q, mp, EPROTO); 5609 return; 5610 } 5611 /* 5612 * Now we have the strbuf structure for TI_GETMYNAME 5613 * and TI_GETPEERNAME. Next we copyout the requested 5614 * address and then we'll copyout the strbuf. 5615 */ 5616 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5617 (void *)mp1->b_rptr); 5618 icmp = Q_TO_ICMP(q); 5619 if (icmp->icmp_family == AF_INET) 5620 addrlen = sizeof (sin_t); 5621 else 5622 addrlen = sizeof (sin6_t); 5623 5624 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5625 mi_copy_done(q, mp, EINVAL); 5626 return; 5627 } 5628 5629 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5630 5631 if (mp1 == NULL) 5632 return; 5633 5634 rw_enter(&icmp->icmp_rwlock, RW_READER); 5635 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5636 case TI_GETMYNAME: 5637 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5638 &addrlen); 5639 break; 5640 case TI_GETPEERNAME: 5641 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5642 &addrlen); 5643 break; 5644 } 5645 rw_exit(&icmp->icmp_rwlock); 5646 5647 if (error != 0) { 5648 mi_copy_done(q, mp, error); 5649 } else { 5650 mp1->b_wptr += addrlen; 5651 STRUCT_FSET(sb, len, addrlen); 5652 5653 /* Copy out the address */ 5654 mi_copyout(q, mp); 5655 } 5656 } 5657 5658 static int 5659 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5660 void *thisdg_attrs) 5661 { 5662 struct T_unitdata_req *udreqp; 5663 int is_absreq_failure; 5664 cred_t *cr; 5665 5666 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5667 *errorp = 0; 5668 5669 /* 5670 * All Solaris components should pass a db_credp 5671 * for this TPI message, hence we ASSERT. 5672 * But in case there is some other M_PROTO that looks 5673 * like a TPI message sent by some other kernel 5674 * component, we check and return an error. 5675 */ 5676 cr = msg_getcred(mp, NULL); 5677 ASSERT(cr != NULL); 5678 if (cr == NULL) 5679 return (-1); 5680 5681 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5682 udreqp->OPT_offset, cr, &icmp_opt_obj, 5683 thisdg_attrs, &is_absreq_failure); 5684 5685 if (*errorp != 0) { 5686 /* 5687 * Note: No special action needed in this 5688 * module for "is_absreq_failure" 5689 */ 5690 return (-1); /* failure */ 5691 } 5692 ASSERT(is_absreq_failure == 0); 5693 return (0); /* success */ 5694 } 5695 5696 void 5697 icmp_ddi_g_init(void) 5698 { 5699 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5700 icmp_opt_obj.odb_opt_arr_cnt); 5701 5702 /* 5703 * We want to be informed each time a stack is created or 5704 * destroyed in the kernel, so we can maintain the 5705 * set of icmp_stack_t's. 5706 */ 5707 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5708 } 5709 5710 void 5711 icmp_ddi_g_destroy(void) 5712 { 5713 netstack_unregister(NS_ICMP); 5714 } 5715 5716 #define INET_NAME "ip" 5717 5718 /* 5719 * Initialize the ICMP stack instance. 5720 */ 5721 static void * 5722 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5723 { 5724 icmp_stack_t *is; 5725 icmpparam_t *pa; 5726 int error = 0; 5727 major_t major; 5728 5729 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5730 is->is_netstack = ns; 5731 5732 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5733 is->is_param_arr = pa; 5734 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5735 5736 (void) icmp_param_register(&is->is_nd, 5737 is->is_param_arr, A_CNT(icmp_param_arr)); 5738 is->is_ksp = rawip_kstat_init(stackid); 5739 5740 major = mod_name_to_major(INET_NAME); 5741 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5742 ASSERT(error == 0); 5743 return (is); 5744 } 5745 5746 /* 5747 * Free the ICMP stack instance. 5748 */ 5749 static void 5750 rawip_stack_fini(netstackid_t stackid, void *arg) 5751 { 5752 icmp_stack_t *is = (icmp_stack_t *)arg; 5753 5754 nd_free(&is->is_nd); 5755 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5756 is->is_param_arr = NULL; 5757 5758 rawip_kstat_fini(stackid, is->is_ksp); 5759 is->is_ksp = NULL; 5760 ldi_ident_release(is->is_ldi_ident); 5761 kmem_free(is, sizeof (*is)); 5762 } 5763 5764 static void * 5765 rawip_kstat_init(netstackid_t stackid) { 5766 kstat_t *ksp; 5767 5768 rawip_named_kstat_t template = { 5769 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5770 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5771 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5772 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5773 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5774 }; 5775 5776 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5777 KSTAT_TYPE_NAMED, 5778 NUM_OF_FIELDS(rawip_named_kstat_t), 5779 0, stackid); 5780 if (ksp == NULL || ksp->ks_data == NULL) 5781 return (NULL); 5782 5783 bcopy(&template, ksp->ks_data, sizeof (template)); 5784 ksp->ks_update = rawip_kstat_update; 5785 ksp->ks_private = (void *)(uintptr_t)stackid; 5786 5787 kstat_install(ksp); 5788 return (ksp); 5789 } 5790 5791 static void 5792 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5793 { 5794 if (ksp != NULL) { 5795 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5796 kstat_delete_netstack(ksp, stackid); 5797 } 5798 } 5799 5800 static int 5801 rawip_kstat_update(kstat_t *ksp, int rw) 5802 { 5803 rawip_named_kstat_t *rawipkp; 5804 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5805 netstack_t *ns; 5806 icmp_stack_t *is; 5807 5808 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5809 return (EIO); 5810 5811 if (rw == KSTAT_WRITE) 5812 return (EACCES); 5813 5814 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5815 5816 ns = netstack_find_by_stackid(stackid); 5817 if (ns == NULL) 5818 return (-1); 5819 is = ns->netstack_icmp; 5820 if (is == NULL) { 5821 netstack_rele(ns); 5822 return (-1); 5823 } 5824 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5825 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5826 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5827 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5828 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5829 netstack_rele(ns); 5830 return (0); 5831 } 5832 5833 /* ARGSUSED */ 5834 int 5835 rawip_accept(sock_lower_handle_t lproto_handle, 5836 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5837 cred_t *cr) 5838 { 5839 return (EOPNOTSUPP); 5840 } 5841 5842 /* ARGSUSED */ 5843 int 5844 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5845 socklen_t len, cred_t *cr) 5846 { 5847 conn_t *connp = (conn_t *)proto_handle; 5848 int error; 5849 5850 /* All Solaris components should pass a cred for this operation. */ 5851 ASSERT(cr != NULL); 5852 5853 /* Binding to a NULL address really means unbind */ 5854 if (sa == NULL) 5855 error = rawip_do_unbind(connp); 5856 else 5857 error = rawip_do_bind(connp, sa, len); 5858 5859 if (error < 0) { 5860 if (error == -TOUTSTATE) 5861 error = EINVAL; 5862 else 5863 error = proto_tlitosyserr(-error); 5864 } 5865 return (error); 5866 } 5867 5868 static int 5869 rawip_implicit_bind(conn_t *connp) 5870 { 5871 sin6_t sin6addr; 5872 sin_t *sin; 5873 sin6_t *sin6; 5874 socklen_t len; 5875 int error; 5876 5877 if (connp->conn_icmp->icmp_family == AF_INET) { 5878 len = sizeof (struct sockaddr_in); 5879 sin = (sin_t *)&sin6addr; 5880 *sin = sin_null; 5881 sin->sin_family = AF_INET; 5882 sin->sin_addr.s_addr = INADDR_ANY; 5883 } else { 5884 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5885 len = sizeof (sin6_t); 5886 sin6 = (sin6_t *)&sin6addr; 5887 *sin6 = sin6_null; 5888 sin6->sin6_family = AF_INET6; 5889 V6_SET_ZERO(sin6->sin6_addr); 5890 } 5891 5892 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5893 5894 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5895 } 5896 5897 static int 5898 rawip_unbind(conn_t *connp) 5899 { 5900 int error; 5901 5902 error = rawip_do_unbind(connp); 5903 if (error < 0) { 5904 error = proto_tlitosyserr(-error); 5905 } 5906 return (error); 5907 } 5908 5909 /* ARGSUSED */ 5910 int 5911 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5912 { 5913 return (EOPNOTSUPP); 5914 } 5915 5916 /* ARGSUSED */ 5917 int 5918 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5919 socklen_t len, sock_connid_t *id, cred_t *cr) 5920 { 5921 conn_t *connp = (conn_t *)proto_handle; 5922 icmp_t *icmp = connp->conn_icmp; 5923 int error; 5924 boolean_t did_bind = B_FALSE; 5925 5926 /* All Solaris components should pass a cred for this operation. */ 5927 ASSERT(cr != NULL); 5928 5929 if (sa == NULL) { 5930 /* 5931 * Disconnect 5932 * Make sure we are connected 5933 */ 5934 if (icmp->icmp_state != TS_DATA_XFER) 5935 return (EINVAL); 5936 5937 error = icmp_disconnect(connp); 5938 return (error); 5939 } 5940 5941 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5942 if (error != 0) 5943 return (error); 5944 5945 /* do an implicit bind if necessary */ 5946 if (icmp->icmp_state == TS_UNBND) { 5947 error = rawip_implicit_bind(connp); 5948 /* 5949 * We could be racing with an actual bind, in which case 5950 * we would see EPROTO. We cross our fingers and try 5951 * to connect. 5952 */ 5953 if (!(error == 0 || error == EPROTO)) 5954 return (error); 5955 did_bind = B_TRUE; 5956 } 5957 5958 /* 5959 * set SO_DGRAM_ERRIND 5960 */ 5961 icmp->icmp_dgram_errind = B_TRUE; 5962 5963 error = rawip_do_connect(connp, sa, len, cr); 5964 5965 if (error != 0 && did_bind) { 5966 int unbind_err; 5967 5968 unbind_err = rawip_unbind(connp); 5969 ASSERT(unbind_err == 0); 5970 } 5971 5972 if (error == 0) { 5973 *id = 0; 5974 (*connp->conn_upcalls->su_connected) 5975 (connp->conn_upper_handle, 0, NULL, -1); 5976 } else if (error < 0) { 5977 error = proto_tlitosyserr(-error); 5978 } 5979 return (error); 5980 } 5981 5982 /* ARGSUSED */ 5983 int 5984 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5985 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5986 { 5987 conn_t *connp = (conn_t *)proto_handle; 5988 icmp_t *icmp; 5989 struct T_capability_ack tca; 5990 struct sockaddr_in6 laddr, faddr; 5991 socklen_t laddrlen, faddrlen; 5992 short opts; 5993 struct stroptions *stropt; 5994 mblk_t *stropt_mp; 5995 int error; 5996 5997 icmp = connp->conn_icmp; 5998 5999 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6000 6001 /* 6002 * setup the fallback stream that was allocated 6003 */ 6004 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6005 connp->conn_minor_arena = WR(q)->q_ptr; 6006 6007 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6008 6009 WR(q)->q_qinfo = &icmpwinit; 6010 6011 connp->conn_rq = RD(q); 6012 connp->conn_wq = WR(q); 6013 6014 /* Notify stream head about options before sending up data */ 6015 stropt_mp->b_datap->db_type = M_SETOPTS; 6016 stropt_mp->b_wptr += sizeof (*stropt); 6017 stropt = (struct stroptions *)stropt_mp->b_rptr; 6018 stropt->so_flags = SO_WROFF | SO_HIWAT; 6019 stropt->so_wroff = 6020 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 6021 stropt->so_hiwat = icmp->icmp_recv_hiwat; 6022 putnext(RD(q), stropt_mp); 6023 6024 /* 6025 * free helper stream 6026 */ 6027 ip_free_helper_stream(connp); 6028 6029 /* 6030 * Collect the information needed to sync with the sonode 6031 */ 6032 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 6033 6034 laddrlen = faddrlen = sizeof (sin6_t); 6035 (void) rawip_getsockname((sock_lower_handle_t)connp, 6036 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6037 error = rawip_getpeername((sock_lower_handle_t)connp, 6038 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6039 if (error != 0) 6040 faddrlen = 0; 6041 opts = 0; 6042 if (icmp->icmp_dgram_errind) 6043 opts |= SO_DGRAM_ERRIND; 6044 if (icmp->icmp_dontroute) 6045 opts |= SO_DONTROUTE; 6046 6047 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6048 (struct sockaddr *)&laddr, laddrlen, 6049 (struct sockaddr *)&faddr, faddrlen, opts); 6050 6051 /* 6052 * Attempts to send data up during fallback will result in it being 6053 * queued in udp_t. Now we push up any queued packets. 6054 */ 6055 mutex_enter(&icmp->icmp_recv_lock); 6056 while (icmp->icmp_fallback_queue_head != NULL) { 6057 mblk_t *mp; 6058 6059 mp = icmp->icmp_fallback_queue_head; 6060 icmp->icmp_fallback_queue_head = mp->b_next; 6061 mp->b_next = NULL; 6062 mutex_exit(&icmp->icmp_recv_lock); 6063 putnext(RD(q), mp); 6064 mutex_enter(&icmp->icmp_recv_lock); 6065 } 6066 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 6067 6068 /* 6069 * No longer a streams less socket 6070 */ 6071 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6072 connp->conn_flags &= ~IPCL_NONSTR; 6073 rw_exit(&icmp->icmp_rwlock); 6074 6075 mutex_exit(&icmp->icmp_recv_lock); 6076 6077 ASSERT(icmp->icmp_fallback_queue_head == NULL && 6078 icmp->icmp_fallback_queue_tail == NULL); 6079 6080 ASSERT(connp->conn_ref >= 1); 6081 6082 return (0); 6083 } 6084 6085 /* ARGSUSED */ 6086 sock_lower_handle_t 6087 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 6088 uint_t *smodep, int *errorp, int flags, cred_t *credp) 6089 { 6090 conn_t *connp; 6091 6092 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 6093 *errorp = EPROTONOSUPPORT; 6094 return (NULL); 6095 } 6096 6097 connp = icmp_open(family, credp, errorp, flags); 6098 if (connp != NULL) { 6099 icmp_stack_t *is; 6100 6101 is = connp->conn_icmp->icmp_is; 6102 connp->conn_flags |= IPCL_NONSTR; 6103 6104 if (connp->conn_icmp->icmp_family == AF_INET6) { 6105 /* Build initial header template for transmit */ 6106 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6107 if ((*errorp = 6108 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6109 rw_exit(&connp->conn_icmp->icmp_rwlock); 6110 ipcl_conn_destroy(connp); 6111 return (NULL); 6112 } 6113 rw_exit(&connp->conn_icmp->icmp_rwlock); 6114 } 6115 6116 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6117 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6118 6119 if ((*errorp = ip_create_helper_stream(connp, 6120 is->is_ldi_ident)) != 0) { 6121 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6122 (void) rawip_do_close(connp); 6123 return (NULL); 6124 } 6125 6126 mutex_enter(&connp->conn_lock); 6127 connp->conn_state_flags &= ~CONN_INCIPIENT; 6128 mutex_exit(&connp->conn_lock); 6129 *sock_downcalls = &sock_rawip_downcalls; 6130 *smodep = SM_ATOMIC; 6131 } else { 6132 ASSERT(*errorp != 0); 6133 } 6134 6135 return ((sock_lower_handle_t)connp); 6136 } 6137 6138 /* ARGSUSED */ 6139 void 6140 rawip_activate(sock_lower_handle_t proto_handle, 6141 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6142 cred_t *cr) 6143 { 6144 conn_t *connp = (conn_t *)proto_handle; 6145 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6146 struct sock_proto_props sopp; 6147 6148 /* All Solaris components should pass a cred for this operation. */ 6149 ASSERT(cr != NULL); 6150 6151 connp->conn_upcalls = sock_upcalls; 6152 connp->conn_upper_handle = sock_handle; 6153 6154 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6155 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6156 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6157 is->is_wroff_extra; 6158 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6159 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6160 sopp.sopp_maxblk = INFPSZ; 6161 sopp.sopp_maxpsz = IP_MAXPACKET; 6162 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6163 icmp_mod_info.mi_minpsz; 6164 6165 (*connp->conn_upcalls->su_set_proto_props) 6166 (connp->conn_upper_handle, &sopp); 6167 } 6168 6169 static int 6170 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6171 { 6172 sin_t *sin = (sin_t *)sa; 6173 sin6_t *sin6 = (sin6_t *)sa; 6174 6175 ASSERT(icmp != NULL); 6176 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6177 6178 switch (icmp->icmp_family) { 6179 case AF_INET: 6180 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6181 if (*salenp < sizeof (sin_t)) 6182 return (EINVAL); 6183 6184 *salenp = sizeof (sin_t); 6185 *sin = sin_null; 6186 sin->sin_family = AF_INET; 6187 if (icmp->icmp_state == TS_UNBND) { 6188 break; 6189 } 6190 6191 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6192 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6193 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6194 } else { 6195 /* 6196 * INADDR_ANY 6197 * icmp_v6src is not set, we might be bound to 6198 * broadcast/multicast. Use icmp_bound_v6src as 6199 * local address instead (that could 6200 * also still be INADDR_ANY) 6201 */ 6202 sin->sin_addr.s_addr = 6203 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6204 } 6205 break; 6206 case AF_INET6: 6207 6208 if (*salenp < sizeof (sin6_t)) 6209 return (EINVAL); 6210 6211 *salenp = sizeof (sin6_t); 6212 *sin6 = sin6_null; 6213 sin6->sin6_family = AF_INET6; 6214 if (icmp->icmp_state == TS_UNBND) { 6215 break; 6216 } 6217 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6218 sin6->sin6_addr = icmp->icmp_v6src; 6219 } else { 6220 /* 6221 * UNSPECIFIED 6222 * icmp_v6src is not set, we might be bound to 6223 * broadcast/multicast. Use icmp_bound_v6src as 6224 * local address instead (that could 6225 * also still be UNSPECIFIED) 6226 */ 6227 6228 sin6->sin6_addr = icmp->icmp_bound_v6src; 6229 } 6230 break; 6231 } 6232 return (0); 6233 } 6234 6235 static int 6236 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6237 { 6238 sin_t *sin = (sin_t *)sa; 6239 sin6_t *sin6 = (sin6_t *)sa; 6240 6241 ASSERT(icmp != NULL); 6242 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6243 6244 if (icmp->icmp_state != TS_DATA_XFER) 6245 return (ENOTCONN); 6246 6247 sa->sa_family = icmp->icmp_family; 6248 switch (icmp->icmp_family) { 6249 case AF_INET: 6250 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6251 6252 if (*salenp < sizeof (sin_t)) 6253 return (EINVAL); 6254 6255 *salenp = sizeof (sin_t); 6256 *sin = sin_null; 6257 sin->sin_family = AF_INET; 6258 sin->sin_addr.s_addr = 6259 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6260 break; 6261 case AF_INET6: 6262 if (*salenp < sizeof (sin6_t)) 6263 return (EINVAL); 6264 6265 *salenp = sizeof (sin6_t); 6266 *sin6 = sin6_null; 6267 *sin6 = icmp->icmp_v6dst; 6268 break; 6269 } 6270 return (0); 6271 } 6272 6273 /* ARGSUSED */ 6274 int 6275 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6276 socklen_t *salenp, cred_t *cr) 6277 { 6278 conn_t *connp = (conn_t *)proto_handle; 6279 icmp_t *icmp = connp->conn_icmp; 6280 int error; 6281 6282 /* All Solaris components should pass a cred for this operation. */ 6283 ASSERT(cr != NULL); 6284 6285 ASSERT(icmp != NULL); 6286 6287 rw_enter(&icmp->icmp_rwlock, RW_READER); 6288 6289 error = rawip_do_getpeername(icmp, sa, salenp); 6290 6291 rw_exit(&icmp->icmp_rwlock); 6292 6293 return (error); 6294 } 6295 6296 /* ARGSUSED */ 6297 int 6298 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6299 socklen_t *salenp, cred_t *cr) 6300 { 6301 conn_t *connp = (conn_t *)proto_handle; 6302 icmp_t *icmp = connp->conn_icmp; 6303 int error; 6304 6305 /* All Solaris components should pass a cred for this operation. */ 6306 ASSERT(cr != NULL); 6307 6308 ASSERT(icmp != NULL); 6309 rw_enter(&icmp->icmp_rwlock, RW_READER); 6310 6311 error = rawip_do_getsockname(icmp, sa, salenp); 6312 6313 rw_exit(&icmp->icmp_rwlock); 6314 6315 return (error); 6316 } 6317 6318 int 6319 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6320 const void *optvalp, socklen_t optlen, cred_t *cr) 6321 { 6322 conn_t *connp = (conn_t *)proto_handle; 6323 icmp_t *icmp = connp->conn_icmp; 6324 int error; 6325 6326 /* All Solaris components should pass a cred for this operation. */ 6327 ASSERT(cr != NULL); 6328 6329 error = proto_opt_check(level, option_name, optlen, NULL, 6330 icmp_opt_obj.odb_opt_des_arr, 6331 icmp_opt_obj.odb_opt_arr_cnt, 6332 icmp_opt_obj.odb_topmost_tpiprovider, 6333 B_TRUE, B_FALSE, cr); 6334 6335 if (error != 0) { 6336 /* 6337 * option not recognized 6338 */ 6339 if (error < 0) { 6340 error = proto_tlitosyserr(-error); 6341 } 6342 return (error); 6343 } 6344 6345 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6346 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6347 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6348 (uchar_t *)optvalp, NULL, cr); 6349 rw_exit(&icmp->icmp_rwlock); 6350 6351 if (error < 0) { 6352 /* 6353 * Pass on to ip 6354 */ 6355 error = ip_set_options(connp, level, option_name, optvalp, 6356 optlen, cr); 6357 } 6358 6359 ASSERT(error >= 0); 6360 6361 return (error); 6362 } 6363 6364 int 6365 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6366 void *optvalp, socklen_t *optlen, cred_t *cr) 6367 { 6368 int error; 6369 conn_t *connp = (conn_t *)proto_handle; 6370 icmp_t *icmp = connp->conn_icmp; 6371 t_uscalar_t max_optbuf_len; 6372 void *optvalp_buf; 6373 int len; 6374 6375 /* All Solaris components should pass a cred for this operation. */ 6376 ASSERT(cr != NULL); 6377 6378 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6379 icmp_opt_obj.odb_opt_des_arr, 6380 icmp_opt_obj.odb_opt_arr_cnt, 6381 icmp_opt_obj.odb_topmost_tpiprovider, 6382 B_FALSE, B_TRUE, cr); 6383 6384 if (error != 0) { 6385 if (error < 0) { 6386 error = proto_tlitosyserr(-error); 6387 } 6388 return (error); 6389 } 6390 6391 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6392 rw_enter(&icmp->icmp_rwlock, RW_READER); 6393 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6394 rw_exit(&icmp->icmp_rwlock); 6395 6396 if (len < 0) { 6397 /* 6398 * Pass on to IP 6399 */ 6400 kmem_free(optvalp_buf, max_optbuf_len); 6401 return (ip_get_options(connp, level, option_name, optvalp, 6402 optlen, cr)); 6403 } else { 6404 /* 6405 * update optlen and copy option value 6406 */ 6407 t_uscalar_t size = MIN(len, *optlen); 6408 bcopy(optvalp_buf, optvalp, size); 6409 bcopy(&size, optlen, sizeof (size)); 6410 6411 kmem_free(optvalp_buf, max_optbuf_len); 6412 return (0); 6413 } 6414 } 6415 6416 /* ARGSUSED */ 6417 int 6418 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6419 { 6420 conn_t *connp = (conn_t *)proto_handle; 6421 6422 /* All Solaris components should pass a cred for this operation. */ 6423 ASSERT(cr != NULL); 6424 6425 (void) rawip_do_close(connp); 6426 return (0); 6427 } 6428 6429 /* ARGSUSED */ 6430 int 6431 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6432 { 6433 conn_t *connp = (conn_t *)proto_handle; 6434 6435 /* All Solaris components should pass a cred for this operation. */ 6436 ASSERT(cr != NULL); 6437 6438 /* shut down the send side */ 6439 if (how != SHUT_RD) 6440 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6441 SOCK_OPCTL_SHUT_SEND, 0); 6442 /* shut down the recv side */ 6443 if (how != SHUT_WR) 6444 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6445 SOCK_OPCTL_SHUT_RECV, 0); 6446 return (0); 6447 } 6448 6449 void 6450 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6451 { 6452 conn_t *connp = (conn_t *)proto_handle; 6453 icmp_t *icmp = connp->conn_icmp; 6454 6455 mutex_enter(&icmp->icmp_recv_lock); 6456 connp->conn_flow_cntrld = B_FALSE; 6457 mutex_exit(&icmp->icmp_recv_lock); 6458 } 6459 6460 int 6461 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6462 int mode, int32_t *rvalp, cred_t *cr) 6463 { 6464 conn_t *connp = (conn_t *)proto_handle; 6465 int error; 6466 6467 /* All Solaris components should pass a cred for this operation. */ 6468 ASSERT(cr != NULL); 6469 6470 switch (cmd) { 6471 case ND_SET: 6472 case ND_GET: 6473 case _SIOCSOCKFALLBACK: 6474 case TI_GETPEERNAME: 6475 case TI_GETMYNAME: 6476 #ifdef DEBUG 6477 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6478 " socket", cmd); 6479 #endif 6480 error = EINVAL; 6481 break; 6482 default: 6483 /* 6484 * Pass on to IP using helper stream 6485 */ 6486 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6487 cmd, arg, mode, cr, rvalp); 6488 break; 6489 } 6490 return (error); 6491 } 6492 6493 /* ARGSUSED */ 6494 int 6495 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6496 cred_t *cr) 6497 { 6498 conn_t *connp = (conn_t *)proto_handle; 6499 icmp_t *icmp = connp->conn_icmp; 6500 icmp_stack_t *is = icmp->icmp_is; 6501 int error = 0; 6502 boolean_t bypass_dgram_errind = B_FALSE; 6503 6504 ASSERT(DB_TYPE(mp) == M_DATA); 6505 6506 /* All Solaris components should pass a cred for this operation. */ 6507 ASSERT(cr != NULL); 6508 6509 /* If labeled then sockfs should have already set db_credp */ 6510 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6511 6512 /* do an implicit bind if necessary */ 6513 if (icmp->icmp_state == TS_UNBND) { 6514 error = rawip_implicit_bind(connp); 6515 /* 6516 * We could be racing with an actual bind, in which case 6517 * we would see EPROTO. We cross our fingers and try 6518 * to connect. 6519 */ 6520 if (!(error == 0 || error == EPROTO)) { 6521 freemsg(mp); 6522 return (error); 6523 } 6524 } 6525 6526 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6527 6528 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6529 error = EISCONN; 6530 goto done_lock; 6531 } 6532 6533 switch (icmp->icmp_family) { 6534 case AF_INET6: { 6535 sin6_t *sin6; 6536 ip6_pkt_t ipp_s; /* For ancillary data options */ 6537 ip6_pkt_t *ipp = &ipp_s; 6538 6539 sin6 = (sin6_t *)msg->msg_name; 6540 if (sin6 != NULL) { 6541 error = proto_verify_ip_addr(icmp->icmp_family, 6542 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6543 if (error != 0) { 6544 bypass_dgram_errind = B_TRUE; 6545 goto done_lock; 6546 } 6547 if (icmp->icmp_delayed_error != 0) { 6548 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6549 sin6_t *sin2 = (sin6_t *) 6550 &icmp->icmp_delayed_addr; 6551 6552 error = icmp->icmp_delayed_error; 6553 icmp->icmp_delayed_error = 0; 6554 6555 /* Compare IP address and port */ 6556 6557 if (sin1->sin6_port == sin2->sin6_port && 6558 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6559 &sin2->sin6_addr)) { 6560 goto done_lock; 6561 } 6562 } 6563 } else { 6564 /* 6565 * Use connected address 6566 */ 6567 if (icmp->icmp_state != TS_DATA_XFER) { 6568 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6569 error = EDESTADDRREQ; 6570 bypass_dgram_errind = B_TRUE; 6571 goto done_lock; 6572 } 6573 sin6 = &icmp->icmp_v6dst; 6574 } 6575 6576 /* No support for mapped addresses on raw sockets */ 6577 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6578 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6579 error = EADDRNOTAVAIL; 6580 goto done_lock; 6581 } 6582 6583 ipp->ipp_fields = 0; 6584 ipp->ipp_sticky_ignored = 0; 6585 6586 /* 6587 * If options passed in, feed it for verification and handling 6588 */ 6589 if (msg->msg_controllen != 0) { 6590 error = process_auxiliary_options(connp, 6591 msg->msg_control, msg->msg_controllen, 6592 ipp, &icmp_opt_obj, icmp_opt_set, cr); 6593 if (error != 0) { 6594 goto done_lock; 6595 } 6596 } 6597 6598 rw_exit(&icmp->icmp_rwlock); 6599 6600 /* 6601 * Destination is a native IPv6 address. 6602 * Send out an IPv6 format packet. 6603 */ 6604 6605 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6606 ipp); 6607 } 6608 break; 6609 case AF_INET: { 6610 sin_t *sin; 6611 ip4_pkt_t pktinfo; 6612 ip4_pkt_t *pktinfop = &pktinfo; 6613 ipaddr_t v4dst; 6614 6615 sin = (sin_t *)msg->msg_name; 6616 if (sin != NULL) { 6617 error = proto_verify_ip_addr(icmp->icmp_family, 6618 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6619 if (error != 0) { 6620 bypass_dgram_errind = B_TRUE; 6621 goto done_lock; 6622 } 6623 v4dst = sin->sin_addr.s_addr; 6624 if (icmp->icmp_delayed_error != 0) { 6625 sin_t *sin1 = (sin_t *)msg->msg_name; 6626 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6627 6628 error = icmp->icmp_delayed_error; 6629 icmp->icmp_delayed_error = 0; 6630 6631 /* Compare IP address and port */ 6632 if (sin1->sin_port == sin2->sin_port && 6633 sin1->sin_addr.s_addr == 6634 sin2->sin_addr.s_addr) { 6635 goto done_lock; 6636 } 6637 6638 } 6639 } else { 6640 /* 6641 * Use connected address 6642 */ 6643 if (icmp->icmp_state != TS_DATA_XFER) { 6644 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6645 error = EDESTADDRREQ; 6646 bypass_dgram_errind = B_TRUE; 6647 goto done_lock; 6648 } 6649 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6650 } 6651 6652 6653 pktinfop->ip4_ill_index = 0; 6654 pktinfop->ip4_addr = INADDR_ANY; 6655 6656 /* 6657 * If options passed in, feed it for verification and handling 6658 */ 6659 if (msg->msg_controllen != 0) { 6660 error = process_auxiliary_options(connp, 6661 msg->msg_control, msg->msg_controllen, 6662 pktinfop, &icmp_opt_obj, icmp_opt_set, cr); 6663 if (error != 0) { 6664 goto done_lock; 6665 } 6666 } 6667 rw_exit(&icmp->icmp_rwlock); 6668 6669 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6670 v4dst, pktinfop); 6671 break; 6672 } 6673 6674 default: 6675 ASSERT(0); 6676 } 6677 6678 goto done; 6679 6680 done_lock: 6681 rw_exit(&icmp->icmp_rwlock); 6682 if (error != 0) { 6683 ASSERT(mp != NULL); 6684 freemsg(mp); 6685 } 6686 done: 6687 if (bypass_dgram_errind) 6688 return (error); 6689 return (icmp->icmp_dgram_errind ? error : 0); 6690 } 6691 6692 sock_downcalls_t sock_rawip_downcalls = { 6693 rawip_activate, 6694 rawip_accept, 6695 rawip_bind, 6696 rawip_listen, 6697 rawip_connect, 6698 rawip_getpeername, 6699 rawip_getsockname, 6700 rawip_getsockopt, 6701 rawip_setsockopt, 6702 rawip_send, 6703 NULL, 6704 NULL, 6705 NULL, 6706 rawip_shutdown, 6707 rawip_clr_flowctrl, 6708 rawip_ioctl, 6709 rawip_close 6710 }; 6711