1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 141 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 142 static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst); 143 static void icmp_wput(queue_t *q, mblk_t *mp); 144 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 145 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 146 sin6_t *sin6, ip6_pkt_t *ipp); 147 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 148 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 149 static void icmp_wput_other(queue_t *q, mblk_t *mp); 150 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 151 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 152 static void icmp_ulp_recv(conn_t *, mblk_t *); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len, cred_t *cr); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 cred_t *cr; 274 275 /* 276 * All Solaris components should pass a db_credp 277 * for this TPI message, hence we ASSERT. 278 * But in case there is some other M_PROTO that looks 279 * like a TPI message sent by some other kernel 280 * component, we check and return an error. 281 */ 282 cr = msg_getcred(mp, NULL); 283 ASSERT(cr != NULL); 284 if (cr == NULL) { 285 icmp_err_ack(q, mp, TSYSERR, EINVAL); 286 return; 287 } 288 289 icmp = connp->conn_icmp; 290 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 291 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 292 "icmp_bind: bad req, len %u", 293 (uint_t)(mp->b_wptr - mp->b_rptr)); 294 icmp_err_ack(q, mp, TPROTO, 0); 295 return; 296 } 297 298 if (icmp->icmp_state != TS_UNBND) { 299 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 300 "icmp_bind: bad state, %d", icmp->icmp_state); 301 icmp_err_ack(q, mp, TOUTSTATE, 0); 302 return; 303 } 304 305 /* 306 * Reallocate the message to make sure we have enough room for an 307 * address and the protocol type. 308 */ 309 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 310 if (!mp1) { 311 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 312 return; 313 } 314 mp = mp1; 315 316 /* Reset the message type in preparation for shipping it back. */ 317 DB_TYPE(mp) = M_PCPROTO; 318 tbr = (struct T_bind_req *)mp->b_rptr; 319 len = tbr->ADDR_length; 320 switch (len) { 321 case 0: /* request for a generic port */ 322 tbr->ADDR_offset = sizeof (struct T_bind_req); 323 if (icmp->icmp_family == AF_INET) { 324 tbr->ADDR_length = sizeof (sin_t); 325 sin = (sin_t *)&tbr[1]; 326 *sin = sin_null; 327 sin->sin_family = AF_INET; 328 mp->b_wptr = (uchar_t *)&sin[1]; 329 sa = (struct sockaddr *)sin; 330 len = sizeof (sin_t); 331 } else { 332 ASSERT(icmp->icmp_family == AF_INET6); 333 tbr->ADDR_length = sizeof (sin6_t); 334 sin6 = (sin6_t *)&tbr[1]; 335 *sin6 = sin6_null; 336 sin6->sin6_family = AF_INET6; 337 mp->b_wptr = (uchar_t *)&sin6[1]; 338 sa = (struct sockaddr *)sin6; 339 len = sizeof (sin6_t); 340 } 341 break; 342 343 case sizeof (sin_t): /* Complete IPv4 address */ 344 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 345 sizeof (sin_t)); 346 break; 347 348 case sizeof (sin6_t): /* Complete IPv6 address */ 349 sa = (struct sockaddr *)mi_offset_param(mp, 350 tbr->ADDR_offset, sizeof (sin6_t)); 351 break; 352 353 default: 354 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 355 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 356 icmp_err_ack(q, mp, TBADADDR, 0); 357 return; 358 } 359 360 error = rawip_do_bind(connp, sa, len); 361 done: 362 ASSERT(mp->b_cont == NULL); 363 if (error != 0) { 364 if (error > 0) { 365 icmp_err_ack(q, mp, TSYSERR, error); 366 } else { 367 icmp_err_ack(q, mp, -error, 0); 368 } 369 } else { 370 tbr->PRIM_type = T_BIND_ACK; 371 qreply(q, mp); 372 } 373 } 374 375 static int 376 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 377 { 378 sin_t *sin; 379 sin6_t *sin6; 380 icmp_t *icmp; 381 int error = 0; 382 mblk_t *ire_mp; 383 384 385 icmp = connp->conn_icmp; 386 387 if (sa == NULL || !OK_32PTR((char *)sa)) { 388 return (EINVAL); 389 } 390 391 /* 392 * The state must be TS_UNBND. TPI mandates that users must send 393 * TPI primitives only 1 at a time and wait for the response before 394 * sending the next primitive. 395 */ 396 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 397 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 398 error = -TOUTSTATE; 399 goto done; 400 } 401 402 ASSERT(len != 0); 403 switch (len) { 404 case sizeof (sin_t): /* Complete IPv4 address */ 405 sin = (sin_t *)sa; 406 if (sin->sin_family != AF_INET || 407 icmp->icmp_family != AF_INET) { 408 /* TSYSERR, EAFNOSUPPORT */ 409 error = EAFNOSUPPORT; 410 goto done; 411 } 412 break; 413 case sizeof (sin6_t): /* Complete IPv6 address */ 414 sin6 = (sin6_t *)sa; 415 if (sin6->sin6_family != AF_INET6 || 416 icmp->icmp_family != AF_INET6) { 417 /* TSYSERR, EAFNOSUPPORT */ 418 error = EAFNOSUPPORT; 419 goto done; 420 } 421 /* No support for mapped addresses on raw sockets */ 422 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 423 /* TSYSERR, EADDRNOTAVAIL */ 424 error = EADDRNOTAVAIL; 425 goto done; 426 } 427 break; 428 429 default: 430 /* TBADADDR */ 431 error = EADDRNOTAVAIL; 432 goto done; 433 } 434 435 icmp->icmp_pending_op = T_BIND_REQ; 436 icmp->icmp_state = TS_IDLE; 437 438 /* 439 * Copy the source address into our icmp structure. This address 440 * may still be zero; if so, ip will fill in the correct address 441 * each time an outbound packet is passed to it. 442 * If we are binding to a broadcast or multicast address then 443 * rawip_post_ip_bind_connect will clear the source address. 444 */ 445 446 if (icmp->icmp_family == AF_INET) { 447 ASSERT(sin != NULL); 448 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 449 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 450 &icmp->icmp_v6src); 451 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 452 icmp->icmp_ip_snd_options_len; 453 icmp->icmp_bound_v6src = icmp->icmp_v6src; 454 } else { 455 int error; 456 457 ASSERT(sin6 != NULL); 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 icmp->icmp_v6src = sin6->sin6_addr; 460 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 461 icmp->icmp_bound_v6src = icmp->icmp_v6src; 462 463 /* Rebuild the header template */ 464 error = icmp_build_hdrs(icmp); 465 if (error != 0) { 466 icmp->icmp_pending_op = -1; 467 /* 468 * TSYSERR 469 */ 470 goto done; 471 } 472 } 473 474 ire_mp = NULL; 475 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 476 /* 477 * request an IRE if src not 0 (INADDR_ANY) 478 */ 479 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 480 if (ire_mp == NULL) { 481 icmp->icmp_pending_op = -1; 482 error = ENOMEM; 483 goto done; 484 } 485 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 486 } 487 done: 488 rw_exit(&icmp->icmp_rwlock); 489 if (error != 0) 490 return (error); 491 492 if (icmp->icmp_family == AF_INET6) { 493 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 494 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 495 } else { 496 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 497 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 498 } 499 rawip_post_ip_bind_connect(icmp, ire_mp, error); 500 return (error); 501 } 502 503 static void 504 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 505 { 506 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 507 if (icmp->icmp_state == TS_UNBND) { 508 /* 509 * not yet bound - bind sent by icmp_bind_proto. 510 */ 511 rw_exit(&icmp->icmp_rwlock); 512 return; 513 } 514 ASSERT(icmp->icmp_pending_op != -1); 515 icmp->icmp_pending_op = -1; 516 517 if (error != 0) { 518 if (icmp->icmp_state == TS_DATA_XFER) { 519 /* Connect failed */ 520 /* Revert back to the bound source */ 521 icmp->icmp_v6src = icmp->icmp_bound_v6src; 522 icmp->icmp_state = TS_IDLE; 523 if (icmp->icmp_family == AF_INET6) 524 (void) icmp_build_hdrs(icmp); 525 } else { 526 V6_SET_ZERO(icmp->icmp_v6src); 527 V6_SET_ZERO(icmp->icmp_bound_v6src); 528 icmp->icmp_state = TS_UNBND; 529 if (icmp->icmp_family == AF_INET6) 530 (void) icmp_build_hdrs(icmp); 531 } 532 } else { 533 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 534 ire_t *ire; 535 536 ire = (ire_t *)ire_mp->b_rptr; 537 /* 538 * If a broadcast/multicast address was bound set 539 * the source address to 0. 540 * This ensures no datagrams with broadcast address 541 * as source address are emitted (which would violate 542 * RFC1122 - Hosts requirements) 543 * Note: we get IRE_BROADCAST for IPv6 544 * to "mark" a multicast local address. 545 */ 546 547 548 if (ire->ire_type == IRE_BROADCAST && 549 icmp->icmp_state != TS_DATA_XFER) { 550 /* 551 * This was just a local bind to a 552 * MC/broadcast addr 553 */ 554 V6_SET_ZERO(icmp->icmp_v6src); 555 if (icmp->icmp_family == AF_INET6) 556 (void) icmp_build_hdrs(icmp); 557 } 558 } 559 560 } 561 rw_exit(&icmp->icmp_rwlock); 562 if (ire_mp != NULL) 563 freeb(ire_mp); 564 } 565 566 /* 567 * Send message to IP to just bind to the protocol. 568 */ 569 static int 570 icmp_bind_proto(conn_t *connp) 571 { 572 icmp_t *icmp; 573 int error; 574 575 icmp = connp->conn_icmp; 576 577 if (icmp->icmp_family == AF_INET6) 578 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 579 &sin6_null.sin6_addr, 0, B_TRUE); 580 else 581 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 582 sin_null.sin_addr.s_addr, 0, B_TRUE); 583 584 rawip_post_ip_bind_connect(icmp, NULL, error); 585 return (error); 586 } 587 588 static void 589 icmp_tpi_connect(queue_t *q, mblk_t *mp) 590 { 591 conn_t *connp = Q_TO_CONN(q); 592 struct T_conn_req *tcr; 593 icmp_t *icmp; 594 struct sockaddr *sa; 595 socklen_t len; 596 int error; 597 cred_t *cr; 598 599 /* 600 * All Solaris components should pass a db_credp 601 * for this TPI message, hence we ASSERT. 602 * But in case there is some other M_PROTO that looks 603 * like a TPI message sent by some other kernel 604 * component, we check and return an error. 605 */ 606 cr = msg_getcred(mp, NULL); 607 ASSERT(cr != NULL); 608 if (cr == NULL) { 609 icmp_err_ack(q, mp, TSYSERR, EINVAL); 610 return; 611 } 612 613 icmp = connp->conn_icmp; 614 tcr = (struct T_conn_req *)mp->b_rptr; 615 /* Sanity checks */ 616 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 617 icmp_err_ack(q, mp, TPROTO, 0); 618 return; 619 } 620 621 if (tcr->OPT_length != 0) { 622 icmp_err_ack(q, mp, TBADOPT, 0); 623 return; 624 } 625 626 len = tcr->DEST_length; 627 628 switch (len) { 629 default: 630 icmp_err_ack(q, mp, TBADADDR, 0); 631 return; 632 case sizeof (sin_t): 633 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 634 sizeof (sin_t)); 635 break; 636 case sizeof (sin6_t): 637 sa = (struct sockaddr *)mi_offset_param(mp, 638 tcr->DEST_offset, sizeof (sin6_t)); 639 break; 640 } 641 642 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 643 if (error != 0) { 644 icmp_err_ack(q, mp, TSYSERR, error); 645 return; 646 } 647 648 error = rawip_do_connect(connp, sa, len, cr); 649 if (error != 0) { 650 if (error < 0) { 651 icmp_err_ack(q, mp, -error, 0); 652 } else { 653 icmp_err_ack(q, mp, 0, error); 654 } 655 } else { 656 mblk_t *mp1; 657 658 /* 659 * We have to send a connection confirmation to 660 * keep TLI happy. 661 */ 662 if (icmp->icmp_family == AF_INET) { 663 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 664 sizeof (sin_t), NULL, 0); 665 } else { 666 ASSERT(icmp->icmp_family == AF_INET6); 667 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 668 sizeof (sin6_t), NULL, 0); 669 } 670 if (mp1 == NULL) { 671 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 672 return; 673 } 674 675 /* 676 * Send ok_ack for T_CONN_REQ 677 */ 678 mp = mi_tpi_ok_ack_alloc(mp); 679 if (mp == NULL) { 680 /* Unable to reuse the T_CONN_REQ for the ack. */ 681 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 682 return; 683 } 684 putnext(connp->conn_rq, mp); 685 putnext(connp->conn_rq, mp1); 686 } 687 } 688 689 static int 690 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 691 cred_t *cr) 692 { 693 icmp_t *icmp; 694 sin_t *sin; 695 sin6_t *sin6; 696 mblk_t *ire_mp; 697 int error; 698 ipaddr_t v4dst; 699 in6_addr_t v6dst; 700 701 icmp = connp->conn_icmp; 702 703 if (sa == NULL || !OK_32PTR((char *)sa)) { 704 return (EINVAL); 705 } 706 707 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 708 if (ire_mp == NULL) 709 return (ENOMEM); 710 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 711 712 713 ASSERT(sa != NULL && len != 0); 714 715 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 716 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 717 rw_exit(&icmp->icmp_rwlock); 718 freeb(ire_mp); 719 return (-TOUTSTATE); 720 } 721 722 switch (len) { 723 case sizeof (sin_t): 724 sin = (sin_t *)sa; 725 726 ASSERT(icmp->icmp_family == AF_INET); 727 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 728 729 v4dst = sin->sin_addr.s_addr; 730 /* 731 * Interpret a zero destination to mean loopback. 732 * Update the T_CONN_REQ (sin/sin6) since it is used to 733 * generate the T_CONN_CON. 734 */ 735 if (v4dst == INADDR_ANY) { 736 v4dst = htonl(INADDR_LOOPBACK); 737 } 738 739 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 740 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 741 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 742 icmp->icmp_ip_snd_options_len; 743 icmp->icmp_v6dst.sin6_addr = v6dst; 744 icmp->icmp_v6dst.sin6_family = AF_INET6; 745 icmp->icmp_v6dst.sin6_flowinfo = 0; 746 icmp->icmp_v6dst.sin6_port = 0; 747 748 /* 749 * If the destination address is multicast and 750 * an outgoing multicast interface has been set, 751 * use the address of that interface as our 752 * source address if no source address has been set. 753 */ 754 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 755 CLASSD(v4dst) && 756 icmp->icmp_multicast_if_addr != INADDR_ANY) { 757 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 758 &icmp->icmp_v6src); 759 } 760 break; 761 case sizeof (sin6_t): 762 sin6 = (sin6_t *)sa; 763 764 /* No support for mapped addresses on raw sockets */ 765 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 766 rw_exit(&icmp->icmp_rwlock); 767 freeb(ire_mp); 768 return (EADDRNOTAVAIL); 769 } 770 771 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 772 ASSERT(icmp->icmp_family == AF_INET6); 773 774 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 775 776 icmp->icmp_v6dst = *sin6; 777 icmp->icmp_v6dst.sin6_port = 0; 778 779 /* 780 * Interpret a zero destination to mean loopback. 781 * Update the T_CONN_REQ (sin/sin6) since it is used to 782 * generate the T_CONN_CON. 783 */ 784 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 785 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 786 } 787 /* 788 * If the destination address is multicast and 789 * an outgoing multicast interface has been set, 790 * then the ip bind logic will pick the correct source 791 * address (i.e. matching the outgoing multicast interface). 792 */ 793 break; 794 } 795 796 icmp->icmp_pending_op = T_CONN_REQ; 797 798 if (icmp->icmp_state == TS_DATA_XFER) { 799 /* Already connected - clear out state */ 800 icmp->icmp_v6src = icmp->icmp_bound_v6src; 801 icmp->icmp_state = TS_IDLE; 802 } 803 804 icmp->icmp_state = TS_DATA_XFER; 805 rw_exit(&icmp->icmp_rwlock); 806 807 if (icmp->icmp_family == AF_INET6) { 808 error = ip_proto_bind_connected_v6(connp, &ire_mp, 809 icmp->icmp_proto, &icmp->icmp_v6src, 0, 810 &icmp->icmp_v6dst.sin6_addr, 811 NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr); 812 } else { 813 error = ip_proto_bind_connected_v4(connp, &ire_mp, 814 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 815 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 816 B_TRUE, B_TRUE, cr); 817 } 818 rawip_post_ip_bind_connect(icmp, ire_mp, error); 819 return (error); 820 } 821 822 static void 823 icmp_close_free(conn_t *connp) 824 { 825 icmp_t *icmp = connp->conn_icmp; 826 827 /* If there are any options associated with the stream, free them. */ 828 if (icmp->icmp_ip_snd_options != NULL) { 829 mi_free((char *)icmp->icmp_ip_snd_options); 830 icmp->icmp_ip_snd_options = NULL; 831 icmp->icmp_ip_snd_options_len = 0; 832 } 833 834 if (icmp->icmp_filter != NULL) { 835 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 836 icmp->icmp_filter = NULL; 837 } 838 839 /* Free memory associated with sticky options */ 840 if (icmp->icmp_sticky_hdrs_len != 0) { 841 kmem_free(icmp->icmp_sticky_hdrs, 842 icmp->icmp_sticky_hdrs_len); 843 icmp->icmp_sticky_hdrs = NULL; 844 icmp->icmp_sticky_hdrs_len = 0; 845 } 846 ip6_pkt_free(&icmp->icmp_sticky_ipp); 847 848 /* 849 * Clear any fields which the kmem_cache constructor clears. 850 * Only icmp_connp needs to be preserved. 851 * TBD: We should make this more efficient to avoid clearing 852 * everything. 853 */ 854 ASSERT(icmp->icmp_connp == connp); 855 bzero(icmp, sizeof (icmp_t)); 856 icmp->icmp_connp = connp; 857 } 858 859 static int 860 rawip_do_close(conn_t *connp) 861 { 862 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 863 864 ip_quiesce_conn(connp); 865 866 if (!IPCL_IS_NONSTR(connp)) { 867 qprocsoff(connp->conn_rq); 868 } 869 870 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 871 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 872 icmp_close_free(connp); 873 874 /* 875 * Now we are truly single threaded on this stream, and can 876 * delete the things hanging off the connp, and finally the connp. 877 * We removed this connp from the fanout list, it cannot be 878 * accessed thru the fanouts, and we already waited for the 879 * conn_ref to drop to 0. We are already in close, so 880 * there cannot be any other thread from the top. qprocsoff 881 * has completed, and service has completed or won't run in 882 * future. 883 */ 884 ASSERT(connp->conn_ref == 1); 885 886 if (!IPCL_IS_NONSTR(connp)) { 887 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 888 } else { 889 ip_free_helper_stream(connp); 890 } 891 892 connp->conn_ref--; 893 ipcl_conn_destroy(connp); 894 895 return (0); 896 } 897 898 static int 899 icmp_close(queue_t *q, int flags) 900 { 901 conn_t *connp; 902 903 if (flags & SO_FALLBACK) { 904 /* 905 * stream is being closed while in fallback 906 * simply free the resources that were allocated 907 */ 908 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 909 qprocsoff(q); 910 goto done; 911 } 912 913 connp = Q_TO_CONN(q); 914 (void) rawip_do_close(connp); 915 done: 916 q->q_ptr = WR(q)->q_ptr = NULL; 917 return (0); 918 } 919 920 /* 921 * This routine handles each T_DISCON_REQ message passed to icmp 922 * as an indicating that ICMP is no longer connected. This results 923 * in sending a T_BIND_REQ to IP to restore the binding to just 924 * the local address. 925 * 926 * The disconnect completes in rawip_post_ip_bind_connect. 927 */ 928 static int 929 icmp_do_disconnect(conn_t *connp) 930 { 931 icmp_t *icmp; 932 mblk_t *ire_mp; 933 int error; 934 935 icmp = connp->conn_icmp; 936 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 937 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 938 rw_exit(&icmp->icmp_rwlock); 939 return (-TOUTSTATE); 940 } 941 icmp->icmp_pending_op = T_DISCON_REQ; 942 icmp->icmp_v6src = icmp->icmp_bound_v6src; 943 icmp->icmp_state = TS_IDLE; 944 945 946 if (icmp->icmp_family == AF_INET6) { 947 /* Rebuild the header template */ 948 error = icmp_build_hdrs(icmp); 949 if (error != 0) { 950 icmp->icmp_pending_op = -1; 951 rw_exit(&icmp->icmp_rwlock); 952 return (error); 953 } 954 } 955 956 rw_exit(&icmp->icmp_rwlock); 957 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 958 if (ire_mp == NULL) { 959 return (ENOMEM); 960 } 961 962 if (icmp->icmp_family == AF_INET6) { 963 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 964 &icmp->icmp_bound_v6src, 0, B_TRUE); 965 } else { 966 967 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 968 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 969 } 970 971 rawip_post_ip_bind_connect(icmp, ire_mp, error); 972 973 return (error); 974 } 975 976 static void 977 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 978 { 979 conn_t *connp = Q_TO_CONN(q); 980 int error; 981 982 /* 983 * Allocate the largest primitive we need to send back 984 * T_error_ack is > than T_ok_ack 985 */ 986 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 987 if (mp == NULL) { 988 /* Unable to reuse the T_DISCON_REQ for the ack. */ 989 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 990 return; 991 } 992 993 error = icmp_do_disconnect(connp); 994 995 if (error != 0) { 996 if (error > 0) { 997 icmp_err_ack(q, mp, 0, error); 998 } else { 999 icmp_err_ack(q, mp, -error, 0); 1000 } 1001 } else { 1002 mp = mi_tpi_ok_ack_alloc(mp); 1003 ASSERT(mp != NULL); 1004 qreply(q, mp); 1005 } 1006 1007 } 1008 1009 static int 1010 icmp_disconnect(conn_t *connp) 1011 { 1012 int error; 1013 icmp_t *icmp = connp->conn_icmp; 1014 1015 icmp->icmp_dgram_errind = B_FALSE; 1016 1017 error = icmp_do_disconnect(connp); 1018 1019 if (error < 0) 1020 error = proto_tlitosyserr(-error); 1021 return (error); 1022 } 1023 1024 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1025 static void 1026 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1027 { 1028 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1029 qreply(q, mp); 1030 } 1031 1032 /* Shorthand to generate and send TPI error acks to our client */ 1033 static void 1034 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1035 t_scalar_t t_error, int sys_error) 1036 { 1037 struct T_error_ack *teackp; 1038 1039 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1040 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1041 teackp = (struct T_error_ack *)mp->b_rptr; 1042 teackp->ERROR_prim = primitive; 1043 teackp->TLI_error = t_error; 1044 teackp->UNIX_error = sys_error; 1045 qreply(q, mp); 1046 } 1047 } 1048 1049 /* 1050 * icmp_icmp_error is called by icmp_input to process ICMP 1051 * messages passed up by IP. 1052 * Generates the appropriate permanent (non-transient) errors. 1053 * Assumes that IP has pulled up everything up to and including 1054 * the ICMP header. 1055 */ 1056 static void 1057 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1058 { 1059 icmph_t *icmph; 1060 ipha_t *ipha; 1061 int iph_hdr_length; 1062 sin_t sin; 1063 mblk_t *mp1; 1064 int error = 0; 1065 icmp_t *icmp = connp->conn_icmp; 1066 1067 ipha = (ipha_t *)mp->b_rptr; 1068 1069 ASSERT(OK_32PTR(mp->b_rptr)); 1070 1071 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1072 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1073 icmp_icmp_error_ipv6(connp, mp); 1074 return; 1075 } 1076 1077 /* 1078 * icmp does not support v4 mapped addresses 1079 * so we can never be here for a V6 socket 1080 * i.e. icmp_family == AF_INET6 1081 */ 1082 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1083 (icmp->icmp_family == AF_INET)); 1084 1085 ASSERT(icmp->icmp_family == AF_INET); 1086 1087 /* Skip past the outer IP and ICMP headers */ 1088 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1089 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1090 ipha = (ipha_t *)&icmph[1]; 1091 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1092 1093 switch (icmph->icmph_type) { 1094 case ICMP_DEST_UNREACHABLE: 1095 switch (icmph->icmph_code) { 1096 case ICMP_FRAGMENTATION_NEEDED: 1097 /* 1098 * IP has already adjusted the path MTU. 1099 */ 1100 break; 1101 case ICMP_PORT_UNREACHABLE: 1102 case ICMP_PROTOCOL_UNREACHABLE: 1103 error = ECONNREFUSED; 1104 break; 1105 default: 1106 /* Transient errors */ 1107 break; 1108 } 1109 break; 1110 default: 1111 /* Transient errors */ 1112 break; 1113 } 1114 if (error == 0) { 1115 freemsg(mp); 1116 return; 1117 } 1118 1119 /* 1120 * Deliver T_UDERROR_IND when the application has asked for it. 1121 * The socket layer enables this automatically when connected. 1122 */ 1123 if (!icmp->icmp_dgram_errind) { 1124 freemsg(mp); 1125 return; 1126 } 1127 1128 sin = sin_null; 1129 sin.sin_family = AF_INET; 1130 sin.sin_addr.s_addr = ipha->ipha_dst; 1131 1132 if (IPCL_IS_NONSTR(connp)) { 1133 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1134 if (icmp->icmp_state == TS_DATA_XFER) { 1135 if (sin.sin_addr.s_addr == 1136 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1137 rw_exit(&icmp->icmp_rwlock); 1138 (*connp->conn_upcalls->su_set_error) 1139 (connp->conn_upper_handle, error); 1140 goto done; 1141 } 1142 } else { 1143 icmp->icmp_delayed_error = error; 1144 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1145 } 1146 rw_exit(&icmp->icmp_rwlock); 1147 } else { 1148 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1149 0, error); 1150 if (mp1 != NULL) 1151 putnext(connp->conn_rq, mp1); 1152 } 1153 done: 1154 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1155 freemsg(mp); 1156 } 1157 1158 /* 1159 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1160 * for IPv6 packets. 1161 * Send permanent (non-transient) errors upstream. 1162 * Assumes that IP has pulled up all the extension headers as well 1163 * as the ICMPv6 header. 1164 */ 1165 static void 1166 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1167 { 1168 icmp6_t *icmp6; 1169 ip6_t *ip6h, *outer_ip6h; 1170 uint16_t iph_hdr_length; 1171 uint8_t *nexthdrp; 1172 sin6_t sin6; 1173 mblk_t *mp1; 1174 int error = 0; 1175 icmp_t *icmp = connp->conn_icmp; 1176 1177 outer_ip6h = (ip6_t *)mp->b_rptr; 1178 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1179 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1180 else 1181 iph_hdr_length = IPV6_HDR_LEN; 1182 1183 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1184 ip6h = (ip6_t *)&icmp6[1]; 1185 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1186 freemsg(mp); 1187 return; 1188 } 1189 1190 switch (icmp6->icmp6_type) { 1191 case ICMP6_DST_UNREACH: 1192 switch (icmp6->icmp6_code) { 1193 case ICMP6_DST_UNREACH_NOPORT: 1194 error = ECONNREFUSED; 1195 break; 1196 case ICMP6_DST_UNREACH_ADMIN: 1197 case ICMP6_DST_UNREACH_NOROUTE: 1198 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1199 case ICMP6_DST_UNREACH_ADDR: 1200 /* Transient errors */ 1201 break; 1202 default: 1203 break; 1204 } 1205 break; 1206 case ICMP6_PACKET_TOO_BIG: { 1207 struct T_unitdata_ind *tudi; 1208 struct T_opthdr *toh; 1209 size_t udi_size; 1210 mblk_t *newmp; 1211 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1212 sizeof (struct ip6_mtuinfo); 1213 sin6_t *sin6; 1214 struct ip6_mtuinfo *mtuinfo; 1215 1216 /* 1217 * If the application has requested to receive path mtu 1218 * information, send up an empty message containing an 1219 * IPV6_PATHMTU ancillary data item. 1220 */ 1221 if (!icmp->icmp_ipv6_recvpathmtu) 1222 break; 1223 1224 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1225 opt_length; 1226 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1227 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1228 break; 1229 } 1230 1231 /* 1232 * newmp->b_cont is left to NULL on purpose. This is an 1233 * empty message containing only ancillary data. 1234 */ 1235 newmp->b_datap->db_type = M_PROTO; 1236 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1237 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1238 tudi->PRIM_type = T_UNITDATA_IND; 1239 tudi->SRC_length = sizeof (sin6_t); 1240 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1241 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1242 tudi->OPT_length = opt_length; 1243 1244 sin6 = (sin6_t *)&tudi[1]; 1245 bzero(sin6, sizeof (sin6_t)); 1246 sin6->sin6_family = AF_INET6; 1247 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1248 1249 toh = (struct T_opthdr *)&sin6[1]; 1250 toh->level = IPPROTO_IPV6; 1251 toh->name = IPV6_PATHMTU; 1252 toh->len = opt_length; 1253 toh->status = 0; 1254 1255 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1256 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1257 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1258 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1259 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1260 /* 1261 * We've consumed everything we need from the original 1262 * message. Free it, then send our empty message. 1263 */ 1264 freemsg(mp); 1265 icmp_ulp_recv(connp, newmp); 1266 1267 return; 1268 } 1269 case ICMP6_TIME_EXCEEDED: 1270 /* Transient errors */ 1271 break; 1272 case ICMP6_PARAM_PROB: 1273 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1274 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1275 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1276 (uchar_t *)nexthdrp) { 1277 error = ECONNREFUSED; 1278 break; 1279 } 1280 break; 1281 } 1282 if (error == 0) { 1283 freemsg(mp); 1284 return; 1285 } 1286 1287 /* 1288 * Deliver T_UDERROR_IND when the application has asked for it. 1289 * The socket layer enables this automatically when connected. 1290 */ 1291 if (!icmp->icmp_dgram_errind) { 1292 freemsg(mp); 1293 return; 1294 } 1295 1296 sin6 = sin6_null; 1297 sin6.sin6_family = AF_INET6; 1298 sin6.sin6_addr = ip6h->ip6_dst; 1299 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1300 1301 if (IPCL_IS_NONSTR(connp)) { 1302 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1303 if (icmp->icmp_state == TS_DATA_XFER) { 1304 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1305 &icmp->icmp_v6dst.sin6_addr)) { 1306 rw_exit(&icmp->icmp_rwlock); 1307 (*connp->conn_upcalls->su_set_error) 1308 (connp->conn_upper_handle, error); 1309 goto done; 1310 } 1311 } else { 1312 icmp->icmp_delayed_error = error; 1313 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1314 } 1315 rw_exit(&icmp->icmp_rwlock); 1316 } else { 1317 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1318 NULL, 0, error); 1319 if (mp1 != NULL) 1320 putnext(connp->conn_rq, mp1); 1321 } 1322 done: 1323 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1324 freemsg(mp); 1325 } 1326 1327 /* 1328 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1329 * The local address is filled in if endpoint is bound. The remote address 1330 * is filled in if remote address has been precified ("connected endpoint") 1331 * (The concept of connected CLTS sockets is alien to published TPI 1332 * but we support it anyway). 1333 */ 1334 static void 1335 icmp_addr_req(queue_t *q, mblk_t *mp) 1336 { 1337 icmp_t *icmp = Q_TO_ICMP(q); 1338 mblk_t *ackmp; 1339 struct T_addr_ack *taa; 1340 1341 /* Make it large enough for worst case */ 1342 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1343 2 * sizeof (sin6_t), 1); 1344 if (ackmp == NULL) { 1345 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1346 return; 1347 } 1348 taa = (struct T_addr_ack *)ackmp->b_rptr; 1349 1350 bzero(taa, sizeof (struct T_addr_ack)); 1351 ackmp->b_wptr = (uchar_t *)&taa[1]; 1352 1353 taa->PRIM_type = T_ADDR_ACK; 1354 ackmp->b_datap->db_type = M_PCPROTO; 1355 rw_enter(&icmp->icmp_rwlock, RW_READER); 1356 /* 1357 * Note: Following code assumes 32 bit alignment of basic 1358 * data structures like sin_t and struct T_addr_ack. 1359 */ 1360 if (icmp->icmp_state != TS_UNBND) { 1361 /* 1362 * Fill in local address 1363 */ 1364 taa->LOCADDR_offset = sizeof (*taa); 1365 if (icmp->icmp_family == AF_INET) { 1366 sin_t *sin; 1367 1368 taa->LOCADDR_length = sizeof (sin_t); 1369 sin = (sin_t *)&taa[1]; 1370 /* Fill zeroes and then intialize non-zero fields */ 1371 *sin = sin_null; 1372 sin->sin_family = AF_INET; 1373 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1374 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1375 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1376 sin->sin_addr.s_addr); 1377 } else { 1378 /* 1379 * INADDR_ANY 1380 * icmp_v6src is not set, we might be bound to 1381 * broadcast/multicast. Use icmp_bound_v6src as 1382 * local address instead (that could 1383 * also still be INADDR_ANY) 1384 */ 1385 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1386 sin->sin_addr.s_addr); 1387 } 1388 ackmp->b_wptr = (uchar_t *)&sin[1]; 1389 } else { 1390 sin6_t *sin6; 1391 1392 ASSERT(icmp->icmp_family == AF_INET6); 1393 taa->LOCADDR_length = sizeof (sin6_t); 1394 sin6 = (sin6_t *)&taa[1]; 1395 /* Fill zeroes and then intialize non-zero fields */ 1396 *sin6 = sin6_null; 1397 sin6->sin6_family = AF_INET6; 1398 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1399 sin6->sin6_addr = icmp->icmp_v6src; 1400 } else { 1401 /* 1402 * UNSPECIFIED 1403 * icmp_v6src is not set, we might be bound to 1404 * broadcast/multicast. Use icmp_bound_v6src as 1405 * local address instead (that could 1406 * also still be UNSPECIFIED) 1407 */ 1408 sin6->sin6_addr = icmp->icmp_bound_v6src; 1409 } 1410 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1411 } 1412 } 1413 rw_exit(&icmp->icmp_rwlock); 1414 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1415 qreply(q, ackmp); 1416 } 1417 1418 static void 1419 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1420 { 1421 *tap = icmp_g_t_info_ack; 1422 1423 if (icmp->icmp_family == AF_INET6) 1424 tap->ADDR_size = sizeof (sin6_t); 1425 else 1426 tap->ADDR_size = sizeof (sin_t); 1427 tap->CURRENT_state = icmp->icmp_state; 1428 tap->OPT_size = icmp_max_optsize; 1429 } 1430 1431 static void 1432 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1433 t_uscalar_t cap_bits1) 1434 { 1435 tcap->CAP_bits1 = 0; 1436 1437 if (cap_bits1 & TC1_INFO) { 1438 icmp_copy_info(&tcap->INFO_ack, icmp); 1439 tcap->CAP_bits1 |= TC1_INFO; 1440 } 1441 } 1442 1443 /* 1444 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1445 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1446 * icmp_g_t_info_ack. The current state of the stream is copied from 1447 * icmp_state. 1448 */ 1449 static void 1450 icmp_capability_req(queue_t *q, mblk_t *mp) 1451 { 1452 icmp_t *icmp = Q_TO_ICMP(q); 1453 t_uscalar_t cap_bits1; 1454 struct T_capability_ack *tcap; 1455 1456 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1457 1458 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1459 mp->b_datap->db_type, T_CAPABILITY_ACK); 1460 if (!mp) 1461 return; 1462 1463 tcap = (struct T_capability_ack *)mp->b_rptr; 1464 1465 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1466 1467 qreply(q, mp); 1468 } 1469 1470 /* 1471 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1472 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1473 * The current state of the stream is copied from icmp_state. 1474 */ 1475 static void 1476 icmp_info_req(queue_t *q, mblk_t *mp) 1477 { 1478 icmp_t *icmp = Q_TO_ICMP(q); 1479 1480 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1481 T_INFO_ACK); 1482 if (!mp) 1483 return; 1484 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1485 qreply(q, mp); 1486 } 1487 1488 /* For /dev/icmp aka AF_INET open */ 1489 static int 1490 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1491 int family) 1492 { 1493 conn_t *connp; 1494 dev_t conn_dev; 1495 icmp_stack_t *is; 1496 int error; 1497 1498 conn_dev = NULL; 1499 1500 /* If the stream is already open, return immediately. */ 1501 if (q->q_ptr != NULL) 1502 return (0); 1503 1504 if (sflag == MODOPEN) 1505 return (EINVAL); 1506 1507 /* 1508 * Since ICMP is not used so heavily, allocating from the small 1509 * arena should be sufficient. 1510 */ 1511 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1512 return (EBUSY); 1513 } 1514 1515 if (flag & SO_FALLBACK) { 1516 /* 1517 * Non streams socket needs a stream to fallback to 1518 */ 1519 RD(q)->q_ptr = (void *)conn_dev; 1520 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1521 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1522 qprocson(q); 1523 return (0); 1524 } 1525 1526 connp = icmp_open(family, credp, &error, KM_SLEEP); 1527 if (connp == NULL) { 1528 ASSERT(error != NULL); 1529 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1530 return (error); 1531 } 1532 1533 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1534 connp->conn_dev = conn_dev; 1535 connp->conn_minor_arena = ip_minor_arena_sa; 1536 1537 is = connp->conn_icmp->icmp_is; 1538 1539 /* 1540 * Initialize the icmp_t structure for this stream. 1541 */ 1542 q->q_ptr = connp; 1543 WR(q)->q_ptr = connp; 1544 connp->conn_rq = q; 1545 connp->conn_wq = WR(q); 1546 1547 if (connp->conn_icmp->icmp_family == AF_INET6) { 1548 /* Build initial header template for transmit */ 1549 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1550 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1551 rw_exit(&connp->conn_icmp->icmp_rwlock); 1552 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1553 ipcl_conn_destroy(connp); 1554 return (error); 1555 } 1556 rw_exit(&connp->conn_icmp->icmp_rwlock); 1557 } 1558 1559 1560 q->q_hiwat = is->is_recv_hiwat; 1561 WR(q)->q_hiwat = is->is_xmit_hiwat; 1562 WR(q)->q_lowat = is->is_xmit_lowat; 1563 1564 qprocson(q); 1565 1566 /* Set the Stream head write offset. */ 1567 (void) proto_set_tx_wroff(q, connp, 1568 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1569 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1570 1571 mutex_enter(&connp->conn_lock); 1572 connp->conn_state_flags &= ~CONN_INCIPIENT; 1573 mutex_exit(&connp->conn_lock); 1574 1575 return (0); 1576 } 1577 1578 /* For /dev/icmp4 aka AF_INET open */ 1579 static int 1580 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1581 { 1582 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1583 } 1584 1585 /* For /dev/icmp6 aka AF_INET6 open */ 1586 static int 1587 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1588 { 1589 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1590 } 1591 1592 /* 1593 * This is the open routine for icmp. It allocates a icmp_t structure for 1594 * the stream and, on the first open of the module, creates an ND table. 1595 */ 1596 /* ARGSUSED */ 1597 static conn_t * 1598 icmp_open(int family, cred_t *credp, int *err, int flags) 1599 { 1600 icmp_t *icmp; 1601 conn_t *connp; 1602 zoneid_t zoneid; 1603 netstack_t *ns; 1604 icmp_stack_t *is; 1605 boolean_t isv6 = B_FALSE; 1606 1607 *err = secpolicy_net_icmpaccess(credp); 1608 if (*err != 0) 1609 return (NULL); 1610 1611 if (family == AF_INET6) 1612 isv6 = B_TRUE; 1613 ns = netstack_find_by_cred(credp); 1614 ASSERT(ns != NULL); 1615 is = ns->netstack_icmp; 1616 ASSERT(is != NULL); 1617 1618 /* 1619 * For exclusive stacks we set the zoneid to zero 1620 * to make ICMP operate as if in the global zone. 1621 */ 1622 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1623 zoneid = GLOBAL_ZONEID; 1624 else 1625 zoneid = crgetzoneid(credp); 1626 1627 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1628 1629 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1630 icmp = connp->conn_icmp; 1631 icmp->icmp_v6dst = sin6_null; 1632 1633 /* 1634 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1635 * done by netstack_find_by_cred() 1636 */ 1637 netstack_rele(ns); 1638 1639 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1640 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1641 ASSERT(connp->conn_icmp == icmp); 1642 ASSERT(icmp->icmp_connp == connp); 1643 1644 /* Set the initial state of the stream and the privilege status. */ 1645 icmp->icmp_state = TS_UNBND; 1646 if (isv6) { 1647 icmp->icmp_ipversion = IPV6_VERSION; 1648 icmp->icmp_family = AF_INET6; 1649 connp->conn_ulp = IPPROTO_ICMPV6; 1650 /* May be changed by a SO_PROTOTYPE socket option. */ 1651 icmp->icmp_proto = IPPROTO_ICMPV6; 1652 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1653 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1654 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1655 connp->conn_af_isv6 = B_TRUE; 1656 connp->conn_flags |= IPCL_ISV6; 1657 } else { 1658 icmp->icmp_ipversion = IPV4_VERSION; 1659 icmp->icmp_family = AF_INET; 1660 /* May be changed by a SO_PROTOTYPE socket option. */ 1661 icmp->icmp_proto = IPPROTO_ICMP; 1662 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1663 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1664 connp->conn_af_isv6 = B_FALSE; 1665 connp->conn_flags &= ~IPCL_ISV6; 1666 } 1667 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1668 icmp->icmp_pending_op = -1; 1669 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1670 connp->conn_zoneid = zoneid; 1671 1672 /* 1673 * If the caller has the process-wide flag set, then default to MAC 1674 * exempt mode. This allows read-down to unlabeled hosts. 1675 */ 1676 if (getpflags(NET_MAC_AWARE, credp) != 0) 1677 connp->conn_mac_exempt = B_TRUE; 1678 1679 connp->conn_ulp_labeled = is_system_labeled(); 1680 1681 icmp->icmp_is = is; 1682 1683 connp->conn_recv = icmp_input; 1684 crhold(credp); 1685 connp->conn_cred = credp; 1686 1687 rw_exit(&icmp->icmp_rwlock); 1688 1689 connp->conn_flow_cntrld = B_FALSE; 1690 return (connp); 1691 } 1692 1693 /* 1694 * Which ICMP options OK to set through T_UNITDATA_REQ... 1695 */ 1696 /* ARGSUSED */ 1697 static boolean_t 1698 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1699 { 1700 return (B_TRUE); 1701 } 1702 1703 /* 1704 * This routine gets default values of certain options whose default 1705 * values are maintained by protcol specific code 1706 */ 1707 /* ARGSUSED */ 1708 int 1709 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1710 { 1711 icmp_t *icmp = Q_TO_ICMP(q); 1712 icmp_stack_t *is = icmp->icmp_is; 1713 int *i1 = (int *)ptr; 1714 1715 switch (level) { 1716 case IPPROTO_IP: 1717 switch (name) { 1718 case IP_MULTICAST_TTL: 1719 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1720 return (sizeof (uchar_t)); 1721 case IP_MULTICAST_LOOP: 1722 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1723 return (sizeof (uchar_t)); 1724 } 1725 break; 1726 case IPPROTO_IPV6: 1727 switch (name) { 1728 case IPV6_MULTICAST_HOPS: 1729 *i1 = IP_DEFAULT_MULTICAST_TTL; 1730 return (sizeof (int)); 1731 case IPV6_MULTICAST_LOOP: 1732 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1733 return (sizeof (int)); 1734 case IPV6_UNICAST_HOPS: 1735 *i1 = is->is_ipv6_hoplimit; 1736 return (sizeof (int)); 1737 } 1738 break; 1739 case IPPROTO_ICMPV6: 1740 switch (name) { 1741 case ICMP6_FILTER: 1742 /* Make it look like "pass all" */ 1743 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1744 return (sizeof (icmp6_filter_t)); 1745 } 1746 break; 1747 } 1748 return (-1); 1749 } 1750 1751 /* 1752 * This routine retrieves the current status of socket options. 1753 * It returns the size of the option retrieved. 1754 */ 1755 int 1756 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1757 { 1758 icmp_t *icmp = connp->conn_icmp; 1759 icmp_stack_t *is = icmp->icmp_is; 1760 int *i1 = (int *)ptr; 1761 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1762 int ret = 0; 1763 1764 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1765 switch (level) { 1766 case SOL_SOCKET: 1767 switch (name) { 1768 case SO_DEBUG: 1769 *i1 = icmp->icmp_debug; 1770 break; 1771 case SO_TYPE: 1772 *i1 = SOCK_RAW; 1773 break; 1774 case SO_PROTOTYPE: 1775 *i1 = icmp->icmp_proto; 1776 break; 1777 case SO_REUSEADDR: 1778 *i1 = icmp->icmp_reuseaddr; 1779 break; 1780 1781 /* 1782 * The following three items are available here, 1783 * but are only meaningful to IP. 1784 */ 1785 case SO_DONTROUTE: 1786 *i1 = icmp->icmp_dontroute; 1787 break; 1788 case SO_USELOOPBACK: 1789 *i1 = icmp->icmp_useloopback; 1790 break; 1791 case SO_BROADCAST: 1792 *i1 = icmp->icmp_broadcast; 1793 break; 1794 1795 case SO_SNDBUF: 1796 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1797 *i1 = icmp->icmp_xmit_hiwat; 1798 break; 1799 case SO_RCVBUF: 1800 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1801 *i1 = icmp->icmp_recv_hiwat; 1802 break; 1803 case SO_DGRAM_ERRIND: 1804 *i1 = icmp->icmp_dgram_errind; 1805 break; 1806 case SO_TIMESTAMP: 1807 *i1 = icmp->icmp_timestamp; 1808 break; 1809 case SO_MAC_EXEMPT: 1810 *i1 = connp->conn_mac_exempt; 1811 break; 1812 case SO_DOMAIN: 1813 *i1 = icmp->icmp_family; 1814 break; 1815 1816 /* 1817 * Following four not meaningful for icmp 1818 * Action is same as "default" to which we fallthrough 1819 * so we keep them in comments. 1820 * case SO_LINGER: 1821 * case SO_KEEPALIVE: 1822 * case SO_OOBINLINE: 1823 * case SO_ALLZONES: 1824 */ 1825 default: 1826 ret = -1; 1827 goto done; 1828 } 1829 break; 1830 case IPPROTO_IP: 1831 /* 1832 * Only allow IPv4 option processing on IPv4 sockets. 1833 */ 1834 if (icmp->icmp_family != AF_INET) { 1835 ret = -1; 1836 goto done; 1837 } 1838 1839 switch (name) { 1840 case IP_OPTIONS: 1841 case T_IP_OPTIONS: 1842 /* Options are passed up with each packet */ 1843 ret = 0; 1844 goto done; 1845 case IP_HDRINCL: 1846 *i1 = (int)icmp->icmp_hdrincl; 1847 break; 1848 case IP_TOS: 1849 case T_IP_TOS: 1850 *i1 = (int)icmp->icmp_type_of_service; 1851 break; 1852 case IP_TTL: 1853 *i1 = (int)icmp->icmp_ttl; 1854 break; 1855 case IP_MULTICAST_IF: 1856 /* 0 address if not set */ 1857 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1858 ret = sizeof (ipaddr_t); 1859 goto done; 1860 case IP_MULTICAST_TTL: 1861 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1862 ret = sizeof (uchar_t); 1863 goto done; 1864 case IP_MULTICAST_LOOP: 1865 *ptr = connp->conn_multicast_loop; 1866 ret = sizeof (uint8_t); 1867 goto done; 1868 case IP_BOUND_IF: 1869 /* Zero if not set */ 1870 *i1 = icmp->icmp_bound_if; 1871 break; /* goto sizeof (int) option return */ 1872 case IP_UNSPEC_SRC: 1873 *ptr = icmp->icmp_unspec_source; 1874 break; /* goto sizeof (int) option return */ 1875 case IP_RECVIF: 1876 *ptr = icmp->icmp_recvif; 1877 break; /* goto sizeof (int) option return */ 1878 case IP_BROADCAST_TTL: 1879 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1880 return (sizeof (uchar_t)); 1881 case IP_RECVPKTINFO: 1882 /* 1883 * This also handles IP_PKTINFO. 1884 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1885 * Differentiation is based on the size of the argument 1886 * passed in. 1887 * This option is handled in IP which will return an 1888 * error for IP_PKTINFO as it's not supported as a 1889 * sticky option. 1890 */ 1891 ret = -EINVAL; 1892 goto done; 1893 /* 1894 * Cannot "get" the value of following options 1895 * at this level. Action is same as "default" to 1896 * which we fallthrough so we keep them in comments. 1897 * 1898 * case IP_ADD_MEMBERSHIP: 1899 * case IP_DROP_MEMBERSHIP: 1900 * case IP_BLOCK_SOURCE: 1901 * case IP_UNBLOCK_SOURCE: 1902 * case IP_ADD_SOURCE_MEMBERSHIP: 1903 * case IP_DROP_SOURCE_MEMBERSHIP: 1904 * case MCAST_JOIN_GROUP: 1905 * case MCAST_LEAVE_GROUP: 1906 * case MCAST_BLOCK_SOURCE: 1907 * case MCAST_UNBLOCK_SOURCE: 1908 * case MCAST_JOIN_SOURCE_GROUP: 1909 * case MCAST_LEAVE_SOURCE_GROUP: 1910 * case MRT_INIT: 1911 * case MRT_DONE: 1912 * case MRT_ADD_VIF: 1913 * case MRT_DEL_VIF: 1914 * case MRT_ADD_MFC: 1915 * case MRT_DEL_MFC: 1916 * case MRT_VERSION: 1917 * case MRT_ASSERT: 1918 * case IP_SEC_OPT: 1919 * case IP_NEXTHOP: 1920 */ 1921 default: 1922 ret = -1; 1923 goto done; 1924 } 1925 break; 1926 case IPPROTO_IPV6: 1927 /* 1928 * Only allow IPv6 option processing on native IPv6 sockets. 1929 */ 1930 if (icmp->icmp_family != AF_INET6) { 1931 ret = -1; 1932 goto done; 1933 } 1934 switch (name) { 1935 case IPV6_UNICAST_HOPS: 1936 *i1 = (unsigned int)icmp->icmp_ttl; 1937 break; 1938 case IPV6_MULTICAST_IF: 1939 /* 0 index if not set */ 1940 *i1 = icmp->icmp_multicast_if_index; 1941 break; 1942 case IPV6_MULTICAST_HOPS: 1943 *i1 = icmp->icmp_multicast_ttl; 1944 break; 1945 case IPV6_MULTICAST_LOOP: 1946 *i1 = connp->conn_multicast_loop; 1947 break; 1948 case IPV6_BOUND_IF: 1949 /* Zero if not set */ 1950 *i1 = icmp->icmp_bound_if; 1951 break; 1952 case IPV6_UNSPEC_SRC: 1953 *i1 = icmp->icmp_unspec_source; 1954 break; 1955 case IPV6_CHECKSUM: 1956 /* 1957 * Return offset or -1 if no checksum offset. 1958 * Does not apply to IPPROTO_ICMPV6 1959 */ 1960 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1961 ret = -1; 1962 goto done; 1963 } 1964 1965 if (icmp->icmp_raw_checksum) { 1966 *i1 = icmp->icmp_checksum_off; 1967 } else { 1968 *i1 = -1; 1969 } 1970 break; 1971 case IPV6_JOIN_GROUP: 1972 case IPV6_LEAVE_GROUP: 1973 case MCAST_JOIN_GROUP: 1974 case MCAST_LEAVE_GROUP: 1975 case MCAST_BLOCK_SOURCE: 1976 case MCAST_UNBLOCK_SOURCE: 1977 case MCAST_JOIN_SOURCE_GROUP: 1978 case MCAST_LEAVE_SOURCE_GROUP: 1979 /* cannot "get" the value for these */ 1980 ret = -1; 1981 goto done; 1982 case IPV6_RECVPKTINFO: 1983 *i1 = icmp->icmp_ip_recvpktinfo; 1984 break; 1985 case IPV6_RECVTCLASS: 1986 *i1 = icmp->icmp_ipv6_recvtclass; 1987 break; 1988 case IPV6_RECVPATHMTU: 1989 *i1 = icmp->icmp_ipv6_recvpathmtu; 1990 break; 1991 case IPV6_V6ONLY: 1992 *i1 = 1; 1993 break; 1994 case IPV6_RECVHOPLIMIT: 1995 *i1 = icmp->icmp_ipv6_recvhoplimit; 1996 break; 1997 case IPV6_RECVHOPOPTS: 1998 *i1 = icmp->icmp_ipv6_recvhopopts; 1999 break; 2000 case IPV6_RECVDSTOPTS: 2001 *i1 = icmp->icmp_ipv6_recvdstopts; 2002 break; 2003 case _OLD_IPV6_RECVDSTOPTS: 2004 *i1 = icmp->icmp_old_ipv6_recvdstopts; 2005 break; 2006 case IPV6_RECVRTHDRDSTOPTS: 2007 *i1 = icmp->icmp_ipv6_recvrtdstopts; 2008 break; 2009 case IPV6_RECVRTHDR: 2010 *i1 = icmp->icmp_ipv6_recvrthdr; 2011 break; 2012 case IPV6_PKTINFO: { 2013 /* XXX assumes that caller has room for max size! */ 2014 struct in6_pktinfo *pkti; 2015 2016 pkti = (struct in6_pktinfo *)ptr; 2017 if (ipp->ipp_fields & IPPF_IFINDEX) 2018 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2019 else 2020 pkti->ipi6_ifindex = 0; 2021 if (ipp->ipp_fields & IPPF_ADDR) 2022 pkti->ipi6_addr = ipp->ipp_addr; 2023 else 2024 pkti->ipi6_addr = ipv6_all_zeros; 2025 ret = sizeof (struct in6_pktinfo); 2026 goto done; 2027 } 2028 case IPV6_NEXTHOP: { 2029 sin6_t *sin6 = (sin6_t *)ptr; 2030 2031 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2032 return (0); 2033 *sin6 = sin6_null; 2034 sin6->sin6_family = AF_INET6; 2035 sin6->sin6_addr = ipp->ipp_nexthop; 2036 ret = (sizeof (sin6_t)); 2037 goto done; 2038 } 2039 case IPV6_HOPOPTS: 2040 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2041 return (0); 2042 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2043 return (0); 2044 bcopy((char *)ipp->ipp_hopopts + 2045 icmp->icmp_label_len_v6, ptr, 2046 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2047 if (icmp->icmp_label_len_v6 > 0) { 2048 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2049 ptr[1] = (ipp->ipp_hopoptslen - 2050 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2051 } 2052 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2053 goto done; 2054 case IPV6_RTHDRDSTOPTS: 2055 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2056 return (0); 2057 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2058 ret = ipp->ipp_rtdstoptslen; 2059 goto done; 2060 case IPV6_RTHDR: 2061 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2062 return (0); 2063 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2064 ret = ipp->ipp_rthdrlen; 2065 goto done; 2066 case IPV6_DSTOPTS: 2067 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2068 ret = 0; 2069 goto done; 2070 } 2071 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2072 ret = ipp->ipp_dstoptslen; 2073 goto done; 2074 case IPV6_PATHMTU: 2075 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2076 ret = 0; 2077 } else { 2078 ret = ip_fill_mtuinfo( 2079 &icmp->icmp_v6dst.sin6_addr, 0, 2080 (struct ip6_mtuinfo *)ptr, 2081 is->is_netstack); 2082 } 2083 goto done; 2084 case IPV6_TCLASS: 2085 if (ipp->ipp_fields & IPPF_TCLASS) 2086 *i1 = ipp->ipp_tclass; 2087 else 2088 *i1 = IPV6_FLOW_TCLASS( 2089 IPV6_DEFAULT_VERS_AND_FLOW); 2090 break; 2091 default: 2092 ret = -1; 2093 goto done; 2094 } 2095 break; 2096 case IPPROTO_ICMPV6: 2097 /* 2098 * Only allow IPv6 option processing on native IPv6 sockets. 2099 */ 2100 if (icmp->icmp_family != AF_INET6) { 2101 ret = -1; 2102 } 2103 2104 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2105 ret = -1; 2106 } 2107 2108 switch (name) { 2109 case ICMP6_FILTER: 2110 if (icmp->icmp_filter == NULL) { 2111 /* Make it look like "pass all" */ 2112 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2113 } else { 2114 (void) bcopy(icmp->icmp_filter, ptr, 2115 sizeof (icmp6_filter_t)); 2116 } 2117 ret = sizeof (icmp6_filter_t); 2118 goto done; 2119 default: 2120 ret = -1; 2121 goto done; 2122 } 2123 default: 2124 ret = -1; 2125 goto done; 2126 } 2127 ret = sizeof (int); 2128 done: 2129 return (ret); 2130 } 2131 2132 /* 2133 * This routine retrieves the current status of socket options. 2134 * It returns the size of the option retrieved. 2135 */ 2136 int 2137 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2138 { 2139 conn_t *connp = Q_TO_CONN(q); 2140 icmp_t *icmp = connp->conn_icmp; 2141 int err; 2142 2143 rw_enter(&icmp->icmp_rwlock, RW_READER); 2144 err = icmp_opt_get(connp, level, name, ptr); 2145 rw_exit(&icmp->icmp_rwlock); 2146 return (err); 2147 } 2148 2149 int 2150 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2151 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2152 void *thisdg_attrs, boolean_t checkonly) 2153 { 2154 2155 int *i1 = (int *)invalp; 2156 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2157 icmp_t *icmp = connp->conn_icmp; 2158 icmp_stack_t *is = icmp->icmp_is; 2159 int error; 2160 2161 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2162 /* 2163 * For fixed length options, no sanity check 2164 * of passed in length is done. It is assumed *_optcom_req() 2165 * routines do the right thing. 2166 */ 2167 switch (level) { 2168 case SOL_SOCKET: 2169 switch (name) { 2170 case SO_DEBUG: 2171 if (!checkonly) 2172 icmp->icmp_debug = onoff; 2173 break; 2174 case SO_PROTOTYPE: 2175 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2176 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2177 secpolicy_net_rawaccess(cr) != 0) { 2178 *outlenp = 0; 2179 return (EACCES); 2180 } 2181 /* Can't use IPPROTO_RAW with IPv6 */ 2182 if ((*i1 & 0xFF) == IPPROTO_RAW && 2183 icmp->icmp_family == AF_INET6) { 2184 *outlenp = 0; 2185 return (EPROTONOSUPPORT); 2186 } 2187 if (checkonly) { 2188 /* T_CHECK case */ 2189 *(int *)outvalp = (*i1 & 0xFF); 2190 break; 2191 } 2192 icmp->icmp_proto = *i1 & 0xFF; 2193 if ((icmp->icmp_proto == IPPROTO_RAW || 2194 icmp->icmp_proto == IPPROTO_IGMP) && 2195 icmp->icmp_family == AF_INET) 2196 icmp->icmp_hdrincl = 1; 2197 else 2198 icmp->icmp_hdrincl = 0; 2199 2200 if (icmp->icmp_family == AF_INET6 && 2201 icmp->icmp_proto == IPPROTO_ICMPV6) { 2202 /* Set offset for icmp6_cksum */ 2203 icmp->icmp_raw_checksum = 0; 2204 icmp->icmp_checksum_off = 2; 2205 } 2206 if (icmp->icmp_proto == IPPROTO_UDP || 2207 icmp->icmp_proto == IPPROTO_TCP || 2208 icmp->icmp_proto == IPPROTO_SCTP) { 2209 icmp->icmp_no_tp_cksum = 1; 2210 icmp->icmp_sticky_ipp.ipp_fields |= 2211 IPPF_NO_CKSUM; 2212 } else { 2213 icmp->icmp_no_tp_cksum = 0; 2214 icmp->icmp_sticky_ipp.ipp_fields &= 2215 ~IPPF_NO_CKSUM; 2216 } 2217 2218 if (icmp->icmp_filter != NULL && 2219 icmp->icmp_proto != IPPROTO_ICMPV6) { 2220 kmem_free(icmp->icmp_filter, 2221 sizeof (icmp6_filter_t)); 2222 icmp->icmp_filter = NULL; 2223 } 2224 2225 /* Rebuild the header template */ 2226 error = icmp_build_hdrs(icmp); 2227 if (error != 0) { 2228 *outlenp = 0; 2229 return (error); 2230 } 2231 2232 /* 2233 * For SCTP, we don't use icmp_bind_proto() for 2234 * raw socket binding. Note that we do not need 2235 * to set *outlenp. 2236 * FIXME: how does SCTP work? 2237 */ 2238 if (icmp->icmp_proto == IPPROTO_SCTP) 2239 return (0); 2240 2241 *outlenp = sizeof (int); 2242 *(int *)outvalp = *i1 & 0xFF; 2243 2244 /* Drop lock across the bind operation */ 2245 rw_exit(&icmp->icmp_rwlock); 2246 (void) icmp_bind_proto(connp); 2247 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2248 return (0); 2249 case SO_REUSEADDR: 2250 if (!checkonly) { 2251 icmp->icmp_reuseaddr = onoff; 2252 PASS_OPT_TO_IP(connp); 2253 } 2254 break; 2255 2256 /* 2257 * The following three items are available here, 2258 * but are only meaningful to IP. 2259 */ 2260 case SO_DONTROUTE: 2261 if (!checkonly) { 2262 icmp->icmp_dontroute = onoff; 2263 PASS_OPT_TO_IP(connp); 2264 } 2265 break; 2266 case SO_USELOOPBACK: 2267 if (!checkonly) { 2268 icmp->icmp_useloopback = onoff; 2269 PASS_OPT_TO_IP(connp); 2270 } 2271 break; 2272 case SO_BROADCAST: 2273 if (!checkonly) { 2274 icmp->icmp_broadcast = onoff; 2275 PASS_OPT_TO_IP(connp); 2276 } 2277 break; 2278 2279 case SO_SNDBUF: 2280 if (*i1 > is->is_max_buf) { 2281 *outlenp = 0; 2282 return (ENOBUFS); 2283 } 2284 if (!checkonly) { 2285 if (!IPCL_IS_NONSTR(connp)) { 2286 connp->conn_wq->q_hiwat = *i1; 2287 } 2288 icmp->icmp_xmit_hiwat = *i1; 2289 } 2290 break; 2291 case SO_RCVBUF: 2292 if (*i1 > is->is_max_buf) { 2293 *outlenp = 0; 2294 return (ENOBUFS); 2295 } 2296 if (!checkonly) { 2297 icmp->icmp_recv_hiwat = *i1; 2298 rw_exit(&icmp->icmp_rwlock); 2299 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2300 *i1); 2301 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2302 } 2303 break; 2304 case SO_DGRAM_ERRIND: 2305 if (!checkonly) 2306 icmp->icmp_dgram_errind = onoff; 2307 break; 2308 case SO_ALLZONES: 2309 /* 2310 * "soft" error (negative) 2311 * option not handled at this level 2312 * Note: Do not modify *outlenp 2313 */ 2314 return (-EINVAL); 2315 case SO_TIMESTAMP: 2316 if (!checkonly) { 2317 icmp->icmp_timestamp = onoff; 2318 } 2319 break; 2320 case SO_MAC_EXEMPT: 2321 /* 2322 * "soft" error (negative) 2323 * option not handled at this level 2324 * Note: Do not modify *outlenp 2325 */ 2326 return (-EINVAL); 2327 case SO_RCVTIMEO: 2328 case SO_SNDTIMEO: 2329 /* 2330 * Pass these two options in order for third part 2331 * protocol usage. Here just return directly. 2332 */ 2333 return (0); 2334 /* 2335 * Following three not meaningful for icmp 2336 * Action is same as "default" so we keep them 2337 * in comments. 2338 * case SO_LINGER: 2339 * case SO_KEEPALIVE: 2340 * case SO_OOBINLINE: 2341 */ 2342 default: 2343 *outlenp = 0; 2344 return (EINVAL); 2345 } 2346 break; 2347 case IPPROTO_IP: 2348 /* 2349 * Only allow IPv4 option processing on IPv4 sockets. 2350 */ 2351 if (icmp->icmp_family != AF_INET) { 2352 *outlenp = 0; 2353 return (ENOPROTOOPT); 2354 } 2355 switch (name) { 2356 case IP_OPTIONS: 2357 case T_IP_OPTIONS: 2358 /* Save options for use by IP. */ 2359 if ((inlen & 0x3) || 2360 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2361 *outlenp = 0; 2362 return (EINVAL); 2363 } 2364 if (checkonly) 2365 break; 2366 2367 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2368 &icmp->icmp_ip_snd_options_len, 2369 icmp->icmp_label_len, invalp, inlen)) { 2370 *outlenp = 0; 2371 return (ENOMEM); 2372 } 2373 2374 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2375 icmp->icmp_ip_snd_options_len; 2376 rw_exit(&icmp->icmp_rwlock); 2377 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2378 RD(connp->conn_rq), connp, 2379 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2380 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2381 break; 2382 case IP_HDRINCL: 2383 if (!checkonly) 2384 icmp->icmp_hdrincl = onoff; 2385 break; 2386 case IP_TOS: 2387 case T_IP_TOS: 2388 if (!checkonly) { 2389 icmp->icmp_type_of_service = (uint8_t)*i1; 2390 } 2391 break; 2392 case IP_TTL: 2393 if (!checkonly) { 2394 icmp->icmp_ttl = (uint8_t)*i1; 2395 } 2396 break; 2397 case IP_MULTICAST_IF: 2398 /* 2399 * TODO should check OPTMGMT reply and undo this if 2400 * there is an error. 2401 */ 2402 if (!checkonly) { 2403 icmp->icmp_multicast_if_addr = *i1; 2404 PASS_OPT_TO_IP(connp); 2405 } 2406 break; 2407 case IP_MULTICAST_TTL: 2408 if (!checkonly) 2409 icmp->icmp_multicast_ttl = *invalp; 2410 break; 2411 case IP_MULTICAST_LOOP: 2412 if (!checkonly) { 2413 connp->conn_multicast_loop = 2414 (*invalp == 0) ? 0 : 1; 2415 PASS_OPT_TO_IP(connp); 2416 } 2417 break; 2418 case IP_BOUND_IF: 2419 if (!checkonly) { 2420 icmp->icmp_bound_if = *i1; 2421 PASS_OPT_TO_IP(connp); 2422 } 2423 break; 2424 case IP_UNSPEC_SRC: 2425 if (!checkonly) { 2426 icmp->icmp_unspec_source = onoff; 2427 PASS_OPT_TO_IP(connp); 2428 } 2429 break; 2430 case IP_BROADCAST_TTL: 2431 if (!checkonly) 2432 connp->conn_broadcast_ttl = *invalp; 2433 break; 2434 case IP_RECVIF: 2435 if (!checkonly) { 2436 icmp->icmp_recvif = onoff; 2437 } 2438 /* 2439 * pass to ip 2440 */ 2441 return (-EINVAL); 2442 case IP_PKTINFO: { 2443 /* 2444 * This also handles IP_RECVPKTINFO. 2445 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2446 * Differentiation is based on the size of the argument 2447 * passed in. 2448 */ 2449 struct in_pktinfo *pktinfop; 2450 ip4_pkt_t *attr_pktinfop; 2451 2452 if (checkonly) 2453 break; 2454 2455 if (inlen == sizeof (int)) { 2456 /* 2457 * This is IP_RECVPKTINFO option. 2458 * Keep a local copy of wether this option is 2459 * set or not and pass it down to IP for 2460 * processing. 2461 */ 2462 icmp->icmp_ip_recvpktinfo = onoff; 2463 return (-EINVAL); 2464 } 2465 2466 2467 if (inlen != sizeof (struct in_pktinfo)) { 2468 return (EINVAL); 2469 } 2470 2471 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2472 == NULL) { 2473 /* 2474 * sticky option is not supported 2475 */ 2476 return (EINVAL); 2477 } 2478 2479 pktinfop = (struct in_pktinfo *)invalp; 2480 2481 /* 2482 * Atleast one of the values should be specified 2483 */ 2484 if (pktinfop->ipi_ifindex == 0 && 2485 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2486 return (EINVAL); 2487 } 2488 2489 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2490 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2491 } 2492 break; 2493 case IP_ADD_MEMBERSHIP: 2494 case IP_DROP_MEMBERSHIP: 2495 case IP_BLOCK_SOURCE: 2496 case IP_UNBLOCK_SOURCE: 2497 case IP_ADD_SOURCE_MEMBERSHIP: 2498 case IP_DROP_SOURCE_MEMBERSHIP: 2499 case MCAST_JOIN_GROUP: 2500 case MCAST_LEAVE_GROUP: 2501 case MCAST_BLOCK_SOURCE: 2502 case MCAST_UNBLOCK_SOURCE: 2503 case MCAST_JOIN_SOURCE_GROUP: 2504 case MCAST_LEAVE_SOURCE_GROUP: 2505 case MRT_INIT: 2506 case MRT_DONE: 2507 case MRT_ADD_VIF: 2508 case MRT_DEL_VIF: 2509 case MRT_ADD_MFC: 2510 case MRT_DEL_MFC: 2511 case MRT_VERSION: 2512 case MRT_ASSERT: 2513 case IP_SEC_OPT: 2514 case IP_NEXTHOP: 2515 /* 2516 * "soft" error (negative) 2517 * option not handled at this level 2518 * Note: Do not modify *outlenp 2519 */ 2520 return (-EINVAL); 2521 default: 2522 *outlenp = 0; 2523 return (EINVAL); 2524 } 2525 break; 2526 case IPPROTO_IPV6: { 2527 ip6_pkt_t *ipp; 2528 boolean_t sticky; 2529 2530 if (icmp->icmp_family != AF_INET6) { 2531 *outlenp = 0; 2532 return (ENOPROTOOPT); 2533 } 2534 /* 2535 * Deal with both sticky options and ancillary data 2536 */ 2537 if (thisdg_attrs == NULL) { 2538 /* sticky options, or none */ 2539 ipp = &icmp->icmp_sticky_ipp; 2540 sticky = B_TRUE; 2541 } else { 2542 /* ancillary data */ 2543 ipp = (ip6_pkt_t *)thisdg_attrs; 2544 sticky = B_FALSE; 2545 } 2546 2547 switch (name) { 2548 case IPV6_MULTICAST_IF: 2549 if (!checkonly) { 2550 icmp->icmp_multicast_if_index = *i1; 2551 PASS_OPT_TO_IP(connp); 2552 } 2553 break; 2554 case IPV6_UNICAST_HOPS: 2555 /* -1 means use default */ 2556 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2557 *outlenp = 0; 2558 return (EINVAL); 2559 } 2560 if (!checkonly) { 2561 if (*i1 == -1) { 2562 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2563 is->is_ipv6_hoplimit; 2564 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2565 /* Pass modified value to IP. */ 2566 *i1 = ipp->ipp_hoplimit; 2567 } else { 2568 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2569 (uint8_t)*i1; 2570 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2571 } 2572 /* Rebuild the header template */ 2573 error = icmp_build_hdrs(icmp); 2574 if (error != 0) { 2575 *outlenp = 0; 2576 return (error); 2577 } 2578 } 2579 break; 2580 case IPV6_MULTICAST_HOPS: 2581 /* -1 means use default */ 2582 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2583 *outlenp = 0; 2584 return (EINVAL); 2585 } 2586 if (!checkonly) { 2587 if (*i1 == -1) { 2588 icmp->icmp_multicast_ttl = 2589 ipp->ipp_multicast_hops = 2590 IP_DEFAULT_MULTICAST_TTL; 2591 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2592 /* Pass modified value to IP. */ 2593 *i1 = icmp->icmp_multicast_ttl; 2594 } else { 2595 icmp->icmp_multicast_ttl = 2596 ipp->ipp_multicast_hops = 2597 (uint8_t)*i1; 2598 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2599 } 2600 } 2601 break; 2602 case IPV6_MULTICAST_LOOP: 2603 if (*i1 != 0 && *i1 != 1) { 2604 *outlenp = 0; 2605 return (EINVAL); 2606 } 2607 if (!checkonly) { 2608 connp->conn_multicast_loop = *i1; 2609 PASS_OPT_TO_IP(connp); 2610 } 2611 break; 2612 case IPV6_CHECKSUM: 2613 /* 2614 * Integer offset into the user data of where the 2615 * checksum is located. 2616 * Offset of -1 disables option. 2617 * Does not apply to IPPROTO_ICMPV6. 2618 */ 2619 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2620 *outlenp = 0; 2621 return (EINVAL); 2622 } 2623 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2624 /* Negative or not 16 bit aligned offset */ 2625 *outlenp = 0; 2626 return (EINVAL); 2627 } 2628 if (checkonly) 2629 break; 2630 2631 if (*i1 == -1) { 2632 icmp->icmp_raw_checksum = 0; 2633 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2634 } else { 2635 icmp->icmp_raw_checksum = 1; 2636 icmp->icmp_checksum_off = *i1; 2637 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2638 } 2639 /* Rebuild the header template */ 2640 error = icmp_build_hdrs(icmp); 2641 if (error != 0) { 2642 *outlenp = 0; 2643 return (error); 2644 } 2645 break; 2646 case IPV6_JOIN_GROUP: 2647 case IPV6_LEAVE_GROUP: 2648 case MCAST_JOIN_GROUP: 2649 case MCAST_LEAVE_GROUP: 2650 case MCAST_BLOCK_SOURCE: 2651 case MCAST_UNBLOCK_SOURCE: 2652 case MCAST_JOIN_SOURCE_GROUP: 2653 case MCAST_LEAVE_SOURCE_GROUP: 2654 /* 2655 * "soft" error (negative) 2656 * option not handled at this level 2657 * Note: Do not modify *outlenp 2658 */ 2659 return (-EINVAL); 2660 case IPV6_BOUND_IF: 2661 if (!checkonly) { 2662 icmp->icmp_bound_if = *i1; 2663 PASS_OPT_TO_IP(connp); 2664 } 2665 break; 2666 case IPV6_UNSPEC_SRC: 2667 if (!checkonly) { 2668 icmp->icmp_unspec_source = onoff; 2669 PASS_OPT_TO_IP(connp); 2670 } 2671 break; 2672 case IPV6_RECVTCLASS: 2673 if (!checkonly) { 2674 icmp->icmp_ipv6_recvtclass = onoff; 2675 PASS_OPT_TO_IP(connp); 2676 } 2677 break; 2678 /* 2679 * Set boolean switches for ancillary data delivery 2680 */ 2681 case IPV6_RECVPKTINFO: 2682 if (!checkonly) { 2683 icmp->icmp_ip_recvpktinfo = onoff; 2684 PASS_OPT_TO_IP(connp); 2685 } 2686 break; 2687 case IPV6_RECVPATHMTU: 2688 if (!checkonly) { 2689 icmp->icmp_ipv6_recvpathmtu = onoff; 2690 PASS_OPT_TO_IP(connp); 2691 } 2692 break; 2693 case IPV6_RECVHOPLIMIT: 2694 if (!checkonly) { 2695 icmp->icmp_ipv6_recvhoplimit = onoff; 2696 PASS_OPT_TO_IP(connp); 2697 } 2698 break; 2699 case IPV6_RECVHOPOPTS: 2700 if (!checkonly) { 2701 icmp->icmp_ipv6_recvhopopts = onoff; 2702 PASS_OPT_TO_IP(connp); 2703 } 2704 break; 2705 case IPV6_RECVDSTOPTS: 2706 if (!checkonly) { 2707 icmp->icmp_ipv6_recvdstopts = onoff; 2708 PASS_OPT_TO_IP(connp); 2709 } 2710 break; 2711 case _OLD_IPV6_RECVDSTOPTS: 2712 if (!checkonly) 2713 icmp->icmp_old_ipv6_recvdstopts = onoff; 2714 break; 2715 case IPV6_RECVRTHDRDSTOPTS: 2716 if (!checkonly) { 2717 icmp->icmp_ipv6_recvrtdstopts = onoff; 2718 PASS_OPT_TO_IP(connp); 2719 } 2720 break; 2721 case IPV6_RECVRTHDR: 2722 if (!checkonly) { 2723 icmp->icmp_ipv6_recvrthdr = onoff; 2724 PASS_OPT_TO_IP(connp); 2725 } 2726 break; 2727 /* 2728 * Set sticky options or ancillary data. 2729 * If sticky options, (re)build any extension headers 2730 * that might be needed as a result. 2731 */ 2732 case IPV6_PKTINFO: 2733 /* 2734 * The source address and ifindex are verified 2735 * in ip_opt_set(). For ancillary data the 2736 * source address is checked in ip_wput_v6. 2737 */ 2738 if (inlen != 0 && inlen != 2739 sizeof (struct in6_pktinfo)) { 2740 return (EINVAL); 2741 } 2742 if (checkonly) 2743 break; 2744 2745 if (inlen == 0) { 2746 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2747 ipp->ipp_sticky_ignored |= 2748 (IPPF_IFINDEX|IPPF_ADDR); 2749 } else { 2750 struct in6_pktinfo *pkti; 2751 2752 pkti = (struct in6_pktinfo *)invalp; 2753 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2754 ipp->ipp_addr = pkti->ipi6_addr; 2755 if (ipp->ipp_ifindex != 0) 2756 ipp->ipp_fields |= IPPF_IFINDEX; 2757 else 2758 ipp->ipp_fields &= ~IPPF_IFINDEX; 2759 if (!IN6_IS_ADDR_UNSPECIFIED( 2760 &ipp->ipp_addr)) 2761 ipp->ipp_fields |= IPPF_ADDR; 2762 else 2763 ipp->ipp_fields &= ~IPPF_ADDR; 2764 } 2765 if (sticky) { 2766 error = icmp_build_hdrs(icmp); 2767 if (error != 0) 2768 return (error); 2769 PASS_OPT_TO_IP(connp); 2770 } 2771 break; 2772 case IPV6_HOPLIMIT: 2773 /* This option can only be used as ancillary data. */ 2774 if (sticky) 2775 return (EINVAL); 2776 if (inlen != 0 && inlen != sizeof (int)) 2777 return (EINVAL); 2778 if (checkonly) 2779 break; 2780 2781 if (inlen == 0) { 2782 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2783 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2784 } else { 2785 if (*i1 > 255 || *i1 < -1) 2786 return (EINVAL); 2787 if (*i1 == -1) 2788 ipp->ipp_hoplimit = 2789 is->is_ipv6_hoplimit; 2790 else 2791 ipp->ipp_hoplimit = *i1; 2792 ipp->ipp_fields |= IPPF_HOPLIMIT; 2793 } 2794 break; 2795 case IPV6_TCLASS: 2796 /* 2797 * IPV6_RECVTCLASS accepts -1 as use kernel default 2798 * and [0, 255] as the actualy traffic class. 2799 */ 2800 if (inlen != 0 && inlen != sizeof (int)) { 2801 return (EINVAL); 2802 } 2803 if (checkonly) 2804 break; 2805 2806 if (inlen == 0) { 2807 ipp->ipp_fields &= ~IPPF_TCLASS; 2808 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2809 } else { 2810 if (*i1 >= 256 || *i1 < -1) 2811 return (EINVAL); 2812 if (*i1 == -1) { 2813 ipp->ipp_tclass = 2814 IPV6_FLOW_TCLASS( 2815 IPV6_DEFAULT_VERS_AND_FLOW); 2816 } else { 2817 ipp->ipp_tclass = *i1; 2818 } 2819 ipp->ipp_fields |= IPPF_TCLASS; 2820 } 2821 if (sticky) { 2822 error = icmp_build_hdrs(icmp); 2823 if (error != 0) 2824 return (error); 2825 } 2826 break; 2827 case IPV6_NEXTHOP: 2828 /* 2829 * IP will verify that the nexthop is reachable 2830 * and fail for sticky options. 2831 */ 2832 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2833 return (EINVAL); 2834 } 2835 if (checkonly) 2836 break; 2837 2838 if (inlen == 0) { 2839 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2840 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2841 } else { 2842 sin6_t *sin6 = (sin6_t *)invalp; 2843 2844 if (sin6->sin6_family != AF_INET6) { 2845 return (EAFNOSUPPORT); 2846 } 2847 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2848 return (EADDRNOTAVAIL); 2849 } 2850 ipp->ipp_nexthop = sin6->sin6_addr; 2851 if (!IN6_IS_ADDR_UNSPECIFIED( 2852 &ipp->ipp_nexthop)) 2853 ipp->ipp_fields |= IPPF_NEXTHOP; 2854 else 2855 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2856 } 2857 if (sticky) { 2858 error = icmp_build_hdrs(icmp); 2859 if (error != 0) 2860 return (error); 2861 PASS_OPT_TO_IP(connp); 2862 } 2863 break; 2864 case IPV6_HOPOPTS: { 2865 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2866 /* 2867 * Sanity checks - minimum size, size a multiple of 2868 * eight bytes, and matching size passed in. 2869 */ 2870 if (inlen != 0 && 2871 inlen != (8 * (hopts->ip6h_len + 1))) { 2872 return (EINVAL); 2873 } 2874 2875 if (checkonly) 2876 break; 2877 error = optcom_pkt_set(invalp, inlen, sticky, 2878 (uchar_t **)&ipp->ipp_hopopts, 2879 &ipp->ipp_hopoptslen, 2880 sticky ? icmp->icmp_label_len_v6 : 0); 2881 if (error != 0) 2882 return (error); 2883 if (ipp->ipp_hopoptslen == 0) { 2884 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2885 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2886 } else { 2887 ipp->ipp_fields |= IPPF_HOPOPTS; 2888 } 2889 if (sticky) { 2890 error = icmp_build_hdrs(icmp); 2891 if (error != 0) 2892 return (error); 2893 } 2894 break; 2895 } 2896 case IPV6_RTHDRDSTOPTS: { 2897 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2898 2899 /* 2900 * Sanity checks - minimum size, size a multiple of 2901 * eight bytes, and matching size passed in. 2902 */ 2903 if (inlen != 0 && 2904 inlen != (8 * (dopts->ip6d_len + 1))) 2905 return (EINVAL); 2906 2907 if (checkonly) 2908 break; 2909 2910 if (inlen == 0) { 2911 if (sticky && 2912 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2913 kmem_free(ipp->ipp_rtdstopts, 2914 ipp->ipp_rtdstoptslen); 2915 ipp->ipp_rtdstopts = NULL; 2916 ipp->ipp_rtdstoptslen = 0; 2917 } 2918 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2919 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2920 } else { 2921 error = optcom_pkt_set(invalp, inlen, sticky, 2922 (uchar_t **)&ipp->ipp_rtdstopts, 2923 &ipp->ipp_rtdstoptslen, 0); 2924 if (error != 0) 2925 return (error); 2926 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2927 } 2928 if (sticky) { 2929 error = icmp_build_hdrs(icmp); 2930 if (error != 0) 2931 return (error); 2932 } 2933 break; 2934 } 2935 case IPV6_DSTOPTS: { 2936 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2937 2938 /* 2939 * Sanity checks - minimum size, size a multiple of 2940 * eight bytes, and matching size passed in. 2941 */ 2942 if (inlen != 0 && 2943 inlen != (8 * (dopts->ip6d_len + 1))) 2944 return (EINVAL); 2945 2946 if (checkonly) 2947 break; 2948 2949 if (inlen == 0) { 2950 if (sticky && 2951 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2952 kmem_free(ipp->ipp_dstopts, 2953 ipp->ipp_dstoptslen); 2954 ipp->ipp_dstopts = NULL; 2955 ipp->ipp_dstoptslen = 0; 2956 } 2957 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2958 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2959 } else { 2960 error = optcom_pkt_set(invalp, inlen, sticky, 2961 (uchar_t **)&ipp->ipp_dstopts, 2962 &ipp->ipp_dstoptslen, 0); 2963 if (error != 0) 2964 return (error); 2965 ipp->ipp_fields |= IPPF_DSTOPTS; 2966 } 2967 if (sticky) { 2968 error = icmp_build_hdrs(icmp); 2969 if (error != 0) 2970 return (error); 2971 } 2972 break; 2973 } 2974 case IPV6_RTHDR: { 2975 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2976 2977 /* 2978 * Sanity checks - minimum size, size a multiple of 2979 * eight bytes, and matching size passed in. 2980 */ 2981 if (inlen != 0 && 2982 inlen != (8 * (rt->ip6r_len + 1))) 2983 return (EINVAL); 2984 2985 if (checkonly) 2986 break; 2987 2988 if (inlen == 0) { 2989 if (sticky && 2990 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2991 kmem_free(ipp->ipp_rthdr, 2992 ipp->ipp_rthdrlen); 2993 ipp->ipp_rthdr = NULL; 2994 ipp->ipp_rthdrlen = 0; 2995 } 2996 ipp->ipp_fields &= ~IPPF_RTHDR; 2997 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2998 } else { 2999 error = optcom_pkt_set(invalp, inlen, sticky, 3000 (uchar_t **)&ipp->ipp_rthdr, 3001 &ipp->ipp_rthdrlen, 0); 3002 if (error != 0) 3003 return (error); 3004 ipp->ipp_fields |= IPPF_RTHDR; 3005 } 3006 if (sticky) { 3007 error = icmp_build_hdrs(icmp); 3008 if (error != 0) 3009 return (error); 3010 } 3011 break; 3012 } 3013 3014 case IPV6_DONTFRAG: 3015 if (checkonly) 3016 break; 3017 3018 if (onoff) { 3019 ipp->ipp_fields |= IPPF_DONTFRAG; 3020 } else { 3021 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3022 } 3023 break; 3024 3025 case IPV6_USE_MIN_MTU: 3026 if (inlen != sizeof (int)) 3027 return (EINVAL); 3028 3029 if (*i1 < -1 || *i1 > 1) 3030 return (EINVAL); 3031 3032 if (checkonly) 3033 break; 3034 3035 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3036 ipp->ipp_use_min_mtu = *i1; 3037 break; 3038 3039 /* 3040 * This option can't be set. Its only returned via 3041 * getsockopt() or ancillary data. 3042 */ 3043 case IPV6_PATHMTU: 3044 return (EINVAL); 3045 3046 case IPV6_SEC_OPT: 3047 case IPV6_SRC_PREFERENCES: 3048 case IPV6_V6ONLY: 3049 /* Handled at IP level */ 3050 return (-EINVAL); 3051 default: 3052 *outlenp = 0; 3053 return (EINVAL); 3054 } 3055 break; 3056 } /* end IPPROTO_IPV6 */ 3057 3058 case IPPROTO_ICMPV6: 3059 /* 3060 * Only allow IPv6 option processing on IPv6 sockets. 3061 */ 3062 if (icmp->icmp_family != AF_INET6) { 3063 *outlenp = 0; 3064 return (ENOPROTOOPT); 3065 } 3066 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3067 *outlenp = 0; 3068 return (ENOPROTOOPT); 3069 } 3070 switch (name) { 3071 case ICMP6_FILTER: 3072 if (!checkonly) { 3073 if ((inlen != 0) && 3074 (inlen != sizeof (icmp6_filter_t))) 3075 return (EINVAL); 3076 3077 if (inlen == 0) { 3078 if (icmp->icmp_filter != NULL) { 3079 kmem_free(icmp->icmp_filter, 3080 sizeof (icmp6_filter_t)); 3081 icmp->icmp_filter = NULL; 3082 } 3083 } else { 3084 if (icmp->icmp_filter == NULL) { 3085 icmp->icmp_filter = kmem_alloc( 3086 sizeof (icmp6_filter_t), 3087 KM_NOSLEEP); 3088 if (icmp->icmp_filter == NULL) { 3089 *outlenp = 0; 3090 return (ENOBUFS); 3091 } 3092 } 3093 (void) bcopy(invalp, icmp->icmp_filter, 3094 inlen); 3095 } 3096 } 3097 break; 3098 3099 default: 3100 *outlenp = 0; 3101 return (EINVAL); 3102 } 3103 break; 3104 default: 3105 *outlenp = 0; 3106 return (EINVAL); 3107 } 3108 /* 3109 * Common case of OK return with outval same as inval. 3110 */ 3111 if (invalp != outvalp) { 3112 /* don't trust bcopy for identical src/dst */ 3113 (void) bcopy(invalp, outvalp, inlen); 3114 } 3115 *outlenp = inlen; 3116 return (0); 3117 } 3118 3119 /* This routine sets socket options. */ 3120 /* ARGSUSED */ 3121 int 3122 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3123 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3124 void *thisdg_attrs, cred_t *cr) 3125 { 3126 boolean_t checkonly; 3127 int error; 3128 3129 error = 0; 3130 switch (optset_context) { 3131 case SETFN_OPTCOM_CHECKONLY: 3132 checkonly = B_TRUE; 3133 /* 3134 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3135 * inlen != 0 implies value supplied and 3136 * we have to "pretend" to set it. 3137 * inlen == 0 implies that there is no 3138 * value part in T_CHECK request and just validation 3139 * done elsewhere should be enough, we just return here. 3140 */ 3141 if (inlen == 0) { 3142 *outlenp = 0; 3143 error = 0; 3144 goto done; 3145 } 3146 break; 3147 case SETFN_OPTCOM_NEGOTIATE: 3148 checkonly = B_FALSE; 3149 break; 3150 case SETFN_UD_NEGOTIATE: 3151 case SETFN_CONN_NEGOTIATE: 3152 checkonly = B_FALSE; 3153 /* 3154 * Negotiating local and "association-related" options 3155 * through T_UNITDATA_REQ. 3156 * 3157 * Following routine can filter out ones we do not 3158 * want to be "set" this way. 3159 */ 3160 if (!icmp_opt_allow_udr_set(level, name)) { 3161 *outlenp = 0; 3162 error = EINVAL; 3163 goto done; 3164 } 3165 break; 3166 default: 3167 /* 3168 * We should never get here 3169 */ 3170 *outlenp = 0; 3171 error = EINVAL; 3172 goto done; 3173 } 3174 3175 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3176 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3177 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3178 outvalp, cr, thisdg_attrs, checkonly); 3179 3180 done: 3181 return (error); 3182 } 3183 3184 /* This routine sets socket options. */ 3185 /* ARGSUSED */ 3186 int 3187 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3188 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3189 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3190 { 3191 conn_t *connp = Q_TO_CONN(q); 3192 icmp_t *icmp; 3193 int error; 3194 3195 icmp = connp->conn_icmp; 3196 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3197 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3198 outlenp, outvalp, thisdg_attrs, cr); 3199 rw_exit(&icmp->icmp_rwlock); 3200 return (error); 3201 } 3202 3203 /* 3204 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3205 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3206 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3207 * headers. 3208 * Returns failure if can't allocate memory. 3209 */ 3210 static int 3211 icmp_build_hdrs(icmp_t *icmp) 3212 { 3213 icmp_stack_t *is = icmp->icmp_is; 3214 uchar_t *hdrs; 3215 uint_t hdrs_len; 3216 ip6_t *ip6h; 3217 ip6i_t *ip6i; 3218 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3219 3220 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3221 hdrs_len = ip_total_hdrs_len_v6(ipp); 3222 ASSERT(hdrs_len != 0); 3223 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3224 /* Need to reallocate */ 3225 if (hdrs_len != 0) { 3226 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3227 if (hdrs == NULL) 3228 return (ENOMEM); 3229 } else { 3230 hdrs = NULL; 3231 } 3232 if (icmp->icmp_sticky_hdrs_len != 0) { 3233 kmem_free(icmp->icmp_sticky_hdrs, 3234 icmp->icmp_sticky_hdrs_len); 3235 } 3236 icmp->icmp_sticky_hdrs = hdrs; 3237 icmp->icmp_sticky_hdrs_len = hdrs_len; 3238 } 3239 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3240 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3241 3242 /* Set header fields not in ipp */ 3243 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3244 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3245 ip6h = (ip6_t *)&ip6i[1]; 3246 3247 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3248 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3249 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3250 } 3251 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3252 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3253 } 3254 } else { 3255 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3256 } 3257 3258 if (!(ipp->ipp_fields & IPPF_ADDR)) 3259 ip6h->ip6_src = icmp->icmp_v6src; 3260 3261 /* Try to get everything in a single mblk */ 3262 if (hdrs_len > icmp->icmp_max_hdr_len) { 3263 icmp->icmp_max_hdr_len = hdrs_len; 3264 rw_exit(&icmp->icmp_rwlock); 3265 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3266 icmp->icmp_connp, 3267 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3268 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3269 } 3270 return (0); 3271 } 3272 3273 /* 3274 * This routine retrieves the value of an ND variable in a icmpparam_t 3275 * structure. It is called through nd_getset when a user reads the 3276 * variable. 3277 */ 3278 /* ARGSUSED */ 3279 static int 3280 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3281 { 3282 icmpparam_t *icmppa = (icmpparam_t *)cp; 3283 3284 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3285 return (0); 3286 } 3287 3288 /* 3289 * Walk through the param array specified registering each element with the 3290 * named dispatch (ND) handler. 3291 */ 3292 static boolean_t 3293 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3294 { 3295 for (; cnt-- > 0; icmppa++) { 3296 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3297 if (!nd_load(ndp, icmppa->icmp_param_name, 3298 icmp_param_get, icmp_param_set, 3299 (caddr_t)icmppa)) { 3300 nd_free(ndp); 3301 return (B_FALSE); 3302 } 3303 } 3304 } 3305 return (B_TRUE); 3306 } 3307 3308 /* This routine sets an ND variable in a icmpparam_t structure. */ 3309 /* ARGSUSED */ 3310 static int 3311 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3312 { 3313 long new_value; 3314 icmpparam_t *icmppa = (icmpparam_t *)cp; 3315 3316 /* 3317 * Fail the request if the new value does not lie within the 3318 * required bounds. 3319 */ 3320 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3321 new_value < icmppa->icmp_param_min || 3322 new_value > icmppa->icmp_param_max) { 3323 return (EINVAL); 3324 } 3325 /* Set the new value */ 3326 icmppa->icmp_param_value = new_value; 3327 return (0); 3328 } 3329 3330 static mblk_t * 3331 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3332 { 3333 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3334 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3335 /* 3336 * fallback has started but messages have not been moved yet 3337 */ 3338 if (icmp->icmp_fallback_queue_head == NULL) { 3339 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3340 icmp->icmp_fallback_queue_head = mp; 3341 icmp->icmp_fallback_queue_tail = mp; 3342 } else { 3343 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3344 icmp->icmp_fallback_queue_tail->b_next = mp; 3345 icmp->icmp_fallback_queue_tail = mp; 3346 } 3347 return (NULL); 3348 } else { 3349 /* 3350 * Fallback completed, let the caller putnext() the mblk. 3351 */ 3352 return (mp); 3353 } 3354 } 3355 3356 /* 3357 * Deliver data to ULP. In case we have a socket, and it's falling back to 3358 * TPI, then we'll queue the mp for later processing. 3359 */ 3360 static void 3361 icmp_ulp_recv(conn_t *connp, mblk_t *mp) 3362 { 3363 3364 if (IPCL_IS_NONSTR(connp)) { 3365 icmp_t *icmp = connp->conn_icmp; 3366 int error; 3367 3368 if ((*connp->conn_upcalls->su_recv) 3369 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3370 NULL) < 0) { 3371 mutex_enter(&icmp->icmp_recv_lock); 3372 if (error == ENOSPC) { 3373 /* 3374 * let's confirm while holding the lock 3375 */ 3376 if ((*connp->conn_upcalls->su_recv) 3377 (connp->conn_upper_handle, NULL, 0, 0, 3378 &error, NULL) < 0) { 3379 ASSERT(error == ENOSPC); 3380 if (error == ENOSPC) { 3381 connp->conn_flow_cntrld = 3382 B_TRUE; 3383 } 3384 } 3385 mutex_exit(&icmp->icmp_recv_lock); 3386 } else { 3387 ASSERT(error == EOPNOTSUPP); 3388 mp = icmp_queue_fallback(icmp, mp); 3389 mutex_exit(&icmp->icmp_recv_lock); 3390 if (mp != NULL) 3391 putnext(connp->conn_rq, mp); 3392 } 3393 } 3394 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 3395 } else { 3396 putnext(connp->conn_rq, mp); 3397 } 3398 } 3399 3400 /*ARGSUSED2*/ 3401 static void 3402 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3403 { 3404 conn_t *connp = (conn_t *)arg1; 3405 struct T_unitdata_ind *tudi; 3406 uchar_t *rptr; 3407 icmp_t *icmp; 3408 icmp_stack_t *is; 3409 sin_t *sin; 3410 sin6_t *sin6; 3411 ip6_t *ip6h; 3412 ip6i_t *ip6i; 3413 mblk_t *mp1; 3414 int hdr_len; 3415 ipha_t *ipha; 3416 int udi_size; /* Size of T_unitdata_ind */ 3417 uint_t ipvers; 3418 ip6_pkt_t ipp; 3419 uint8_t nexthdr; 3420 ip_pktinfo_t *pinfo = NULL; 3421 mblk_t *options_mp = NULL; 3422 uint_t icmp_opt = 0; 3423 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3424 uint_t hopstrip; 3425 3426 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3427 3428 icmp = connp->conn_icmp; 3429 is = icmp->icmp_is; 3430 rptr = mp->b_rptr; 3431 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3432 ASSERT(OK_32PTR(rptr)); 3433 3434 /* 3435 * IP should have prepended the options data in an M_CTL 3436 * Check M_CTL "type" to make sure are not here bcos of 3437 * a valid ICMP message 3438 */ 3439 if (DB_TYPE(mp) == M_CTL) { 3440 /* 3441 * FIXME: does IP still do this? 3442 * IP sends up the IPSEC_IN message for handling IPSEC 3443 * policy at the TCP level. We don't need it here. 3444 */ 3445 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3446 mp1 = mp->b_cont; 3447 freeb(mp); 3448 mp = mp1; 3449 rptr = mp->b_rptr; 3450 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3451 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3452 IN_PKTINFO) { 3453 /* 3454 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3455 * has been prepended to the packet by IP. We need to 3456 * extract the mblk and adjust the rptr 3457 */ 3458 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3459 options_mp = mp; 3460 mp = mp->b_cont; 3461 rptr = mp->b_rptr; 3462 } else { 3463 /* 3464 * ICMP messages. 3465 */ 3466 icmp_icmp_error(connp, mp); 3467 return; 3468 } 3469 } 3470 3471 /* 3472 * Discard message if it is misaligned or smaller than the IP header. 3473 */ 3474 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3475 freemsg(mp); 3476 if (options_mp != NULL) 3477 freeb(options_mp); 3478 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3479 return; 3480 } 3481 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3482 3483 /* Handle M_DATA messages containing IP packets messages */ 3484 if (ipvers == IPV4_VERSION) { 3485 /* 3486 * Special case where IP attaches 3487 * the IRE needs to be handled so that we don't send up 3488 * IRE to the user land. 3489 */ 3490 ipha = (ipha_t *)rptr; 3491 hdr_len = IPH_HDR_LENGTH(ipha); 3492 3493 if (ipha->ipha_protocol == IPPROTO_TCP) { 3494 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3495 3496 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3497 TH_SYN) && mp->b_cont != NULL) { 3498 mp1 = mp->b_cont; 3499 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3500 freeb(mp1); 3501 mp->b_cont = NULL; 3502 } 3503 } 3504 } 3505 if (is->is_bsd_compat) { 3506 ushort_t len; 3507 len = ntohs(ipha->ipha_length); 3508 3509 if (mp->b_datap->db_ref > 1) { 3510 /* 3511 * Allocate a new IP header so that we can 3512 * modify ipha_length. 3513 */ 3514 mblk_t *mp1; 3515 3516 mp1 = allocb(hdr_len, BPRI_MED); 3517 if (!mp1) { 3518 freemsg(mp); 3519 if (options_mp != NULL) 3520 freeb(options_mp); 3521 BUMP_MIB(&is->is_rawip_mib, 3522 rawipInErrors); 3523 return; 3524 } 3525 bcopy(rptr, mp1->b_rptr, hdr_len); 3526 mp->b_rptr = rptr + hdr_len; 3527 rptr = mp1->b_rptr; 3528 ipha = (ipha_t *)rptr; 3529 mp1->b_cont = mp; 3530 mp1->b_wptr = rptr + hdr_len; 3531 mp = mp1; 3532 } 3533 len -= hdr_len; 3534 ipha->ipha_length = htons(len); 3535 } 3536 } 3537 3538 /* 3539 * This is the inbound data path. Packets are passed upstream as 3540 * T_UNITDATA_IND messages with full IP headers still attached. 3541 */ 3542 if (icmp->icmp_family == AF_INET) { 3543 ASSERT(ipvers == IPV4_VERSION); 3544 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3545 if (icmp->icmp_recvif && (pinfo != NULL) && 3546 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3547 udi_size += sizeof (struct T_opthdr) + 3548 sizeof (uint_t); 3549 } 3550 3551 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3552 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3553 udi_size += sizeof (struct T_opthdr) + 3554 sizeof (struct in_pktinfo); 3555 } 3556 3557 /* 3558 * If SO_TIMESTAMP is set allocate the appropriate sized 3559 * buffer. Since gethrestime() expects a pointer aligned 3560 * argument, we allocate space necessary for extra 3561 * alignment (even though it might not be used). 3562 */ 3563 if (icmp->icmp_timestamp) { 3564 udi_size += sizeof (struct T_opthdr) + 3565 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3566 } 3567 mp1 = allocb(udi_size, BPRI_MED); 3568 if (mp1 == NULL) { 3569 freemsg(mp); 3570 if (options_mp != NULL) 3571 freeb(options_mp); 3572 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3573 return; 3574 } 3575 mp1->b_cont = mp; 3576 mp = mp1; 3577 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3578 mp->b_datap->db_type = M_PROTO; 3579 mp->b_wptr = (uchar_t *)tudi + udi_size; 3580 tudi->PRIM_type = T_UNITDATA_IND; 3581 tudi->SRC_length = sizeof (sin_t); 3582 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3583 sin = (sin_t *)&tudi[1]; 3584 *sin = sin_null; 3585 sin->sin_family = AF_INET; 3586 sin->sin_addr.s_addr = ipha->ipha_src; 3587 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3588 sizeof (sin_t); 3589 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3590 tudi->OPT_length = udi_size; 3591 3592 /* 3593 * Add options if IP_RECVIF is set 3594 */ 3595 if (udi_size != 0) { 3596 char *dstopt; 3597 3598 dstopt = (char *)&sin[1]; 3599 if (icmp->icmp_recvif && (pinfo != NULL) && 3600 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3601 3602 struct T_opthdr *toh; 3603 uint_t *dstptr; 3604 3605 toh = (struct T_opthdr *)dstopt; 3606 toh->level = IPPROTO_IP; 3607 toh->name = IP_RECVIF; 3608 toh->len = sizeof (struct T_opthdr) + 3609 sizeof (uint_t); 3610 toh->status = 0; 3611 dstopt += sizeof (struct T_opthdr); 3612 dstptr = (uint_t *)dstopt; 3613 *dstptr = pinfo->ip_pkt_ifindex; 3614 dstopt += sizeof (uint_t); 3615 udi_size -= toh->len; 3616 } 3617 if (icmp->icmp_timestamp) { 3618 struct T_opthdr *toh; 3619 3620 toh = (struct T_opthdr *)dstopt; 3621 toh->level = SOL_SOCKET; 3622 toh->name = SCM_TIMESTAMP; 3623 toh->len = sizeof (struct T_opthdr) + 3624 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3625 toh->status = 0; 3626 dstopt += sizeof (struct T_opthdr); 3627 /* Align for gethrestime() */ 3628 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3629 sizeof (intptr_t)); 3630 gethrestime((timestruc_t *)dstopt); 3631 dstopt = (char *)toh + toh->len; 3632 udi_size -= toh->len; 3633 } 3634 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3635 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3636 struct T_opthdr *toh; 3637 struct in_pktinfo *pktinfop; 3638 3639 toh = (struct T_opthdr *)dstopt; 3640 toh->level = IPPROTO_IP; 3641 toh->name = IP_PKTINFO; 3642 toh->len = sizeof (struct T_opthdr) + 3643 sizeof (in_pktinfo_t); 3644 toh->status = 0; 3645 dstopt += sizeof (struct T_opthdr); 3646 pktinfop = (struct in_pktinfo *)dstopt; 3647 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3648 pktinfop->ipi_spec_dst = 3649 pinfo->ip_pkt_match_addr; 3650 3651 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3652 3653 dstopt += sizeof (struct in_pktinfo); 3654 udi_size -= toh->len; 3655 } 3656 3657 /* Consumed all of allocated space */ 3658 ASSERT(udi_size == 0); 3659 } 3660 3661 if (options_mp != NULL) 3662 freeb(options_mp); 3663 3664 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3665 goto deliver; 3666 } 3667 3668 /* 3669 * We don't need options_mp in the IPv6 path. 3670 */ 3671 if (options_mp != NULL) { 3672 freeb(options_mp); 3673 options_mp = NULL; 3674 } 3675 3676 /* 3677 * Discard message if it is smaller than the IPv6 header 3678 * or if the header is malformed. 3679 */ 3680 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3681 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3682 icmp->icmp_family != AF_INET6) { 3683 freemsg(mp); 3684 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3685 return; 3686 } 3687 3688 /* Initialize */ 3689 ipp.ipp_fields = 0; 3690 hopstrip = 0; 3691 3692 ip6h = (ip6_t *)rptr; 3693 /* 3694 * Call on ip_find_hdr_v6 which gets the total hdr len 3695 * as well as individual lenghts of ext hdrs (and ptrs to 3696 * them). 3697 */ 3698 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3699 /* Look for ifindex information */ 3700 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3701 ip6i = (ip6i_t *)ip6h; 3702 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3703 ASSERT(ip6i->ip6i_ifindex != 0); 3704 ipp.ipp_fields |= IPPF_IFINDEX; 3705 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3706 } 3707 rptr = (uchar_t *)&ip6i[1]; 3708 mp->b_rptr = rptr; 3709 if (rptr == mp->b_wptr) { 3710 mp1 = mp->b_cont; 3711 freeb(mp); 3712 mp = mp1; 3713 rptr = mp->b_rptr; 3714 } 3715 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3716 ip6h = (ip6_t *)rptr; 3717 } 3718 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3719 3720 /* 3721 * We need to lie a bit to the user because users inside 3722 * labeled compartments should not see their own labels. We 3723 * assume that in all other respects IP has checked the label, 3724 * and that the label is always first among the options. (If 3725 * it's not first, then this code won't see it, and the option 3726 * will be passed along to the user.) 3727 * 3728 * If we had multilevel ICMP sockets, then the following code 3729 * should be skipped for them to allow the user to see the 3730 * label. 3731 * 3732 * Alignment restrictions in the definition of IP options 3733 * (namely, the requirement that the 4-octet DOI goes on a 3734 * 4-octet boundary) mean that we know exactly where the option 3735 * should start, but we're lenient for other hosts. 3736 * 3737 * Note that there are no multilevel ICMP or raw IP sockets 3738 * yet, thus nobody ever sees the IP6OPT_LS option. 3739 */ 3740 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3741 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3742 const uchar_t *ucp = 3743 (const uchar_t *)ipp.ipp_hopopts + 2; 3744 int remlen = ipp.ipp_hopoptslen - 2; 3745 3746 while (remlen > 0) { 3747 if (*ucp == IP6OPT_PAD1) { 3748 remlen--; 3749 ucp++; 3750 } else if (*ucp == IP6OPT_PADN) { 3751 remlen -= ucp[1] + 2; 3752 ucp += ucp[1] + 2; 3753 } else if (*ucp == ip6opt_ls) { 3754 hopstrip = (ucp - 3755 (const uchar_t *)ipp.ipp_hopopts) + 3756 ucp[1] + 2; 3757 hopstrip = (hopstrip + 7) & ~7; 3758 break; 3759 } else { 3760 /* label option must be first */ 3761 break; 3762 } 3763 } 3764 } 3765 } else { 3766 hdr_len = IPV6_HDR_LEN; 3767 ip6i = NULL; 3768 nexthdr = ip6h->ip6_nxt; 3769 } 3770 /* 3771 * One special case where IP attaches the IRE needs to 3772 * be handled so that we don't send up IRE to the user land. 3773 */ 3774 if (nexthdr == IPPROTO_TCP) { 3775 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3776 3777 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3778 mp->b_cont != NULL) { 3779 mp1 = mp->b_cont; 3780 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3781 freeb(mp1); 3782 mp->b_cont = NULL; 3783 } 3784 } 3785 } 3786 /* 3787 * Check a filter for ICMPv6 types if needed. 3788 * Verify raw checksums if needed. 3789 */ 3790 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3791 if (icmp->icmp_filter != NULL) { 3792 int type; 3793 3794 /* Assumes that IP has done the pullupmsg */ 3795 type = mp->b_rptr[hdr_len]; 3796 3797 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3798 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3799 freemsg(mp); 3800 return; 3801 } 3802 } else { 3803 /* Checksum */ 3804 uint16_t *up; 3805 uint32_t sum; 3806 int remlen; 3807 3808 up = (uint16_t *)&ip6h->ip6_src; 3809 3810 remlen = msgdsize(mp) - hdr_len; 3811 sum = htons(icmp->icmp_proto + remlen) 3812 + up[0] + up[1] + up[2] + up[3] 3813 + up[4] + up[5] + up[6] + up[7] 3814 + up[8] + up[9] + up[10] + up[11] 3815 + up[12] + up[13] + up[14] + up[15]; 3816 sum = (sum & 0xffff) + (sum >> 16); 3817 sum = IP_CSUM(mp, hdr_len, sum); 3818 if (sum != 0) { 3819 /* IPv6 RAW checksum failed */ 3820 ip0dbg(("icmp_rput: RAW checksum " 3821 "failed %x\n", sum)); 3822 freemsg(mp); 3823 BUMP_MIB(&is->is_rawip_mib, 3824 rawipInCksumErrs); 3825 return; 3826 } 3827 } 3828 } 3829 /* Skip all the IPv6 headers per API */ 3830 mp->b_rptr += hdr_len; 3831 3832 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3833 3834 /* 3835 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3836 * maintain state information, instead of relying on icmp_t 3837 * structure, since there arent any locks protecting these members 3838 * and there is a window where there might be a race between a 3839 * thread setting options on the write side and a thread reading 3840 * these options on the read size. 3841 */ 3842 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3843 IPPF_RTHDR|IPPF_IFINDEX)) { 3844 if (icmp->icmp_ipv6_recvhopopts && 3845 (ipp.ipp_fields & IPPF_HOPOPTS) && 3846 ipp.ipp_hopoptslen > hopstrip) { 3847 udi_size += sizeof (struct T_opthdr) + 3848 ipp.ipp_hopoptslen - hopstrip; 3849 icmp_opt |= IPPF_HOPOPTS; 3850 } 3851 if ((icmp->icmp_ipv6_recvdstopts || 3852 icmp->icmp_old_ipv6_recvdstopts) && 3853 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3854 udi_size += sizeof (struct T_opthdr) + 3855 ipp.ipp_dstoptslen; 3856 icmp_opt |= IPPF_DSTOPTS; 3857 } 3858 if (((icmp->icmp_ipv6_recvdstopts && 3859 icmp->icmp_ipv6_recvrthdr && 3860 (ipp.ipp_fields & IPPF_RTHDR)) || 3861 icmp->icmp_ipv6_recvrtdstopts) && 3862 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3863 udi_size += sizeof (struct T_opthdr) + 3864 ipp.ipp_rtdstoptslen; 3865 icmp_opt |= IPPF_RTDSTOPTS; 3866 } 3867 if (icmp->icmp_ipv6_recvrthdr && 3868 (ipp.ipp_fields & IPPF_RTHDR)) { 3869 udi_size += sizeof (struct T_opthdr) + 3870 ipp.ipp_rthdrlen; 3871 icmp_opt |= IPPF_RTHDR; 3872 } 3873 if (icmp->icmp_ip_recvpktinfo && 3874 (ipp.ipp_fields & IPPF_IFINDEX)) { 3875 udi_size += sizeof (struct T_opthdr) + 3876 sizeof (struct in6_pktinfo); 3877 icmp_opt |= IPPF_IFINDEX; 3878 } 3879 } 3880 if (icmp->icmp_ipv6_recvhoplimit) { 3881 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3882 icmp_ipv6_recvhoplimit = B_TRUE; 3883 } 3884 3885 if (icmp->icmp_ipv6_recvtclass) 3886 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3887 3888 /* 3889 * If SO_TIMESTAMP is set allocate the appropriate sized 3890 * buffer. Since gethrestime() expects a pointer aligned 3891 * argument, we allocate space necessary for extra 3892 * alignment (even though it might not be used). 3893 */ 3894 if (icmp->icmp_timestamp) { 3895 udi_size += sizeof (struct T_opthdr) + 3896 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3897 } 3898 3899 mp1 = allocb(udi_size, BPRI_MED); 3900 if (mp1 == NULL) { 3901 freemsg(mp); 3902 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3903 return; 3904 } 3905 mp1->b_cont = mp; 3906 mp = mp1; 3907 mp->b_datap->db_type = M_PROTO; 3908 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3909 mp->b_wptr = (uchar_t *)tudi + udi_size; 3910 tudi->PRIM_type = T_UNITDATA_IND; 3911 tudi->SRC_length = sizeof (sin6_t); 3912 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3913 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3914 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3915 tudi->OPT_length = udi_size; 3916 sin6 = (sin6_t *)&tudi[1]; 3917 sin6->sin6_port = 0; 3918 sin6->sin6_family = AF_INET6; 3919 3920 sin6->sin6_addr = ip6h->ip6_src; 3921 /* No sin6_flowinfo per API */ 3922 sin6->sin6_flowinfo = 0; 3923 /* For link-scope source pass up scope id */ 3924 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3925 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3926 sin6->sin6_scope_id = ipp.ipp_ifindex; 3927 else 3928 sin6->sin6_scope_id = 0; 3929 3930 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3931 icmp->icmp_zoneid, is->is_netstack); 3932 3933 if (udi_size != 0) { 3934 uchar_t *dstopt; 3935 3936 dstopt = (uchar_t *)&sin6[1]; 3937 if (icmp_opt & IPPF_IFINDEX) { 3938 struct T_opthdr *toh; 3939 struct in6_pktinfo *pkti; 3940 3941 toh = (struct T_opthdr *)dstopt; 3942 toh->level = IPPROTO_IPV6; 3943 toh->name = IPV6_PKTINFO; 3944 toh->len = sizeof (struct T_opthdr) + 3945 sizeof (*pkti); 3946 toh->status = 0; 3947 dstopt += sizeof (struct T_opthdr); 3948 pkti = (struct in6_pktinfo *)dstopt; 3949 pkti->ipi6_addr = ip6h->ip6_dst; 3950 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3951 dstopt += sizeof (*pkti); 3952 udi_size -= toh->len; 3953 } 3954 if (icmp_ipv6_recvhoplimit) { 3955 struct T_opthdr *toh; 3956 3957 toh = (struct T_opthdr *)dstopt; 3958 toh->level = IPPROTO_IPV6; 3959 toh->name = IPV6_HOPLIMIT; 3960 toh->len = sizeof (struct T_opthdr) + 3961 sizeof (uint_t); 3962 toh->status = 0; 3963 dstopt += sizeof (struct T_opthdr); 3964 *(uint_t *)dstopt = ip6h->ip6_hops; 3965 dstopt += sizeof (uint_t); 3966 udi_size -= toh->len; 3967 } 3968 if (icmp->icmp_ipv6_recvtclass) { 3969 struct T_opthdr *toh; 3970 3971 toh = (struct T_opthdr *)dstopt; 3972 toh->level = IPPROTO_IPV6; 3973 toh->name = IPV6_TCLASS; 3974 toh->len = sizeof (struct T_opthdr) + 3975 sizeof (uint_t); 3976 toh->status = 0; 3977 dstopt += sizeof (struct T_opthdr); 3978 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3979 dstopt += sizeof (uint_t); 3980 udi_size -= toh->len; 3981 } 3982 if (icmp->icmp_timestamp) { 3983 struct T_opthdr *toh; 3984 3985 toh = (struct T_opthdr *)dstopt; 3986 toh->level = SOL_SOCKET; 3987 toh->name = SCM_TIMESTAMP; 3988 toh->len = sizeof (struct T_opthdr) + 3989 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3990 toh->status = 0; 3991 dstopt += sizeof (struct T_opthdr); 3992 /* Align for gethrestime() */ 3993 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3994 sizeof (intptr_t)); 3995 gethrestime((timestruc_t *)dstopt); 3996 dstopt = (uchar_t *)toh + toh->len; 3997 udi_size -= toh->len; 3998 } 3999 4000 if (icmp_opt & IPPF_HOPOPTS) { 4001 struct T_opthdr *toh; 4002 4003 toh = (struct T_opthdr *)dstopt; 4004 toh->level = IPPROTO_IPV6; 4005 toh->name = IPV6_HOPOPTS; 4006 toh->len = sizeof (struct T_opthdr) + 4007 ipp.ipp_hopoptslen - hopstrip; 4008 toh->status = 0; 4009 dstopt += sizeof (struct T_opthdr); 4010 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 4011 ipp.ipp_hopoptslen - hopstrip); 4012 if (hopstrip > 0) { 4013 /* copy next header value and fake length */ 4014 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 4015 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 4016 hopstrip / 8; 4017 } 4018 dstopt += ipp.ipp_hopoptslen - hopstrip; 4019 udi_size -= toh->len; 4020 } 4021 if (icmp_opt & IPPF_RTDSTOPTS) { 4022 struct T_opthdr *toh; 4023 4024 toh = (struct T_opthdr *)dstopt; 4025 toh->level = IPPROTO_IPV6; 4026 toh->name = IPV6_DSTOPTS; 4027 toh->len = sizeof (struct T_opthdr) + 4028 ipp.ipp_rtdstoptslen; 4029 toh->status = 0; 4030 dstopt += sizeof (struct T_opthdr); 4031 bcopy(ipp.ipp_rtdstopts, dstopt, 4032 ipp.ipp_rtdstoptslen); 4033 dstopt += ipp.ipp_rtdstoptslen; 4034 udi_size -= toh->len; 4035 } 4036 if (icmp_opt & IPPF_RTHDR) { 4037 struct T_opthdr *toh; 4038 4039 toh = (struct T_opthdr *)dstopt; 4040 toh->level = IPPROTO_IPV6; 4041 toh->name = IPV6_RTHDR; 4042 toh->len = sizeof (struct T_opthdr) + 4043 ipp.ipp_rthdrlen; 4044 toh->status = 0; 4045 dstopt += sizeof (struct T_opthdr); 4046 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4047 dstopt += ipp.ipp_rthdrlen; 4048 udi_size -= toh->len; 4049 } 4050 if (icmp_opt & IPPF_DSTOPTS) { 4051 struct T_opthdr *toh; 4052 4053 toh = (struct T_opthdr *)dstopt; 4054 toh->level = IPPROTO_IPV6; 4055 toh->name = IPV6_DSTOPTS; 4056 toh->len = sizeof (struct T_opthdr) + 4057 ipp.ipp_dstoptslen; 4058 toh->status = 0; 4059 dstopt += sizeof (struct T_opthdr); 4060 bcopy(ipp.ipp_dstopts, dstopt, 4061 ipp.ipp_dstoptslen); 4062 dstopt += ipp.ipp_dstoptslen; 4063 udi_size -= toh->len; 4064 } 4065 /* Consumed all of allocated space */ 4066 ASSERT(udi_size == 0); 4067 } 4068 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4069 4070 deliver: 4071 icmp_ulp_recv(connp, mp); 4072 4073 } 4074 4075 /* 4076 * return SNMP stuff in buffer in mpdata 4077 */ 4078 mblk_t * 4079 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4080 { 4081 mblk_t *mpdata; 4082 struct opthdr *optp; 4083 conn_t *connp = Q_TO_CONN(q); 4084 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4085 mblk_t *mp2ctl; 4086 4087 /* 4088 * make a copy of the original message 4089 */ 4090 mp2ctl = copymsg(mpctl); 4091 4092 if (mpctl == NULL || 4093 (mpdata = mpctl->b_cont) == NULL) { 4094 freemsg(mpctl); 4095 freemsg(mp2ctl); 4096 return (0); 4097 } 4098 4099 /* fixed length structure for IPv4 and IPv6 counters */ 4100 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4101 optp->level = EXPER_RAWIP; 4102 optp->name = 0; 4103 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4104 sizeof (is->is_rawip_mib)); 4105 optp->len = msgdsize(mpdata); 4106 qreply(q, mpctl); 4107 4108 return (mp2ctl); 4109 } 4110 4111 /* 4112 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4113 * TODO: If this ever actually tries to set anything, it needs to be 4114 * to do the appropriate locking. 4115 */ 4116 /* ARGSUSED */ 4117 int 4118 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4119 uchar_t *ptr, int len) 4120 { 4121 switch (level) { 4122 case EXPER_RAWIP: 4123 return (0); 4124 default: 4125 return (1); 4126 } 4127 } 4128 4129 /* 4130 * This routine creates a T_UDERROR_IND message and passes it upstream. 4131 * The address and options are copied from the T_UNITDATA_REQ message 4132 * passed in mp. This message is freed. 4133 */ 4134 static void 4135 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4136 { 4137 mblk_t *mp1; 4138 uchar_t *rptr = mp->b_rptr; 4139 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4140 4141 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4142 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4143 tudr->OPT_length, err); 4144 if (mp1) 4145 qreply(q, mp1); 4146 freemsg(mp); 4147 } 4148 4149 4150 static int 4151 rawip_do_unbind(conn_t *connp) 4152 { 4153 icmp_t *icmp = connp->conn_icmp; 4154 4155 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4156 /* If a bind has not been done, we can't unbind. */ 4157 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4158 rw_exit(&icmp->icmp_rwlock); 4159 return (-TOUTSTATE); 4160 } 4161 icmp->icmp_pending_op = T_UNBIND_REQ; 4162 rw_exit(&icmp->icmp_rwlock); 4163 4164 /* 4165 * Call ip to unbind 4166 */ 4167 4168 ip_unbind(connp); 4169 4170 /* 4171 * Once we're unbound from IP, the pending operation may be cleared 4172 * here. 4173 */ 4174 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4175 V6_SET_ZERO(icmp->icmp_v6src); 4176 V6_SET_ZERO(icmp->icmp_bound_v6src); 4177 icmp->icmp_pending_op = -1; 4178 icmp->icmp_state = TS_UNBND; 4179 if (icmp->icmp_family == AF_INET6) 4180 (void) icmp_build_hdrs(icmp); 4181 rw_exit(&icmp->icmp_rwlock); 4182 return (0); 4183 } 4184 4185 /* 4186 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4187 * After some error checking, the message is passed downstream to ip. 4188 */ 4189 static void 4190 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4191 { 4192 conn_t *connp = Q_TO_CONN(q); 4193 int error; 4194 4195 ASSERT(mp->b_cont == NULL); 4196 error = rawip_do_unbind(connp); 4197 if (error) { 4198 if (error < 0) { 4199 icmp_err_ack(q, mp, -error, 0); 4200 } else { 4201 icmp_err_ack(q, mp, 0, error); 4202 } 4203 return; 4204 } 4205 4206 /* 4207 * Convert mp into a T_OK_ACK 4208 */ 4209 4210 mp = mi_tpi_ok_ack_alloc(mp); 4211 4212 /* 4213 * should not happen in practice... T_OK_ACK is smaller than the 4214 * original message. 4215 */ 4216 ASSERT(mp != NULL); 4217 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4218 qreply(q, mp); 4219 } 4220 4221 4222 /* 4223 * Process IPv4 packets that already include an IP header. 4224 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4225 * IPPROTO_IGMP). 4226 */ 4227 static int 4228 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4229 ip4_pkt_t *pktinfop) 4230 { 4231 icmp_stack_t *is = icmp->icmp_is; 4232 ipha_t *ipha; 4233 int ip_hdr_length; 4234 int tp_hdr_len; 4235 mblk_t *mp1; 4236 uint_t pkt_len; 4237 ip_opt_info_t optinfo; 4238 4239 optinfo.ip_opt_flags = 0; 4240 optinfo.ip_opt_ill_index = 0; 4241 ipha = (ipha_t *)mp->b_rptr; 4242 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4243 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4244 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4245 ASSERT(icmp != NULL); 4246 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4247 freemsg(mp); 4248 return (0); 4249 } 4250 ipha = (ipha_t *)mp->b_rptr; 4251 } 4252 ipha->ipha_version_and_hdr_length = 4253 (IP_VERSION<<4) | (ip_hdr_length>>2); 4254 4255 /* 4256 * For the socket of SOCK_RAW type, the checksum is provided in the 4257 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4258 * tell IP that the application has sent a complete IP header and not 4259 * to compute the transport checksum nor change the DF flag. 4260 */ 4261 ipha->ipha_ident = IP_HDR_INCLUDED; 4262 ipha->ipha_hdr_checksum = 0; 4263 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4264 /* Insert options if any */ 4265 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4266 /* 4267 * Put the IP header plus any transport header that is 4268 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4269 * that at least the checksum field is in the first mblk.) 4270 */ 4271 switch (ipha->ipha_protocol) { 4272 case IPPROTO_UDP: 4273 tp_hdr_len = 8; 4274 break; 4275 case IPPROTO_TCP: 4276 tp_hdr_len = 20; 4277 break; 4278 default: 4279 tp_hdr_len = 0; 4280 break; 4281 } 4282 /* 4283 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4284 * tp_hdr_len bytes will be in a single mblk. 4285 */ 4286 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4287 tp_hdr_len)) { 4288 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4289 tp_hdr_len)) { 4290 BUMP_MIB(&is->is_rawip_mib, 4291 rawipOutErrors); 4292 freemsg(mp); 4293 return (0); 4294 } 4295 ipha = (ipha_t *)mp->b_rptr; 4296 } 4297 4298 /* 4299 * if the length is larger then the max allowed IP packet, 4300 * then send an error and abort the processing. 4301 */ 4302 pkt_len = ntohs(ipha->ipha_length) 4303 + icmp->icmp_ip_snd_options_len; 4304 if (pkt_len > IP_MAXPACKET) { 4305 return (EMSGSIZE); 4306 } 4307 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4308 tp_hdr_len, BPRI_LO))) { 4309 return (ENOMEM); 4310 } 4311 mp1->b_rptr += is->is_wroff_extra; 4312 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4313 4314 ipha->ipha_length = htons((uint16_t)pkt_len); 4315 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4316 4317 /* Copy transport header if any */ 4318 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4319 mp1->b_wptr += tp_hdr_len; 4320 4321 /* Add options */ 4322 ipha = (ipha_t *)mp1->b_rptr; 4323 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4324 icmp->icmp_ip_snd_options_len); 4325 4326 /* Drop IP header and transport header from original */ 4327 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4328 4329 mp1->b_cont = mp; 4330 mp = mp1; 4331 /* 4332 * Massage source route putting first source 4333 * route in ipha_dst. 4334 */ 4335 (void) ip_massage_options(ipha, is->is_netstack); 4336 } 4337 4338 if (pktinfop != NULL) { 4339 /* 4340 * Over write the source address provided in the header 4341 */ 4342 if (pktinfop->ip4_addr != INADDR_ANY) { 4343 ipha->ipha_src = pktinfop->ip4_addr; 4344 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4345 } 4346 4347 if (pktinfop->ip4_ill_index != 0) { 4348 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4349 } 4350 } 4351 4352 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4353 return (0); 4354 } 4355 4356 static int 4357 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4358 { 4359 int err; 4360 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4361 icmp_stack_t *is = icmp->icmp_is; 4362 conn_t *connp = icmp->icmp_connp; 4363 cred_t *cr; 4364 4365 /* 4366 * All Solaris components should pass a db_credp 4367 * for this message, hence we ASSERT. 4368 * On production kernels we return an error to be robust against 4369 * random streams modules sitting on top of us. 4370 */ 4371 cr = msg_getcred(mp, NULL); 4372 ASSERT(cr != NULL); 4373 if (cr == NULL) 4374 return (EINVAL); 4375 4376 err = tsol_compute_label(cr, dst, 4377 opt_storage, connp->conn_mac_exempt, 4378 is->is_netstack->netstack_ip); 4379 if (err == 0) { 4380 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4381 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4382 opt_storage); 4383 } 4384 if (err != 0) { 4385 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4386 DTRACE_PROBE4( 4387 tx__ip__log__drop__updatelabel__icmp, 4388 char *, "icmp(1) failed to update options(2) on mp(3)", 4389 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4390 return (err); 4391 } 4392 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4393 return (0); 4394 } 4395 4396 /* 4397 * This routine handles all messages passed downstream. It either 4398 * consumes the message or passes it downstream; it never queues a 4399 * a message. 4400 */ 4401 static void 4402 icmp_wput(queue_t *q, mblk_t *mp) 4403 { 4404 uchar_t *rptr = mp->b_rptr; 4405 ipha_t *ipha; 4406 mblk_t *mp1; 4407 #define tudr ((struct T_unitdata_req *)rptr) 4408 size_t ip_len; 4409 conn_t *connp = Q_TO_CONN(q); 4410 icmp_t *icmp = connp->conn_icmp; 4411 icmp_stack_t *is = icmp->icmp_is; 4412 sin6_t *sin6; 4413 sin_t *sin; 4414 ipaddr_t v4dst; 4415 ip4_pkt_t pktinfo; 4416 ip4_pkt_t *pktinfop = &pktinfo; 4417 ip6_pkt_t ipp_s; /* For ancillary data options */ 4418 ip6_pkt_t *ipp = &ipp_s; 4419 int error; 4420 4421 ipp->ipp_fields = 0; 4422 ipp->ipp_sticky_ignored = 0; 4423 4424 switch (mp->b_datap->db_type) { 4425 case M_DATA: 4426 if (icmp->icmp_hdrincl) { 4427 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4428 ipha = (ipha_t *)mp->b_rptr; 4429 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4430 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4431 BUMP_MIB(&is->is_rawip_mib, 4432 rawipOutErrors); 4433 freemsg(mp); 4434 return; 4435 } 4436 ipha = (ipha_t *)mp->b_rptr; 4437 } 4438 /* 4439 * If this connection was used for v6 (inconceivable!) 4440 * or if we have a new destination, then it's time to 4441 * figure a new label. 4442 */ 4443 if (is_system_labeled() && 4444 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4445 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4446 ipha->ipha_dst)) { 4447 error = icmp_update_label(icmp, mp, 4448 ipha->ipha_dst); 4449 if (error != 0) { 4450 icmp_ud_err(q, mp, error); 4451 return; 4452 } 4453 } 4454 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4455 if (error != 0) 4456 icmp_ud_err(q, mp, error); 4457 return; 4458 } 4459 freemsg(mp); 4460 return; 4461 case M_PROTO: 4462 case M_PCPROTO: 4463 ip_len = mp->b_wptr - rptr; 4464 if (ip_len >= sizeof (struct T_unitdata_req)) { 4465 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4466 if (((union T_primitives *)rptr)->type 4467 == T_UNITDATA_REQ) 4468 break; 4469 } 4470 /* FALLTHRU */ 4471 default: 4472 icmp_wput_other(q, mp); 4473 return; 4474 } 4475 4476 /* Handle T_UNITDATA_REQ messages here. */ 4477 4478 mp1 = mp->b_cont; 4479 if (mp1 == NULL) { 4480 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4481 icmp_ud_err(q, mp, EPROTO); 4482 return; 4483 } 4484 4485 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4486 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4487 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4488 return; 4489 } 4490 4491 switch (icmp->icmp_family) { 4492 case AF_INET6: 4493 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4494 if (!OK_32PTR((char *)sin6) || 4495 tudr->DEST_length != sizeof (sin6_t) || 4496 sin6->sin6_family != AF_INET6) { 4497 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4498 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4499 return; 4500 } 4501 4502 /* No support for mapped addresses on raw sockets */ 4503 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4504 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4505 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4506 return; 4507 } 4508 4509 /* 4510 * Destination is a native IPv6 address. 4511 * Send out an IPv6 format packet. 4512 */ 4513 if (tudr->OPT_length != 0) { 4514 int error; 4515 4516 error = 0; 4517 if (icmp_unitdata_opt_process(q, mp, &error, 4518 (void *)ipp) < 0) { 4519 /* failure */ 4520 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4521 icmp_ud_err(q, mp, error); 4522 return; 4523 } 4524 ASSERT(error == 0); 4525 } 4526 4527 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4528 goto done; 4529 4530 case AF_INET: 4531 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4532 if (!OK_32PTR((char *)sin) || 4533 tudr->DEST_length != sizeof (sin_t) || 4534 sin->sin_family != AF_INET) { 4535 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4536 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4537 return; 4538 } 4539 /* Extract and ipaddr */ 4540 v4dst = sin->sin_addr.s_addr; 4541 break; 4542 4543 default: 4544 ASSERT(0); 4545 } 4546 4547 pktinfop->ip4_ill_index = 0; 4548 pktinfop->ip4_addr = INADDR_ANY; 4549 4550 /* 4551 * If options passed in, feed it for verification and handling 4552 */ 4553 if (tudr->OPT_length != 0) { 4554 int error; 4555 4556 error = 0; 4557 if (icmp_unitdata_opt_process(q, mp, &error, 4558 (void *)pktinfop) < 0) { 4559 /* failure */ 4560 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4561 icmp_ud_err(q, mp, error); 4562 return; 4563 } 4564 ASSERT(error == 0); 4565 /* 4566 * Note: Success in processing options. 4567 * mp option buffer represented by 4568 * OPT_length/offset now potentially modified 4569 * and contain option setting results 4570 */ 4571 } 4572 4573 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4574 done: 4575 if (error != 0) { 4576 icmp_ud_err(q, mp, error); 4577 return; 4578 } else { 4579 mp->b_cont = NULL; 4580 freeb(mp); 4581 } 4582 } 4583 4584 4585 /* ARGSUSED */ 4586 static void 4587 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4588 { 4589 #ifdef DEBUG 4590 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4591 #endif 4592 freemsg(mp); 4593 } 4594 4595 static int 4596 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4597 ip4_pkt_t *pktinfop) 4598 { 4599 ipha_t *ipha; 4600 size_t ip_len; 4601 icmp_t *icmp = connp->conn_icmp; 4602 icmp_stack_t *is = icmp->icmp_is; 4603 int ip_hdr_length; 4604 ip_opt_info_t optinfo; 4605 4606 optinfo.ip_opt_flags = 0; 4607 optinfo.ip_opt_ill_index = 0; 4608 4609 if (icmp->icmp_state == TS_UNBND) { 4610 /* If a port has not been bound to the stream, fail. */ 4611 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4612 return (EPROTO); 4613 } 4614 4615 if (v4dst == INADDR_ANY) 4616 v4dst = htonl(INADDR_LOOPBACK); 4617 4618 /* Check if our saved options are valid; update if not */ 4619 if (is_system_labeled() && 4620 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4621 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4622 int error = icmp_update_label(icmp, mp, v4dst); 4623 4624 if (error != 0) 4625 return (error); 4626 } 4627 4628 /* Protocol 255 contains full IP headers */ 4629 if (icmp->icmp_hdrincl) 4630 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4631 4632 /* Add an IP header */ 4633 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4634 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4635 if ((uchar_t *)ipha < mp->b_datap->db_base || 4636 mp->b_datap->db_ref != 1 || 4637 !OK_32PTR(ipha)) { 4638 mblk_t *mp1; 4639 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4640 BPRI_LO))) { 4641 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4642 return (ENOMEM); 4643 } 4644 mp1->b_cont = mp; 4645 ipha = (ipha_t *)mp1->b_datap->db_lim; 4646 mp1->b_wptr = (uchar_t *)ipha; 4647 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4648 mp = mp1; 4649 } 4650 #ifdef _BIG_ENDIAN 4651 /* Set version, header length, and tos */ 4652 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4653 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4654 icmp->icmp_type_of_service); 4655 /* Set ttl and protocol */ 4656 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4657 #else 4658 /* Set version, header length, and tos */ 4659 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4660 ((icmp->icmp_type_of_service << 8) | 4661 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4662 /* Set ttl and protocol */ 4663 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4664 #endif 4665 if (pktinfop->ip4_addr != INADDR_ANY) { 4666 ipha->ipha_src = pktinfop->ip4_addr; 4667 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4668 } else { 4669 4670 /* 4671 * Copy our address into the packet. If this is zero, 4672 * ip will fill in the real source address. 4673 */ 4674 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4675 } 4676 4677 ipha->ipha_fragment_offset_and_flags = 0; 4678 4679 if (pktinfop->ip4_ill_index != 0) { 4680 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4681 } 4682 4683 4684 /* 4685 * For the socket of SOCK_RAW type, the checksum is provided in the 4686 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4687 * tell IP that the application has sent a complete IP header and not 4688 * to compute the transport checksum nor change the DF flag. 4689 */ 4690 ipha->ipha_ident = IP_HDR_INCLUDED; 4691 4692 /* Finish common formatting of the packet. */ 4693 mp->b_rptr = (uchar_t *)ipha; 4694 4695 ip_len = mp->b_wptr - (uchar_t *)ipha; 4696 if (mp->b_cont != NULL) 4697 ip_len += msgdsize(mp->b_cont); 4698 4699 /* 4700 * Set the length into the IP header. 4701 * If the length is greater than the maximum allowed by IP, 4702 * then free the message and return. Do not try and send it 4703 * as this can cause problems in layers below. 4704 */ 4705 if (ip_len > IP_MAXPACKET) { 4706 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4707 return (EMSGSIZE); 4708 } 4709 ipha->ipha_length = htons((uint16_t)ip_len); 4710 /* 4711 * Copy in the destination address request 4712 */ 4713 ipha->ipha_dst = v4dst; 4714 4715 /* 4716 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4717 */ 4718 if (CLASSD(v4dst)) 4719 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4720 4721 /* Copy in options if any */ 4722 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4723 bcopy(icmp->icmp_ip_snd_options, 4724 &ipha[1], icmp->icmp_ip_snd_options_len); 4725 /* 4726 * Massage source route putting first source route in ipha_dst. 4727 * Ignore the destination in the T_unitdata_req. 4728 */ 4729 (void) ip_massage_options(ipha, is->is_netstack); 4730 } 4731 4732 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4733 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4734 return (0); 4735 } 4736 4737 static int 4738 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4739 { 4740 int err; 4741 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4742 icmp_stack_t *is = icmp->icmp_is; 4743 conn_t *connp = icmp->icmp_connp; 4744 cred_t *cr; 4745 4746 /* 4747 * All Solaris components should pass a db_credp 4748 * for this message, hence we ASSERT. 4749 * On production kernels we return an error to be robust against 4750 * random streams modules sitting on top of us. 4751 */ 4752 cr = msg_getcred(mp, NULL); 4753 ASSERT(cr != NULL); 4754 if (cr == NULL) 4755 return (EINVAL); 4756 4757 err = tsol_compute_label_v6(cr, dst, 4758 opt_storage, connp->conn_mac_exempt, 4759 is->is_netstack->netstack_ip); 4760 if (err == 0) { 4761 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4762 &icmp->icmp_label_len_v6, opt_storage); 4763 } 4764 if (err != 0) { 4765 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4766 DTRACE_PROBE4( 4767 tx__ip__log__drop__updatelabel__icmp6, 4768 char *, "icmp(1) failed to update options(2) on mp(3)", 4769 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4770 return (err); 4771 } 4772 4773 icmp->icmp_v6lastdst = *dst; 4774 return (0); 4775 } 4776 4777 /* 4778 * raw_ip_send_data_v6(): 4779 * Assumes that icmp_wput did some sanity checking on the destination 4780 * address, but that the label may not yet be correct. 4781 */ 4782 static int 4783 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4784 ip6_pkt_t *ipp) 4785 { 4786 ip6_t *ip6h; 4787 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4788 int ip_hdr_len = IPV6_HDR_LEN; 4789 size_t ip_len; 4790 icmp_t *icmp = connp->conn_icmp; 4791 icmp_stack_t *is = icmp->icmp_is; 4792 ip6_pkt_t *tipp; 4793 uint32_t csum = 0; 4794 uint_t ignore = 0; 4795 uint_t option_exists = 0, is_sticky = 0; 4796 uint8_t *cp; 4797 uint8_t *nxthdr_ptr; 4798 in6_addr_t ip6_dst; 4799 4800 /* 4801 * If the local address is a mapped address return 4802 * an error. 4803 * It would be possible to send an IPv6 packet but the 4804 * response would never make it back to the application 4805 * since it is bound to a mapped address. 4806 */ 4807 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4808 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4809 return (EADDRNOTAVAIL); 4810 } 4811 4812 ignore = ipp->ipp_sticky_ignored; 4813 if (sin6->sin6_scope_id != 0 && 4814 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4815 /* 4816 * IPPF_SCOPE_ID is special. It's neither a sticky 4817 * option nor ancillary data. It needs to be 4818 * explicitly set in options_exists. 4819 */ 4820 option_exists |= IPPF_SCOPE_ID; 4821 } 4822 4823 /* 4824 * Compute the destination address 4825 */ 4826 ip6_dst = sin6->sin6_addr; 4827 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4828 ip6_dst = ipv6_loopback; 4829 4830 /* 4831 * If we're not going to the same destination as last time, then 4832 * recompute the label required. This is done in a separate routine to 4833 * avoid blowing up our stack here. 4834 */ 4835 if (is_system_labeled() && 4836 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4837 int error = 0; 4838 4839 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4840 if (error != 0) 4841 return (error); 4842 } 4843 4844 /* 4845 * If there's a security label here, then we ignore any options the 4846 * user may try to set. We keep the peer's label as a hidden sticky 4847 * option. 4848 */ 4849 if (icmp->icmp_label_len_v6 > 0) { 4850 ignore &= ~IPPF_HOPOPTS; 4851 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4852 } 4853 4854 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4855 (ipp->ipp_fields == 0)) { 4856 /* No sticky options nor ancillary data. */ 4857 goto no_options; 4858 } 4859 4860 /* 4861 * Go through the options figuring out where each is going to 4862 * come from and build two masks. The first mask indicates if 4863 * the option exists at all. The second mask indicates if the 4864 * option is sticky or ancillary. 4865 */ 4866 if (!(ignore & IPPF_HOPOPTS)) { 4867 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4868 option_exists |= IPPF_HOPOPTS; 4869 ip_hdr_len += ipp->ipp_hopoptslen; 4870 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4871 option_exists |= IPPF_HOPOPTS; 4872 is_sticky |= IPPF_HOPOPTS; 4873 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4874 } 4875 } 4876 4877 if (!(ignore & IPPF_RTHDR)) { 4878 if (ipp->ipp_fields & IPPF_RTHDR) { 4879 option_exists |= IPPF_RTHDR; 4880 ip_hdr_len += ipp->ipp_rthdrlen; 4881 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4882 option_exists |= IPPF_RTHDR; 4883 is_sticky |= IPPF_RTHDR; 4884 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4885 } 4886 } 4887 4888 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4889 /* 4890 * Need to have a router header to use these. 4891 */ 4892 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4893 option_exists |= IPPF_RTDSTOPTS; 4894 ip_hdr_len += ipp->ipp_rtdstoptslen; 4895 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4896 option_exists |= IPPF_RTDSTOPTS; 4897 is_sticky |= IPPF_RTDSTOPTS; 4898 ip_hdr_len += 4899 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4900 } 4901 } 4902 4903 if (!(ignore & IPPF_DSTOPTS)) { 4904 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4905 option_exists |= IPPF_DSTOPTS; 4906 ip_hdr_len += ipp->ipp_dstoptslen; 4907 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4908 option_exists |= IPPF_DSTOPTS; 4909 is_sticky |= IPPF_DSTOPTS; 4910 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4911 } 4912 } 4913 4914 if (!(ignore & IPPF_IFINDEX)) { 4915 if (ipp->ipp_fields & IPPF_IFINDEX) { 4916 option_exists |= IPPF_IFINDEX; 4917 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4918 option_exists |= IPPF_IFINDEX; 4919 is_sticky |= IPPF_IFINDEX; 4920 } 4921 } 4922 4923 if (!(ignore & IPPF_ADDR)) { 4924 if (ipp->ipp_fields & IPPF_ADDR) { 4925 option_exists |= IPPF_ADDR; 4926 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4927 option_exists |= IPPF_ADDR; 4928 is_sticky |= IPPF_ADDR; 4929 } 4930 } 4931 4932 if (!(ignore & IPPF_DONTFRAG)) { 4933 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4934 option_exists |= IPPF_DONTFRAG; 4935 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4936 option_exists |= IPPF_DONTFRAG; 4937 is_sticky |= IPPF_DONTFRAG; 4938 } 4939 } 4940 4941 if (!(ignore & IPPF_USE_MIN_MTU)) { 4942 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4943 option_exists |= IPPF_USE_MIN_MTU; 4944 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4945 IPPF_USE_MIN_MTU) { 4946 option_exists |= IPPF_USE_MIN_MTU; 4947 is_sticky |= IPPF_USE_MIN_MTU; 4948 } 4949 } 4950 4951 if (!(ignore & IPPF_NEXTHOP)) { 4952 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4953 option_exists |= IPPF_NEXTHOP; 4954 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4955 option_exists |= IPPF_NEXTHOP; 4956 is_sticky |= IPPF_NEXTHOP; 4957 } 4958 } 4959 4960 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4961 option_exists |= IPPF_HOPLIMIT; 4962 /* IPV6_HOPLIMIT can never be sticky */ 4963 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4964 4965 if (!(ignore & IPPF_UNICAST_HOPS) && 4966 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4967 option_exists |= IPPF_UNICAST_HOPS; 4968 is_sticky |= IPPF_UNICAST_HOPS; 4969 } 4970 4971 if (!(ignore & IPPF_MULTICAST_HOPS) && 4972 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4973 option_exists |= IPPF_MULTICAST_HOPS; 4974 is_sticky |= IPPF_MULTICAST_HOPS; 4975 } 4976 4977 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4978 /* This is a sticky socket option only */ 4979 option_exists |= IPPF_NO_CKSUM; 4980 is_sticky |= IPPF_NO_CKSUM; 4981 } 4982 4983 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4984 /* This is a sticky socket option only */ 4985 option_exists |= IPPF_RAW_CKSUM; 4986 is_sticky |= IPPF_RAW_CKSUM; 4987 } 4988 4989 if (!(ignore & IPPF_TCLASS)) { 4990 if (ipp->ipp_fields & IPPF_TCLASS) { 4991 option_exists |= IPPF_TCLASS; 4992 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4993 option_exists |= IPPF_TCLASS; 4994 is_sticky |= IPPF_TCLASS; 4995 } 4996 } 4997 4998 no_options: 4999 5000 /* 5001 * If any options carried in the ip6i_t were specified, we 5002 * need to account for the ip6i_t in the data we'll be sending 5003 * down. 5004 */ 5005 if (option_exists & IPPF_HAS_IP6I) 5006 ip_hdr_len += sizeof (ip6i_t); 5007 5008 /* check/fix buffer config, setup pointers into it */ 5009 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5010 if ((mp->b_datap->db_ref != 1) || 5011 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5012 !OK_32PTR(ip6h)) { 5013 mblk_t *mp1; 5014 5015 /* Try to get everything in a single mblk next time */ 5016 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5017 icmp->icmp_max_hdr_len = ip_hdr_len; 5018 5019 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5020 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5021 } 5022 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5023 if (!mp1) { 5024 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5025 return (ENOMEM); 5026 } 5027 mp1->b_cont = mp; 5028 mp1->b_wptr = mp1->b_datap->db_lim; 5029 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5030 mp = mp1; 5031 } 5032 mp->b_rptr = (unsigned char *)ip6h; 5033 ip6i = (ip6i_t *)ip6h; 5034 5035 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5036 if (option_exists & IPPF_HAS_IP6I) { 5037 ip6h = (ip6_t *)&ip6i[1]; 5038 ip6i->ip6i_flags = 0; 5039 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5040 5041 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5042 if (option_exists & IPPF_SCOPE_ID) { 5043 ip6i->ip6i_flags |= IP6I_IFINDEX; 5044 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5045 } else if (option_exists & IPPF_IFINDEX) { 5046 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5047 ASSERT(tipp->ipp_ifindex != 0); 5048 ip6i->ip6i_flags |= IP6I_IFINDEX; 5049 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5050 } 5051 5052 if (option_exists & IPPF_RAW_CKSUM) { 5053 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5054 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5055 } 5056 5057 if (option_exists & IPPF_NO_CKSUM) { 5058 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5059 } 5060 5061 if (option_exists & IPPF_ADDR) { 5062 /* 5063 * Enable per-packet source address verification if 5064 * IPV6_PKTINFO specified the source address. 5065 * ip6_src is set in the transport's _wput function. 5066 */ 5067 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5068 } 5069 5070 if (option_exists & IPPF_DONTFRAG) { 5071 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5072 } 5073 5074 if (option_exists & IPPF_USE_MIN_MTU) { 5075 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5076 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5077 } 5078 5079 if (option_exists & IPPF_NEXTHOP) { 5080 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5081 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5082 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5083 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5084 } 5085 5086 /* 5087 * tell IP this is an ip6i_t private header 5088 */ 5089 ip6i->ip6i_nxt = IPPROTO_RAW; 5090 } 5091 5092 /* Initialize IPv6 header */ 5093 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5094 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5095 5096 /* Set the hoplimit of the outgoing packet. */ 5097 if (option_exists & IPPF_HOPLIMIT) { 5098 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5099 ip6h->ip6_hops = ipp->ipp_hoplimit; 5100 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5101 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5102 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5103 if (option_exists & IPPF_MULTICAST_HOPS) 5104 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5105 } else { 5106 ip6h->ip6_hops = icmp->icmp_ttl; 5107 if (option_exists & IPPF_UNICAST_HOPS) 5108 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5109 } 5110 5111 if (option_exists & IPPF_ADDR) { 5112 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5113 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5114 ip6h->ip6_src = tipp->ipp_addr; 5115 } else { 5116 /* 5117 * The source address was not set using IPV6_PKTINFO. 5118 * First look at the bound source. 5119 * If unspecified fallback to __sin6_src_id. 5120 */ 5121 ip6h->ip6_src = icmp->icmp_v6src; 5122 if (sin6->__sin6_src_id != 0 && 5123 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5124 ip_srcid_find_id(sin6->__sin6_src_id, 5125 &ip6h->ip6_src, icmp->icmp_zoneid, 5126 is->is_netstack); 5127 } 5128 } 5129 5130 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5131 cp = (uint8_t *)&ip6h[1]; 5132 5133 /* 5134 * Here's where we have to start stringing together 5135 * any extension headers in the right order: 5136 * Hop-by-hop, destination, routing, and final destination opts. 5137 */ 5138 if (option_exists & IPPF_HOPOPTS) { 5139 /* Hop-by-hop options */ 5140 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5141 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5142 5143 *nxthdr_ptr = IPPROTO_HOPOPTS; 5144 nxthdr_ptr = &hbh->ip6h_nxt; 5145 5146 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5147 cp += tipp->ipp_hopoptslen; 5148 } 5149 /* 5150 * En-route destination options 5151 * Only do them if there's a routing header as well 5152 */ 5153 if (option_exists & IPPF_RTDSTOPTS) { 5154 ip6_dest_t *dst = (ip6_dest_t *)cp; 5155 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5156 5157 *nxthdr_ptr = IPPROTO_DSTOPTS; 5158 nxthdr_ptr = &dst->ip6d_nxt; 5159 5160 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5161 cp += tipp->ipp_rtdstoptslen; 5162 } 5163 /* 5164 * Routing header next 5165 */ 5166 if (option_exists & IPPF_RTHDR) { 5167 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5168 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5169 5170 *nxthdr_ptr = IPPROTO_ROUTING; 5171 nxthdr_ptr = &rt->ip6r_nxt; 5172 5173 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5174 cp += tipp->ipp_rthdrlen; 5175 } 5176 /* 5177 * Do ultimate destination options 5178 */ 5179 if (option_exists & IPPF_DSTOPTS) { 5180 ip6_dest_t *dest = (ip6_dest_t *)cp; 5181 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5182 5183 *nxthdr_ptr = IPPROTO_DSTOPTS; 5184 nxthdr_ptr = &dest->ip6d_nxt; 5185 5186 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5187 cp += tipp->ipp_dstoptslen; 5188 } 5189 5190 /* 5191 * Now set the last header pointer to the proto passed in 5192 */ 5193 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5194 *nxthdr_ptr = icmp->icmp_proto; 5195 5196 /* 5197 * Copy in the destination address 5198 */ 5199 ip6h->ip6_dst = ip6_dst; 5200 5201 ip6h->ip6_vcf = 5202 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5203 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5204 5205 if (option_exists & IPPF_TCLASS) { 5206 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5207 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5208 tipp->ipp_tclass); 5209 } 5210 if (option_exists & IPPF_RTHDR) { 5211 ip6_rthdr_t *rth; 5212 5213 /* 5214 * Perform any processing needed for source routing. 5215 * We know that all extension headers will be in the same mblk 5216 * as the IPv6 header. 5217 */ 5218 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5219 if (rth != NULL && rth->ip6r_segleft != 0) { 5220 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5221 /* 5222 * Drop packet - only support Type 0 routing. 5223 * Notify the application as well. 5224 */ 5225 BUMP_MIB(&is->is_rawip_mib, 5226 rawipOutErrors); 5227 return (EPROTO); 5228 } 5229 /* 5230 * rth->ip6r_len is twice the number of 5231 * addresses in the header 5232 */ 5233 if (rth->ip6r_len & 0x1) { 5234 BUMP_MIB(&is->is_rawip_mib, 5235 rawipOutErrors); 5236 return (EPROTO); 5237 } 5238 /* 5239 * Shuffle the routing header and ip6_dst 5240 * addresses, and get the checksum difference 5241 * between the first hop (in ip6_dst) and 5242 * the destination (in the last routing hdr entry). 5243 */ 5244 csum = ip_massage_options_v6(ip6h, rth, 5245 is->is_netstack); 5246 /* 5247 * Verify that the first hop isn't a mapped address. 5248 * Routers along the path need to do this verification 5249 * for subsequent hops. 5250 */ 5251 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5252 BUMP_MIB(&is->is_rawip_mib, 5253 rawipOutErrors); 5254 return (EADDRNOTAVAIL); 5255 } 5256 } 5257 } 5258 5259 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5260 if (mp->b_cont != NULL) 5261 ip_len += msgdsize(mp->b_cont); 5262 5263 /* 5264 * Set the length into the IP header. 5265 * If the length is greater than the maximum allowed by IP, 5266 * then free the message and return. Do not try and send it 5267 * as this can cause problems in layers below. 5268 */ 5269 if (ip_len > IP_MAXPACKET) { 5270 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5271 return (EMSGSIZE); 5272 } 5273 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5274 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5275 uint16_t *cksum_ptr; 5276 uint_t ext_hdrs_len; 5277 5278 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5279 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5280 icmp->icmp_checksum_off == 2); 5281 5282 /* 5283 * We make it easy for IP to include our pseudo header 5284 * by putting our length in uh_checksum, modified (if 5285 * we have a routing header) by the checksum difference 5286 * between the ultimate destination and first hop addresses. 5287 * Note: ICMPv6 must always checksum the packet. 5288 */ 5289 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5290 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5291 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5292 BUMP_MIB(&is->is_rawip_mib, 5293 rawipOutErrors); 5294 freemsg(mp); 5295 return (0); 5296 } 5297 ip6i = (ip6i_t *)mp->b_rptr; 5298 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5299 ip6h = (ip6_t *)&ip6i[1]; 5300 else 5301 ip6h = (ip6_t *)ip6i; 5302 } 5303 /* Add payload length to checksum */ 5304 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5305 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5306 csum += htons(ip_len - ext_hdrs_len); 5307 5308 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5309 csum = (csum & 0xFFFF) + (csum >> 16); 5310 *cksum_ptr = (uint16_t)csum; 5311 } 5312 5313 #ifdef _LITTLE_ENDIAN 5314 ip_len = htons(ip_len); 5315 #endif 5316 ip6h->ip6_plen = (uint16_t)ip_len; 5317 5318 /* We're done. Pass the packet to IP */ 5319 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5320 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5321 return (0); 5322 } 5323 5324 static void 5325 icmp_wput_other(queue_t *q, mblk_t *mp) 5326 { 5327 uchar_t *rptr = mp->b_rptr; 5328 struct iocblk *iocp; 5329 #define tudr ((struct T_unitdata_req *)rptr) 5330 conn_t *connp = Q_TO_CONN(q); 5331 icmp_t *icmp = connp->conn_icmp; 5332 icmp_stack_t *is = icmp->icmp_is; 5333 cred_t *cr; 5334 5335 switch (mp->b_datap->db_type) { 5336 case M_PROTO: 5337 case M_PCPROTO: 5338 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5339 /* 5340 * If the message does not contain a PRIM_type, 5341 * throw it away. 5342 */ 5343 freemsg(mp); 5344 return; 5345 } 5346 switch (((union T_primitives *)rptr)->type) { 5347 case T_ADDR_REQ: 5348 icmp_addr_req(q, mp); 5349 return; 5350 case O_T_BIND_REQ: 5351 case T_BIND_REQ: 5352 icmp_tpi_bind(q, mp); 5353 return; 5354 case T_CONN_REQ: 5355 icmp_tpi_connect(q, mp); 5356 return; 5357 case T_CAPABILITY_REQ: 5358 icmp_capability_req(q, mp); 5359 return; 5360 case T_INFO_REQ: 5361 icmp_info_req(q, mp); 5362 return; 5363 case T_UNITDATA_REQ: 5364 /* 5365 * If a T_UNITDATA_REQ gets here, the address must 5366 * be bad. Valid T_UNITDATA_REQs are found above 5367 * and break to below this switch. 5368 */ 5369 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5370 return; 5371 case T_UNBIND_REQ: 5372 icmp_tpi_unbind(q, mp); 5373 return; 5374 5375 case T_SVR4_OPTMGMT_REQ: 5376 /* 5377 * All Solaris components should pass a db_credp 5378 * for this TPI message, hence we ASSERT. 5379 * But in case there is some other M_PROTO that looks 5380 * like a TPI message sent by some other kernel 5381 * component, we check and return an error. 5382 */ 5383 cr = msg_getcred(mp, NULL); 5384 ASSERT(cr != NULL); 5385 if (cr == NULL) { 5386 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5387 return; 5388 } 5389 5390 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5391 cr)) { 5392 /* Only IP can return anything meaningful */ 5393 (void) svr4_optcom_req(q, mp, cr, 5394 &icmp_opt_obj, B_TRUE); 5395 } 5396 return; 5397 5398 case T_OPTMGMT_REQ: 5399 /* 5400 * All Solaris components should pass a db_credp 5401 * for this TPI message, hence we ASSERT. 5402 * But in case there is some other M_PROTO that looks 5403 * like a TPI message sent by some other kernel 5404 * component, we check and return an error. 5405 */ 5406 cr = msg_getcred(mp, NULL); 5407 ASSERT(cr != NULL); 5408 if (cr == NULL) { 5409 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5410 return; 5411 } 5412 /* Only IP can return anything meaningful */ 5413 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5414 return; 5415 5416 case T_DISCON_REQ: 5417 icmp_tpi_disconnect(q, mp); 5418 return; 5419 5420 /* The following TPI message is not supported by icmp. */ 5421 case O_T_CONN_RES: 5422 case T_CONN_RES: 5423 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5424 return; 5425 5426 /* The following 3 TPI requests are illegal for icmp. */ 5427 case T_DATA_REQ: 5428 case T_EXDATA_REQ: 5429 case T_ORDREL_REQ: 5430 freemsg(mp); 5431 (void) putctl1(RD(q), M_ERROR, EPROTO); 5432 return; 5433 default: 5434 break; 5435 } 5436 break; 5437 case M_IOCTL: 5438 iocp = (struct iocblk *)mp->b_rptr; 5439 switch (iocp->ioc_cmd) { 5440 case TI_GETPEERNAME: 5441 if (icmp->icmp_state != TS_DATA_XFER) { 5442 /* 5443 * If a default destination address has not 5444 * been associated with the stream, then we 5445 * don't know the peer's name. 5446 */ 5447 iocp->ioc_error = ENOTCONN; 5448 err_ret:; 5449 iocp->ioc_count = 0; 5450 mp->b_datap->db_type = M_IOCACK; 5451 qreply(q, mp); 5452 return; 5453 } 5454 /* FALLTHRU */ 5455 case TI_GETMYNAME: 5456 /* 5457 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5458 * need to copyin the user's strbuf structure. 5459 * Processing will continue in the M_IOCDATA case 5460 * below. 5461 */ 5462 mi_copyin(q, mp, NULL, 5463 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5464 return; 5465 case ND_SET: 5466 /* nd_getset performs the necessary error checking */ 5467 case ND_GET: 5468 if (nd_getset(q, is->is_nd, mp)) { 5469 qreply(q, mp); 5470 return; 5471 } 5472 break; 5473 case _SIOCSOCKFALLBACK: 5474 /* 5475 * socket is falling back to be a 5476 * streams socket. Nothing to do 5477 */ 5478 iocp->ioc_count = 0; 5479 iocp->ioc_rval = 0; 5480 qreply(q, mp); 5481 return; 5482 default: 5483 break; 5484 } 5485 break; 5486 case M_IOCDATA: 5487 icmp_wput_iocdata(q, mp); 5488 return; 5489 default: 5490 break; 5491 } 5492 ip_wput(q, mp); 5493 } 5494 5495 /* 5496 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5497 * messages. 5498 */ 5499 static void 5500 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5501 { 5502 mblk_t *mp1; 5503 STRUCT_HANDLE(strbuf, sb); 5504 icmp_t *icmp; 5505 uint_t addrlen; 5506 uint_t error; 5507 5508 /* Make sure it is one of ours. */ 5509 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5510 case TI_GETMYNAME: 5511 case TI_GETPEERNAME: 5512 break; 5513 default: 5514 icmp = Q_TO_ICMP(q); 5515 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5516 return; 5517 } 5518 switch (mi_copy_state(q, mp, &mp1)) { 5519 case -1: 5520 return; 5521 case MI_COPY_CASE(MI_COPY_IN, 1): 5522 break; 5523 case MI_COPY_CASE(MI_COPY_OUT, 1): 5524 /* 5525 * The address has been copied out, so now 5526 * copyout the strbuf. 5527 */ 5528 mi_copyout(q, mp); 5529 return; 5530 case MI_COPY_CASE(MI_COPY_OUT, 2): 5531 /* 5532 * The address and strbuf have been copied out. 5533 * We're done, so just acknowledge the original 5534 * M_IOCTL. 5535 */ 5536 mi_copy_done(q, mp, 0); 5537 return; 5538 default: 5539 /* 5540 * Something strange has happened, so acknowledge 5541 * the original M_IOCTL with an EPROTO error. 5542 */ 5543 mi_copy_done(q, mp, EPROTO); 5544 return; 5545 } 5546 /* 5547 * Now we have the strbuf structure for TI_GETMYNAME 5548 * and TI_GETPEERNAME. Next we copyout the requested 5549 * address and then we'll copyout the strbuf. 5550 */ 5551 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5552 (void *)mp1->b_rptr); 5553 icmp = Q_TO_ICMP(q); 5554 if (icmp->icmp_family == AF_INET) 5555 addrlen = sizeof (sin_t); 5556 else 5557 addrlen = sizeof (sin6_t); 5558 5559 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5560 mi_copy_done(q, mp, EINVAL); 5561 return; 5562 } 5563 5564 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5565 5566 if (mp1 == NULL) 5567 return; 5568 5569 rw_enter(&icmp->icmp_rwlock, RW_READER); 5570 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5571 case TI_GETMYNAME: 5572 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5573 &addrlen); 5574 break; 5575 case TI_GETPEERNAME: 5576 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5577 &addrlen); 5578 break; 5579 } 5580 rw_exit(&icmp->icmp_rwlock); 5581 5582 if (error != 0) { 5583 mi_copy_done(q, mp, error); 5584 } else { 5585 mp1->b_wptr += addrlen; 5586 STRUCT_FSET(sb, len, addrlen); 5587 5588 /* Copy out the address */ 5589 mi_copyout(q, mp); 5590 } 5591 } 5592 5593 static int 5594 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5595 void *thisdg_attrs) 5596 { 5597 struct T_unitdata_req *udreqp; 5598 int is_absreq_failure; 5599 cred_t *cr; 5600 5601 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5602 *errorp = 0; 5603 5604 /* 5605 * All Solaris components should pass a db_credp 5606 * for this TPI message, hence we ASSERT. 5607 * But in case there is some other M_PROTO that looks 5608 * like a TPI message sent by some other kernel 5609 * component, we check and return an error. 5610 */ 5611 cr = msg_getcred(mp, NULL); 5612 ASSERT(cr != NULL); 5613 if (cr == NULL) 5614 return (-1); 5615 5616 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5617 udreqp->OPT_offset, cr, &icmp_opt_obj, 5618 thisdg_attrs, &is_absreq_failure); 5619 5620 if (*errorp != 0) { 5621 /* 5622 * Note: No special action needed in this 5623 * module for "is_absreq_failure" 5624 */ 5625 return (-1); /* failure */ 5626 } 5627 ASSERT(is_absreq_failure == 0); 5628 return (0); /* success */ 5629 } 5630 5631 void 5632 icmp_ddi_g_init(void) 5633 { 5634 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5635 icmp_opt_obj.odb_opt_arr_cnt); 5636 5637 /* 5638 * We want to be informed each time a stack is created or 5639 * destroyed in the kernel, so we can maintain the 5640 * set of icmp_stack_t's. 5641 */ 5642 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5643 } 5644 5645 void 5646 icmp_ddi_g_destroy(void) 5647 { 5648 netstack_unregister(NS_ICMP); 5649 } 5650 5651 #define INET_NAME "ip" 5652 5653 /* 5654 * Initialize the ICMP stack instance. 5655 */ 5656 static void * 5657 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5658 { 5659 icmp_stack_t *is; 5660 icmpparam_t *pa; 5661 int error = 0; 5662 major_t major; 5663 5664 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5665 is->is_netstack = ns; 5666 5667 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5668 is->is_param_arr = pa; 5669 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5670 5671 (void) icmp_param_register(&is->is_nd, 5672 is->is_param_arr, A_CNT(icmp_param_arr)); 5673 is->is_ksp = rawip_kstat_init(stackid); 5674 5675 major = mod_name_to_major(INET_NAME); 5676 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5677 ASSERT(error == 0); 5678 return (is); 5679 } 5680 5681 /* 5682 * Free the ICMP stack instance. 5683 */ 5684 static void 5685 rawip_stack_fini(netstackid_t stackid, void *arg) 5686 { 5687 icmp_stack_t *is = (icmp_stack_t *)arg; 5688 5689 nd_free(&is->is_nd); 5690 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5691 is->is_param_arr = NULL; 5692 5693 rawip_kstat_fini(stackid, is->is_ksp); 5694 is->is_ksp = NULL; 5695 ldi_ident_release(is->is_ldi_ident); 5696 kmem_free(is, sizeof (*is)); 5697 } 5698 5699 static void * 5700 rawip_kstat_init(netstackid_t stackid) { 5701 kstat_t *ksp; 5702 5703 rawip_named_kstat_t template = { 5704 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5705 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5706 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5707 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5708 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5709 }; 5710 5711 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5712 KSTAT_TYPE_NAMED, 5713 NUM_OF_FIELDS(rawip_named_kstat_t), 5714 0, stackid); 5715 if (ksp == NULL || ksp->ks_data == NULL) 5716 return (NULL); 5717 5718 bcopy(&template, ksp->ks_data, sizeof (template)); 5719 ksp->ks_update = rawip_kstat_update; 5720 ksp->ks_private = (void *)(uintptr_t)stackid; 5721 5722 kstat_install(ksp); 5723 return (ksp); 5724 } 5725 5726 static void 5727 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5728 { 5729 if (ksp != NULL) { 5730 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5731 kstat_delete_netstack(ksp, stackid); 5732 } 5733 } 5734 5735 static int 5736 rawip_kstat_update(kstat_t *ksp, int rw) 5737 { 5738 rawip_named_kstat_t *rawipkp; 5739 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5740 netstack_t *ns; 5741 icmp_stack_t *is; 5742 5743 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5744 return (EIO); 5745 5746 if (rw == KSTAT_WRITE) 5747 return (EACCES); 5748 5749 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5750 5751 ns = netstack_find_by_stackid(stackid); 5752 if (ns == NULL) 5753 return (-1); 5754 is = ns->netstack_icmp; 5755 if (is == NULL) { 5756 netstack_rele(ns); 5757 return (-1); 5758 } 5759 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5760 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5761 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5762 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5763 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5764 netstack_rele(ns); 5765 return (0); 5766 } 5767 5768 /* ARGSUSED */ 5769 int 5770 rawip_accept(sock_lower_handle_t lproto_handle, 5771 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5772 cred_t *cr) 5773 { 5774 return (EOPNOTSUPP); 5775 } 5776 5777 /* ARGSUSED */ 5778 int 5779 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5780 socklen_t len, cred_t *cr) 5781 { 5782 conn_t *connp = (conn_t *)proto_handle; 5783 int error; 5784 5785 /* All Solaris components should pass a cred for this operation. */ 5786 ASSERT(cr != NULL); 5787 5788 /* Binding to a NULL address really means unbind */ 5789 if (sa == NULL) 5790 error = rawip_do_unbind(connp); 5791 else 5792 error = rawip_do_bind(connp, sa, len); 5793 5794 if (error < 0) { 5795 if (error == -TOUTSTATE) 5796 error = EINVAL; 5797 else 5798 error = proto_tlitosyserr(-error); 5799 } 5800 return (error); 5801 } 5802 5803 static int 5804 rawip_implicit_bind(conn_t *connp) 5805 { 5806 sin6_t sin6addr; 5807 sin_t *sin; 5808 sin6_t *sin6; 5809 socklen_t len; 5810 int error; 5811 5812 if (connp->conn_icmp->icmp_family == AF_INET) { 5813 len = sizeof (struct sockaddr_in); 5814 sin = (sin_t *)&sin6addr; 5815 *sin = sin_null; 5816 sin->sin_family = AF_INET; 5817 sin->sin_addr.s_addr = INADDR_ANY; 5818 } else { 5819 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5820 len = sizeof (sin6_t); 5821 sin6 = (sin6_t *)&sin6addr; 5822 *sin6 = sin6_null; 5823 sin6->sin6_family = AF_INET6; 5824 V6_SET_ZERO(sin6->sin6_addr); 5825 } 5826 5827 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5828 5829 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5830 } 5831 5832 static int 5833 rawip_unbind(conn_t *connp) 5834 { 5835 int error; 5836 5837 error = rawip_do_unbind(connp); 5838 if (error < 0) { 5839 error = proto_tlitosyserr(-error); 5840 } 5841 return (error); 5842 } 5843 5844 /* ARGSUSED */ 5845 int 5846 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5847 { 5848 return (EOPNOTSUPP); 5849 } 5850 5851 /* ARGSUSED */ 5852 int 5853 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5854 socklen_t len, sock_connid_t *id, cred_t *cr) 5855 { 5856 conn_t *connp = (conn_t *)proto_handle; 5857 icmp_t *icmp = connp->conn_icmp; 5858 int error; 5859 boolean_t did_bind = B_FALSE; 5860 5861 /* All Solaris components should pass a cred for this operation. */ 5862 ASSERT(cr != NULL); 5863 5864 if (sa == NULL) { 5865 /* 5866 * Disconnect 5867 * Make sure we are connected 5868 */ 5869 if (icmp->icmp_state != TS_DATA_XFER) 5870 return (EINVAL); 5871 5872 error = icmp_disconnect(connp); 5873 return (error); 5874 } 5875 5876 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5877 if (error != 0) 5878 return (error); 5879 5880 /* do an implicit bind if necessary */ 5881 if (icmp->icmp_state == TS_UNBND) { 5882 error = rawip_implicit_bind(connp); 5883 /* 5884 * We could be racing with an actual bind, in which case 5885 * we would see EPROTO. We cross our fingers and try 5886 * to connect. 5887 */ 5888 if (!(error == 0 || error == EPROTO)) 5889 return (error); 5890 did_bind = B_TRUE; 5891 } 5892 5893 /* 5894 * set SO_DGRAM_ERRIND 5895 */ 5896 icmp->icmp_dgram_errind = B_TRUE; 5897 5898 error = rawip_do_connect(connp, sa, len, cr); 5899 5900 if (error != 0 && did_bind) { 5901 int unbind_err; 5902 5903 unbind_err = rawip_unbind(connp); 5904 ASSERT(unbind_err == 0); 5905 } 5906 5907 if (error == 0) { 5908 *id = 0; 5909 (*connp->conn_upcalls->su_connected) 5910 (connp->conn_upper_handle, 0, NULL, -1); 5911 } else if (error < 0) { 5912 error = proto_tlitosyserr(-error); 5913 } 5914 return (error); 5915 } 5916 5917 /* ARGSUSED */ 5918 int 5919 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5920 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5921 { 5922 conn_t *connp = (conn_t *)proto_handle; 5923 icmp_t *icmp; 5924 struct T_capability_ack tca; 5925 struct sockaddr_in6 laddr, faddr; 5926 socklen_t laddrlen, faddrlen; 5927 short opts; 5928 struct stroptions *stropt; 5929 mblk_t *stropt_mp; 5930 int error; 5931 5932 icmp = connp->conn_icmp; 5933 5934 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5935 5936 /* 5937 * setup the fallback stream that was allocated 5938 */ 5939 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5940 connp->conn_minor_arena = WR(q)->q_ptr; 5941 5942 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5943 5944 WR(q)->q_qinfo = &icmpwinit; 5945 5946 connp->conn_rq = RD(q); 5947 connp->conn_wq = WR(q); 5948 5949 /* Notify stream head about options before sending up data */ 5950 stropt_mp->b_datap->db_type = M_SETOPTS; 5951 stropt_mp->b_wptr += sizeof (*stropt); 5952 stropt = (struct stroptions *)stropt_mp->b_rptr; 5953 stropt->so_flags = SO_WROFF | SO_HIWAT; 5954 stropt->so_wroff = 5955 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 5956 stropt->so_hiwat = icmp->icmp_recv_hiwat; 5957 putnext(RD(q), stropt_mp); 5958 5959 /* 5960 * free helper stream 5961 */ 5962 ip_free_helper_stream(connp); 5963 5964 /* 5965 * Collect the information needed to sync with the sonode 5966 */ 5967 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 5968 5969 laddrlen = faddrlen = sizeof (sin6_t); 5970 (void) rawip_getsockname((sock_lower_handle_t)connp, 5971 (struct sockaddr *)&laddr, &laddrlen, CRED()); 5972 error = rawip_getpeername((sock_lower_handle_t)connp, 5973 (struct sockaddr *)&faddr, &faddrlen, CRED()); 5974 if (error != 0) 5975 faddrlen = 0; 5976 opts = 0; 5977 if (icmp->icmp_dgram_errind) 5978 opts |= SO_DGRAM_ERRIND; 5979 if (icmp->icmp_dontroute) 5980 opts |= SO_DONTROUTE; 5981 5982 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 5983 (struct sockaddr *)&laddr, laddrlen, 5984 (struct sockaddr *)&faddr, faddrlen, opts); 5985 5986 /* 5987 * Attempts to send data up during fallback will result in it being 5988 * queued in udp_t. Now we push up any queued packets. 5989 */ 5990 mutex_enter(&icmp->icmp_recv_lock); 5991 while (icmp->icmp_fallback_queue_head != NULL) { 5992 mblk_t *mp; 5993 5994 mp = icmp->icmp_fallback_queue_head; 5995 icmp->icmp_fallback_queue_head = mp->b_next; 5996 mp->b_next = NULL; 5997 mutex_exit(&icmp->icmp_recv_lock); 5998 putnext(RD(q), mp); 5999 mutex_enter(&icmp->icmp_recv_lock); 6000 } 6001 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 6002 6003 /* 6004 * No longer a streams less socket 6005 */ 6006 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6007 connp->conn_flags &= ~IPCL_NONSTR; 6008 rw_exit(&icmp->icmp_rwlock); 6009 6010 mutex_exit(&icmp->icmp_recv_lock); 6011 6012 ASSERT(icmp->icmp_fallback_queue_head == NULL && 6013 icmp->icmp_fallback_queue_tail == NULL); 6014 6015 ASSERT(connp->conn_ref >= 1); 6016 6017 return (0); 6018 } 6019 6020 /* ARGSUSED */ 6021 sock_lower_handle_t 6022 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 6023 uint_t *smodep, int *errorp, int flags, cred_t *credp) 6024 { 6025 conn_t *connp; 6026 6027 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 6028 *errorp = EPROTONOSUPPORT; 6029 return (NULL); 6030 } 6031 6032 connp = icmp_open(family, credp, errorp, flags); 6033 if (connp != NULL) { 6034 icmp_stack_t *is; 6035 6036 is = connp->conn_icmp->icmp_is; 6037 connp->conn_flags |= IPCL_NONSTR; 6038 6039 if (connp->conn_icmp->icmp_family == AF_INET6) { 6040 /* Build initial header template for transmit */ 6041 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6042 if ((*errorp = 6043 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6044 rw_exit(&connp->conn_icmp->icmp_rwlock); 6045 ipcl_conn_destroy(connp); 6046 return (NULL); 6047 } 6048 rw_exit(&connp->conn_icmp->icmp_rwlock); 6049 } 6050 6051 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6052 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6053 6054 if ((*errorp = ip_create_helper_stream(connp, 6055 is->is_ldi_ident)) != 0) { 6056 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6057 (void) rawip_do_close(connp); 6058 return (NULL); 6059 } 6060 6061 mutex_enter(&connp->conn_lock); 6062 connp->conn_state_flags &= ~CONN_INCIPIENT; 6063 mutex_exit(&connp->conn_lock); 6064 *sock_downcalls = &sock_rawip_downcalls; 6065 *smodep = SM_ATOMIC; 6066 } else { 6067 ASSERT(*errorp != 0); 6068 } 6069 6070 return ((sock_lower_handle_t)connp); 6071 } 6072 6073 /* ARGSUSED */ 6074 void 6075 rawip_activate(sock_lower_handle_t proto_handle, 6076 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6077 cred_t *cr) 6078 { 6079 conn_t *connp = (conn_t *)proto_handle; 6080 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6081 struct sock_proto_props sopp; 6082 6083 /* All Solaris components should pass a cred for this operation. */ 6084 ASSERT(cr != NULL); 6085 6086 connp->conn_upcalls = sock_upcalls; 6087 connp->conn_upper_handle = sock_handle; 6088 6089 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6090 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6091 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6092 is->is_wroff_extra; 6093 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6094 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6095 sopp.sopp_maxblk = INFPSZ; 6096 sopp.sopp_maxpsz = IP_MAXPACKET; 6097 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6098 icmp_mod_info.mi_minpsz; 6099 6100 (*connp->conn_upcalls->su_set_proto_props) 6101 (connp->conn_upper_handle, &sopp); 6102 } 6103 6104 static int 6105 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6106 { 6107 sin_t *sin = (sin_t *)sa; 6108 sin6_t *sin6 = (sin6_t *)sa; 6109 6110 ASSERT(icmp != NULL); 6111 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6112 6113 switch (icmp->icmp_family) { 6114 case AF_INET: 6115 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6116 if (*salenp < sizeof (sin_t)) 6117 return (EINVAL); 6118 6119 *salenp = sizeof (sin_t); 6120 *sin = sin_null; 6121 sin->sin_family = AF_INET; 6122 if (icmp->icmp_state == TS_UNBND) { 6123 break; 6124 } 6125 6126 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6127 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6128 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6129 } else { 6130 /* 6131 * INADDR_ANY 6132 * icmp_v6src is not set, we might be bound to 6133 * broadcast/multicast. Use icmp_bound_v6src as 6134 * local address instead (that could 6135 * also still be INADDR_ANY) 6136 */ 6137 sin->sin_addr.s_addr = 6138 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6139 } 6140 break; 6141 case AF_INET6: 6142 6143 if (*salenp < sizeof (sin6_t)) 6144 return (EINVAL); 6145 6146 *salenp = sizeof (sin6_t); 6147 *sin6 = sin6_null; 6148 sin6->sin6_family = AF_INET6; 6149 if (icmp->icmp_state == TS_UNBND) { 6150 break; 6151 } 6152 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6153 sin6->sin6_addr = icmp->icmp_v6src; 6154 } else { 6155 /* 6156 * UNSPECIFIED 6157 * icmp_v6src is not set, we might be bound to 6158 * broadcast/multicast. Use icmp_bound_v6src as 6159 * local address instead (that could 6160 * also still be UNSPECIFIED) 6161 */ 6162 6163 sin6->sin6_addr = icmp->icmp_bound_v6src; 6164 } 6165 break; 6166 } 6167 return (0); 6168 } 6169 6170 static int 6171 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6172 { 6173 sin_t *sin = (sin_t *)sa; 6174 sin6_t *sin6 = (sin6_t *)sa; 6175 6176 ASSERT(icmp != NULL); 6177 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6178 6179 if (icmp->icmp_state != TS_DATA_XFER) 6180 return (ENOTCONN); 6181 6182 sa->sa_family = icmp->icmp_family; 6183 switch (icmp->icmp_family) { 6184 case AF_INET: 6185 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6186 6187 if (*salenp < sizeof (sin_t)) 6188 return (EINVAL); 6189 6190 *salenp = sizeof (sin_t); 6191 *sin = sin_null; 6192 sin->sin_family = AF_INET; 6193 sin->sin_addr.s_addr = 6194 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6195 break; 6196 case AF_INET6: 6197 if (*salenp < sizeof (sin6_t)) 6198 return (EINVAL); 6199 6200 *salenp = sizeof (sin6_t); 6201 *sin6 = sin6_null; 6202 *sin6 = icmp->icmp_v6dst; 6203 break; 6204 } 6205 return (0); 6206 } 6207 6208 /* ARGSUSED */ 6209 int 6210 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6211 socklen_t *salenp, cred_t *cr) 6212 { 6213 conn_t *connp = (conn_t *)proto_handle; 6214 icmp_t *icmp = connp->conn_icmp; 6215 int error; 6216 6217 /* All Solaris components should pass a cred for this operation. */ 6218 ASSERT(cr != NULL); 6219 6220 ASSERT(icmp != NULL); 6221 6222 rw_enter(&icmp->icmp_rwlock, RW_READER); 6223 6224 error = rawip_do_getpeername(icmp, sa, salenp); 6225 6226 rw_exit(&icmp->icmp_rwlock); 6227 6228 return (error); 6229 } 6230 6231 /* ARGSUSED */ 6232 int 6233 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6234 socklen_t *salenp, cred_t *cr) 6235 { 6236 conn_t *connp = (conn_t *)proto_handle; 6237 icmp_t *icmp = connp->conn_icmp; 6238 int error; 6239 6240 /* All Solaris components should pass a cred for this operation. */ 6241 ASSERT(cr != NULL); 6242 6243 ASSERT(icmp != NULL); 6244 rw_enter(&icmp->icmp_rwlock, RW_READER); 6245 6246 error = rawip_do_getsockname(icmp, sa, salenp); 6247 6248 rw_exit(&icmp->icmp_rwlock); 6249 6250 return (error); 6251 } 6252 6253 int 6254 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6255 const void *optvalp, socklen_t optlen, cred_t *cr) 6256 { 6257 conn_t *connp = (conn_t *)proto_handle; 6258 icmp_t *icmp = connp->conn_icmp; 6259 int error; 6260 6261 /* All Solaris components should pass a cred for this operation. */ 6262 ASSERT(cr != NULL); 6263 6264 error = proto_opt_check(level, option_name, optlen, NULL, 6265 icmp_opt_obj.odb_opt_des_arr, 6266 icmp_opt_obj.odb_opt_arr_cnt, 6267 icmp_opt_obj.odb_topmost_tpiprovider, 6268 B_TRUE, B_FALSE, cr); 6269 6270 if (error != 0) { 6271 /* 6272 * option not recognized 6273 */ 6274 if (error < 0) { 6275 error = proto_tlitosyserr(-error); 6276 } 6277 return (error); 6278 } 6279 6280 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6281 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6282 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6283 (uchar_t *)optvalp, NULL, cr); 6284 rw_exit(&icmp->icmp_rwlock); 6285 6286 if (error < 0) { 6287 /* 6288 * Pass on to ip 6289 */ 6290 error = ip_set_options(connp, level, option_name, optvalp, 6291 optlen, cr); 6292 } 6293 6294 ASSERT(error >= 0); 6295 6296 return (error); 6297 } 6298 6299 int 6300 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6301 void *optvalp, socklen_t *optlen, cred_t *cr) 6302 { 6303 int error; 6304 conn_t *connp = (conn_t *)proto_handle; 6305 icmp_t *icmp = connp->conn_icmp; 6306 t_uscalar_t max_optbuf_len; 6307 void *optvalp_buf; 6308 int len; 6309 6310 /* All Solaris components should pass a cred for this operation. */ 6311 ASSERT(cr != NULL); 6312 6313 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6314 icmp_opt_obj.odb_opt_des_arr, 6315 icmp_opt_obj.odb_opt_arr_cnt, 6316 icmp_opt_obj.odb_topmost_tpiprovider, 6317 B_FALSE, B_TRUE, cr); 6318 6319 if (error != 0) { 6320 if (error < 0) { 6321 error = proto_tlitosyserr(-error); 6322 } 6323 return (error); 6324 } 6325 6326 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6327 rw_enter(&icmp->icmp_rwlock, RW_READER); 6328 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6329 rw_exit(&icmp->icmp_rwlock); 6330 6331 if (len < 0) { 6332 /* 6333 * Pass on to IP 6334 */ 6335 kmem_free(optvalp_buf, max_optbuf_len); 6336 return (ip_get_options(connp, level, option_name, optvalp, 6337 optlen, cr)); 6338 } else { 6339 /* 6340 * update optlen and copy option value 6341 */ 6342 t_uscalar_t size = MIN(len, *optlen); 6343 bcopy(optvalp_buf, optvalp, size); 6344 bcopy(&size, optlen, sizeof (size)); 6345 6346 kmem_free(optvalp_buf, max_optbuf_len); 6347 return (0); 6348 } 6349 } 6350 6351 /* ARGSUSED */ 6352 int 6353 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6354 { 6355 conn_t *connp = (conn_t *)proto_handle; 6356 6357 /* All Solaris components should pass a cred for this operation. */ 6358 ASSERT(cr != NULL); 6359 6360 (void) rawip_do_close(connp); 6361 return (0); 6362 } 6363 6364 /* ARGSUSED */ 6365 int 6366 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6367 { 6368 conn_t *connp = (conn_t *)proto_handle; 6369 6370 /* All Solaris components should pass a cred for this operation. */ 6371 ASSERT(cr != NULL); 6372 6373 /* shut down the send side */ 6374 if (how != SHUT_RD) 6375 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6376 SOCK_OPCTL_SHUT_SEND, 0); 6377 /* shut down the recv side */ 6378 if (how != SHUT_WR) 6379 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6380 SOCK_OPCTL_SHUT_RECV, 0); 6381 return (0); 6382 } 6383 6384 void 6385 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6386 { 6387 conn_t *connp = (conn_t *)proto_handle; 6388 icmp_t *icmp = connp->conn_icmp; 6389 6390 mutex_enter(&icmp->icmp_recv_lock); 6391 connp->conn_flow_cntrld = B_FALSE; 6392 mutex_exit(&icmp->icmp_recv_lock); 6393 } 6394 6395 int 6396 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6397 int mode, int32_t *rvalp, cred_t *cr) 6398 { 6399 conn_t *connp = (conn_t *)proto_handle; 6400 int error; 6401 6402 /* All Solaris components should pass a cred for this operation. */ 6403 ASSERT(cr != NULL); 6404 6405 switch (cmd) { 6406 case ND_SET: 6407 case ND_GET: 6408 case _SIOCSOCKFALLBACK: 6409 case TI_GETPEERNAME: 6410 case TI_GETMYNAME: 6411 #ifdef DEBUG 6412 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6413 " socket", cmd); 6414 #endif 6415 error = EINVAL; 6416 break; 6417 default: 6418 /* 6419 * Pass on to IP using helper stream 6420 */ 6421 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6422 cmd, arg, mode, cr, rvalp); 6423 break; 6424 } 6425 return (error); 6426 } 6427 6428 /* ARGSUSED */ 6429 int 6430 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6431 cred_t *cr) 6432 { 6433 conn_t *connp = (conn_t *)proto_handle; 6434 icmp_t *icmp = connp->conn_icmp; 6435 icmp_stack_t *is = icmp->icmp_is; 6436 int error = 0; 6437 boolean_t bypass_dgram_errind = B_FALSE; 6438 6439 ASSERT(DB_TYPE(mp) == M_DATA); 6440 6441 /* All Solaris components should pass a cred for this operation. */ 6442 ASSERT(cr != NULL); 6443 6444 /* If labeled then sockfs should have already set db_credp */ 6445 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6446 6447 /* do an implicit bind if necessary */ 6448 if (icmp->icmp_state == TS_UNBND) { 6449 error = rawip_implicit_bind(connp); 6450 /* 6451 * We could be racing with an actual bind, in which case 6452 * we would see EPROTO. We cross our fingers and try 6453 * to connect. 6454 */ 6455 if (!(error == 0 || error == EPROTO)) { 6456 freemsg(mp); 6457 return (error); 6458 } 6459 } 6460 6461 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6462 6463 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6464 error = EISCONN; 6465 goto done_lock; 6466 } 6467 6468 switch (icmp->icmp_family) { 6469 case AF_INET6: { 6470 sin6_t *sin6; 6471 ip6_pkt_t ipp_s; /* For ancillary data options */ 6472 ip6_pkt_t *ipp = &ipp_s; 6473 6474 sin6 = (sin6_t *)msg->msg_name; 6475 if (sin6 != NULL) { 6476 error = proto_verify_ip_addr(icmp->icmp_family, 6477 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6478 if (error != 0) { 6479 bypass_dgram_errind = B_TRUE; 6480 goto done_lock; 6481 } 6482 if (icmp->icmp_delayed_error != 0) { 6483 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6484 sin6_t *sin2 = (sin6_t *) 6485 &icmp->icmp_delayed_addr; 6486 6487 error = icmp->icmp_delayed_error; 6488 icmp->icmp_delayed_error = 0; 6489 6490 /* Compare IP address and port */ 6491 6492 if (sin1->sin6_port == sin2->sin6_port && 6493 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6494 &sin2->sin6_addr)) { 6495 goto done_lock; 6496 } 6497 } 6498 } else { 6499 /* 6500 * Use connected address 6501 */ 6502 if (icmp->icmp_state != TS_DATA_XFER) { 6503 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6504 error = EDESTADDRREQ; 6505 bypass_dgram_errind = B_TRUE; 6506 goto done_lock; 6507 } 6508 sin6 = &icmp->icmp_v6dst; 6509 } 6510 6511 /* No support for mapped addresses on raw sockets */ 6512 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6513 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6514 error = EADDRNOTAVAIL; 6515 goto done_lock; 6516 } 6517 6518 ipp->ipp_fields = 0; 6519 ipp->ipp_sticky_ignored = 0; 6520 6521 /* 6522 * If options passed in, feed it for verification and handling 6523 */ 6524 if (msg->msg_controllen != 0) { 6525 error = process_auxiliary_options(connp, 6526 msg->msg_control, msg->msg_controllen, 6527 ipp, &icmp_opt_obj, icmp_opt_set, cr); 6528 if (error != 0) { 6529 goto done_lock; 6530 } 6531 } 6532 6533 rw_exit(&icmp->icmp_rwlock); 6534 6535 /* 6536 * Destination is a native IPv6 address. 6537 * Send out an IPv6 format packet. 6538 */ 6539 6540 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6541 ipp); 6542 } 6543 break; 6544 case AF_INET: { 6545 sin_t *sin; 6546 ip4_pkt_t pktinfo; 6547 ip4_pkt_t *pktinfop = &pktinfo; 6548 ipaddr_t v4dst; 6549 6550 sin = (sin_t *)msg->msg_name; 6551 if (sin != NULL) { 6552 error = proto_verify_ip_addr(icmp->icmp_family, 6553 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6554 if (error != 0) { 6555 bypass_dgram_errind = B_TRUE; 6556 goto done_lock; 6557 } 6558 v4dst = sin->sin_addr.s_addr; 6559 if (icmp->icmp_delayed_error != 0) { 6560 sin_t *sin1 = (sin_t *)msg->msg_name; 6561 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6562 6563 error = icmp->icmp_delayed_error; 6564 icmp->icmp_delayed_error = 0; 6565 6566 /* Compare IP address and port */ 6567 if (sin1->sin_port == sin2->sin_port && 6568 sin1->sin_addr.s_addr == 6569 sin2->sin_addr.s_addr) { 6570 goto done_lock; 6571 } 6572 6573 } 6574 } else { 6575 /* 6576 * Use connected address 6577 */ 6578 if (icmp->icmp_state != TS_DATA_XFER) { 6579 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6580 error = EDESTADDRREQ; 6581 bypass_dgram_errind = B_TRUE; 6582 goto done_lock; 6583 } 6584 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6585 } 6586 6587 6588 pktinfop->ip4_ill_index = 0; 6589 pktinfop->ip4_addr = INADDR_ANY; 6590 6591 /* 6592 * If options passed in, feed it for verification and handling 6593 */ 6594 if (msg->msg_controllen != 0) { 6595 error = process_auxiliary_options(connp, 6596 msg->msg_control, msg->msg_controllen, 6597 pktinfop, &icmp_opt_obj, icmp_opt_set, cr); 6598 if (error != 0) { 6599 goto done_lock; 6600 } 6601 } 6602 rw_exit(&icmp->icmp_rwlock); 6603 6604 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6605 v4dst, pktinfop); 6606 break; 6607 } 6608 6609 default: 6610 ASSERT(0); 6611 } 6612 6613 goto done; 6614 6615 done_lock: 6616 rw_exit(&icmp->icmp_rwlock); 6617 if (error != 0) { 6618 ASSERT(mp != NULL); 6619 freemsg(mp); 6620 } 6621 done: 6622 if (bypass_dgram_errind) 6623 return (error); 6624 return (icmp->icmp_dgram_errind ? error : 0); 6625 } 6626 6627 sock_downcalls_t sock_rawip_downcalls = { 6628 rawip_activate, 6629 rawip_accept, 6630 rawip_bind, 6631 rawip_listen, 6632 rawip_connect, 6633 rawip_getpeername, 6634 rawip_getsockname, 6635 rawip_getsockopt, 6636 rawip_setsockopt, 6637 rawip_send, 6638 NULL, 6639 NULL, 6640 NULL, 6641 rawip_shutdown, 6642 rawip_clr_flowctrl, 6643 rawip_ioctl, 6644 rawip_close 6645 }; 6646