1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static int icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 143 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 144 static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst); 145 static void icmp_wput(queue_t *q, mblk_t *mp); 146 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 147 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 148 sin6_t *sin6, ip6_pkt_t *ipp); 149 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 150 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 151 static void icmp_wput_other(queue_t *q, mblk_t *mp); 152 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 153 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 154 155 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 156 static void rawip_stack_fini(netstackid_t stackid, void *arg); 157 158 static void *rawip_kstat_init(netstackid_t stackid); 159 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 160 static int rawip_kstat_update(kstat_t *kp, int rw); 161 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 162 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 163 uint_t *salenp); 164 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 165 uint_t *salenp); 166 167 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 168 socklen_t *, cred_t *); 169 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 170 socklen_t *, cred_t *); 171 172 static struct module_info icmp_mod_info = { 173 5707, "icmp", 1, INFPSZ, 512, 128 174 }; 175 176 /* 177 * Entry points for ICMP as a device. 178 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 179 */ 180 static struct qinit icmprinitv4 = { 181 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 182 }; 183 184 static struct qinit icmprinitv6 = { 185 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 186 }; 187 188 static struct qinit icmpwinit = { 189 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 190 }; 191 192 /* ICMP entry point during fallback */ 193 static struct qinit icmp_fallback_sock_winit = { 194 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 195 }; 196 197 /* For AF_INET aka /dev/icmp */ 198 struct streamtab icmpinfov4 = { 199 &icmprinitv4, &icmpwinit 200 }; 201 202 /* For AF_INET6 aka /dev/icmp6 */ 203 struct streamtab icmpinfov6 = { 204 &icmprinitv6, &icmpwinit 205 }; 206 207 static sin_t sin_null; /* Zero address for quick clears */ 208 static sin6_t sin6_null; /* Zero address for quick clears */ 209 210 /* Default structure copied into T_INFO_ACK messages */ 211 static struct T_info_ack icmp_g_t_info_ack = { 212 T_INFO_ACK, 213 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 214 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 215 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 216 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 217 0, /* ADDR_size - filled in later. */ 218 0, /* OPT_size - not initialized here */ 219 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 220 T_CLTS, /* SERV_type. icmp supports connection-less. */ 221 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 222 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 223 }; 224 225 /* 226 * Table of ND variables supported by icmp. These are loaded into is_nd 227 * when the stack instance is created. 228 * All of these are alterable, within the min/max values given, at run time. 229 */ 230 static icmpparam_t icmp_param_arr[] = { 231 /* min max value name */ 232 { 0, 128, 32, "icmp_wroff_extra" }, 233 { 1, 255, 255, "icmp_ipv4_ttl" }, 234 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 235 { 0, 1, 1, "icmp_bsd_compat" }, 236 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 237 { 0, 65536, 1024, "icmp_xmit_lowat"}, 238 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 239 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 240 }; 241 #define is_wroff_extra is_param_arr[0].icmp_param_value 242 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 243 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 244 #define is_bsd_compat is_param_arr[3].icmp_param_value 245 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 246 #define is_xmit_lowat is_param_arr[5].icmp_param_value 247 #define is_recv_hiwat is_param_arr[6].icmp_param_value 248 #define is_max_buf is_param_arr[7].icmp_param_value 249 250 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 251 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 252 socklen_t len, cred_t *cr); 253 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 254 255 /* 256 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 257 * passed to icmp_wput. 258 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 259 * protocol type placed in the message following the address. A T_BIND_ACK 260 * message is returned by ip_bind_v4/v6. 261 */ 262 static void 263 icmp_tpi_bind(queue_t *q, mblk_t *mp) 264 { 265 int error; 266 struct sockaddr *sa; 267 struct T_bind_req *tbr; 268 socklen_t len; 269 sin_t *sin; 270 sin6_t *sin6; 271 icmp_t *icmp; 272 conn_t *connp = Q_TO_CONN(q); 273 mblk_t *mp1; 274 cred_t *cr; 275 276 /* 277 * All Solaris components should pass a db_credp 278 * for this TPI message, hence we ASSERT. 279 * But in case there is some other M_PROTO that looks 280 * like a TPI message sent by some other kernel 281 * component, we check and return an error. 282 */ 283 cr = msg_getcred(mp, NULL); 284 ASSERT(cr != NULL); 285 if (cr == NULL) { 286 icmp_err_ack(q, mp, TSYSERR, EINVAL); 287 return; 288 } 289 290 icmp = connp->conn_icmp; 291 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 292 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 293 "icmp_bind: bad req, len %u", 294 (uint_t)(mp->b_wptr - mp->b_rptr)); 295 icmp_err_ack(q, mp, TPROTO, 0); 296 return; 297 } 298 299 if (icmp->icmp_state != TS_UNBND) { 300 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 301 "icmp_bind: bad state, %d", icmp->icmp_state); 302 icmp_err_ack(q, mp, TOUTSTATE, 0); 303 return; 304 } 305 306 /* 307 * Reallocate the message to make sure we have enough room for an 308 * address and the protocol type. 309 */ 310 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 311 if (!mp1) { 312 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 313 return; 314 } 315 mp = mp1; 316 317 /* Reset the message type in preparation for shipping it back. */ 318 DB_TYPE(mp) = M_PCPROTO; 319 tbr = (struct T_bind_req *)mp->b_rptr; 320 len = tbr->ADDR_length; 321 switch (len) { 322 case 0: /* request for a generic port */ 323 tbr->ADDR_offset = sizeof (struct T_bind_req); 324 if (icmp->icmp_family == AF_INET) { 325 tbr->ADDR_length = sizeof (sin_t); 326 sin = (sin_t *)&tbr[1]; 327 *sin = sin_null; 328 sin->sin_family = AF_INET; 329 mp->b_wptr = (uchar_t *)&sin[1]; 330 sa = (struct sockaddr *)sin; 331 len = sizeof (sin_t); 332 } else { 333 ASSERT(icmp->icmp_family == AF_INET6); 334 tbr->ADDR_length = sizeof (sin6_t); 335 sin6 = (sin6_t *)&tbr[1]; 336 *sin6 = sin6_null; 337 sin6->sin6_family = AF_INET6; 338 mp->b_wptr = (uchar_t *)&sin6[1]; 339 sa = (struct sockaddr *)sin6; 340 len = sizeof (sin6_t); 341 } 342 break; 343 344 case sizeof (sin_t): /* Complete IPv4 address */ 345 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 346 sizeof (sin_t)); 347 break; 348 349 case sizeof (sin6_t): /* Complete IPv6 address */ 350 sa = (struct sockaddr *)mi_offset_param(mp, 351 tbr->ADDR_offset, sizeof (sin6_t)); 352 break; 353 354 default: 355 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 356 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 357 icmp_err_ack(q, mp, TBADADDR, 0); 358 return; 359 } 360 361 error = rawip_do_bind(connp, sa, len); 362 done: 363 ASSERT(mp->b_cont == NULL); 364 if (error != 0) { 365 if (error > 0) { 366 icmp_err_ack(q, mp, TSYSERR, error); 367 } else { 368 icmp_err_ack(q, mp, -error, 0); 369 } 370 } else { 371 tbr->PRIM_type = T_BIND_ACK; 372 qreply(q, mp); 373 } 374 } 375 376 static int 377 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 378 { 379 sin_t *sin; 380 sin6_t *sin6; 381 icmp_t *icmp; 382 int error = 0; 383 mblk_t *ire_mp; 384 385 386 icmp = connp->conn_icmp; 387 388 if (sa == NULL || !OK_32PTR((char *)sa)) { 389 return (EINVAL); 390 } 391 392 /* 393 * The state must be TS_UNBND. TPI mandates that users must send 394 * TPI primitives only 1 at a time and wait for the response before 395 * sending the next primitive. 396 */ 397 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 398 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 399 error = -TOUTSTATE; 400 goto done; 401 } 402 403 ASSERT(len != 0); 404 switch (len) { 405 case sizeof (sin_t): /* Complete IPv4 address */ 406 sin = (sin_t *)sa; 407 if (sin->sin_family != AF_INET || 408 icmp->icmp_family != AF_INET) { 409 /* TSYSERR, EAFNOSUPPORT */ 410 error = EAFNOSUPPORT; 411 goto done; 412 } 413 break; 414 case sizeof (sin6_t): /* Complete IPv6 address */ 415 sin6 = (sin6_t *)sa; 416 if (sin6->sin6_family != AF_INET6 || 417 icmp->icmp_family != AF_INET6) { 418 /* TSYSERR, EAFNOSUPPORT */ 419 error = EAFNOSUPPORT; 420 goto done; 421 } 422 /* No support for mapped addresses on raw sockets */ 423 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 424 /* TSYSERR, EADDRNOTAVAIL */ 425 error = EADDRNOTAVAIL; 426 goto done; 427 } 428 break; 429 430 default: 431 /* TBADADDR */ 432 error = EADDRNOTAVAIL; 433 goto done; 434 } 435 436 icmp->icmp_pending_op = T_BIND_REQ; 437 icmp->icmp_state = TS_IDLE; 438 439 /* 440 * Copy the source address into our icmp structure. This address 441 * may still be zero; if so, ip will fill in the correct address 442 * each time an outbound packet is passed to it. 443 * If we are binding to a broadcast or multicast address then 444 * rawip_post_ip_bind_connect will clear the source address. 445 */ 446 447 if (icmp->icmp_family == AF_INET) { 448 ASSERT(sin != NULL); 449 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 450 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 451 &icmp->icmp_v6src); 452 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 453 icmp->icmp_ip_snd_options_len; 454 icmp->icmp_bound_v6src = icmp->icmp_v6src; 455 } else { 456 int error; 457 458 ASSERT(sin6 != NULL); 459 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 460 icmp->icmp_v6src = sin6->sin6_addr; 461 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 462 icmp->icmp_bound_v6src = icmp->icmp_v6src; 463 464 /* Rebuild the header template */ 465 error = icmp_build_hdrs(icmp); 466 if (error != 0) { 467 icmp->icmp_pending_op = -1; 468 /* 469 * TSYSERR 470 */ 471 goto done; 472 } 473 } 474 475 ire_mp = NULL; 476 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 477 /* 478 * request an IRE if src not 0 (INADDR_ANY) 479 */ 480 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 481 if (ire_mp == NULL) { 482 icmp->icmp_pending_op = -1; 483 error = ENOMEM; 484 goto done; 485 } 486 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 487 } 488 done: 489 rw_exit(&icmp->icmp_rwlock); 490 if (error != 0) 491 return (error); 492 493 if (icmp->icmp_family == AF_INET6) { 494 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 495 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 496 } else { 497 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 498 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 499 } 500 rawip_post_ip_bind_connect(icmp, ire_mp, error); 501 return (error); 502 } 503 504 static void 505 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 506 { 507 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 508 if (icmp->icmp_state == TS_UNBND) { 509 /* 510 * not yet bound - bind sent by icmp_bind_proto. 511 */ 512 rw_exit(&icmp->icmp_rwlock); 513 return; 514 } 515 ASSERT(icmp->icmp_pending_op != -1); 516 icmp->icmp_pending_op = -1; 517 518 if (error != 0) { 519 if (icmp->icmp_state == TS_DATA_XFER) { 520 /* Connect failed */ 521 /* Revert back to the bound source */ 522 icmp->icmp_v6src = icmp->icmp_bound_v6src; 523 icmp->icmp_state = TS_IDLE; 524 if (icmp->icmp_family == AF_INET6) 525 (void) icmp_build_hdrs(icmp); 526 } else { 527 V6_SET_ZERO(icmp->icmp_v6src); 528 V6_SET_ZERO(icmp->icmp_bound_v6src); 529 icmp->icmp_state = TS_UNBND; 530 if (icmp->icmp_family == AF_INET6) 531 (void) icmp_build_hdrs(icmp); 532 } 533 } else { 534 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 535 ire_t *ire; 536 537 ire = (ire_t *)ire_mp->b_rptr; 538 /* 539 * If a broadcast/multicast address was bound set 540 * the source address to 0. 541 * This ensures no datagrams with broadcast address 542 * as source address are emitted (which would violate 543 * RFC1122 - Hosts requirements) 544 * Note: we get IRE_BROADCAST for IPv6 545 * to "mark" a multicast local address. 546 */ 547 548 549 if (ire->ire_type == IRE_BROADCAST && 550 icmp->icmp_state != TS_DATA_XFER) { 551 /* 552 * This was just a local bind to a 553 * MC/broadcast addr 554 */ 555 V6_SET_ZERO(icmp->icmp_v6src); 556 if (icmp->icmp_family == AF_INET6) 557 (void) icmp_build_hdrs(icmp); 558 } 559 } 560 561 } 562 rw_exit(&icmp->icmp_rwlock); 563 if (ire_mp != NULL) 564 freeb(ire_mp); 565 } 566 567 /* 568 * Send message to IP to just bind to the protocol. 569 */ 570 static int 571 icmp_bind_proto(conn_t *connp) 572 { 573 icmp_t *icmp; 574 int error; 575 576 icmp = connp->conn_icmp; 577 578 if (icmp->icmp_family == AF_INET6) 579 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 580 &sin6_null.sin6_addr, 0, B_TRUE); 581 else 582 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 583 sin_null.sin_addr.s_addr, 0, B_TRUE); 584 585 rawip_post_ip_bind_connect(icmp, NULL, error); 586 return (error); 587 } 588 589 static void 590 icmp_tpi_connect(queue_t *q, mblk_t *mp) 591 { 592 conn_t *connp = Q_TO_CONN(q); 593 struct T_conn_req *tcr; 594 icmp_t *icmp; 595 struct sockaddr *sa; 596 socklen_t len; 597 int error; 598 cred_t *cr; 599 600 /* 601 * All Solaris components should pass a db_credp 602 * for this TPI message, hence we ASSERT. 603 * But in case there is some other M_PROTO that looks 604 * like a TPI message sent by some other kernel 605 * component, we check and return an error. 606 */ 607 cr = msg_getcred(mp, NULL); 608 ASSERT(cr != NULL); 609 if (cr == NULL) { 610 icmp_err_ack(q, mp, TSYSERR, EINVAL); 611 return; 612 } 613 614 icmp = connp->conn_icmp; 615 tcr = (struct T_conn_req *)mp->b_rptr; 616 /* Sanity checks */ 617 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 618 icmp_err_ack(q, mp, TPROTO, 0); 619 return; 620 } 621 622 if (tcr->OPT_length != 0) { 623 icmp_err_ack(q, mp, TBADOPT, 0); 624 return; 625 } 626 627 len = tcr->DEST_length; 628 629 switch (len) { 630 default: 631 icmp_err_ack(q, mp, TBADADDR, 0); 632 return; 633 case sizeof (sin_t): 634 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 635 sizeof (sin_t)); 636 break; 637 case sizeof (sin6_t): 638 sa = (struct sockaddr *)mi_offset_param(mp, 639 tcr->DEST_offset, sizeof (sin6_t)); 640 break; 641 } 642 643 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 644 if (error != 0) { 645 icmp_err_ack(q, mp, TSYSERR, error); 646 return; 647 } 648 649 error = rawip_do_connect(connp, sa, len, cr); 650 if (error != 0) { 651 if (error < 0) { 652 icmp_err_ack(q, mp, -error, 0); 653 } else { 654 icmp_err_ack(q, mp, 0, error); 655 } 656 } else { 657 mblk_t *mp1; 658 659 /* 660 * We have to send a connection confirmation to 661 * keep TLI happy. 662 */ 663 if (icmp->icmp_family == AF_INET) { 664 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 665 sizeof (sin_t), NULL, 0); 666 } else { 667 ASSERT(icmp->icmp_family == AF_INET6); 668 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 669 sizeof (sin6_t), NULL, 0); 670 } 671 if (mp1 == NULL) { 672 rw_exit(&icmp->icmp_rwlock); 673 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 674 return; 675 } 676 677 /* 678 * Send ok_ack for T_CONN_REQ 679 */ 680 mp = mi_tpi_ok_ack_alloc(mp); 681 if (mp == NULL) { 682 /* Unable to reuse the T_CONN_REQ for the ack. */ 683 freemsg(mp1); 684 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 685 return; 686 } 687 putnext(connp->conn_rq, mp); 688 putnext(connp->conn_rq, mp1); 689 } 690 } 691 692 static int 693 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 694 cred_t *cr) 695 { 696 icmp_t *icmp; 697 sin_t *sin; 698 sin6_t *sin6; 699 mblk_t *ire_mp; 700 int error; 701 ipaddr_t v4dst; 702 in6_addr_t v6dst; 703 704 icmp = connp->conn_icmp; 705 706 if (sa == NULL || !OK_32PTR((char *)sa)) { 707 return (EINVAL); 708 } 709 710 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 711 if (ire_mp == NULL) 712 return (ENOMEM); 713 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 714 715 716 ASSERT(sa != NULL && len != 0); 717 718 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 719 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 720 rw_exit(&icmp->icmp_rwlock); 721 freeb(ire_mp); 722 return (-TOUTSTATE); 723 } 724 725 switch (len) { 726 case sizeof (sin_t): 727 sin = (sin_t *)sa; 728 729 ASSERT(icmp->icmp_family == AF_INET); 730 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 731 732 v4dst = sin->sin_addr.s_addr; 733 /* 734 * Interpret a zero destination to mean loopback. 735 * Update the T_CONN_REQ (sin/sin6) since it is used to 736 * generate the T_CONN_CON. 737 */ 738 if (v4dst == INADDR_ANY) { 739 v4dst = htonl(INADDR_LOOPBACK); 740 } 741 742 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 743 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 744 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 745 icmp->icmp_ip_snd_options_len; 746 icmp->icmp_v6dst.sin6_addr = v6dst; 747 icmp->icmp_v6dst.sin6_family = AF_INET6; 748 icmp->icmp_v6dst.sin6_flowinfo = 0; 749 icmp->icmp_v6dst.sin6_port = 0; 750 751 /* 752 * If the destination address is multicast and 753 * an outgoing multicast interface has been set, 754 * use the address of that interface as our 755 * source address if no source address has been set. 756 */ 757 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 758 CLASSD(v4dst) && 759 icmp->icmp_multicast_if_addr != INADDR_ANY) { 760 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 761 &icmp->icmp_v6src); 762 } 763 break; 764 case sizeof (sin6_t): 765 sin6 = (sin6_t *)sa; 766 767 /* No support for mapped addresses on raw sockets */ 768 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 769 rw_exit(&icmp->icmp_rwlock); 770 freeb(ire_mp); 771 return (EADDRNOTAVAIL); 772 } 773 774 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 775 ASSERT(icmp->icmp_family == AF_INET6); 776 777 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 778 779 icmp->icmp_v6dst = *sin6; 780 icmp->icmp_v6dst.sin6_port = 0; 781 782 /* 783 * Interpret a zero destination to mean loopback. 784 * Update the T_CONN_REQ (sin/sin6) since it is used to 785 * generate the T_CONN_CON. 786 */ 787 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 788 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 789 } 790 /* 791 * If the destination address is multicast and 792 * an outgoing multicast interface has been set, 793 * then the ip bind logic will pick the correct source 794 * address (i.e. matching the outgoing multicast interface). 795 */ 796 break; 797 } 798 799 icmp->icmp_pending_op = T_CONN_REQ; 800 801 if (icmp->icmp_state == TS_DATA_XFER) { 802 /* Already connected - clear out state */ 803 icmp->icmp_v6src = icmp->icmp_bound_v6src; 804 icmp->icmp_state = TS_IDLE; 805 } 806 807 icmp->icmp_state = TS_DATA_XFER; 808 rw_exit(&icmp->icmp_rwlock); 809 810 if (icmp->icmp_family == AF_INET6) { 811 error = ip_proto_bind_connected_v6(connp, &ire_mp, 812 icmp->icmp_proto, &icmp->icmp_v6src, 0, 813 &icmp->icmp_v6dst.sin6_addr, 814 NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr); 815 } else { 816 error = ip_proto_bind_connected_v4(connp, &ire_mp, 817 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 818 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 819 B_TRUE, B_TRUE, cr); 820 } 821 rawip_post_ip_bind_connect(icmp, ire_mp, error); 822 return (error); 823 } 824 825 static void 826 icmp_close_free(conn_t *connp) 827 { 828 icmp_t *icmp = connp->conn_icmp; 829 830 /* If there are any options associated with the stream, free them. */ 831 if (icmp->icmp_ip_snd_options != NULL) { 832 mi_free((char *)icmp->icmp_ip_snd_options); 833 icmp->icmp_ip_snd_options = NULL; 834 icmp->icmp_ip_snd_options_len = 0; 835 } 836 837 if (icmp->icmp_filter != NULL) { 838 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 839 icmp->icmp_filter = NULL; 840 } 841 842 /* Free memory associated with sticky options */ 843 if (icmp->icmp_sticky_hdrs_len != 0) { 844 kmem_free(icmp->icmp_sticky_hdrs, 845 icmp->icmp_sticky_hdrs_len); 846 icmp->icmp_sticky_hdrs = NULL; 847 icmp->icmp_sticky_hdrs_len = 0; 848 } 849 ip6_pkt_free(&icmp->icmp_sticky_ipp); 850 851 /* 852 * Clear any fields which the kmem_cache constructor clears. 853 * Only icmp_connp needs to be preserved. 854 * TBD: We should make this more efficient to avoid clearing 855 * everything. 856 */ 857 ASSERT(icmp->icmp_connp == connp); 858 bzero(icmp, sizeof (icmp_t)); 859 icmp->icmp_connp = connp; 860 } 861 862 static int 863 rawip_do_close(conn_t *connp) 864 { 865 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 866 867 ip_quiesce_conn(connp); 868 869 if (!IPCL_IS_NONSTR(connp)) { 870 qprocsoff(connp->conn_rq); 871 } 872 873 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 874 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 875 icmp_close_free(connp); 876 877 /* 878 * Now we are truly single threaded on this stream, and can 879 * delete the things hanging off the connp, and finally the connp. 880 * We removed this connp from the fanout list, it cannot be 881 * accessed thru the fanouts, and we already waited for the 882 * conn_ref to drop to 0. We are already in close, so 883 * there cannot be any other thread from the top. qprocsoff 884 * has completed, and service has completed or won't run in 885 * future. 886 */ 887 ASSERT(connp->conn_ref == 1); 888 889 if (!IPCL_IS_NONSTR(connp)) { 890 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 891 } else { 892 ip_free_helper_stream(connp); 893 } 894 895 connp->conn_ref--; 896 ipcl_conn_destroy(connp); 897 898 return (0); 899 } 900 901 static int 902 icmp_close(queue_t *q, int flags) 903 { 904 conn_t *connp; 905 906 if (flags & SO_FALLBACK) { 907 /* 908 * stream is being closed while in fallback 909 * simply free the resources that were allocated 910 */ 911 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 912 qprocsoff(q); 913 goto done; 914 } 915 916 connp = Q_TO_CONN(q); 917 (void) rawip_do_close(connp); 918 done: 919 q->q_ptr = WR(q)->q_ptr = NULL; 920 return (0); 921 } 922 923 /* 924 * This routine handles each T_DISCON_REQ message passed to icmp 925 * as an indicating that ICMP is no longer connected. This results 926 * in sending a T_BIND_REQ to IP to restore the binding to just 927 * the local address. 928 * 929 * The disconnect completes in rawip_post_ip_bind_connect. 930 */ 931 static int 932 icmp_do_disconnect(conn_t *connp) 933 { 934 icmp_t *icmp; 935 mblk_t *ire_mp; 936 int error; 937 938 icmp = connp->conn_icmp; 939 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 940 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 941 rw_exit(&icmp->icmp_rwlock); 942 return (-TOUTSTATE); 943 } 944 icmp->icmp_pending_op = T_DISCON_REQ; 945 icmp->icmp_v6src = icmp->icmp_bound_v6src; 946 icmp->icmp_state = TS_IDLE; 947 948 949 if (icmp->icmp_family == AF_INET6) { 950 /* Rebuild the header template */ 951 error = icmp_build_hdrs(icmp); 952 if (error != 0) { 953 icmp->icmp_pending_op = -1; 954 rw_exit(&icmp->icmp_rwlock); 955 return (error); 956 } 957 } 958 959 rw_exit(&icmp->icmp_rwlock); 960 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 961 if (ire_mp == NULL) { 962 return (ENOMEM); 963 } 964 965 if (icmp->icmp_family == AF_INET6) { 966 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 967 &icmp->icmp_bound_v6src, 0, B_TRUE); 968 } else { 969 970 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 971 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 972 } 973 974 rawip_post_ip_bind_connect(icmp, ire_mp, error); 975 976 return (error); 977 } 978 979 static void 980 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 981 { 982 conn_t *connp = Q_TO_CONN(q); 983 int error; 984 985 /* 986 * Allocate the largest primitive we need to send back 987 * T_error_ack is > than T_ok_ack 988 */ 989 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 990 if (mp == NULL) { 991 /* Unable to reuse the T_DISCON_REQ for the ack. */ 992 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 993 return; 994 } 995 996 error = icmp_do_disconnect(connp); 997 998 if (error != 0) { 999 if (error > 0) { 1000 icmp_err_ack(q, mp, 0, error); 1001 } else { 1002 icmp_err_ack(q, mp, -error, 0); 1003 } 1004 } else { 1005 mp = mi_tpi_ok_ack_alloc(mp); 1006 ASSERT(mp != NULL); 1007 qreply(q, mp); 1008 } 1009 1010 } 1011 1012 static int 1013 icmp_disconnect(conn_t *connp) 1014 { 1015 int error; 1016 icmp_t *icmp = connp->conn_icmp; 1017 1018 icmp->icmp_dgram_errind = B_FALSE; 1019 1020 error = icmp_do_disconnect(connp); 1021 1022 if (error < 0) 1023 error = proto_tlitosyserr(-error); 1024 return (error); 1025 } 1026 1027 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1028 static void 1029 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1030 { 1031 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1032 qreply(q, mp); 1033 } 1034 1035 /* Shorthand to generate and send TPI error acks to our client */ 1036 static void 1037 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1038 t_scalar_t t_error, int sys_error) 1039 { 1040 struct T_error_ack *teackp; 1041 1042 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1043 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1044 teackp = (struct T_error_ack *)mp->b_rptr; 1045 teackp->ERROR_prim = primitive; 1046 teackp->TLI_error = t_error; 1047 teackp->UNIX_error = sys_error; 1048 qreply(q, mp); 1049 } 1050 } 1051 1052 /* 1053 * icmp_icmp_error is called by icmp_input to process ICMP 1054 * messages passed up by IP. 1055 * Generates the appropriate permanent (non-transient) errors. 1056 * Assumes that IP has pulled up everything up to and including 1057 * the ICMP header. 1058 */ 1059 static void 1060 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1061 { 1062 icmph_t *icmph; 1063 ipha_t *ipha; 1064 int iph_hdr_length; 1065 sin_t sin; 1066 mblk_t *mp1; 1067 int error = 0; 1068 icmp_t *icmp = connp->conn_icmp; 1069 1070 ipha = (ipha_t *)mp->b_rptr; 1071 1072 ASSERT(OK_32PTR(mp->b_rptr)); 1073 1074 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1075 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1076 icmp_icmp_error_ipv6(connp, mp); 1077 return; 1078 } 1079 1080 /* 1081 * icmp does not support v4 mapped addresses 1082 * so we can never be here for a V6 socket 1083 * i.e. icmp_family == AF_INET6 1084 */ 1085 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1086 (icmp->icmp_family == AF_INET)); 1087 1088 ASSERT(icmp->icmp_family == AF_INET); 1089 1090 /* Skip past the outer IP and ICMP headers */ 1091 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1092 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1093 ipha = (ipha_t *)&icmph[1]; 1094 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1095 1096 switch (icmph->icmph_type) { 1097 case ICMP_DEST_UNREACHABLE: 1098 switch (icmph->icmph_code) { 1099 case ICMP_FRAGMENTATION_NEEDED: 1100 /* 1101 * IP has already adjusted the path MTU. 1102 */ 1103 break; 1104 case ICMP_PORT_UNREACHABLE: 1105 case ICMP_PROTOCOL_UNREACHABLE: 1106 error = ECONNREFUSED; 1107 break; 1108 default: 1109 /* Transient errors */ 1110 break; 1111 } 1112 break; 1113 default: 1114 /* Transient errors */ 1115 break; 1116 } 1117 if (error == 0) { 1118 freemsg(mp); 1119 return; 1120 } 1121 1122 /* 1123 * Deliver T_UDERROR_IND when the application has asked for it. 1124 * The socket layer enables this automatically when connected. 1125 */ 1126 if (!icmp->icmp_dgram_errind) { 1127 freemsg(mp); 1128 return; 1129 } 1130 1131 sin = sin_null; 1132 sin.sin_family = AF_INET; 1133 sin.sin_addr.s_addr = ipha->ipha_dst; 1134 if (IPCL_IS_NONSTR(connp)) { 1135 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1136 if (icmp->icmp_state == TS_DATA_XFER) { 1137 if (sin.sin_addr.s_addr == 1138 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1139 rw_exit(&icmp->icmp_rwlock); 1140 (*connp->conn_upcalls->su_set_error) 1141 (connp->conn_upper_handle, error); 1142 goto done; 1143 } 1144 } else { 1145 icmp->icmp_delayed_error = error; 1146 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1147 } 1148 rw_exit(&icmp->icmp_rwlock); 1149 } else { 1150 1151 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1152 0, error); 1153 if (mp1 != NULL) 1154 putnext(connp->conn_rq, mp1); 1155 } 1156 done: 1157 freemsg(mp); 1158 } 1159 1160 /* 1161 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1162 * for IPv6 packets. 1163 * Send permanent (non-transient) errors upstream. 1164 * Assumes that IP has pulled up all the extension headers as well 1165 * as the ICMPv6 header. 1166 */ 1167 static void 1168 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1169 { 1170 icmp6_t *icmp6; 1171 ip6_t *ip6h, *outer_ip6h; 1172 uint16_t iph_hdr_length; 1173 uint8_t *nexthdrp; 1174 sin6_t sin6; 1175 mblk_t *mp1; 1176 int error = 0; 1177 icmp_t *icmp = connp->conn_icmp; 1178 1179 outer_ip6h = (ip6_t *)mp->b_rptr; 1180 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1181 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1182 else 1183 iph_hdr_length = IPV6_HDR_LEN; 1184 1185 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1186 ip6h = (ip6_t *)&icmp6[1]; 1187 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1188 freemsg(mp); 1189 return; 1190 } 1191 1192 switch (icmp6->icmp6_type) { 1193 case ICMP6_DST_UNREACH: 1194 switch (icmp6->icmp6_code) { 1195 case ICMP6_DST_UNREACH_NOPORT: 1196 error = ECONNREFUSED; 1197 break; 1198 case ICMP6_DST_UNREACH_ADMIN: 1199 case ICMP6_DST_UNREACH_NOROUTE: 1200 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1201 case ICMP6_DST_UNREACH_ADDR: 1202 /* Transient errors */ 1203 break; 1204 default: 1205 break; 1206 } 1207 break; 1208 case ICMP6_PACKET_TOO_BIG: { 1209 struct T_unitdata_ind *tudi; 1210 struct T_opthdr *toh; 1211 size_t udi_size; 1212 mblk_t *newmp; 1213 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1214 sizeof (struct ip6_mtuinfo); 1215 sin6_t *sin6; 1216 struct ip6_mtuinfo *mtuinfo; 1217 1218 /* 1219 * If the application has requested to receive path mtu 1220 * information, send up an empty message containing an 1221 * IPV6_PATHMTU ancillary data item. 1222 */ 1223 if (!icmp->icmp_ipv6_recvpathmtu) 1224 break; 1225 1226 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1227 opt_length; 1228 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1229 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1230 break; 1231 } 1232 1233 /* 1234 * newmp->b_cont is left to NULL on purpose. This is an 1235 * empty message containing only ancillary data. 1236 */ 1237 newmp->b_datap->db_type = M_PROTO; 1238 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1239 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1240 tudi->PRIM_type = T_UNITDATA_IND; 1241 tudi->SRC_length = sizeof (sin6_t); 1242 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1243 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1244 tudi->OPT_length = opt_length; 1245 1246 sin6 = (sin6_t *)&tudi[1]; 1247 bzero(sin6, sizeof (sin6_t)); 1248 sin6->sin6_family = AF_INET6; 1249 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1250 1251 toh = (struct T_opthdr *)&sin6[1]; 1252 toh->level = IPPROTO_IPV6; 1253 toh->name = IPV6_PATHMTU; 1254 toh->len = opt_length; 1255 toh->status = 0; 1256 1257 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1258 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1259 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1260 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1261 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1262 /* 1263 * We've consumed everything we need from the original 1264 * message. Free it, then send our empty message. 1265 */ 1266 freemsg(mp); 1267 if (!IPCL_IS_NONSTR(connp)) { 1268 putnext(connp->conn_rq, newmp); 1269 } else { 1270 (*connp->conn_upcalls->su_recv) 1271 (connp->conn_upper_handle, newmp, 0, 0, &error, 1272 NULL); 1273 ASSERT(error == 0); 1274 } 1275 return; 1276 } 1277 case ICMP6_TIME_EXCEEDED: 1278 /* Transient errors */ 1279 break; 1280 case ICMP6_PARAM_PROB: 1281 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1282 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1283 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1284 (uchar_t *)nexthdrp) { 1285 error = ECONNREFUSED; 1286 break; 1287 } 1288 break; 1289 } 1290 if (error == 0) { 1291 freemsg(mp); 1292 return; 1293 } 1294 1295 /* 1296 * Deliver T_UDERROR_IND when the application has asked for it. 1297 * The socket layer enables this automatically when connected. 1298 */ 1299 if (!icmp->icmp_dgram_errind) { 1300 freemsg(mp); 1301 return; 1302 } 1303 1304 sin6 = sin6_null; 1305 sin6.sin6_family = AF_INET6; 1306 sin6.sin6_addr = ip6h->ip6_dst; 1307 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1308 1309 if (IPCL_IS_NONSTR(connp)) { 1310 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1311 if (icmp->icmp_state == TS_DATA_XFER) { 1312 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1313 &icmp->icmp_v6dst.sin6_addr)) { 1314 rw_exit(&icmp->icmp_rwlock); 1315 (*connp->conn_upcalls->su_set_error) 1316 (connp->conn_upper_handle, error); 1317 goto done; 1318 } 1319 } else { 1320 icmp->icmp_delayed_error = error; 1321 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1322 } 1323 rw_exit(&icmp->icmp_rwlock); 1324 } else { 1325 1326 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1327 NULL, 0, error); 1328 if (mp1 != NULL) 1329 putnext(connp->conn_rq, mp1); 1330 } 1331 done: 1332 freemsg(mp); 1333 } 1334 1335 /* 1336 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1337 * The local address is filled in if endpoint is bound. The remote address 1338 * is filled in if remote address has been precified ("connected endpoint") 1339 * (The concept of connected CLTS sockets is alien to published TPI 1340 * but we support it anyway). 1341 */ 1342 static void 1343 icmp_addr_req(queue_t *q, mblk_t *mp) 1344 { 1345 icmp_t *icmp = Q_TO_ICMP(q); 1346 mblk_t *ackmp; 1347 struct T_addr_ack *taa; 1348 1349 /* Make it large enough for worst case */ 1350 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1351 2 * sizeof (sin6_t), 1); 1352 if (ackmp == NULL) { 1353 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1354 return; 1355 } 1356 taa = (struct T_addr_ack *)ackmp->b_rptr; 1357 1358 bzero(taa, sizeof (struct T_addr_ack)); 1359 ackmp->b_wptr = (uchar_t *)&taa[1]; 1360 1361 taa->PRIM_type = T_ADDR_ACK; 1362 ackmp->b_datap->db_type = M_PCPROTO; 1363 rw_enter(&icmp->icmp_rwlock, RW_READER); 1364 /* 1365 * Note: Following code assumes 32 bit alignment of basic 1366 * data structures like sin_t and struct T_addr_ack. 1367 */ 1368 if (icmp->icmp_state != TS_UNBND) { 1369 /* 1370 * Fill in local address 1371 */ 1372 taa->LOCADDR_offset = sizeof (*taa); 1373 if (icmp->icmp_family == AF_INET) { 1374 sin_t *sin; 1375 1376 taa->LOCADDR_length = sizeof (sin_t); 1377 sin = (sin_t *)&taa[1]; 1378 /* Fill zeroes and then intialize non-zero fields */ 1379 *sin = sin_null; 1380 sin->sin_family = AF_INET; 1381 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1382 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1383 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1384 sin->sin_addr.s_addr); 1385 } else { 1386 /* 1387 * INADDR_ANY 1388 * icmp_v6src is not set, we might be bound to 1389 * broadcast/multicast. Use icmp_bound_v6src as 1390 * local address instead (that could 1391 * also still be INADDR_ANY) 1392 */ 1393 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1394 sin->sin_addr.s_addr); 1395 } 1396 ackmp->b_wptr = (uchar_t *)&sin[1]; 1397 } else { 1398 sin6_t *sin6; 1399 1400 ASSERT(icmp->icmp_family == AF_INET6); 1401 taa->LOCADDR_length = sizeof (sin6_t); 1402 sin6 = (sin6_t *)&taa[1]; 1403 /* Fill zeroes and then intialize non-zero fields */ 1404 *sin6 = sin6_null; 1405 sin6->sin6_family = AF_INET6; 1406 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1407 sin6->sin6_addr = icmp->icmp_v6src; 1408 } else { 1409 /* 1410 * UNSPECIFIED 1411 * icmp_v6src is not set, we might be bound to 1412 * broadcast/multicast. Use icmp_bound_v6src as 1413 * local address instead (that could 1414 * also still be UNSPECIFIED) 1415 */ 1416 sin6->sin6_addr = icmp->icmp_bound_v6src; 1417 } 1418 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1419 } 1420 } 1421 rw_exit(&icmp->icmp_rwlock); 1422 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1423 qreply(q, ackmp); 1424 } 1425 1426 static void 1427 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1428 { 1429 *tap = icmp_g_t_info_ack; 1430 1431 if (icmp->icmp_family == AF_INET6) 1432 tap->ADDR_size = sizeof (sin6_t); 1433 else 1434 tap->ADDR_size = sizeof (sin_t); 1435 tap->CURRENT_state = icmp->icmp_state; 1436 tap->OPT_size = icmp_max_optsize; 1437 } 1438 1439 static void 1440 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1441 t_uscalar_t cap_bits1) 1442 { 1443 tcap->CAP_bits1 = 0; 1444 1445 if (cap_bits1 & TC1_INFO) { 1446 icmp_copy_info(&tcap->INFO_ack, icmp); 1447 tcap->CAP_bits1 |= TC1_INFO; 1448 } 1449 } 1450 1451 /* 1452 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1453 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1454 * icmp_g_t_info_ack. The current state of the stream is copied from 1455 * icmp_state. 1456 */ 1457 static void 1458 icmp_capability_req(queue_t *q, mblk_t *mp) 1459 { 1460 icmp_t *icmp = Q_TO_ICMP(q); 1461 t_uscalar_t cap_bits1; 1462 struct T_capability_ack *tcap; 1463 1464 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1465 1466 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1467 mp->b_datap->db_type, T_CAPABILITY_ACK); 1468 if (!mp) 1469 return; 1470 1471 tcap = (struct T_capability_ack *)mp->b_rptr; 1472 1473 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1474 1475 qreply(q, mp); 1476 } 1477 1478 /* 1479 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1480 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1481 * The current state of the stream is copied from icmp_state. 1482 */ 1483 static void 1484 icmp_info_req(queue_t *q, mblk_t *mp) 1485 { 1486 icmp_t *icmp = Q_TO_ICMP(q); 1487 1488 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1489 T_INFO_ACK); 1490 if (!mp) 1491 return; 1492 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1493 qreply(q, mp); 1494 } 1495 1496 /* For /dev/icmp aka AF_INET open */ 1497 static int 1498 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1499 int family) 1500 { 1501 conn_t *connp; 1502 dev_t conn_dev; 1503 icmp_stack_t *is; 1504 int error; 1505 1506 conn_dev = NULL; 1507 1508 /* If the stream is already open, return immediately. */ 1509 if (q->q_ptr != NULL) 1510 return (0); 1511 1512 if (sflag == MODOPEN) 1513 return (EINVAL); 1514 1515 /* 1516 * Since ICMP is not used so heavily, allocating from the small 1517 * arena should be sufficient. 1518 */ 1519 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1520 return (EBUSY); 1521 } 1522 1523 if (flag & SO_FALLBACK) { 1524 /* 1525 * Non streams socket needs a stream to fallback to 1526 */ 1527 RD(q)->q_ptr = (void *)conn_dev; 1528 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1529 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1530 qprocson(q); 1531 return (0); 1532 } 1533 1534 connp = icmp_open(family, credp, &error, KM_SLEEP); 1535 if (connp == NULL) { 1536 ASSERT(error != NULL); 1537 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1538 return (error); 1539 } 1540 1541 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1542 connp->conn_dev = conn_dev; 1543 connp->conn_minor_arena = ip_minor_arena_sa; 1544 1545 is = connp->conn_icmp->icmp_is; 1546 1547 /* 1548 * Initialize the icmp_t structure for this stream. 1549 */ 1550 q->q_ptr = connp; 1551 WR(q)->q_ptr = connp; 1552 connp->conn_rq = q; 1553 connp->conn_wq = WR(q); 1554 1555 if (connp->conn_icmp->icmp_family == AF_INET6) { 1556 /* Build initial header template for transmit */ 1557 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1558 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1559 rw_exit(&connp->conn_icmp->icmp_rwlock); 1560 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1561 ipcl_conn_destroy(connp); 1562 return (error); 1563 } 1564 rw_exit(&connp->conn_icmp->icmp_rwlock); 1565 } 1566 1567 1568 q->q_hiwat = is->is_recv_hiwat; 1569 WR(q)->q_hiwat = is->is_xmit_hiwat; 1570 WR(q)->q_lowat = is->is_xmit_lowat; 1571 1572 qprocson(q); 1573 1574 /* Set the Stream head write offset. */ 1575 (void) proto_set_tx_wroff(q, connp, 1576 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1577 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1578 1579 mutex_enter(&connp->conn_lock); 1580 connp->conn_state_flags &= ~CONN_INCIPIENT; 1581 mutex_exit(&connp->conn_lock); 1582 1583 return (0); 1584 } 1585 1586 /* For /dev/icmp4 aka AF_INET open */ 1587 static int 1588 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1589 { 1590 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1591 } 1592 1593 /* For /dev/icmp6 aka AF_INET6 open */ 1594 static int 1595 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1596 { 1597 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1598 } 1599 1600 /* 1601 * This is the open routine for icmp. It allocates a icmp_t structure for 1602 * the stream and, on the first open of the module, creates an ND table. 1603 */ 1604 /* ARGSUSED */ 1605 static conn_t * 1606 icmp_open(int family, cred_t *credp, int *err, int flags) 1607 { 1608 icmp_t *icmp; 1609 conn_t *connp; 1610 zoneid_t zoneid; 1611 netstack_t *ns; 1612 icmp_stack_t *is; 1613 boolean_t isv6 = B_FALSE; 1614 1615 *err = secpolicy_net_icmpaccess(credp); 1616 if (*err != 0) 1617 return (NULL); 1618 1619 if (family == AF_INET6) 1620 isv6 = B_TRUE; 1621 ns = netstack_find_by_cred(credp); 1622 ASSERT(ns != NULL); 1623 is = ns->netstack_icmp; 1624 ASSERT(is != NULL); 1625 1626 /* 1627 * For exclusive stacks we set the zoneid to zero 1628 * to make ICMP operate as if in the global zone. 1629 */ 1630 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1631 zoneid = GLOBAL_ZONEID; 1632 else 1633 zoneid = crgetzoneid(credp); 1634 1635 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1636 1637 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1638 icmp = connp->conn_icmp; 1639 icmp->icmp_v6dst = sin6_null; 1640 1641 /* 1642 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1643 * done by netstack_find_by_cred() 1644 */ 1645 netstack_rele(ns); 1646 1647 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1648 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1649 ASSERT(connp->conn_icmp == icmp); 1650 ASSERT(icmp->icmp_connp == connp); 1651 1652 /* Set the initial state of the stream and the privilege status. */ 1653 icmp->icmp_state = TS_UNBND; 1654 if (isv6) { 1655 icmp->icmp_ipversion = IPV6_VERSION; 1656 icmp->icmp_family = AF_INET6; 1657 connp->conn_ulp = IPPROTO_ICMPV6; 1658 /* May be changed by a SO_PROTOTYPE socket option. */ 1659 icmp->icmp_proto = IPPROTO_ICMPV6; 1660 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1661 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1662 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1663 connp->conn_af_isv6 = B_TRUE; 1664 connp->conn_flags |= IPCL_ISV6; 1665 } else { 1666 icmp->icmp_ipversion = IPV4_VERSION; 1667 icmp->icmp_family = AF_INET; 1668 /* May be changed by a SO_PROTOTYPE socket option. */ 1669 icmp->icmp_proto = IPPROTO_ICMP; 1670 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1671 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1672 connp->conn_af_isv6 = B_FALSE; 1673 connp->conn_flags &= ~IPCL_ISV6; 1674 } 1675 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1676 icmp->icmp_pending_op = -1; 1677 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1678 connp->conn_zoneid = zoneid; 1679 1680 /* 1681 * If the caller has the process-wide flag set, then default to MAC 1682 * exempt mode. This allows read-down to unlabeled hosts. 1683 */ 1684 if (getpflags(NET_MAC_AWARE, credp) != 0) 1685 connp->conn_mac_exempt = B_TRUE; 1686 1687 connp->conn_ulp_labeled = is_system_labeled(); 1688 1689 icmp->icmp_is = is; 1690 1691 connp->conn_recv = icmp_input; 1692 crhold(credp); 1693 connp->conn_cred = credp; 1694 1695 rw_exit(&icmp->icmp_rwlock); 1696 1697 connp->conn_flow_cntrld = B_FALSE; 1698 return (connp); 1699 } 1700 1701 /* 1702 * Which ICMP options OK to set through T_UNITDATA_REQ... 1703 */ 1704 /* ARGSUSED */ 1705 static boolean_t 1706 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1707 { 1708 return (B_TRUE); 1709 } 1710 1711 /* 1712 * This routine gets default values of certain options whose default 1713 * values are maintained by protcol specific code 1714 */ 1715 /* ARGSUSED */ 1716 int 1717 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1718 { 1719 icmp_t *icmp = Q_TO_ICMP(q); 1720 icmp_stack_t *is = icmp->icmp_is; 1721 int *i1 = (int *)ptr; 1722 1723 switch (level) { 1724 case IPPROTO_IP: 1725 switch (name) { 1726 case IP_MULTICAST_TTL: 1727 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1728 return (sizeof (uchar_t)); 1729 case IP_MULTICAST_LOOP: 1730 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1731 return (sizeof (uchar_t)); 1732 } 1733 break; 1734 case IPPROTO_IPV6: 1735 switch (name) { 1736 case IPV6_MULTICAST_HOPS: 1737 *i1 = IP_DEFAULT_MULTICAST_TTL; 1738 return (sizeof (int)); 1739 case IPV6_MULTICAST_LOOP: 1740 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1741 return (sizeof (int)); 1742 case IPV6_UNICAST_HOPS: 1743 *i1 = is->is_ipv6_hoplimit; 1744 return (sizeof (int)); 1745 } 1746 break; 1747 case IPPROTO_ICMPV6: 1748 switch (name) { 1749 case ICMP6_FILTER: 1750 /* Make it look like "pass all" */ 1751 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1752 return (sizeof (icmp6_filter_t)); 1753 } 1754 break; 1755 } 1756 return (-1); 1757 } 1758 1759 /* 1760 * This routine retrieves the current status of socket options. 1761 * It returns the size of the option retrieved. 1762 */ 1763 int 1764 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1765 { 1766 icmp_t *icmp = connp->conn_icmp; 1767 icmp_stack_t *is = icmp->icmp_is; 1768 int *i1 = (int *)ptr; 1769 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1770 int ret = 0; 1771 1772 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1773 switch (level) { 1774 case SOL_SOCKET: 1775 switch (name) { 1776 case SO_DEBUG: 1777 *i1 = icmp->icmp_debug; 1778 break; 1779 case SO_TYPE: 1780 *i1 = SOCK_RAW; 1781 break; 1782 case SO_PROTOTYPE: 1783 *i1 = icmp->icmp_proto; 1784 break; 1785 case SO_REUSEADDR: 1786 *i1 = icmp->icmp_reuseaddr; 1787 break; 1788 1789 /* 1790 * The following three items are available here, 1791 * but are only meaningful to IP. 1792 */ 1793 case SO_DONTROUTE: 1794 *i1 = icmp->icmp_dontroute; 1795 break; 1796 case SO_USELOOPBACK: 1797 *i1 = icmp->icmp_useloopback; 1798 break; 1799 case SO_BROADCAST: 1800 *i1 = icmp->icmp_broadcast; 1801 break; 1802 1803 case SO_SNDBUF: 1804 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1805 *i1 = icmp->icmp_xmit_hiwat; 1806 break; 1807 case SO_RCVBUF: 1808 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1809 *i1 = icmp->icmp_recv_hiwat; 1810 break; 1811 case SO_DGRAM_ERRIND: 1812 *i1 = icmp->icmp_dgram_errind; 1813 break; 1814 case SO_TIMESTAMP: 1815 *i1 = icmp->icmp_timestamp; 1816 break; 1817 case SO_MAC_EXEMPT: 1818 *i1 = connp->conn_mac_exempt; 1819 break; 1820 case SO_DOMAIN: 1821 *i1 = icmp->icmp_family; 1822 break; 1823 1824 /* 1825 * Following four not meaningful for icmp 1826 * Action is same as "default" to which we fallthrough 1827 * so we keep them in comments. 1828 * case SO_LINGER: 1829 * case SO_KEEPALIVE: 1830 * case SO_OOBINLINE: 1831 * case SO_ALLZONES: 1832 */ 1833 default: 1834 ret = -1; 1835 goto done; 1836 } 1837 break; 1838 case IPPROTO_IP: 1839 /* 1840 * Only allow IPv4 option processing on IPv4 sockets. 1841 */ 1842 if (icmp->icmp_family != AF_INET) { 1843 ret = -1; 1844 goto done; 1845 } 1846 1847 switch (name) { 1848 case IP_OPTIONS: 1849 case T_IP_OPTIONS: 1850 /* Options are passed up with each packet */ 1851 ret = 0; 1852 goto done; 1853 case IP_HDRINCL: 1854 *i1 = (int)icmp->icmp_hdrincl; 1855 break; 1856 case IP_TOS: 1857 case T_IP_TOS: 1858 *i1 = (int)icmp->icmp_type_of_service; 1859 break; 1860 case IP_TTL: 1861 *i1 = (int)icmp->icmp_ttl; 1862 break; 1863 case IP_MULTICAST_IF: 1864 /* 0 address if not set */ 1865 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1866 ret = sizeof (ipaddr_t); 1867 goto done; 1868 case IP_MULTICAST_TTL: 1869 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1870 ret = sizeof (uchar_t); 1871 goto done; 1872 case IP_MULTICAST_LOOP: 1873 *ptr = connp->conn_multicast_loop; 1874 ret = sizeof (uint8_t); 1875 goto done; 1876 case IP_BOUND_IF: 1877 /* Zero if not set */ 1878 *i1 = icmp->icmp_bound_if; 1879 break; /* goto sizeof (int) option return */ 1880 case IP_UNSPEC_SRC: 1881 *ptr = icmp->icmp_unspec_source; 1882 break; /* goto sizeof (int) option return */ 1883 case IP_RECVIF: 1884 *ptr = icmp->icmp_recvif; 1885 break; /* goto sizeof (int) option return */ 1886 case IP_BROADCAST_TTL: 1887 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1888 return (sizeof (uchar_t)); 1889 case IP_RECVPKTINFO: 1890 /* 1891 * This also handles IP_PKTINFO. 1892 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1893 * Differentiation is based on the size of the argument 1894 * passed in. 1895 * This option is handled in IP which will return an 1896 * error for IP_PKTINFO as it's not supported as a 1897 * sticky option. 1898 */ 1899 ret = -EINVAL; 1900 goto done; 1901 /* 1902 * Cannot "get" the value of following options 1903 * at this level. Action is same as "default" to 1904 * which we fallthrough so we keep them in comments. 1905 * 1906 * case IP_ADD_MEMBERSHIP: 1907 * case IP_DROP_MEMBERSHIP: 1908 * case IP_BLOCK_SOURCE: 1909 * case IP_UNBLOCK_SOURCE: 1910 * case IP_ADD_SOURCE_MEMBERSHIP: 1911 * case IP_DROP_SOURCE_MEMBERSHIP: 1912 * case MCAST_JOIN_GROUP: 1913 * case MCAST_LEAVE_GROUP: 1914 * case MCAST_BLOCK_SOURCE: 1915 * case MCAST_UNBLOCK_SOURCE: 1916 * case MCAST_JOIN_SOURCE_GROUP: 1917 * case MCAST_LEAVE_SOURCE_GROUP: 1918 * case MRT_INIT: 1919 * case MRT_DONE: 1920 * case MRT_ADD_VIF: 1921 * case MRT_DEL_VIF: 1922 * case MRT_ADD_MFC: 1923 * case MRT_DEL_MFC: 1924 * case MRT_VERSION: 1925 * case MRT_ASSERT: 1926 * case IP_SEC_OPT: 1927 * case IP_NEXTHOP: 1928 */ 1929 default: 1930 ret = -1; 1931 goto done; 1932 } 1933 break; 1934 case IPPROTO_IPV6: 1935 /* 1936 * Only allow IPv6 option processing on native IPv6 sockets. 1937 */ 1938 if (icmp->icmp_family != AF_INET6) { 1939 ret = -1; 1940 goto done; 1941 } 1942 switch (name) { 1943 case IPV6_UNICAST_HOPS: 1944 *i1 = (unsigned int)icmp->icmp_ttl; 1945 break; 1946 case IPV6_MULTICAST_IF: 1947 /* 0 index if not set */ 1948 *i1 = icmp->icmp_multicast_if_index; 1949 break; 1950 case IPV6_MULTICAST_HOPS: 1951 *i1 = icmp->icmp_multicast_ttl; 1952 break; 1953 case IPV6_MULTICAST_LOOP: 1954 *i1 = connp->conn_multicast_loop; 1955 break; 1956 case IPV6_BOUND_IF: 1957 /* Zero if not set */ 1958 *i1 = icmp->icmp_bound_if; 1959 break; 1960 case IPV6_UNSPEC_SRC: 1961 *i1 = icmp->icmp_unspec_source; 1962 break; 1963 case IPV6_CHECKSUM: 1964 /* 1965 * Return offset or -1 if no checksum offset. 1966 * Does not apply to IPPROTO_ICMPV6 1967 */ 1968 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1969 ret = -1; 1970 goto done; 1971 } 1972 1973 if (icmp->icmp_raw_checksum) { 1974 *i1 = icmp->icmp_checksum_off; 1975 } else { 1976 *i1 = -1; 1977 } 1978 break; 1979 case IPV6_JOIN_GROUP: 1980 case IPV6_LEAVE_GROUP: 1981 case MCAST_JOIN_GROUP: 1982 case MCAST_LEAVE_GROUP: 1983 case MCAST_BLOCK_SOURCE: 1984 case MCAST_UNBLOCK_SOURCE: 1985 case MCAST_JOIN_SOURCE_GROUP: 1986 case MCAST_LEAVE_SOURCE_GROUP: 1987 /* cannot "get" the value for these */ 1988 ret = -1; 1989 goto done; 1990 case IPV6_RECVPKTINFO: 1991 *i1 = icmp->icmp_ip_recvpktinfo; 1992 break; 1993 case IPV6_RECVTCLASS: 1994 *i1 = icmp->icmp_ipv6_recvtclass; 1995 break; 1996 case IPV6_RECVPATHMTU: 1997 *i1 = icmp->icmp_ipv6_recvpathmtu; 1998 break; 1999 case IPV6_V6ONLY: 2000 *i1 = 1; 2001 break; 2002 case IPV6_RECVHOPLIMIT: 2003 *i1 = icmp->icmp_ipv6_recvhoplimit; 2004 break; 2005 case IPV6_RECVHOPOPTS: 2006 *i1 = icmp->icmp_ipv6_recvhopopts; 2007 break; 2008 case IPV6_RECVDSTOPTS: 2009 *i1 = icmp->icmp_ipv6_recvdstopts; 2010 break; 2011 case _OLD_IPV6_RECVDSTOPTS: 2012 *i1 = icmp->icmp_old_ipv6_recvdstopts; 2013 break; 2014 case IPV6_RECVRTHDRDSTOPTS: 2015 *i1 = icmp->icmp_ipv6_recvrtdstopts; 2016 break; 2017 case IPV6_RECVRTHDR: 2018 *i1 = icmp->icmp_ipv6_recvrthdr; 2019 break; 2020 case IPV6_PKTINFO: { 2021 /* XXX assumes that caller has room for max size! */ 2022 struct in6_pktinfo *pkti; 2023 2024 pkti = (struct in6_pktinfo *)ptr; 2025 if (ipp->ipp_fields & IPPF_IFINDEX) 2026 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2027 else 2028 pkti->ipi6_ifindex = 0; 2029 if (ipp->ipp_fields & IPPF_ADDR) 2030 pkti->ipi6_addr = ipp->ipp_addr; 2031 else 2032 pkti->ipi6_addr = ipv6_all_zeros; 2033 ret = sizeof (struct in6_pktinfo); 2034 goto done; 2035 } 2036 case IPV6_NEXTHOP: { 2037 sin6_t *sin6 = (sin6_t *)ptr; 2038 2039 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2040 return (0); 2041 *sin6 = sin6_null; 2042 sin6->sin6_family = AF_INET6; 2043 sin6->sin6_addr = ipp->ipp_nexthop; 2044 ret = (sizeof (sin6_t)); 2045 goto done; 2046 } 2047 case IPV6_HOPOPTS: 2048 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2049 return (0); 2050 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2051 return (0); 2052 bcopy((char *)ipp->ipp_hopopts + 2053 icmp->icmp_label_len_v6, ptr, 2054 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2055 if (icmp->icmp_label_len_v6 > 0) { 2056 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2057 ptr[1] = (ipp->ipp_hopoptslen - 2058 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2059 } 2060 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2061 goto done; 2062 case IPV6_RTHDRDSTOPTS: 2063 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2064 return (0); 2065 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2066 ret = ipp->ipp_rtdstoptslen; 2067 goto done; 2068 case IPV6_RTHDR: 2069 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2070 return (0); 2071 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2072 ret = ipp->ipp_rthdrlen; 2073 goto done; 2074 case IPV6_DSTOPTS: 2075 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2076 ret = 0; 2077 goto done; 2078 } 2079 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2080 ret = ipp->ipp_dstoptslen; 2081 goto done; 2082 case IPV6_PATHMTU: 2083 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2084 ret = 0; 2085 } else { 2086 ret = ip_fill_mtuinfo( 2087 &icmp->icmp_v6dst.sin6_addr, 0, 2088 (struct ip6_mtuinfo *)ptr, 2089 is->is_netstack); 2090 } 2091 goto done; 2092 case IPV6_TCLASS: 2093 if (ipp->ipp_fields & IPPF_TCLASS) 2094 *i1 = ipp->ipp_tclass; 2095 else 2096 *i1 = IPV6_FLOW_TCLASS( 2097 IPV6_DEFAULT_VERS_AND_FLOW); 2098 break; 2099 default: 2100 ret = -1; 2101 goto done; 2102 } 2103 break; 2104 case IPPROTO_ICMPV6: 2105 /* 2106 * Only allow IPv6 option processing on native IPv6 sockets. 2107 */ 2108 if (icmp->icmp_family != AF_INET6) { 2109 ret = -1; 2110 } 2111 2112 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2113 ret = -1; 2114 } 2115 2116 switch (name) { 2117 case ICMP6_FILTER: 2118 if (icmp->icmp_filter == NULL) { 2119 /* Make it look like "pass all" */ 2120 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2121 } else { 2122 (void) bcopy(icmp->icmp_filter, ptr, 2123 sizeof (icmp6_filter_t)); 2124 } 2125 ret = sizeof (icmp6_filter_t); 2126 goto done; 2127 default: 2128 ret = -1; 2129 goto done; 2130 } 2131 default: 2132 ret = -1; 2133 goto done; 2134 } 2135 ret = sizeof (int); 2136 done: 2137 return (ret); 2138 } 2139 2140 /* 2141 * This routine retrieves the current status of socket options. 2142 * It returns the size of the option retrieved. 2143 */ 2144 int 2145 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2146 { 2147 conn_t *connp = Q_TO_CONN(q); 2148 icmp_t *icmp = connp->conn_icmp; 2149 int err; 2150 2151 rw_enter(&icmp->icmp_rwlock, RW_READER); 2152 err = icmp_opt_get(connp, level, name, ptr); 2153 rw_exit(&icmp->icmp_rwlock); 2154 return (err); 2155 } 2156 2157 int 2158 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2159 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2160 void *thisdg_attrs, boolean_t checkonly) 2161 { 2162 2163 int *i1 = (int *)invalp; 2164 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2165 icmp_t *icmp = connp->conn_icmp; 2166 icmp_stack_t *is = icmp->icmp_is; 2167 int error; 2168 2169 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2170 /* 2171 * For fixed length options, no sanity check 2172 * of passed in length is done. It is assumed *_optcom_req() 2173 * routines do the right thing. 2174 */ 2175 switch (level) { 2176 case SOL_SOCKET: 2177 switch (name) { 2178 case SO_DEBUG: 2179 if (!checkonly) 2180 icmp->icmp_debug = onoff; 2181 break; 2182 case SO_PROTOTYPE: 2183 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2184 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2185 secpolicy_net_rawaccess(cr) != 0) { 2186 *outlenp = 0; 2187 return (EACCES); 2188 } 2189 /* Can't use IPPROTO_RAW with IPv6 */ 2190 if ((*i1 & 0xFF) == IPPROTO_RAW && 2191 icmp->icmp_family == AF_INET6) { 2192 *outlenp = 0; 2193 return (EPROTONOSUPPORT); 2194 } 2195 if (checkonly) { 2196 /* T_CHECK case */ 2197 *(int *)outvalp = (*i1 & 0xFF); 2198 break; 2199 } 2200 icmp->icmp_proto = *i1 & 0xFF; 2201 if ((icmp->icmp_proto == IPPROTO_RAW || 2202 icmp->icmp_proto == IPPROTO_IGMP) && 2203 icmp->icmp_family == AF_INET) 2204 icmp->icmp_hdrincl = 1; 2205 else 2206 icmp->icmp_hdrincl = 0; 2207 2208 if (icmp->icmp_family == AF_INET6 && 2209 icmp->icmp_proto == IPPROTO_ICMPV6) { 2210 /* Set offset for icmp6_cksum */ 2211 icmp->icmp_raw_checksum = 0; 2212 icmp->icmp_checksum_off = 2; 2213 } 2214 if (icmp->icmp_proto == IPPROTO_UDP || 2215 icmp->icmp_proto == IPPROTO_TCP || 2216 icmp->icmp_proto == IPPROTO_SCTP) { 2217 icmp->icmp_no_tp_cksum = 1; 2218 icmp->icmp_sticky_ipp.ipp_fields |= 2219 IPPF_NO_CKSUM; 2220 } else { 2221 icmp->icmp_no_tp_cksum = 0; 2222 icmp->icmp_sticky_ipp.ipp_fields &= 2223 ~IPPF_NO_CKSUM; 2224 } 2225 2226 if (icmp->icmp_filter != NULL && 2227 icmp->icmp_proto != IPPROTO_ICMPV6) { 2228 kmem_free(icmp->icmp_filter, 2229 sizeof (icmp6_filter_t)); 2230 icmp->icmp_filter = NULL; 2231 } 2232 2233 /* Rebuild the header template */ 2234 error = icmp_build_hdrs(icmp); 2235 if (error != 0) { 2236 *outlenp = 0; 2237 return (error); 2238 } 2239 2240 /* 2241 * For SCTP, we don't use icmp_bind_proto() for 2242 * raw socket binding. Note that we do not need 2243 * to set *outlenp. 2244 * FIXME: how does SCTP work? 2245 */ 2246 if (icmp->icmp_proto == IPPROTO_SCTP) 2247 return (0); 2248 2249 *outlenp = sizeof (int); 2250 *(int *)outvalp = *i1 & 0xFF; 2251 2252 /* Drop lock across the bind operation */ 2253 rw_exit(&icmp->icmp_rwlock); 2254 (void) icmp_bind_proto(connp); 2255 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2256 return (0); 2257 case SO_REUSEADDR: 2258 if (!checkonly) { 2259 icmp->icmp_reuseaddr = onoff; 2260 PASS_OPT_TO_IP(connp); 2261 } 2262 break; 2263 2264 /* 2265 * The following three items are available here, 2266 * but are only meaningful to IP. 2267 */ 2268 case SO_DONTROUTE: 2269 if (!checkonly) { 2270 icmp->icmp_dontroute = onoff; 2271 PASS_OPT_TO_IP(connp); 2272 } 2273 break; 2274 case SO_USELOOPBACK: 2275 if (!checkonly) { 2276 icmp->icmp_useloopback = onoff; 2277 PASS_OPT_TO_IP(connp); 2278 } 2279 break; 2280 case SO_BROADCAST: 2281 if (!checkonly) { 2282 icmp->icmp_broadcast = onoff; 2283 PASS_OPT_TO_IP(connp); 2284 } 2285 break; 2286 2287 case SO_SNDBUF: 2288 if (*i1 > is->is_max_buf) { 2289 *outlenp = 0; 2290 return (ENOBUFS); 2291 } 2292 if (!checkonly) { 2293 if (!IPCL_IS_NONSTR(connp)) { 2294 connp->conn_wq->q_hiwat = *i1; 2295 } 2296 icmp->icmp_xmit_hiwat = *i1; 2297 } 2298 break; 2299 case SO_RCVBUF: 2300 if (*i1 > is->is_max_buf) { 2301 *outlenp = 0; 2302 return (ENOBUFS); 2303 } 2304 if (!checkonly) { 2305 icmp->icmp_recv_hiwat = *i1; 2306 rw_exit(&icmp->icmp_rwlock); 2307 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2308 *i1); 2309 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2310 } 2311 break; 2312 case SO_DGRAM_ERRIND: 2313 if (!checkonly) 2314 icmp->icmp_dgram_errind = onoff; 2315 break; 2316 case SO_ALLZONES: 2317 /* 2318 * "soft" error (negative) 2319 * option not handled at this level 2320 * Note: Do not modify *outlenp 2321 */ 2322 return (-EINVAL); 2323 case SO_TIMESTAMP: 2324 if (!checkonly) { 2325 icmp->icmp_timestamp = onoff; 2326 } 2327 break; 2328 case SO_MAC_EXEMPT: 2329 /* 2330 * "soft" error (negative) 2331 * option not handled at this level 2332 * Note: Do not modify *outlenp 2333 */ 2334 return (-EINVAL); 2335 case SO_RCVTIMEO: 2336 case SO_SNDTIMEO: 2337 /* 2338 * Pass these two options in order for third part 2339 * protocol usage. Here just return directly. 2340 */ 2341 return (0); 2342 /* 2343 * Following three not meaningful for icmp 2344 * Action is same as "default" so we keep them 2345 * in comments. 2346 * case SO_LINGER: 2347 * case SO_KEEPALIVE: 2348 * case SO_OOBINLINE: 2349 */ 2350 default: 2351 *outlenp = 0; 2352 return (EINVAL); 2353 } 2354 break; 2355 case IPPROTO_IP: 2356 /* 2357 * Only allow IPv4 option processing on IPv4 sockets. 2358 */ 2359 if (icmp->icmp_family != AF_INET) { 2360 *outlenp = 0; 2361 return (ENOPROTOOPT); 2362 } 2363 switch (name) { 2364 case IP_OPTIONS: 2365 case T_IP_OPTIONS: 2366 /* Save options for use by IP. */ 2367 if ((inlen & 0x3) || 2368 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2369 *outlenp = 0; 2370 return (EINVAL); 2371 } 2372 if (checkonly) 2373 break; 2374 2375 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2376 &icmp->icmp_ip_snd_options_len, 2377 icmp->icmp_label_len, invalp, inlen)) { 2378 *outlenp = 0; 2379 return (ENOMEM); 2380 } 2381 2382 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2383 icmp->icmp_ip_snd_options_len; 2384 rw_exit(&icmp->icmp_rwlock); 2385 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2386 RD(connp->conn_rq), connp, 2387 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2388 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2389 break; 2390 case IP_HDRINCL: 2391 if (!checkonly) 2392 icmp->icmp_hdrincl = onoff; 2393 break; 2394 case IP_TOS: 2395 case T_IP_TOS: 2396 if (!checkonly) { 2397 icmp->icmp_type_of_service = (uint8_t)*i1; 2398 } 2399 break; 2400 case IP_TTL: 2401 if (!checkonly) { 2402 icmp->icmp_ttl = (uint8_t)*i1; 2403 } 2404 break; 2405 case IP_MULTICAST_IF: 2406 /* 2407 * TODO should check OPTMGMT reply and undo this if 2408 * there is an error. 2409 */ 2410 if (!checkonly) { 2411 icmp->icmp_multicast_if_addr = *i1; 2412 PASS_OPT_TO_IP(connp); 2413 } 2414 break; 2415 case IP_MULTICAST_TTL: 2416 if (!checkonly) 2417 icmp->icmp_multicast_ttl = *invalp; 2418 break; 2419 case IP_MULTICAST_LOOP: 2420 if (!checkonly) { 2421 connp->conn_multicast_loop = 2422 (*invalp == 0) ? 0 : 1; 2423 PASS_OPT_TO_IP(connp); 2424 } 2425 break; 2426 case IP_BOUND_IF: 2427 if (!checkonly) { 2428 icmp->icmp_bound_if = *i1; 2429 PASS_OPT_TO_IP(connp); 2430 } 2431 break; 2432 case IP_UNSPEC_SRC: 2433 if (!checkonly) { 2434 icmp->icmp_unspec_source = onoff; 2435 PASS_OPT_TO_IP(connp); 2436 } 2437 break; 2438 case IP_BROADCAST_TTL: 2439 if (!checkonly) 2440 connp->conn_broadcast_ttl = *invalp; 2441 break; 2442 case IP_RECVIF: 2443 if (!checkonly) { 2444 icmp->icmp_recvif = onoff; 2445 } 2446 /* 2447 * pass to ip 2448 */ 2449 return (-EINVAL); 2450 case IP_PKTINFO: { 2451 /* 2452 * This also handles IP_RECVPKTINFO. 2453 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2454 * Differentiation is based on the size of the argument 2455 * passed in. 2456 */ 2457 struct in_pktinfo *pktinfop; 2458 ip4_pkt_t *attr_pktinfop; 2459 2460 if (checkonly) 2461 break; 2462 2463 if (inlen == sizeof (int)) { 2464 /* 2465 * This is IP_RECVPKTINFO option. 2466 * Keep a local copy of wether this option is 2467 * set or not and pass it down to IP for 2468 * processing. 2469 */ 2470 icmp->icmp_ip_recvpktinfo = onoff; 2471 return (-EINVAL); 2472 } 2473 2474 2475 if (inlen != sizeof (struct in_pktinfo)) { 2476 return (EINVAL); 2477 } 2478 2479 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2480 == NULL) { 2481 /* 2482 * sticky option is not supported 2483 */ 2484 return (EINVAL); 2485 } 2486 2487 pktinfop = (struct in_pktinfo *)invalp; 2488 2489 /* 2490 * Atleast one of the values should be specified 2491 */ 2492 if (pktinfop->ipi_ifindex == 0 && 2493 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2494 return (EINVAL); 2495 } 2496 2497 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2498 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2499 } 2500 break; 2501 case IP_ADD_MEMBERSHIP: 2502 case IP_DROP_MEMBERSHIP: 2503 case IP_BLOCK_SOURCE: 2504 case IP_UNBLOCK_SOURCE: 2505 case IP_ADD_SOURCE_MEMBERSHIP: 2506 case IP_DROP_SOURCE_MEMBERSHIP: 2507 case MCAST_JOIN_GROUP: 2508 case MCAST_LEAVE_GROUP: 2509 case MCAST_BLOCK_SOURCE: 2510 case MCAST_UNBLOCK_SOURCE: 2511 case MCAST_JOIN_SOURCE_GROUP: 2512 case MCAST_LEAVE_SOURCE_GROUP: 2513 case MRT_INIT: 2514 case MRT_DONE: 2515 case MRT_ADD_VIF: 2516 case MRT_DEL_VIF: 2517 case MRT_ADD_MFC: 2518 case MRT_DEL_MFC: 2519 case MRT_VERSION: 2520 case MRT_ASSERT: 2521 case IP_SEC_OPT: 2522 case IP_NEXTHOP: 2523 /* 2524 * "soft" error (negative) 2525 * option not handled at this level 2526 * Note: Do not modify *outlenp 2527 */ 2528 return (-EINVAL); 2529 default: 2530 *outlenp = 0; 2531 return (EINVAL); 2532 } 2533 break; 2534 case IPPROTO_IPV6: { 2535 ip6_pkt_t *ipp; 2536 boolean_t sticky; 2537 2538 if (icmp->icmp_family != AF_INET6) { 2539 *outlenp = 0; 2540 return (ENOPROTOOPT); 2541 } 2542 /* 2543 * Deal with both sticky options and ancillary data 2544 */ 2545 if (thisdg_attrs == NULL) { 2546 /* sticky options, or none */ 2547 ipp = &icmp->icmp_sticky_ipp; 2548 sticky = B_TRUE; 2549 } else { 2550 /* ancillary data */ 2551 ipp = (ip6_pkt_t *)thisdg_attrs; 2552 sticky = B_FALSE; 2553 } 2554 2555 switch (name) { 2556 case IPV6_MULTICAST_IF: 2557 if (!checkonly) { 2558 icmp->icmp_multicast_if_index = *i1; 2559 PASS_OPT_TO_IP(connp); 2560 } 2561 break; 2562 case IPV6_UNICAST_HOPS: 2563 /* -1 means use default */ 2564 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2565 *outlenp = 0; 2566 return (EINVAL); 2567 } 2568 if (!checkonly) { 2569 if (*i1 == -1) { 2570 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2571 is->is_ipv6_hoplimit; 2572 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2573 /* Pass modified value to IP. */ 2574 *i1 = ipp->ipp_hoplimit; 2575 } else { 2576 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2577 (uint8_t)*i1; 2578 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2579 } 2580 /* Rebuild the header template */ 2581 error = icmp_build_hdrs(icmp); 2582 if (error != 0) { 2583 *outlenp = 0; 2584 return (error); 2585 } 2586 } 2587 break; 2588 case IPV6_MULTICAST_HOPS: 2589 /* -1 means use default */ 2590 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2591 *outlenp = 0; 2592 return (EINVAL); 2593 } 2594 if (!checkonly) { 2595 if (*i1 == -1) { 2596 icmp->icmp_multicast_ttl = 2597 ipp->ipp_multicast_hops = 2598 IP_DEFAULT_MULTICAST_TTL; 2599 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2600 /* Pass modified value to IP. */ 2601 *i1 = icmp->icmp_multicast_ttl; 2602 } else { 2603 icmp->icmp_multicast_ttl = 2604 ipp->ipp_multicast_hops = 2605 (uint8_t)*i1; 2606 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2607 } 2608 } 2609 break; 2610 case IPV6_MULTICAST_LOOP: 2611 if (*i1 != 0 && *i1 != 1) { 2612 *outlenp = 0; 2613 return (EINVAL); 2614 } 2615 if (!checkonly) { 2616 connp->conn_multicast_loop = *i1; 2617 PASS_OPT_TO_IP(connp); 2618 } 2619 break; 2620 case IPV6_CHECKSUM: 2621 /* 2622 * Integer offset into the user data of where the 2623 * checksum is located. 2624 * Offset of -1 disables option. 2625 * Does not apply to IPPROTO_ICMPV6. 2626 */ 2627 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2628 *outlenp = 0; 2629 return (EINVAL); 2630 } 2631 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2632 /* Negative or not 16 bit aligned offset */ 2633 *outlenp = 0; 2634 return (EINVAL); 2635 } 2636 if (checkonly) 2637 break; 2638 2639 if (*i1 == -1) { 2640 icmp->icmp_raw_checksum = 0; 2641 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2642 } else { 2643 icmp->icmp_raw_checksum = 1; 2644 icmp->icmp_checksum_off = *i1; 2645 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2646 } 2647 /* Rebuild the header template */ 2648 error = icmp_build_hdrs(icmp); 2649 if (error != 0) { 2650 *outlenp = 0; 2651 return (error); 2652 } 2653 break; 2654 case IPV6_JOIN_GROUP: 2655 case IPV6_LEAVE_GROUP: 2656 case MCAST_JOIN_GROUP: 2657 case MCAST_LEAVE_GROUP: 2658 case MCAST_BLOCK_SOURCE: 2659 case MCAST_UNBLOCK_SOURCE: 2660 case MCAST_JOIN_SOURCE_GROUP: 2661 case MCAST_LEAVE_SOURCE_GROUP: 2662 /* 2663 * "soft" error (negative) 2664 * option not handled at this level 2665 * Note: Do not modify *outlenp 2666 */ 2667 return (-EINVAL); 2668 case IPV6_BOUND_IF: 2669 if (!checkonly) { 2670 icmp->icmp_bound_if = *i1; 2671 PASS_OPT_TO_IP(connp); 2672 } 2673 break; 2674 case IPV6_UNSPEC_SRC: 2675 if (!checkonly) { 2676 icmp->icmp_unspec_source = onoff; 2677 PASS_OPT_TO_IP(connp); 2678 } 2679 break; 2680 case IPV6_RECVTCLASS: 2681 if (!checkonly) { 2682 icmp->icmp_ipv6_recvtclass = onoff; 2683 PASS_OPT_TO_IP(connp); 2684 } 2685 break; 2686 /* 2687 * Set boolean switches for ancillary data delivery 2688 */ 2689 case IPV6_RECVPKTINFO: 2690 if (!checkonly) { 2691 icmp->icmp_ip_recvpktinfo = onoff; 2692 PASS_OPT_TO_IP(connp); 2693 } 2694 break; 2695 case IPV6_RECVPATHMTU: 2696 if (!checkonly) { 2697 icmp->icmp_ipv6_recvpathmtu = onoff; 2698 PASS_OPT_TO_IP(connp); 2699 } 2700 break; 2701 case IPV6_RECVHOPLIMIT: 2702 if (!checkonly) { 2703 icmp->icmp_ipv6_recvhoplimit = onoff; 2704 PASS_OPT_TO_IP(connp); 2705 } 2706 break; 2707 case IPV6_RECVHOPOPTS: 2708 if (!checkonly) { 2709 icmp->icmp_ipv6_recvhopopts = onoff; 2710 PASS_OPT_TO_IP(connp); 2711 } 2712 break; 2713 case IPV6_RECVDSTOPTS: 2714 if (!checkonly) { 2715 icmp->icmp_ipv6_recvdstopts = onoff; 2716 PASS_OPT_TO_IP(connp); 2717 } 2718 break; 2719 case _OLD_IPV6_RECVDSTOPTS: 2720 if (!checkonly) 2721 icmp->icmp_old_ipv6_recvdstopts = onoff; 2722 break; 2723 case IPV6_RECVRTHDRDSTOPTS: 2724 if (!checkonly) { 2725 icmp->icmp_ipv6_recvrtdstopts = onoff; 2726 PASS_OPT_TO_IP(connp); 2727 } 2728 break; 2729 case IPV6_RECVRTHDR: 2730 if (!checkonly) { 2731 icmp->icmp_ipv6_recvrthdr = onoff; 2732 PASS_OPT_TO_IP(connp); 2733 } 2734 break; 2735 /* 2736 * Set sticky options or ancillary data. 2737 * If sticky options, (re)build any extension headers 2738 * that might be needed as a result. 2739 */ 2740 case IPV6_PKTINFO: 2741 /* 2742 * The source address and ifindex are verified 2743 * in ip_opt_set(). For ancillary data the 2744 * source address is checked in ip_wput_v6. 2745 */ 2746 if (inlen != 0 && inlen != 2747 sizeof (struct in6_pktinfo)) { 2748 return (EINVAL); 2749 } 2750 if (checkonly) 2751 break; 2752 2753 if (inlen == 0) { 2754 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2755 ipp->ipp_sticky_ignored |= 2756 (IPPF_IFINDEX|IPPF_ADDR); 2757 } else { 2758 struct in6_pktinfo *pkti; 2759 2760 pkti = (struct in6_pktinfo *)invalp; 2761 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2762 ipp->ipp_addr = pkti->ipi6_addr; 2763 if (ipp->ipp_ifindex != 0) 2764 ipp->ipp_fields |= IPPF_IFINDEX; 2765 else 2766 ipp->ipp_fields &= ~IPPF_IFINDEX; 2767 if (!IN6_IS_ADDR_UNSPECIFIED( 2768 &ipp->ipp_addr)) 2769 ipp->ipp_fields |= IPPF_ADDR; 2770 else 2771 ipp->ipp_fields &= ~IPPF_ADDR; 2772 } 2773 if (sticky) { 2774 error = icmp_build_hdrs(icmp); 2775 if (error != 0) 2776 return (error); 2777 PASS_OPT_TO_IP(connp); 2778 } 2779 break; 2780 case IPV6_HOPLIMIT: 2781 /* This option can only be used as ancillary data. */ 2782 if (sticky) 2783 return (EINVAL); 2784 if (inlen != 0 && inlen != sizeof (int)) 2785 return (EINVAL); 2786 if (checkonly) 2787 break; 2788 2789 if (inlen == 0) { 2790 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2791 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2792 } else { 2793 if (*i1 > 255 || *i1 < -1) 2794 return (EINVAL); 2795 if (*i1 == -1) 2796 ipp->ipp_hoplimit = 2797 is->is_ipv6_hoplimit; 2798 else 2799 ipp->ipp_hoplimit = *i1; 2800 ipp->ipp_fields |= IPPF_HOPLIMIT; 2801 } 2802 break; 2803 case IPV6_TCLASS: 2804 /* 2805 * IPV6_RECVTCLASS accepts -1 as use kernel default 2806 * and [0, 255] as the actualy traffic class. 2807 */ 2808 if (inlen != 0 && inlen != sizeof (int)) { 2809 return (EINVAL); 2810 } 2811 if (checkonly) 2812 break; 2813 2814 if (inlen == 0) { 2815 ipp->ipp_fields &= ~IPPF_TCLASS; 2816 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2817 } else { 2818 if (*i1 >= 256 || *i1 < -1) 2819 return (EINVAL); 2820 if (*i1 == -1) { 2821 ipp->ipp_tclass = 2822 IPV6_FLOW_TCLASS( 2823 IPV6_DEFAULT_VERS_AND_FLOW); 2824 } else { 2825 ipp->ipp_tclass = *i1; 2826 } 2827 ipp->ipp_fields |= IPPF_TCLASS; 2828 } 2829 if (sticky) { 2830 error = icmp_build_hdrs(icmp); 2831 if (error != 0) 2832 return (error); 2833 } 2834 break; 2835 case IPV6_NEXTHOP: 2836 /* 2837 * IP will verify that the nexthop is reachable 2838 * and fail for sticky options. 2839 */ 2840 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2841 return (EINVAL); 2842 } 2843 if (checkonly) 2844 break; 2845 2846 if (inlen == 0) { 2847 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2848 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2849 } else { 2850 sin6_t *sin6 = (sin6_t *)invalp; 2851 2852 if (sin6->sin6_family != AF_INET6) { 2853 return (EAFNOSUPPORT); 2854 } 2855 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2856 return (EADDRNOTAVAIL); 2857 } 2858 ipp->ipp_nexthop = sin6->sin6_addr; 2859 if (!IN6_IS_ADDR_UNSPECIFIED( 2860 &ipp->ipp_nexthop)) 2861 ipp->ipp_fields |= IPPF_NEXTHOP; 2862 else 2863 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2864 } 2865 if (sticky) { 2866 error = icmp_build_hdrs(icmp); 2867 if (error != 0) 2868 return (error); 2869 PASS_OPT_TO_IP(connp); 2870 } 2871 break; 2872 case IPV6_HOPOPTS: { 2873 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2874 /* 2875 * Sanity checks - minimum size, size a multiple of 2876 * eight bytes, and matching size passed in. 2877 */ 2878 if (inlen != 0 && 2879 inlen != (8 * (hopts->ip6h_len + 1))) { 2880 return (EINVAL); 2881 } 2882 2883 if (checkonly) 2884 break; 2885 error = optcom_pkt_set(invalp, inlen, sticky, 2886 (uchar_t **)&ipp->ipp_hopopts, 2887 &ipp->ipp_hopoptslen, 2888 sticky ? icmp->icmp_label_len_v6 : 0); 2889 if (error != 0) 2890 return (error); 2891 if (ipp->ipp_hopoptslen == 0) { 2892 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2893 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2894 } else { 2895 ipp->ipp_fields |= IPPF_HOPOPTS; 2896 } 2897 if (sticky) { 2898 error = icmp_build_hdrs(icmp); 2899 if (error != 0) 2900 return (error); 2901 } 2902 break; 2903 } 2904 case IPV6_RTHDRDSTOPTS: { 2905 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2906 2907 /* 2908 * Sanity checks - minimum size, size a multiple of 2909 * eight bytes, and matching size passed in. 2910 */ 2911 if (inlen != 0 && 2912 inlen != (8 * (dopts->ip6d_len + 1))) 2913 return (EINVAL); 2914 2915 if (checkonly) 2916 break; 2917 2918 if (inlen == 0) { 2919 if (sticky && 2920 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2921 kmem_free(ipp->ipp_rtdstopts, 2922 ipp->ipp_rtdstoptslen); 2923 ipp->ipp_rtdstopts = NULL; 2924 ipp->ipp_rtdstoptslen = 0; 2925 } 2926 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2927 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2928 } else { 2929 error = optcom_pkt_set(invalp, inlen, sticky, 2930 (uchar_t **)&ipp->ipp_rtdstopts, 2931 &ipp->ipp_rtdstoptslen, 0); 2932 if (error != 0) 2933 return (error); 2934 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2935 } 2936 if (sticky) { 2937 error = icmp_build_hdrs(icmp); 2938 if (error != 0) 2939 return (error); 2940 } 2941 break; 2942 } 2943 case IPV6_DSTOPTS: { 2944 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2945 2946 /* 2947 * Sanity checks - minimum size, size a multiple of 2948 * eight bytes, and matching size passed in. 2949 */ 2950 if (inlen != 0 && 2951 inlen != (8 * (dopts->ip6d_len + 1))) 2952 return (EINVAL); 2953 2954 if (checkonly) 2955 break; 2956 2957 if (inlen == 0) { 2958 if (sticky && 2959 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2960 kmem_free(ipp->ipp_dstopts, 2961 ipp->ipp_dstoptslen); 2962 ipp->ipp_dstopts = NULL; 2963 ipp->ipp_dstoptslen = 0; 2964 } 2965 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2966 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2967 } else { 2968 error = optcom_pkt_set(invalp, inlen, sticky, 2969 (uchar_t **)&ipp->ipp_dstopts, 2970 &ipp->ipp_dstoptslen, 0); 2971 if (error != 0) 2972 return (error); 2973 ipp->ipp_fields |= IPPF_DSTOPTS; 2974 } 2975 if (sticky) { 2976 error = icmp_build_hdrs(icmp); 2977 if (error != 0) 2978 return (error); 2979 } 2980 break; 2981 } 2982 case IPV6_RTHDR: { 2983 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2984 2985 /* 2986 * Sanity checks - minimum size, size a multiple of 2987 * eight bytes, and matching size passed in. 2988 */ 2989 if (inlen != 0 && 2990 inlen != (8 * (rt->ip6r_len + 1))) 2991 return (EINVAL); 2992 2993 if (checkonly) 2994 break; 2995 2996 if (inlen == 0) { 2997 if (sticky && 2998 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2999 kmem_free(ipp->ipp_rthdr, 3000 ipp->ipp_rthdrlen); 3001 ipp->ipp_rthdr = NULL; 3002 ipp->ipp_rthdrlen = 0; 3003 } 3004 ipp->ipp_fields &= ~IPPF_RTHDR; 3005 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3006 } else { 3007 error = optcom_pkt_set(invalp, inlen, sticky, 3008 (uchar_t **)&ipp->ipp_rthdr, 3009 &ipp->ipp_rthdrlen, 0); 3010 if (error != 0) 3011 return (error); 3012 ipp->ipp_fields |= IPPF_RTHDR; 3013 } 3014 if (sticky) { 3015 error = icmp_build_hdrs(icmp); 3016 if (error != 0) 3017 return (error); 3018 } 3019 break; 3020 } 3021 3022 case IPV6_DONTFRAG: 3023 if (checkonly) 3024 break; 3025 3026 if (onoff) { 3027 ipp->ipp_fields |= IPPF_DONTFRAG; 3028 } else { 3029 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3030 } 3031 break; 3032 3033 case IPV6_USE_MIN_MTU: 3034 if (inlen != sizeof (int)) 3035 return (EINVAL); 3036 3037 if (*i1 < -1 || *i1 > 1) 3038 return (EINVAL); 3039 3040 if (checkonly) 3041 break; 3042 3043 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3044 ipp->ipp_use_min_mtu = *i1; 3045 break; 3046 3047 /* 3048 * This option can't be set. Its only returned via 3049 * getsockopt() or ancillary data. 3050 */ 3051 case IPV6_PATHMTU: 3052 return (EINVAL); 3053 3054 case IPV6_SEC_OPT: 3055 case IPV6_SRC_PREFERENCES: 3056 case IPV6_V6ONLY: 3057 /* Handled at IP level */ 3058 return (-EINVAL); 3059 default: 3060 *outlenp = 0; 3061 return (EINVAL); 3062 } 3063 break; 3064 } /* end IPPROTO_IPV6 */ 3065 3066 case IPPROTO_ICMPV6: 3067 /* 3068 * Only allow IPv6 option processing on IPv6 sockets. 3069 */ 3070 if (icmp->icmp_family != AF_INET6) { 3071 *outlenp = 0; 3072 return (ENOPROTOOPT); 3073 } 3074 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3075 *outlenp = 0; 3076 return (ENOPROTOOPT); 3077 } 3078 switch (name) { 3079 case ICMP6_FILTER: 3080 if (!checkonly) { 3081 if ((inlen != 0) && 3082 (inlen != sizeof (icmp6_filter_t))) 3083 return (EINVAL); 3084 3085 if (inlen == 0) { 3086 if (icmp->icmp_filter != NULL) { 3087 kmem_free(icmp->icmp_filter, 3088 sizeof (icmp6_filter_t)); 3089 icmp->icmp_filter = NULL; 3090 } 3091 } else { 3092 if (icmp->icmp_filter == NULL) { 3093 icmp->icmp_filter = kmem_alloc( 3094 sizeof (icmp6_filter_t), 3095 KM_NOSLEEP); 3096 if (icmp->icmp_filter == NULL) { 3097 *outlenp = 0; 3098 return (ENOBUFS); 3099 } 3100 } 3101 (void) bcopy(invalp, icmp->icmp_filter, 3102 inlen); 3103 } 3104 } 3105 break; 3106 3107 default: 3108 *outlenp = 0; 3109 return (EINVAL); 3110 } 3111 break; 3112 default: 3113 *outlenp = 0; 3114 return (EINVAL); 3115 } 3116 /* 3117 * Common case of OK return with outval same as inval. 3118 */ 3119 if (invalp != outvalp) { 3120 /* don't trust bcopy for identical src/dst */ 3121 (void) bcopy(invalp, outvalp, inlen); 3122 } 3123 *outlenp = inlen; 3124 return (0); 3125 } 3126 3127 /* This routine sets socket options. */ 3128 /* ARGSUSED */ 3129 int 3130 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3131 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3132 void *thisdg_attrs, cred_t *cr) 3133 { 3134 boolean_t checkonly; 3135 int error; 3136 3137 error = 0; 3138 switch (optset_context) { 3139 case SETFN_OPTCOM_CHECKONLY: 3140 checkonly = B_TRUE; 3141 /* 3142 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3143 * inlen != 0 implies value supplied and 3144 * we have to "pretend" to set it. 3145 * inlen == 0 implies that there is no 3146 * value part in T_CHECK request and just validation 3147 * done elsewhere should be enough, we just return here. 3148 */ 3149 if (inlen == 0) { 3150 *outlenp = 0; 3151 error = 0; 3152 goto done; 3153 } 3154 break; 3155 case SETFN_OPTCOM_NEGOTIATE: 3156 checkonly = B_FALSE; 3157 break; 3158 case SETFN_UD_NEGOTIATE: 3159 case SETFN_CONN_NEGOTIATE: 3160 checkonly = B_FALSE; 3161 /* 3162 * Negotiating local and "association-related" options 3163 * through T_UNITDATA_REQ. 3164 * 3165 * Following routine can filter out ones we do not 3166 * want to be "set" this way. 3167 */ 3168 if (!icmp_opt_allow_udr_set(level, name)) { 3169 *outlenp = 0; 3170 error = EINVAL; 3171 goto done; 3172 } 3173 break; 3174 default: 3175 /* 3176 * We should never get here 3177 */ 3178 *outlenp = 0; 3179 error = EINVAL; 3180 goto done; 3181 } 3182 3183 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3184 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3185 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3186 outvalp, cr, thisdg_attrs, checkonly); 3187 3188 done: 3189 return (error); 3190 } 3191 3192 /* This routine sets socket options. */ 3193 /* ARGSUSED */ 3194 int 3195 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3196 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3197 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3198 { 3199 conn_t *connp = Q_TO_CONN(q); 3200 icmp_t *icmp; 3201 int error; 3202 3203 icmp = connp->conn_icmp; 3204 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3205 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3206 outlenp, outvalp, thisdg_attrs, cr); 3207 rw_exit(&icmp->icmp_rwlock); 3208 return (error); 3209 } 3210 3211 /* 3212 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3213 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3214 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3215 * headers. 3216 * Returns failure if can't allocate memory. 3217 */ 3218 static int 3219 icmp_build_hdrs(icmp_t *icmp) 3220 { 3221 icmp_stack_t *is = icmp->icmp_is; 3222 uchar_t *hdrs; 3223 uint_t hdrs_len; 3224 ip6_t *ip6h; 3225 ip6i_t *ip6i; 3226 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3227 3228 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3229 hdrs_len = ip_total_hdrs_len_v6(ipp); 3230 ASSERT(hdrs_len != 0); 3231 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3232 /* Need to reallocate */ 3233 if (hdrs_len != 0) { 3234 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3235 if (hdrs == NULL) 3236 return (ENOMEM); 3237 } else { 3238 hdrs = NULL; 3239 } 3240 if (icmp->icmp_sticky_hdrs_len != 0) { 3241 kmem_free(icmp->icmp_sticky_hdrs, 3242 icmp->icmp_sticky_hdrs_len); 3243 } 3244 icmp->icmp_sticky_hdrs = hdrs; 3245 icmp->icmp_sticky_hdrs_len = hdrs_len; 3246 } 3247 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3248 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3249 3250 /* Set header fields not in ipp */ 3251 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3252 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3253 ip6h = (ip6_t *)&ip6i[1]; 3254 3255 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3256 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3257 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3258 } 3259 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3260 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3261 } 3262 } else { 3263 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3264 } 3265 3266 if (!(ipp->ipp_fields & IPPF_ADDR)) 3267 ip6h->ip6_src = icmp->icmp_v6src; 3268 3269 /* Try to get everything in a single mblk */ 3270 if (hdrs_len > icmp->icmp_max_hdr_len) { 3271 icmp->icmp_max_hdr_len = hdrs_len; 3272 rw_exit(&icmp->icmp_rwlock); 3273 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3274 icmp->icmp_connp, 3275 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3276 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3277 } 3278 return (0); 3279 } 3280 3281 /* 3282 * This routine retrieves the value of an ND variable in a icmpparam_t 3283 * structure. It is called through nd_getset when a user reads the 3284 * variable. 3285 */ 3286 /* ARGSUSED */ 3287 static int 3288 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3289 { 3290 icmpparam_t *icmppa = (icmpparam_t *)cp; 3291 3292 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3293 return (0); 3294 } 3295 3296 /* 3297 * Walk through the param array specified registering each element with the 3298 * named dispatch (ND) handler. 3299 */ 3300 static boolean_t 3301 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3302 { 3303 for (; cnt-- > 0; icmppa++) { 3304 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3305 if (!nd_load(ndp, icmppa->icmp_param_name, 3306 icmp_param_get, icmp_param_set, 3307 (caddr_t)icmppa)) { 3308 nd_free(ndp); 3309 return (B_FALSE); 3310 } 3311 } 3312 } 3313 if (!nd_load(ndp, "icmp_status", icmp_status_report, NULL, 3314 NULL)) { 3315 nd_free(ndp); 3316 return (B_FALSE); 3317 } 3318 return (B_TRUE); 3319 } 3320 3321 /* This routine sets an ND variable in a icmpparam_t structure. */ 3322 /* ARGSUSED */ 3323 static int 3324 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3325 { 3326 long new_value; 3327 icmpparam_t *icmppa = (icmpparam_t *)cp; 3328 3329 /* 3330 * Fail the request if the new value does not lie within the 3331 * required bounds. 3332 */ 3333 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3334 new_value < icmppa->icmp_param_min || 3335 new_value > icmppa->icmp_param_max) { 3336 return (EINVAL); 3337 } 3338 /* Set the new value */ 3339 icmppa->icmp_param_value = new_value; 3340 return (0); 3341 } 3342 static void 3343 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3344 { 3345 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3346 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3347 /* 3348 * fallback has started but messages have not been moved yet 3349 */ 3350 if (icmp->icmp_fallback_queue_head == NULL) { 3351 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3352 icmp->icmp_fallback_queue_head = mp; 3353 icmp->icmp_fallback_queue_tail = mp; 3354 } else { 3355 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3356 icmp->icmp_fallback_queue_tail->b_next = mp; 3357 icmp->icmp_fallback_queue_tail = mp; 3358 } 3359 mutex_exit(&icmp->icmp_recv_lock); 3360 } else { 3361 /* 3362 * no more fallbacks possible, ok to drop lock. 3363 */ 3364 mutex_exit(&icmp->icmp_recv_lock); 3365 putnext(icmp->icmp_connp->conn_rq, mp); 3366 } 3367 } 3368 3369 /*ARGSUSED2*/ 3370 static void 3371 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3372 { 3373 conn_t *connp = (conn_t *)arg1; 3374 struct T_unitdata_ind *tudi; 3375 uchar_t *rptr; 3376 icmp_t *icmp; 3377 icmp_stack_t *is; 3378 sin_t *sin; 3379 sin6_t *sin6; 3380 ip6_t *ip6h; 3381 ip6i_t *ip6i; 3382 mblk_t *mp1; 3383 int hdr_len; 3384 ipha_t *ipha; 3385 int udi_size; /* Size of T_unitdata_ind */ 3386 uint_t ipvers; 3387 ip6_pkt_t ipp; 3388 uint8_t nexthdr; 3389 ip_pktinfo_t *pinfo = NULL; 3390 mblk_t *options_mp = NULL; 3391 uint_t icmp_opt = 0; 3392 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3393 uint_t hopstrip; 3394 int error; 3395 3396 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3397 3398 icmp = connp->conn_icmp; 3399 is = icmp->icmp_is; 3400 rptr = mp->b_rptr; 3401 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3402 ASSERT(OK_32PTR(rptr)); 3403 3404 /* 3405 * IP should have prepended the options data in an M_CTL 3406 * Check M_CTL "type" to make sure are not here bcos of 3407 * a valid ICMP message 3408 */ 3409 if (DB_TYPE(mp) == M_CTL) { 3410 /* 3411 * FIXME: does IP still do this? 3412 * IP sends up the IPSEC_IN message for handling IPSEC 3413 * policy at the TCP level. We don't need it here. 3414 */ 3415 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3416 mp1 = mp->b_cont; 3417 freeb(mp); 3418 mp = mp1; 3419 rptr = mp->b_rptr; 3420 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3421 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3422 IN_PKTINFO) { 3423 /* 3424 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3425 * has been prepended to the packet by IP. We need to 3426 * extract the mblk and adjust the rptr 3427 */ 3428 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3429 options_mp = mp; 3430 mp = mp->b_cont; 3431 rptr = mp->b_rptr; 3432 } else { 3433 /* 3434 * ICMP messages. 3435 */ 3436 icmp_icmp_error(connp, mp); 3437 return; 3438 } 3439 } 3440 3441 /* 3442 * Discard message if it is misaligned or smaller than the IP header. 3443 */ 3444 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3445 freemsg(mp); 3446 if (options_mp != NULL) 3447 freeb(options_mp); 3448 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3449 return; 3450 } 3451 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3452 3453 /* Handle M_DATA messages containing IP packets messages */ 3454 if (ipvers == IPV4_VERSION) { 3455 /* 3456 * Special case where IP attaches 3457 * the IRE needs to be handled so that we don't send up 3458 * IRE to the user land. 3459 */ 3460 ipha = (ipha_t *)rptr; 3461 hdr_len = IPH_HDR_LENGTH(ipha); 3462 3463 if (ipha->ipha_protocol == IPPROTO_TCP) { 3464 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3465 3466 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3467 TH_SYN) && mp->b_cont != NULL) { 3468 mp1 = mp->b_cont; 3469 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3470 freeb(mp1); 3471 mp->b_cont = NULL; 3472 } 3473 } 3474 } 3475 if (is->is_bsd_compat) { 3476 ushort_t len; 3477 len = ntohs(ipha->ipha_length); 3478 3479 if (mp->b_datap->db_ref > 1) { 3480 /* 3481 * Allocate a new IP header so that we can 3482 * modify ipha_length. 3483 */ 3484 mblk_t *mp1; 3485 3486 mp1 = allocb(hdr_len, BPRI_MED); 3487 if (!mp1) { 3488 freemsg(mp); 3489 if (options_mp != NULL) 3490 freeb(options_mp); 3491 BUMP_MIB(&is->is_rawip_mib, 3492 rawipInErrors); 3493 return; 3494 } 3495 bcopy(rptr, mp1->b_rptr, hdr_len); 3496 mp->b_rptr = rptr + hdr_len; 3497 rptr = mp1->b_rptr; 3498 ipha = (ipha_t *)rptr; 3499 mp1->b_cont = mp; 3500 mp1->b_wptr = rptr + hdr_len; 3501 mp = mp1; 3502 } 3503 len -= hdr_len; 3504 ipha->ipha_length = htons(len); 3505 } 3506 } 3507 3508 /* 3509 * This is the inbound data path. Packets are passed upstream as 3510 * T_UNITDATA_IND messages with full IP headers still attached. 3511 */ 3512 if (icmp->icmp_family == AF_INET) { 3513 ASSERT(ipvers == IPV4_VERSION); 3514 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3515 if (icmp->icmp_recvif && (pinfo != NULL) && 3516 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3517 udi_size += sizeof (struct T_opthdr) + 3518 sizeof (uint_t); 3519 } 3520 3521 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3522 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3523 udi_size += sizeof (struct T_opthdr) + 3524 sizeof (struct in_pktinfo); 3525 } 3526 3527 /* 3528 * If SO_TIMESTAMP is set allocate the appropriate sized 3529 * buffer. Since gethrestime() expects a pointer aligned 3530 * argument, we allocate space necessary for extra 3531 * alignment (even though it might not be used). 3532 */ 3533 if (icmp->icmp_timestamp) { 3534 udi_size += sizeof (struct T_opthdr) + 3535 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3536 } 3537 mp1 = allocb(udi_size, BPRI_MED); 3538 if (mp1 == NULL) { 3539 freemsg(mp); 3540 if (options_mp != NULL) 3541 freeb(options_mp); 3542 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3543 return; 3544 } 3545 mp1->b_cont = mp; 3546 mp = mp1; 3547 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3548 mp->b_datap->db_type = M_PROTO; 3549 mp->b_wptr = (uchar_t *)tudi + udi_size; 3550 tudi->PRIM_type = T_UNITDATA_IND; 3551 tudi->SRC_length = sizeof (sin_t); 3552 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3553 sin = (sin_t *)&tudi[1]; 3554 *sin = sin_null; 3555 sin->sin_family = AF_INET; 3556 sin->sin_addr.s_addr = ipha->ipha_src; 3557 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3558 sizeof (sin_t); 3559 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3560 tudi->OPT_length = udi_size; 3561 3562 /* 3563 * Add options if IP_RECVIF is set 3564 */ 3565 if (udi_size != 0) { 3566 char *dstopt; 3567 3568 dstopt = (char *)&sin[1]; 3569 if (icmp->icmp_recvif && (pinfo != NULL) && 3570 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3571 3572 struct T_opthdr *toh; 3573 uint_t *dstptr; 3574 3575 toh = (struct T_opthdr *)dstopt; 3576 toh->level = IPPROTO_IP; 3577 toh->name = IP_RECVIF; 3578 toh->len = sizeof (struct T_opthdr) + 3579 sizeof (uint_t); 3580 toh->status = 0; 3581 dstopt += sizeof (struct T_opthdr); 3582 dstptr = (uint_t *)dstopt; 3583 *dstptr = pinfo->ip_pkt_ifindex; 3584 dstopt += sizeof (uint_t); 3585 udi_size -= toh->len; 3586 } 3587 if (icmp->icmp_timestamp) { 3588 struct T_opthdr *toh; 3589 3590 toh = (struct T_opthdr *)dstopt; 3591 toh->level = SOL_SOCKET; 3592 toh->name = SCM_TIMESTAMP; 3593 toh->len = sizeof (struct T_opthdr) + 3594 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3595 toh->status = 0; 3596 dstopt += sizeof (struct T_opthdr); 3597 /* Align for gethrestime() */ 3598 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3599 sizeof (intptr_t)); 3600 gethrestime((timestruc_t *)dstopt); 3601 dstopt = (char *)toh + toh->len; 3602 udi_size -= toh->len; 3603 } 3604 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3605 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3606 struct T_opthdr *toh; 3607 struct in_pktinfo *pktinfop; 3608 3609 toh = (struct T_opthdr *)dstopt; 3610 toh->level = IPPROTO_IP; 3611 toh->name = IP_PKTINFO; 3612 toh->len = sizeof (struct T_opthdr) + 3613 sizeof (in_pktinfo_t); 3614 toh->status = 0; 3615 dstopt += sizeof (struct T_opthdr); 3616 pktinfop = (struct in_pktinfo *)dstopt; 3617 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3618 pktinfop->ipi_spec_dst = 3619 pinfo->ip_pkt_match_addr; 3620 3621 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3622 3623 dstopt += sizeof (struct in_pktinfo); 3624 udi_size -= toh->len; 3625 } 3626 3627 /* Consumed all of allocated space */ 3628 ASSERT(udi_size == 0); 3629 } 3630 3631 if (options_mp != NULL) 3632 freeb(options_mp); 3633 3634 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3635 goto deliver; 3636 } 3637 3638 /* 3639 * We don't need options_mp in the IPv6 path. 3640 */ 3641 if (options_mp != NULL) { 3642 freeb(options_mp); 3643 options_mp = NULL; 3644 } 3645 3646 /* 3647 * Discard message if it is smaller than the IPv6 header 3648 * or if the header is malformed. 3649 */ 3650 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3651 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3652 icmp->icmp_family != AF_INET6) { 3653 freemsg(mp); 3654 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3655 return; 3656 } 3657 3658 /* Initialize */ 3659 ipp.ipp_fields = 0; 3660 hopstrip = 0; 3661 3662 ip6h = (ip6_t *)rptr; 3663 /* 3664 * Call on ip_find_hdr_v6 which gets the total hdr len 3665 * as well as individual lenghts of ext hdrs (and ptrs to 3666 * them). 3667 */ 3668 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3669 /* Look for ifindex information */ 3670 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3671 ip6i = (ip6i_t *)ip6h; 3672 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3673 ASSERT(ip6i->ip6i_ifindex != 0); 3674 ipp.ipp_fields |= IPPF_IFINDEX; 3675 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3676 } 3677 rptr = (uchar_t *)&ip6i[1]; 3678 mp->b_rptr = rptr; 3679 if (rptr == mp->b_wptr) { 3680 mp1 = mp->b_cont; 3681 freeb(mp); 3682 mp = mp1; 3683 rptr = mp->b_rptr; 3684 } 3685 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3686 ip6h = (ip6_t *)rptr; 3687 } 3688 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3689 3690 /* 3691 * We need to lie a bit to the user because users inside 3692 * labeled compartments should not see their own labels. We 3693 * assume that in all other respects IP has checked the label, 3694 * and that the label is always first among the options. (If 3695 * it's not first, then this code won't see it, and the option 3696 * will be passed along to the user.) 3697 * 3698 * If we had multilevel ICMP sockets, then the following code 3699 * should be skipped for them to allow the user to see the 3700 * label. 3701 * 3702 * Alignment restrictions in the definition of IP options 3703 * (namely, the requirement that the 4-octet DOI goes on a 3704 * 4-octet boundary) mean that we know exactly where the option 3705 * should start, but we're lenient for other hosts. 3706 * 3707 * Note that there are no multilevel ICMP or raw IP sockets 3708 * yet, thus nobody ever sees the IP6OPT_LS option. 3709 */ 3710 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3711 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3712 const uchar_t *ucp = 3713 (const uchar_t *)ipp.ipp_hopopts + 2; 3714 int remlen = ipp.ipp_hopoptslen - 2; 3715 3716 while (remlen > 0) { 3717 if (*ucp == IP6OPT_PAD1) { 3718 remlen--; 3719 ucp++; 3720 } else if (*ucp == IP6OPT_PADN) { 3721 remlen -= ucp[1] + 2; 3722 ucp += ucp[1] + 2; 3723 } else if (*ucp == ip6opt_ls) { 3724 hopstrip = (ucp - 3725 (const uchar_t *)ipp.ipp_hopopts) + 3726 ucp[1] + 2; 3727 hopstrip = (hopstrip + 7) & ~7; 3728 break; 3729 } else { 3730 /* label option must be first */ 3731 break; 3732 } 3733 } 3734 } 3735 } else { 3736 hdr_len = IPV6_HDR_LEN; 3737 ip6i = NULL; 3738 nexthdr = ip6h->ip6_nxt; 3739 } 3740 /* 3741 * One special case where IP attaches the IRE needs to 3742 * be handled so that we don't send up IRE to the user land. 3743 */ 3744 if (nexthdr == IPPROTO_TCP) { 3745 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3746 3747 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3748 mp->b_cont != NULL) { 3749 mp1 = mp->b_cont; 3750 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3751 freeb(mp1); 3752 mp->b_cont = NULL; 3753 } 3754 } 3755 } 3756 /* 3757 * Check a filter for ICMPv6 types if needed. 3758 * Verify raw checksums if needed. 3759 */ 3760 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3761 if (icmp->icmp_filter != NULL) { 3762 int type; 3763 3764 /* Assumes that IP has done the pullupmsg */ 3765 type = mp->b_rptr[hdr_len]; 3766 3767 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3768 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3769 freemsg(mp); 3770 return; 3771 } 3772 } else { 3773 /* Checksum */ 3774 uint16_t *up; 3775 uint32_t sum; 3776 int remlen; 3777 3778 up = (uint16_t *)&ip6h->ip6_src; 3779 3780 remlen = msgdsize(mp) - hdr_len; 3781 sum = htons(icmp->icmp_proto + remlen) 3782 + up[0] + up[1] + up[2] + up[3] 3783 + up[4] + up[5] + up[6] + up[7] 3784 + up[8] + up[9] + up[10] + up[11] 3785 + up[12] + up[13] + up[14] + up[15]; 3786 sum = (sum & 0xffff) + (sum >> 16); 3787 sum = IP_CSUM(mp, hdr_len, sum); 3788 if (sum != 0) { 3789 /* IPv6 RAW checksum failed */ 3790 ip0dbg(("icmp_rput: RAW checksum " 3791 "failed %x\n", sum)); 3792 freemsg(mp); 3793 BUMP_MIB(&is->is_rawip_mib, 3794 rawipInCksumErrs); 3795 return; 3796 } 3797 } 3798 } 3799 /* Skip all the IPv6 headers per API */ 3800 mp->b_rptr += hdr_len; 3801 3802 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3803 3804 /* 3805 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3806 * maintain state information, instead of relying on icmp_t 3807 * structure, since there arent any locks protecting these members 3808 * and there is a window where there might be a race between a 3809 * thread setting options on the write side and a thread reading 3810 * these options on the read size. 3811 */ 3812 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3813 IPPF_RTHDR|IPPF_IFINDEX)) { 3814 if (icmp->icmp_ipv6_recvhopopts && 3815 (ipp.ipp_fields & IPPF_HOPOPTS) && 3816 ipp.ipp_hopoptslen > hopstrip) { 3817 udi_size += sizeof (struct T_opthdr) + 3818 ipp.ipp_hopoptslen - hopstrip; 3819 icmp_opt |= IPPF_HOPOPTS; 3820 } 3821 if ((icmp->icmp_ipv6_recvdstopts || 3822 icmp->icmp_old_ipv6_recvdstopts) && 3823 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3824 udi_size += sizeof (struct T_opthdr) + 3825 ipp.ipp_dstoptslen; 3826 icmp_opt |= IPPF_DSTOPTS; 3827 } 3828 if (((icmp->icmp_ipv6_recvdstopts && 3829 icmp->icmp_ipv6_recvrthdr && 3830 (ipp.ipp_fields & IPPF_RTHDR)) || 3831 icmp->icmp_ipv6_recvrtdstopts) && 3832 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3833 udi_size += sizeof (struct T_opthdr) + 3834 ipp.ipp_rtdstoptslen; 3835 icmp_opt |= IPPF_RTDSTOPTS; 3836 } 3837 if (icmp->icmp_ipv6_recvrthdr && 3838 (ipp.ipp_fields & IPPF_RTHDR)) { 3839 udi_size += sizeof (struct T_opthdr) + 3840 ipp.ipp_rthdrlen; 3841 icmp_opt |= IPPF_RTHDR; 3842 } 3843 if (icmp->icmp_ip_recvpktinfo && 3844 (ipp.ipp_fields & IPPF_IFINDEX)) { 3845 udi_size += sizeof (struct T_opthdr) + 3846 sizeof (struct in6_pktinfo); 3847 icmp_opt |= IPPF_IFINDEX; 3848 } 3849 } 3850 if (icmp->icmp_ipv6_recvhoplimit) { 3851 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3852 icmp_ipv6_recvhoplimit = B_TRUE; 3853 } 3854 3855 if (icmp->icmp_ipv6_recvtclass) 3856 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3857 3858 /* 3859 * If SO_TIMESTAMP is set allocate the appropriate sized 3860 * buffer. Since gethrestime() expects a pointer aligned 3861 * argument, we allocate space necessary for extra 3862 * alignment (even though it might not be used). 3863 */ 3864 if (icmp->icmp_timestamp) { 3865 udi_size += sizeof (struct T_opthdr) + 3866 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3867 } 3868 3869 mp1 = allocb(udi_size, BPRI_MED); 3870 if (mp1 == NULL) { 3871 freemsg(mp); 3872 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3873 return; 3874 } 3875 mp1->b_cont = mp; 3876 mp = mp1; 3877 mp->b_datap->db_type = M_PROTO; 3878 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3879 mp->b_wptr = (uchar_t *)tudi + udi_size; 3880 tudi->PRIM_type = T_UNITDATA_IND; 3881 tudi->SRC_length = sizeof (sin6_t); 3882 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3883 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3884 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3885 tudi->OPT_length = udi_size; 3886 sin6 = (sin6_t *)&tudi[1]; 3887 sin6->sin6_port = 0; 3888 sin6->sin6_family = AF_INET6; 3889 3890 sin6->sin6_addr = ip6h->ip6_src; 3891 /* No sin6_flowinfo per API */ 3892 sin6->sin6_flowinfo = 0; 3893 /* For link-scope source pass up scope id */ 3894 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3895 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3896 sin6->sin6_scope_id = ipp.ipp_ifindex; 3897 else 3898 sin6->sin6_scope_id = 0; 3899 3900 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3901 icmp->icmp_zoneid, is->is_netstack); 3902 3903 if (udi_size != 0) { 3904 uchar_t *dstopt; 3905 3906 dstopt = (uchar_t *)&sin6[1]; 3907 if (icmp_opt & IPPF_IFINDEX) { 3908 struct T_opthdr *toh; 3909 struct in6_pktinfo *pkti; 3910 3911 toh = (struct T_opthdr *)dstopt; 3912 toh->level = IPPROTO_IPV6; 3913 toh->name = IPV6_PKTINFO; 3914 toh->len = sizeof (struct T_opthdr) + 3915 sizeof (*pkti); 3916 toh->status = 0; 3917 dstopt += sizeof (struct T_opthdr); 3918 pkti = (struct in6_pktinfo *)dstopt; 3919 pkti->ipi6_addr = ip6h->ip6_dst; 3920 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3921 dstopt += sizeof (*pkti); 3922 udi_size -= toh->len; 3923 } 3924 if (icmp_ipv6_recvhoplimit) { 3925 struct T_opthdr *toh; 3926 3927 toh = (struct T_opthdr *)dstopt; 3928 toh->level = IPPROTO_IPV6; 3929 toh->name = IPV6_HOPLIMIT; 3930 toh->len = sizeof (struct T_opthdr) + 3931 sizeof (uint_t); 3932 toh->status = 0; 3933 dstopt += sizeof (struct T_opthdr); 3934 *(uint_t *)dstopt = ip6h->ip6_hops; 3935 dstopt += sizeof (uint_t); 3936 udi_size -= toh->len; 3937 } 3938 if (icmp->icmp_ipv6_recvtclass) { 3939 struct T_opthdr *toh; 3940 3941 toh = (struct T_opthdr *)dstopt; 3942 toh->level = IPPROTO_IPV6; 3943 toh->name = IPV6_TCLASS; 3944 toh->len = sizeof (struct T_opthdr) + 3945 sizeof (uint_t); 3946 toh->status = 0; 3947 dstopt += sizeof (struct T_opthdr); 3948 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3949 dstopt += sizeof (uint_t); 3950 udi_size -= toh->len; 3951 } 3952 if (icmp->icmp_timestamp) { 3953 struct T_opthdr *toh; 3954 3955 toh = (struct T_opthdr *)dstopt; 3956 toh->level = SOL_SOCKET; 3957 toh->name = SCM_TIMESTAMP; 3958 toh->len = sizeof (struct T_opthdr) + 3959 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3960 toh->status = 0; 3961 dstopt += sizeof (struct T_opthdr); 3962 /* Align for gethrestime() */ 3963 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3964 sizeof (intptr_t)); 3965 gethrestime((timestruc_t *)dstopt); 3966 dstopt = (uchar_t *)toh + toh->len; 3967 udi_size -= toh->len; 3968 } 3969 3970 if (icmp_opt & IPPF_HOPOPTS) { 3971 struct T_opthdr *toh; 3972 3973 toh = (struct T_opthdr *)dstopt; 3974 toh->level = IPPROTO_IPV6; 3975 toh->name = IPV6_HOPOPTS; 3976 toh->len = sizeof (struct T_opthdr) + 3977 ipp.ipp_hopoptslen - hopstrip; 3978 toh->status = 0; 3979 dstopt += sizeof (struct T_opthdr); 3980 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 3981 ipp.ipp_hopoptslen - hopstrip); 3982 if (hopstrip > 0) { 3983 /* copy next header value and fake length */ 3984 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 3985 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 3986 hopstrip / 8; 3987 } 3988 dstopt += ipp.ipp_hopoptslen - hopstrip; 3989 udi_size -= toh->len; 3990 } 3991 if (icmp_opt & IPPF_RTDSTOPTS) { 3992 struct T_opthdr *toh; 3993 3994 toh = (struct T_opthdr *)dstopt; 3995 toh->level = IPPROTO_IPV6; 3996 toh->name = IPV6_DSTOPTS; 3997 toh->len = sizeof (struct T_opthdr) + 3998 ipp.ipp_rtdstoptslen; 3999 toh->status = 0; 4000 dstopt += sizeof (struct T_opthdr); 4001 bcopy(ipp.ipp_rtdstopts, dstopt, 4002 ipp.ipp_rtdstoptslen); 4003 dstopt += ipp.ipp_rtdstoptslen; 4004 udi_size -= toh->len; 4005 } 4006 if (icmp_opt & IPPF_RTHDR) { 4007 struct T_opthdr *toh; 4008 4009 toh = (struct T_opthdr *)dstopt; 4010 toh->level = IPPROTO_IPV6; 4011 toh->name = IPV6_RTHDR; 4012 toh->len = sizeof (struct T_opthdr) + 4013 ipp.ipp_rthdrlen; 4014 toh->status = 0; 4015 dstopt += sizeof (struct T_opthdr); 4016 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4017 dstopt += ipp.ipp_rthdrlen; 4018 udi_size -= toh->len; 4019 } 4020 if (icmp_opt & IPPF_DSTOPTS) { 4021 struct T_opthdr *toh; 4022 4023 toh = (struct T_opthdr *)dstopt; 4024 toh->level = IPPROTO_IPV6; 4025 toh->name = IPV6_DSTOPTS; 4026 toh->len = sizeof (struct T_opthdr) + 4027 ipp.ipp_dstoptslen; 4028 toh->status = 0; 4029 dstopt += sizeof (struct T_opthdr); 4030 bcopy(ipp.ipp_dstopts, dstopt, 4031 ipp.ipp_dstoptslen); 4032 dstopt += ipp.ipp_dstoptslen; 4033 udi_size -= toh->len; 4034 } 4035 /* Consumed all of allocated space */ 4036 ASSERT(udi_size == 0); 4037 } 4038 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4039 4040 deliver: 4041 if (IPCL_IS_NONSTR(connp)) { 4042 if ((*connp->conn_upcalls->su_recv) 4043 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4044 NULL) < 0) { 4045 mutex_enter(&icmp->icmp_recv_lock); 4046 if (error == ENOSPC) { 4047 /* 4048 * let's confirm while holding the lock 4049 */ 4050 if ((*connp->conn_upcalls->su_recv) 4051 (connp->conn_upper_handle, NULL, 0, 0, 4052 &error, NULL) < 0) { 4053 if (error == ENOSPC) { 4054 connp->conn_flow_cntrld = 4055 B_TRUE; 4056 } else { 4057 ASSERT(error == EOPNOTSUPP); 4058 } 4059 } 4060 mutex_exit(&icmp->icmp_recv_lock); 4061 } else { 4062 ASSERT(error == EOPNOTSUPP); 4063 icmp_queue_fallback(icmp, mp); 4064 } 4065 } 4066 } else { 4067 putnext(connp->conn_rq, mp); 4068 } 4069 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 4070 } 4071 4072 /* 4073 * return SNMP stuff in buffer in mpdata 4074 */ 4075 mblk_t * 4076 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4077 { 4078 mblk_t *mpdata; 4079 struct opthdr *optp; 4080 conn_t *connp = Q_TO_CONN(q); 4081 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4082 mblk_t *mp2ctl; 4083 4084 /* 4085 * make a copy of the original message 4086 */ 4087 mp2ctl = copymsg(mpctl); 4088 4089 if (mpctl == NULL || 4090 (mpdata = mpctl->b_cont) == NULL) { 4091 freemsg(mpctl); 4092 freemsg(mp2ctl); 4093 return (0); 4094 } 4095 4096 /* fixed length structure for IPv4 and IPv6 counters */ 4097 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4098 optp->level = EXPER_RAWIP; 4099 optp->name = 0; 4100 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4101 sizeof (is->is_rawip_mib)); 4102 optp->len = msgdsize(mpdata); 4103 qreply(q, mpctl); 4104 4105 return (mp2ctl); 4106 } 4107 4108 /* 4109 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4110 * TODO: If this ever actually tries to set anything, it needs to be 4111 * to do the appropriate locking. 4112 */ 4113 /* ARGSUSED */ 4114 int 4115 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4116 uchar_t *ptr, int len) 4117 { 4118 switch (level) { 4119 case EXPER_RAWIP: 4120 return (0); 4121 default: 4122 return (1); 4123 } 4124 } 4125 4126 /* Report for ndd "icmp_status" */ 4127 /* ARGSUSED */ 4128 static int 4129 icmp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4130 { 4131 conn_t *connp; 4132 ip_stack_t *ipst; 4133 char laddrbuf[INET6_ADDRSTRLEN]; 4134 char faddrbuf[INET6_ADDRSTRLEN]; 4135 int i; 4136 4137 (void) mi_mpprintf(mp, 4138 "RAWIP " MI_COL_HDRPAD_STR 4139 /* 01234567[89ABCDEF] */ 4140 " src addr dest addr state"); 4141 /* xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx UNBOUND */ 4142 4143 connp = Q_TO_CONN(q); 4144 ipst = connp->conn_netstack->netstack_ip; 4145 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4146 connf_t *connfp; 4147 char *state; 4148 4149 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4150 connp = NULL; 4151 4152 while ((connp = ipcl_get_next_conn(connfp, connp, 4153 IPCL_RAWIPCONN)) != NULL) { 4154 icmp_t *icmp; 4155 4156 mutex_enter(&(connp)->conn_lock); 4157 icmp = connp->conn_icmp; 4158 4159 if (icmp->icmp_state == TS_UNBND) 4160 state = "UNBOUND"; 4161 else if (icmp->icmp_state == TS_IDLE) 4162 state = "IDLE"; 4163 else if (icmp->icmp_state == TS_DATA_XFER) 4164 state = "CONNECTED"; 4165 else 4166 state = "UnkState"; 4167 4168 (void) mi_mpprintf(mp, MI_COL_PTRFMT_STR "%s %s %s", 4169 (void *)icmp, 4170 inet_ntop(AF_INET6, &icmp->icmp_v6dst.sin6_addr, 4171 faddrbuf, 4172 sizeof (faddrbuf)), 4173 inet_ntop(AF_INET6, &icmp->icmp_v6src, laddrbuf, 4174 sizeof (laddrbuf)), 4175 state); 4176 mutex_exit(&(connp)->conn_lock); 4177 } 4178 } 4179 return (0); 4180 } 4181 4182 /* 4183 * This routine creates a T_UDERROR_IND message and passes it upstream. 4184 * The address and options are copied from the T_UNITDATA_REQ message 4185 * passed in mp. This message is freed. 4186 */ 4187 static void 4188 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4189 { 4190 mblk_t *mp1; 4191 uchar_t *rptr = mp->b_rptr; 4192 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4193 4194 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4195 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4196 tudr->OPT_length, err); 4197 if (mp1) 4198 qreply(q, mp1); 4199 freemsg(mp); 4200 } 4201 4202 4203 static int 4204 rawip_do_unbind(conn_t *connp) 4205 { 4206 icmp_t *icmp = connp->conn_icmp; 4207 4208 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4209 /* If a bind has not been done, we can't unbind. */ 4210 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4211 rw_exit(&icmp->icmp_rwlock); 4212 return (-TOUTSTATE); 4213 } 4214 icmp->icmp_pending_op = T_UNBIND_REQ; 4215 rw_exit(&icmp->icmp_rwlock); 4216 4217 /* 4218 * Call ip to unbind 4219 */ 4220 4221 ip_unbind(connp); 4222 4223 /* 4224 * Once we're unbound from IP, the pending operation may be cleared 4225 * here. 4226 */ 4227 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4228 V6_SET_ZERO(icmp->icmp_v6src); 4229 V6_SET_ZERO(icmp->icmp_bound_v6src); 4230 icmp->icmp_pending_op = -1; 4231 icmp->icmp_state = TS_UNBND; 4232 if (icmp->icmp_family == AF_INET6) 4233 (void) icmp_build_hdrs(icmp); 4234 rw_exit(&icmp->icmp_rwlock); 4235 return (0); 4236 } 4237 4238 /* 4239 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4240 * After some error checking, the message is passed downstream to ip. 4241 */ 4242 static void 4243 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4244 { 4245 conn_t *connp = Q_TO_CONN(q); 4246 int error; 4247 4248 ASSERT(mp->b_cont == NULL); 4249 error = rawip_do_unbind(connp); 4250 if (error) { 4251 if (error < 0) { 4252 icmp_err_ack(q, mp, -error, 0); 4253 } else { 4254 icmp_err_ack(q, mp, 0, error); 4255 } 4256 return; 4257 } 4258 4259 /* 4260 * Convert mp into a T_OK_ACK 4261 */ 4262 4263 mp = mi_tpi_ok_ack_alloc(mp); 4264 4265 /* 4266 * should not happen in practice... T_OK_ACK is smaller than the 4267 * original message. 4268 */ 4269 ASSERT(mp != NULL); 4270 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4271 qreply(q, mp); 4272 } 4273 4274 4275 /* 4276 * Process IPv4 packets that already include an IP header. 4277 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4278 * IPPROTO_IGMP). 4279 */ 4280 static int 4281 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4282 ip4_pkt_t *pktinfop) 4283 { 4284 icmp_stack_t *is = icmp->icmp_is; 4285 ipha_t *ipha; 4286 int ip_hdr_length; 4287 int tp_hdr_len; 4288 mblk_t *mp1; 4289 uint_t pkt_len; 4290 ip_opt_info_t optinfo; 4291 4292 optinfo.ip_opt_flags = 0; 4293 optinfo.ip_opt_ill_index = 0; 4294 ipha = (ipha_t *)mp->b_rptr; 4295 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4296 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4297 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4298 ASSERT(icmp != NULL); 4299 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4300 freemsg(mp); 4301 return (0); 4302 } 4303 ipha = (ipha_t *)mp->b_rptr; 4304 } 4305 ipha->ipha_version_and_hdr_length = 4306 (IP_VERSION<<4) | (ip_hdr_length>>2); 4307 4308 /* 4309 * For the socket of SOCK_RAW type, the checksum is provided in the 4310 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4311 * tell IP that the application has sent a complete IP header and not 4312 * to compute the transport checksum nor change the DF flag. 4313 */ 4314 ipha->ipha_ident = IP_HDR_INCLUDED; 4315 ipha->ipha_hdr_checksum = 0; 4316 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4317 /* Insert options if any */ 4318 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4319 /* 4320 * Put the IP header plus any transport header that is 4321 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4322 * that at least the checksum field is in the first mblk.) 4323 */ 4324 switch (ipha->ipha_protocol) { 4325 case IPPROTO_UDP: 4326 tp_hdr_len = 8; 4327 break; 4328 case IPPROTO_TCP: 4329 tp_hdr_len = 20; 4330 break; 4331 default: 4332 tp_hdr_len = 0; 4333 break; 4334 } 4335 /* 4336 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4337 * tp_hdr_len bytes will be in a single mblk. 4338 */ 4339 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4340 tp_hdr_len)) { 4341 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4342 tp_hdr_len)) { 4343 BUMP_MIB(&is->is_rawip_mib, 4344 rawipOutErrors); 4345 freemsg(mp); 4346 return (0); 4347 } 4348 ipha = (ipha_t *)mp->b_rptr; 4349 } 4350 4351 /* 4352 * if the length is larger then the max allowed IP packet, 4353 * then send an error and abort the processing. 4354 */ 4355 pkt_len = ntohs(ipha->ipha_length) 4356 + icmp->icmp_ip_snd_options_len; 4357 if (pkt_len > IP_MAXPACKET) { 4358 return (EMSGSIZE); 4359 } 4360 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4361 tp_hdr_len, BPRI_LO))) { 4362 return (ENOMEM); 4363 } 4364 mp1->b_rptr += is->is_wroff_extra; 4365 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4366 4367 ipha->ipha_length = htons((uint16_t)pkt_len); 4368 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4369 4370 /* Copy transport header if any */ 4371 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4372 mp1->b_wptr += tp_hdr_len; 4373 4374 /* Add options */ 4375 ipha = (ipha_t *)mp1->b_rptr; 4376 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4377 icmp->icmp_ip_snd_options_len); 4378 4379 /* Drop IP header and transport header from original */ 4380 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4381 4382 mp1->b_cont = mp; 4383 mp = mp1; 4384 /* 4385 * Massage source route putting first source 4386 * route in ipha_dst. 4387 */ 4388 (void) ip_massage_options(ipha, is->is_netstack); 4389 } 4390 4391 if (pktinfop != NULL) { 4392 /* 4393 * Over write the source address provided in the header 4394 */ 4395 if (pktinfop->ip4_addr != INADDR_ANY) { 4396 ipha->ipha_src = pktinfop->ip4_addr; 4397 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4398 } 4399 4400 if (pktinfop->ip4_ill_index != 0) { 4401 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4402 } 4403 } 4404 4405 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4406 return (0); 4407 } 4408 4409 static int 4410 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4411 { 4412 int err; 4413 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4414 icmp_stack_t *is = icmp->icmp_is; 4415 conn_t *connp = icmp->icmp_connp; 4416 cred_t *cr; 4417 4418 /* 4419 * All Solaris components should pass a db_credp 4420 * for this message, hence we ASSERT. 4421 * On production kernels we return an error to be robust against 4422 * random streams modules sitting on top of us. 4423 */ 4424 cr = msg_getcred(mp, NULL); 4425 ASSERT(cr != NULL); 4426 if (cr == NULL) 4427 return (EINVAL); 4428 4429 err = tsol_compute_label(cr, dst, 4430 opt_storage, connp->conn_mac_exempt, 4431 is->is_netstack->netstack_ip); 4432 if (err == 0) { 4433 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4434 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4435 opt_storage); 4436 } 4437 if (err != 0) { 4438 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4439 DTRACE_PROBE4( 4440 tx__ip__log__drop__updatelabel__icmp, 4441 char *, "icmp(1) failed to update options(2) on mp(3)", 4442 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4443 return (err); 4444 } 4445 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4446 return (0); 4447 } 4448 4449 /* 4450 * This routine handles all messages passed downstream. It either 4451 * consumes the message or passes it downstream; it never queues a 4452 * a message. 4453 */ 4454 static void 4455 icmp_wput(queue_t *q, mblk_t *mp) 4456 { 4457 uchar_t *rptr = mp->b_rptr; 4458 ipha_t *ipha; 4459 mblk_t *mp1; 4460 #define tudr ((struct T_unitdata_req *)rptr) 4461 size_t ip_len; 4462 conn_t *connp = Q_TO_CONN(q); 4463 icmp_t *icmp = connp->conn_icmp; 4464 icmp_stack_t *is = icmp->icmp_is; 4465 sin6_t *sin6; 4466 sin_t *sin; 4467 ipaddr_t v4dst; 4468 ip4_pkt_t pktinfo; 4469 ip4_pkt_t *pktinfop = &pktinfo; 4470 ip6_pkt_t ipp_s; /* For ancillary data options */ 4471 ip6_pkt_t *ipp = &ipp_s; 4472 int error; 4473 4474 ipp->ipp_fields = 0; 4475 ipp->ipp_sticky_ignored = 0; 4476 4477 switch (mp->b_datap->db_type) { 4478 case M_DATA: 4479 if (icmp->icmp_hdrincl) { 4480 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4481 ipha = (ipha_t *)mp->b_rptr; 4482 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4483 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4484 BUMP_MIB(&is->is_rawip_mib, 4485 rawipOutErrors); 4486 freemsg(mp); 4487 return; 4488 } 4489 ipha = (ipha_t *)mp->b_rptr; 4490 } 4491 /* 4492 * If this connection was used for v6 (inconceivable!) 4493 * or if we have a new destination, then it's time to 4494 * figure a new label. 4495 */ 4496 if (is_system_labeled() && 4497 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4498 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4499 ipha->ipha_dst)) { 4500 error = icmp_update_label(icmp, mp, 4501 ipha->ipha_dst); 4502 if (error != 0) { 4503 icmp_ud_err(q, mp, error); 4504 return; 4505 } 4506 } 4507 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4508 if (error != 0) 4509 icmp_ud_err(q, mp, error); 4510 return; 4511 } 4512 freemsg(mp); 4513 return; 4514 case M_PROTO: 4515 case M_PCPROTO: 4516 ip_len = mp->b_wptr - rptr; 4517 if (ip_len >= sizeof (struct T_unitdata_req)) { 4518 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4519 if (((union T_primitives *)rptr)->type 4520 == T_UNITDATA_REQ) 4521 break; 4522 } 4523 /* FALLTHRU */ 4524 default: 4525 icmp_wput_other(q, mp); 4526 return; 4527 } 4528 4529 /* Handle T_UNITDATA_REQ messages here. */ 4530 4531 mp1 = mp->b_cont; 4532 if (mp1 == NULL) { 4533 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4534 icmp_ud_err(q, mp, EPROTO); 4535 return; 4536 } 4537 4538 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4539 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4540 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4541 return; 4542 } 4543 4544 switch (icmp->icmp_family) { 4545 case AF_INET6: 4546 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4547 if (!OK_32PTR((char *)sin6) || 4548 tudr->DEST_length != sizeof (sin6_t) || 4549 sin6->sin6_family != AF_INET6) { 4550 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4551 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4552 return; 4553 } 4554 4555 /* No support for mapped addresses on raw sockets */ 4556 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4557 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4558 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4559 return; 4560 } 4561 4562 /* 4563 * Destination is a native IPv6 address. 4564 * Send out an IPv6 format packet. 4565 */ 4566 if (tudr->OPT_length != 0) { 4567 int error; 4568 4569 error = 0; 4570 if (icmp_unitdata_opt_process(q, mp, &error, 4571 (void *)ipp) < 0) { 4572 /* failure */ 4573 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4574 icmp_ud_err(q, mp, error); 4575 return; 4576 } 4577 ASSERT(error == 0); 4578 } 4579 4580 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4581 goto done; 4582 4583 case AF_INET: 4584 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4585 if (!OK_32PTR((char *)sin) || 4586 tudr->DEST_length != sizeof (sin_t) || 4587 sin->sin_family != AF_INET) { 4588 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4589 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4590 return; 4591 } 4592 /* Extract and ipaddr */ 4593 v4dst = sin->sin_addr.s_addr; 4594 break; 4595 4596 default: 4597 ASSERT(0); 4598 } 4599 4600 pktinfop->ip4_ill_index = 0; 4601 pktinfop->ip4_addr = INADDR_ANY; 4602 4603 /* 4604 * If options passed in, feed it for verification and handling 4605 */ 4606 if (tudr->OPT_length != 0) { 4607 int error; 4608 4609 error = 0; 4610 if (icmp_unitdata_opt_process(q, mp, &error, 4611 (void *)pktinfop) < 0) { 4612 /* failure */ 4613 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4614 icmp_ud_err(q, mp, error); 4615 return; 4616 } 4617 ASSERT(error == 0); 4618 /* 4619 * Note: Success in processing options. 4620 * mp option buffer represented by 4621 * OPT_length/offset now potentially modified 4622 * and contain option setting results 4623 */ 4624 } 4625 4626 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4627 done: 4628 if (error != 0) { 4629 icmp_ud_err(q, mp, error); 4630 return; 4631 } else { 4632 mp->b_cont = NULL; 4633 freeb(mp); 4634 } 4635 } 4636 4637 4638 /* ARGSUSED */ 4639 static void 4640 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4641 { 4642 #ifdef DEBUG 4643 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4644 #endif 4645 freemsg(mp); 4646 } 4647 4648 static int 4649 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4650 ip4_pkt_t *pktinfop) 4651 { 4652 ipha_t *ipha; 4653 size_t ip_len; 4654 icmp_t *icmp = connp->conn_icmp; 4655 icmp_stack_t *is = icmp->icmp_is; 4656 int ip_hdr_length; 4657 ip_opt_info_t optinfo; 4658 4659 optinfo.ip_opt_flags = 0; 4660 optinfo.ip_opt_ill_index = 0; 4661 4662 if (icmp->icmp_state == TS_UNBND) { 4663 /* If a port has not been bound to the stream, fail. */ 4664 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4665 return (EPROTO); 4666 } 4667 4668 if (v4dst == INADDR_ANY) 4669 v4dst = htonl(INADDR_LOOPBACK); 4670 4671 /* Check if our saved options are valid; update if not */ 4672 if (is_system_labeled() && 4673 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4674 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4675 int error = icmp_update_label(icmp, mp, v4dst); 4676 4677 if (error != 0) 4678 return (error); 4679 } 4680 4681 /* Protocol 255 contains full IP headers */ 4682 if (icmp->icmp_hdrincl) 4683 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4684 4685 /* Add an IP header */ 4686 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4687 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4688 if ((uchar_t *)ipha < mp->b_datap->db_base || 4689 mp->b_datap->db_ref != 1 || 4690 !OK_32PTR(ipha)) { 4691 mblk_t *mp1; 4692 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4693 BPRI_LO))) { 4694 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4695 return (ENOMEM); 4696 } 4697 mp1->b_cont = mp; 4698 ipha = (ipha_t *)mp1->b_datap->db_lim; 4699 mp1->b_wptr = (uchar_t *)ipha; 4700 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4701 mp = mp1; 4702 } 4703 #ifdef _BIG_ENDIAN 4704 /* Set version, header length, and tos */ 4705 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4706 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4707 icmp->icmp_type_of_service); 4708 /* Set ttl and protocol */ 4709 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4710 #else 4711 /* Set version, header length, and tos */ 4712 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4713 ((icmp->icmp_type_of_service << 8) | 4714 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4715 /* Set ttl and protocol */ 4716 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4717 #endif 4718 if (pktinfop->ip4_addr != INADDR_ANY) { 4719 ipha->ipha_src = pktinfop->ip4_addr; 4720 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4721 } else { 4722 4723 /* 4724 * Copy our address into the packet. If this is zero, 4725 * ip will fill in the real source address. 4726 */ 4727 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4728 } 4729 4730 ipha->ipha_fragment_offset_and_flags = 0; 4731 4732 if (pktinfop->ip4_ill_index != 0) { 4733 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4734 } 4735 4736 4737 /* 4738 * For the socket of SOCK_RAW type, the checksum is provided in the 4739 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4740 * tell IP that the application has sent a complete IP header and not 4741 * to compute the transport checksum nor change the DF flag. 4742 */ 4743 ipha->ipha_ident = IP_HDR_INCLUDED; 4744 4745 /* Finish common formatting of the packet. */ 4746 mp->b_rptr = (uchar_t *)ipha; 4747 4748 ip_len = mp->b_wptr - (uchar_t *)ipha; 4749 if (mp->b_cont != NULL) 4750 ip_len += msgdsize(mp->b_cont); 4751 4752 /* 4753 * Set the length into the IP header. 4754 * If the length is greater than the maximum allowed by IP, 4755 * then free the message and return. Do not try and send it 4756 * as this can cause problems in layers below. 4757 */ 4758 if (ip_len > IP_MAXPACKET) { 4759 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4760 return (EMSGSIZE); 4761 } 4762 ipha->ipha_length = htons((uint16_t)ip_len); 4763 /* 4764 * Copy in the destination address request 4765 */ 4766 ipha->ipha_dst = v4dst; 4767 4768 /* 4769 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4770 */ 4771 if (CLASSD(v4dst)) 4772 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4773 4774 /* Copy in options if any */ 4775 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4776 bcopy(icmp->icmp_ip_snd_options, 4777 &ipha[1], icmp->icmp_ip_snd_options_len); 4778 /* 4779 * Massage source route putting first source route in ipha_dst. 4780 * Ignore the destination in the T_unitdata_req. 4781 */ 4782 (void) ip_massage_options(ipha, is->is_netstack); 4783 } 4784 4785 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4786 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4787 return (0); 4788 } 4789 4790 static int 4791 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4792 { 4793 int err; 4794 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4795 icmp_stack_t *is = icmp->icmp_is; 4796 conn_t *connp = icmp->icmp_connp; 4797 cred_t *cr; 4798 4799 /* 4800 * All Solaris components should pass a db_credp 4801 * for this message, hence we ASSERT. 4802 * On production kernels we return an error to be robust against 4803 * random streams modules sitting on top of us. 4804 */ 4805 cr = msg_getcred(mp, NULL); 4806 ASSERT(cr != NULL); 4807 if (cr == NULL) 4808 return (EINVAL); 4809 4810 err = tsol_compute_label_v6(cr, dst, 4811 opt_storage, connp->conn_mac_exempt, 4812 is->is_netstack->netstack_ip); 4813 if (err == 0) { 4814 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4815 &icmp->icmp_label_len_v6, opt_storage); 4816 } 4817 if (err != 0) { 4818 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4819 DTRACE_PROBE4( 4820 tx__ip__log__drop__updatelabel__icmp6, 4821 char *, "icmp(1) failed to update options(2) on mp(3)", 4822 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4823 return (err); 4824 } 4825 4826 icmp->icmp_v6lastdst = *dst; 4827 return (0); 4828 } 4829 4830 /* 4831 * raw_ip_send_data_v6(): 4832 * Assumes that icmp_wput did some sanity checking on the destination 4833 * address, but that the label may not yet be correct. 4834 */ 4835 static int 4836 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4837 ip6_pkt_t *ipp) 4838 { 4839 ip6_t *ip6h; 4840 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4841 int ip_hdr_len = IPV6_HDR_LEN; 4842 size_t ip_len; 4843 icmp_t *icmp = connp->conn_icmp; 4844 icmp_stack_t *is = icmp->icmp_is; 4845 ip6_pkt_t *tipp; 4846 uint32_t csum = 0; 4847 uint_t ignore = 0; 4848 uint_t option_exists = 0, is_sticky = 0; 4849 uint8_t *cp; 4850 uint8_t *nxthdr_ptr; 4851 in6_addr_t ip6_dst; 4852 4853 /* 4854 * If the local address is a mapped address return 4855 * an error. 4856 * It would be possible to send an IPv6 packet but the 4857 * response would never make it back to the application 4858 * since it is bound to a mapped address. 4859 */ 4860 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4861 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4862 return (EADDRNOTAVAIL); 4863 } 4864 4865 ignore = ipp->ipp_sticky_ignored; 4866 if (sin6->sin6_scope_id != 0 && 4867 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4868 /* 4869 * IPPF_SCOPE_ID is special. It's neither a sticky 4870 * option nor ancillary data. It needs to be 4871 * explicitly set in options_exists. 4872 */ 4873 option_exists |= IPPF_SCOPE_ID; 4874 } 4875 4876 /* 4877 * Compute the destination address 4878 */ 4879 ip6_dst = sin6->sin6_addr; 4880 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4881 ip6_dst = ipv6_loopback; 4882 4883 /* 4884 * If we're not going to the same destination as last time, then 4885 * recompute the label required. This is done in a separate routine to 4886 * avoid blowing up our stack here. 4887 */ 4888 if (is_system_labeled() && 4889 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4890 int error = 0; 4891 4892 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4893 if (error != 0) 4894 return (error); 4895 } 4896 4897 /* 4898 * If there's a security label here, then we ignore any options the 4899 * user may try to set. We keep the peer's label as a hidden sticky 4900 * option. 4901 */ 4902 if (icmp->icmp_label_len_v6 > 0) { 4903 ignore &= ~IPPF_HOPOPTS; 4904 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4905 } 4906 4907 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4908 (ipp->ipp_fields == 0)) { 4909 /* No sticky options nor ancillary data. */ 4910 goto no_options; 4911 } 4912 4913 /* 4914 * Go through the options figuring out where each is going to 4915 * come from and build two masks. The first mask indicates if 4916 * the option exists at all. The second mask indicates if the 4917 * option is sticky or ancillary. 4918 */ 4919 if (!(ignore & IPPF_HOPOPTS)) { 4920 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4921 option_exists |= IPPF_HOPOPTS; 4922 ip_hdr_len += ipp->ipp_hopoptslen; 4923 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4924 option_exists |= IPPF_HOPOPTS; 4925 is_sticky |= IPPF_HOPOPTS; 4926 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4927 } 4928 } 4929 4930 if (!(ignore & IPPF_RTHDR)) { 4931 if (ipp->ipp_fields & IPPF_RTHDR) { 4932 option_exists |= IPPF_RTHDR; 4933 ip_hdr_len += ipp->ipp_rthdrlen; 4934 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4935 option_exists |= IPPF_RTHDR; 4936 is_sticky |= IPPF_RTHDR; 4937 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4938 } 4939 } 4940 4941 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4942 /* 4943 * Need to have a router header to use these. 4944 */ 4945 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4946 option_exists |= IPPF_RTDSTOPTS; 4947 ip_hdr_len += ipp->ipp_rtdstoptslen; 4948 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4949 option_exists |= IPPF_RTDSTOPTS; 4950 is_sticky |= IPPF_RTDSTOPTS; 4951 ip_hdr_len += 4952 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4953 } 4954 } 4955 4956 if (!(ignore & IPPF_DSTOPTS)) { 4957 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4958 option_exists |= IPPF_DSTOPTS; 4959 ip_hdr_len += ipp->ipp_dstoptslen; 4960 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4961 option_exists |= IPPF_DSTOPTS; 4962 is_sticky |= IPPF_DSTOPTS; 4963 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4964 } 4965 } 4966 4967 if (!(ignore & IPPF_IFINDEX)) { 4968 if (ipp->ipp_fields & IPPF_IFINDEX) { 4969 option_exists |= IPPF_IFINDEX; 4970 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4971 option_exists |= IPPF_IFINDEX; 4972 is_sticky |= IPPF_IFINDEX; 4973 } 4974 } 4975 4976 if (!(ignore & IPPF_ADDR)) { 4977 if (ipp->ipp_fields & IPPF_ADDR) { 4978 option_exists |= IPPF_ADDR; 4979 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4980 option_exists |= IPPF_ADDR; 4981 is_sticky |= IPPF_ADDR; 4982 } 4983 } 4984 4985 if (!(ignore & IPPF_DONTFRAG)) { 4986 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4987 option_exists |= IPPF_DONTFRAG; 4988 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4989 option_exists |= IPPF_DONTFRAG; 4990 is_sticky |= IPPF_DONTFRAG; 4991 } 4992 } 4993 4994 if (!(ignore & IPPF_USE_MIN_MTU)) { 4995 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4996 option_exists |= IPPF_USE_MIN_MTU; 4997 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4998 IPPF_USE_MIN_MTU) { 4999 option_exists |= IPPF_USE_MIN_MTU; 5000 is_sticky |= IPPF_USE_MIN_MTU; 5001 } 5002 } 5003 5004 if (!(ignore & IPPF_NEXTHOP)) { 5005 if (ipp->ipp_fields & IPPF_NEXTHOP) { 5006 option_exists |= IPPF_NEXTHOP; 5007 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 5008 option_exists |= IPPF_NEXTHOP; 5009 is_sticky |= IPPF_NEXTHOP; 5010 } 5011 } 5012 5013 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 5014 option_exists |= IPPF_HOPLIMIT; 5015 /* IPV6_HOPLIMIT can never be sticky */ 5016 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 5017 5018 if (!(ignore & IPPF_UNICAST_HOPS) && 5019 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 5020 option_exists |= IPPF_UNICAST_HOPS; 5021 is_sticky |= IPPF_UNICAST_HOPS; 5022 } 5023 5024 if (!(ignore & IPPF_MULTICAST_HOPS) && 5025 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 5026 option_exists |= IPPF_MULTICAST_HOPS; 5027 is_sticky |= IPPF_MULTICAST_HOPS; 5028 } 5029 5030 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 5031 /* This is a sticky socket option only */ 5032 option_exists |= IPPF_NO_CKSUM; 5033 is_sticky |= IPPF_NO_CKSUM; 5034 } 5035 5036 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 5037 /* This is a sticky socket option only */ 5038 option_exists |= IPPF_RAW_CKSUM; 5039 is_sticky |= IPPF_RAW_CKSUM; 5040 } 5041 5042 if (!(ignore & IPPF_TCLASS)) { 5043 if (ipp->ipp_fields & IPPF_TCLASS) { 5044 option_exists |= IPPF_TCLASS; 5045 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 5046 option_exists |= IPPF_TCLASS; 5047 is_sticky |= IPPF_TCLASS; 5048 } 5049 } 5050 5051 no_options: 5052 5053 /* 5054 * If any options carried in the ip6i_t were specified, we 5055 * need to account for the ip6i_t in the data we'll be sending 5056 * down. 5057 */ 5058 if (option_exists & IPPF_HAS_IP6I) 5059 ip_hdr_len += sizeof (ip6i_t); 5060 5061 /* check/fix buffer config, setup pointers into it */ 5062 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5063 if ((mp->b_datap->db_ref != 1) || 5064 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5065 !OK_32PTR(ip6h)) { 5066 mblk_t *mp1; 5067 5068 /* Try to get everything in a single mblk next time */ 5069 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5070 icmp->icmp_max_hdr_len = ip_hdr_len; 5071 5072 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5073 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5074 } 5075 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5076 if (!mp1) { 5077 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5078 return (ENOMEM); 5079 } 5080 mp1->b_cont = mp; 5081 mp1->b_wptr = mp1->b_datap->db_lim; 5082 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5083 mp = mp1; 5084 } 5085 mp->b_rptr = (unsigned char *)ip6h; 5086 ip6i = (ip6i_t *)ip6h; 5087 5088 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5089 if (option_exists & IPPF_HAS_IP6I) { 5090 ip6h = (ip6_t *)&ip6i[1]; 5091 ip6i->ip6i_flags = 0; 5092 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5093 5094 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5095 if (option_exists & IPPF_SCOPE_ID) { 5096 ip6i->ip6i_flags |= IP6I_IFINDEX; 5097 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5098 } else if (option_exists & IPPF_IFINDEX) { 5099 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5100 ASSERT(tipp->ipp_ifindex != 0); 5101 ip6i->ip6i_flags |= IP6I_IFINDEX; 5102 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5103 } 5104 5105 if (option_exists & IPPF_RAW_CKSUM) { 5106 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5107 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5108 } 5109 5110 if (option_exists & IPPF_NO_CKSUM) { 5111 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5112 } 5113 5114 if (option_exists & IPPF_ADDR) { 5115 /* 5116 * Enable per-packet source address verification if 5117 * IPV6_PKTINFO specified the source address. 5118 * ip6_src is set in the transport's _wput function. 5119 */ 5120 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5121 } 5122 5123 if (option_exists & IPPF_DONTFRAG) { 5124 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5125 } 5126 5127 if (option_exists & IPPF_USE_MIN_MTU) { 5128 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5129 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5130 } 5131 5132 if (option_exists & IPPF_NEXTHOP) { 5133 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5134 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5135 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5136 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5137 } 5138 5139 /* 5140 * tell IP this is an ip6i_t private header 5141 */ 5142 ip6i->ip6i_nxt = IPPROTO_RAW; 5143 } 5144 5145 /* Initialize IPv6 header */ 5146 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5147 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5148 5149 /* Set the hoplimit of the outgoing packet. */ 5150 if (option_exists & IPPF_HOPLIMIT) { 5151 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5152 ip6h->ip6_hops = ipp->ipp_hoplimit; 5153 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5154 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5155 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5156 if (option_exists & IPPF_MULTICAST_HOPS) 5157 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5158 } else { 5159 ip6h->ip6_hops = icmp->icmp_ttl; 5160 if (option_exists & IPPF_UNICAST_HOPS) 5161 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5162 } 5163 5164 if (option_exists & IPPF_ADDR) { 5165 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5166 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5167 ip6h->ip6_src = tipp->ipp_addr; 5168 } else { 5169 /* 5170 * The source address was not set using IPV6_PKTINFO. 5171 * First look at the bound source. 5172 * If unspecified fallback to __sin6_src_id. 5173 */ 5174 ip6h->ip6_src = icmp->icmp_v6src; 5175 if (sin6->__sin6_src_id != 0 && 5176 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5177 ip_srcid_find_id(sin6->__sin6_src_id, 5178 &ip6h->ip6_src, icmp->icmp_zoneid, 5179 is->is_netstack); 5180 } 5181 } 5182 5183 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5184 cp = (uint8_t *)&ip6h[1]; 5185 5186 /* 5187 * Here's where we have to start stringing together 5188 * any extension headers in the right order: 5189 * Hop-by-hop, destination, routing, and final destination opts. 5190 */ 5191 if (option_exists & IPPF_HOPOPTS) { 5192 /* Hop-by-hop options */ 5193 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5194 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5195 5196 *nxthdr_ptr = IPPROTO_HOPOPTS; 5197 nxthdr_ptr = &hbh->ip6h_nxt; 5198 5199 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5200 cp += tipp->ipp_hopoptslen; 5201 } 5202 /* 5203 * En-route destination options 5204 * Only do them if there's a routing header as well 5205 */ 5206 if (option_exists & IPPF_RTDSTOPTS) { 5207 ip6_dest_t *dst = (ip6_dest_t *)cp; 5208 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5209 5210 *nxthdr_ptr = IPPROTO_DSTOPTS; 5211 nxthdr_ptr = &dst->ip6d_nxt; 5212 5213 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5214 cp += tipp->ipp_rtdstoptslen; 5215 } 5216 /* 5217 * Routing header next 5218 */ 5219 if (option_exists & IPPF_RTHDR) { 5220 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5221 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5222 5223 *nxthdr_ptr = IPPROTO_ROUTING; 5224 nxthdr_ptr = &rt->ip6r_nxt; 5225 5226 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5227 cp += tipp->ipp_rthdrlen; 5228 } 5229 /* 5230 * Do ultimate destination options 5231 */ 5232 if (option_exists & IPPF_DSTOPTS) { 5233 ip6_dest_t *dest = (ip6_dest_t *)cp; 5234 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5235 5236 *nxthdr_ptr = IPPROTO_DSTOPTS; 5237 nxthdr_ptr = &dest->ip6d_nxt; 5238 5239 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5240 cp += tipp->ipp_dstoptslen; 5241 } 5242 5243 /* 5244 * Now set the last header pointer to the proto passed in 5245 */ 5246 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5247 *nxthdr_ptr = icmp->icmp_proto; 5248 5249 /* 5250 * Copy in the destination address 5251 */ 5252 ip6h->ip6_dst = ip6_dst; 5253 5254 ip6h->ip6_vcf = 5255 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5256 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5257 5258 if (option_exists & IPPF_TCLASS) { 5259 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5260 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5261 tipp->ipp_tclass); 5262 } 5263 if (option_exists & IPPF_RTHDR) { 5264 ip6_rthdr_t *rth; 5265 5266 /* 5267 * Perform any processing needed for source routing. 5268 * We know that all extension headers will be in the same mblk 5269 * as the IPv6 header. 5270 */ 5271 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5272 if (rth != NULL && rth->ip6r_segleft != 0) { 5273 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5274 /* 5275 * Drop packet - only support Type 0 routing. 5276 * Notify the application as well. 5277 */ 5278 BUMP_MIB(&is->is_rawip_mib, 5279 rawipOutErrors); 5280 return (EPROTO); 5281 } 5282 /* 5283 * rth->ip6r_len is twice the number of 5284 * addresses in the header 5285 */ 5286 if (rth->ip6r_len & 0x1) { 5287 BUMP_MIB(&is->is_rawip_mib, 5288 rawipOutErrors); 5289 return (EPROTO); 5290 } 5291 /* 5292 * Shuffle the routing header and ip6_dst 5293 * addresses, and get the checksum difference 5294 * between the first hop (in ip6_dst) and 5295 * the destination (in the last routing hdr entry). 5296 */ 5297 csum = ip_massage_options_v6(ip6h, rth, 5298 is->is_netstack); 5299 /* 5300 * Verify that the first hop isn't a mapped address. 5301 * Routers along the path need to do this verification 5302 * for subsequent hops. 5303 */ 5304 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5305 BUMP_MIB(&is->is_rawip_mib, 5306 rawipOutErrors); 5307 return (EADDRNOTAVAIL); 5308 } 5309 } 5310 } 5311 5312 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5313 if (mp->b_cont != NULL) 5314 ip_len += msgdsize(mp->b_cont); 5315 5316 /* 5317 * Set the length into the IP header. 5318 * If the length is greater than the maximum allowed by IP, 5319 * then free the message and return. Do not try and send it 5320 * as this can cause problems in layers below. 5321 */ 5322 if (ip_len > IP_MAXPACKET) { 5323 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5324 return (EMSGSIZE); 5325 } 5326 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5327 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5328 uint16_t *cksum_ptr; 5329 uint_t ext_hdrs_len; 5330 5331 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5332 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5333 icmp->icmp_checksum_off == 2); 5334 5335 /* 5336 * We make it easy for IP to include our pseudo header 5337 * by putting our length in uh_checksum, modified (if 5338 * we have a routing header) by the checksum difference 5339 * between the ultimate destination and first hop addresses. 5340 * Note: ICMPv6 must always checksum the packet. 5341 */ 5342 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5343 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5344 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5345 BUMP_MIB(&is->is_rawip_mib, 5346 rawipOutErrors); 5347 freemsg(mp); 5348 return (0); 5349 } 5350 ip6i = (ip6i_t *)mp->b_rptr; 5351 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5352 ip6h = (ip6_t *)&ip6i[1]; 5353 else 5354 ip6h = (ip6_t *)ip6i; 5355 } 5356 /* Add payload length to checksum */ 5357 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5358 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5359 csum += htons(ip_len - ext_hdrs_len); 5360 5361 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5362 csum = (csum & 0xFFFF) + (csum >> 16); 5363 *cksum_ptr = (uint16_t)csum; 5364 } 5365 5366 #ifdef _LITTLE_ENDIAN 5367 ip_len = htons(ip_len); 5368 #endif 5369 ip6h->ip6_plen = (uint16_t)ip_len; 5370 5371 /* We're done. Pass the packet to IP */ 5372 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5373 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5374 return (0); 5375 } 5376 5377 static void 5378 icmp_wput_other(queue_t *q, mblk_t *mp) 5379 { 5380 uchar_t *rptr = mp->b_rptr; 5381 struct iocblk *iocp; 5382 #define tudr ((struct T_unitdata_req *)rptr) 5383 conn_t *connp = Q_TO_CONN(q); 5384 icmp_t *icmp = connp->conn_icmp; 5385 icmp_stack_t *is = icmp->icmp_is; 5386 cred_t *cr; 5387 5388 switch (mp->b_datap->db_type) { 5389 case M_PROTO: 5390 case M_PCPROTO: 5391 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5392 /* 5393 * If the message does not contain a PRIM_type, 5394 * throw it away. 5395 */ 5396 freemsg(mp); 5397 return; 5398 } 5399 switch (((union T_primitives *)rptr)->type) { 5400 case T_ADDR_REQ: 5401 icmp_addr_req(q, mp); 5402 return; 5403 case O_T_BIND_REQ: 5404 case T_BIND_REQ: 5405 icmp_tpi_bind(q, mp); 5406 return; 5407 case T_CONN_REQ: 5408 icmp_tpi_connect(q, mp); 5409 return; 5410 case T_CAPABILITY_REQ: 5411 icmp_capability_req(q, mp); 5412 return; 5413 case T_INFO_REQ: 5414 icmp_info_req(q, mp); 5415 return; 5416 case T_UNITDATA_REQ: 5417 /* 5418 * If a T_UNITDATA_REQ gets here, the address must 5419 * be bad. Valid T_UNITDATA_REQs are found above 5420 * and break to below this switch. 5421 */ 5422 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5423 return; 5424 case T_UNBIND_REQ: 5425 icmp_tpi_unbind(q, mp); 5426 return; 5427 5428 case T_SVR4_OPTMGMT_REQ: 5429 /* 5430 * All Solaris components should pass a db_credp 5431 * for this TPI message, hence we ASSERT. 5432 * But in case there is some other M_PROTO that looks 5433 * like a TPI message sent by some other kernel 5434 * component, we check and return an error. 5435 */ 5436 cr = msg_getcred(mp, NULL); 5437 ASSERT(cr != NULL); 5438 if (cr == NULL) { 5439 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5440 return; 5441 } 5442 5443 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5444 cr)) { 5445 /* Only IP can return anything meaningful */ 5446 (void) svr4_optcom_req(q, mp, cr, 5447 &icmp_opt_obj, B_TRUE); 5448 } 5449 return; 5450 5451 case T_OPTMGMT_REQ: 5452 /* 5453 * All Solaris components should pass a db_credp 5454 * for this TPI message, hence we ASSERT. 5455 * But in case there is some other M_PROTO that looks 5456 * like a TPI message sent by some other kernel 5457 * component, we check and return an error. 5458 */ 5459 cr = msg_getcred(mp, NULL); 5460 ASSERT(cr != NULL); 5461 if (cr == NULL) { 5462 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5463 return; 5464 } 5465 /* Only IP can return anything meaningful */ 5466 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5467 return; 5468 5469 case T_DISCON_REQ: 5470 icmp_tpi_disconnect(q, mp); 5471 return; 5472 5473 /* The following TPI message is not supported by icmp. */ 5474 case O_T_CONN_RES: 5475 case T_CONN_RES: 5476 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5477 return; 5478 5479 /* The following 3 TPI requests are illegal for icmp. */ 5480 case T_DATA_REQ: 5481 case T_EXDATA_REQ: 5482 case T_ORDREL_REQ: 5483 freemsg(mp); 5484 (void) putctl1(RD(q), M_ERROR, EPROTO); 5485 return; 5486 default: 5487 break; 5488 } 5489 break; 5490 case M_IOCTL: 5491 iocp = (struct iocblk *)mp->b_rptr; 5492 switch (iocp->ioc_cmd) { 5493 case TI_GETPEERNAME: 5494 if (icmp->icmp_state != TS_DATA_XFER) { 5495 /* 5496 * If a default destination address has not 5497 * been associated with the stream, then we 5498 * don't know the peer's name. 5499 */ 5500 iocp->ioc_error = ENOTCONN; 5501 err_ret:; 5502 iocp->ioc_count = 0; 5503 mp->b_datap->db_type = M_IOCACK; 5504 qreply(q, mp); 5505 return; 5506 } 5507 /* FALLTHRU */ 5508 case TI_GETMYNAME: 5509 /* 5510 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5511 * need to copyin the user's strbuf structure. 5512 * Processing will continue in the M_IOCDATA case 5513 * below. 5514 */ 5515 mi_copyin(q, mp, NULL, 5516 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5517 return; 5518 case ND_SET: 5519 /* nd_getset performs the necessary error checking */ 5520 case ND_GET: 5521 if (nd_getset(q, is->is_nd, mp)) { 5522 qreply(q, mp); 5523 return; 5524 } 5525 break; 5526 case _SIOCSOCKFALLBACK: 5527 /* 5528 * socket is falling back to be a 5529 * streams socket. Nothing to do 5530 */ 5531 iocp->ioc_count = 0; 5532 iocp->ioc_rval = 0; 5533 qreply(q, mp); 5534 return; 5535 default: 5536 break; 5537 } 5538 break; 5539 case M_IOCDATA: 5540 icmp_wput_iocdata(q, mp); 5541 return; 5542 default: 5543 break; 5544 } 5545 ip_wput(q, mp); 5546 } 5547 5548 /* 5549 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5550 * messages. 5551 */ 5552 static void 5553 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5554 { 5555 mblk_t *mp1; 5556 STRUCT_HANDLE(strbuf, sb); 5557 icmp_t *icmp; 5558 uint_t addrlen; 5559 uint_t error; 5560 5561 /* Make sure it is one of ours. */ 5562 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5563 case TI_GETMYNAME: 5564 case TI_GETPEERNAME: 5565 break; 5566 default: 5567 icmp = Q_TO_ICMP(q); 5568 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5569 return; 5570 } 5571 switch (mi_copy_state(q, mp, &mp1)) { 5572 case -1: 5573 return; 5574 case MI_COPY_CASE(MI_COPY_IN, 1): 5575 break; 5576 case MI_COPY_CASE(MI_COPY_OUT, 1): 5577 /* 5578 * The address has been copied out, so now 5579 * copyout the strbuf. 5580 */ 5581 mi_copyout(q, mp); 5582 return; 5583 case MI_COPY_CASE(MI_COPY_OUT, 2): 5584 /* 5585 * The address and strbuf have been copied out. 5586 * We're done, so just acknowledge the original 5587 * M_IOCTL. 5588 */ 5589 mi_copy_done(q, mp, 0); 5590 return; 5591 default: 5592 /* 5593 * Something strange has happened, so acknowledge 5594 * the original M_IOCTL with an EPROTO error. 5595 */ 5596 mi_copy_done(q, mp, EPROTO); 5597 return; 5598 } 5599 /* 5600 * Now we have the strbuf structure for TI_GETMYNAME 5601 * and TI_GETPEERNAME. Next we copyout the requested 5602 * address and then we'll copyout the strbuf. 5603 */ 5604 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5605 (void *)mp1->b_rptr); 5606 icmp = Q_TO_ICMP(q); 5607 if (icmp->icmp_family == AF_INET) 5608 addrlen = sizeof (sin_t); 5609 else 5610 addrlen = sizeof (sin6_t); 5611 5612 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5613 mi_copy_done(q, mp, EINVAL); 5614 return; 5615 } 5616 5617 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5618 5619 if (mp1 == NULL) 5620 return; 5621 5622 rw_enter(&icmp->icmp_rwlock, RW_READER); 5623 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5624 case TI_GETMYNAME: 5625 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5626 &addrlen); 5627 break; 5628 case TI_GETPEERNAME: 5629 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5630 &addrlen); 5631 break; 5632 } 5633 rw_exit(&icmp->icmp_rwlock); 5634 5635 if (error != 0) { 5636 mi_copy_done(q, mp, error); 5637 } else { 5638 mp1->b_wptr += addrlen; 5639 STRUCT_FSET(sb, len, addrlen); 5640 5641 /* Copy out the address */ 5642 mi_copyout(q, mp); 5643 } 5644 } 5645 5646 static int 5647 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5648 void *thisdg_attrs) 5649 { 5650 struct T_unitdata_req *udreqp; 5651 int is_absreq_failure; 5652 cred_t *cr; 5653 5654 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5655 *errorp = 0; 5656 5657 /* 5658 * All Solaris components should pass a db_credp 5659 * for this TPI message, hence we ASSERT. 5660 * But in case there is some other M_PROTO that looks 5661 * like a TPI message sent by some other kernel 5662 * component, we check and return an error. 5663 */ 5664 cr = msg_getcred(mp, NULL); 5665 ASSERT(cr != NULL); 5666 if (cr == NULL) 5667 return (-1); 5668 5669 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5670 udreqp->OPT_offset, cr, &icmp_opt_obj, 5671 thisdg_attrs, &is_absreq_failure); 5672 5673 if (*errorp != 0) { 5674 /* 5675 * Note: No special action needed in this 5676 * module for "is_absreq_failure" 5677 */ 5678 return (-1); /* failure */ 5679 } 5680 ASSERT(is_absreq_failure == 0); 5681 return (0); /* success */ 5682 } 5683 5684 void 5685 icmp_ddi_g_init(void) 5686 { 5687 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5688 icmp_opt_obj.odb_opt_arr_cnt); 5689 5690 /* 5691 * We want to be informed each time a stack is created or 5692 * destroyed in the kernel, so we can maintain the 5693 * set of icmp_stack_t's. 5694 */ 5695 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5696 } 5697 5698 void 5699 icmp_ddi_g_destroy(void) 5700 { 5701 netstack_unregister(NS_ICMP); 5702 } 5703 5704 #define INET_NAME "ip" 5705 5706 /* 5707 * Initialize the ICMP stack instance. 5708 */ 5709 static void * 5710 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5711 { 5712 icmp_stack_t *is; 5713 icmpparam_t *pa; 5714 int error = 0; 5715 major_t major; 5716 5717 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5718 is->is_netstack = ns; 5719 5720 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5721 is->is_param_arr = pa; 5722 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5723 5724 (void) icmp_param_register(&is->is_nd, 5725 is->is_param_arr, A_CNT(icmp_param_arr)); 5726 is->is_ksp = rawip_kstat_init(stackid); 5727 5728 major = mod_name_to_major(INET_NAME); 5729 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5730 ASSERT(error == 0); 5731 return (is); 5732 } 5733 5734 /* 5735 * Free the ICMP stack instance. 5736 */ 5737 static void 5738 rawip_stack_fini(netstackid_t stackid, void *arg) 5739 { 5740 icmp_stack_t *is = (icmp_stack_t *)arg; 5741 5742 nd_free(&is->is_nd); 5743 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5744 is->is_param_arr = NULL; 5745 5746 rawip_kstat_fini(stackid, is->is_ksp); 5747 is->is_ksp = NULL; 5748 ldi_ident_release(is->is_ldi_ident); 5749 kmem_free(is, sizeof (*is)); 5750 } 5751 5752 static void * 5753 rawip_kstat_init(netstackid_t stackid) { 5754 kstat_t *ksp; 5755 5756 rawip_named_kstat_t template = { 5757 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5758 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5759 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5760 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5761 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5762 }; 5763 5764 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5765 KSTAT_TYPE_NAMED, 5766 NUM_OF_FIELDS(rawip_named_kstat_t), 5767 0, stackid); 5768 if (ksp == NULL || ksp->ks_data == NULL) 5769 return (NULL); 5770 5771 bcopy(&template, ksp->ks_data, sizeof (template)); 5772 ksp->ks_update = rawip_kstat_update; 5773 ksp->ks_private = (void *)(uintptr_t)stackid; 5774 5775 kstat_install(ksp); 5776 return (ksp); 5777 } 5778 5779 static void 5780 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5781 { 5782 if (ksp != NULL) { 5783 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5784 kstat_delete_netstack(ksp, stackid); 5785 } 5786 } 5787 5788 static int 5789 rawip_kstat_update(kstat_t *ksp, int rw) 5790 { 5791 rawip_named_kstat_t *rawipkp; 5792 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5793 netstack_t *ns; 5794 icmp_stack_t *is; 5795 5796 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5797 return (EIO); 5798 5799 if (rw == KSTAT_WRITE) 5800 return (EACCES); 5801 5802 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5803 5804 ns = netstack_find_by_stackid(stackid); 5805 if (ns == NULL) 5806 return (-1); 5807 is = ns->netstack_icmp; 5808 if (is == NULL) { 5809 netstack_rele(ns); 5810 return (-1); 5811 } 5812 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5813 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5814 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5815 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5816 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5817 netstack_rele(ns); 5818 return (0); 5819 } 5820 5821 /* ARGSUSED */ 5822 int 5823 rawip_accept(sock_lower_handle_t lproto_handle, 5824 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5825 cred_t *cr) 5826 { 5827 return (EOPNOTSUPP); 5828 } 5829 5830 /* ARGSUSED */ 5831 int 5832 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5833 socklen_t len, cred_t *cr) 5834 { 5835 conn_t *connp = (conn_t *)proto_handle; 5836 int error; 5837 5838 /* All Solaris components should pass a cred for this operation. */ 5839 ASSERT(cr != NULL); 5840 5841 /* Binding to a NULL address really means unbind */ 5842 if (sa == NULL) 5843 error = rawip_do_unbind(connp); 5844 else 5845 error = rawip_do_bind(connp, sa, len); 5846 5847 if (error < 0) { 5848 if (error == -TOUTSTATE) 5849 error = EINVAL; 5850 else 5851 error = proto_tlitosyserr(-error); 5852 } 5853 return (error); 5854 } 5855 5856 static int 5857 rawip_implicit_bind(conn_t *connp) 5858 { 5859 sin6_t sin6addr; 5860 sin_t *sin; 5861 sin6_t *sin6; 5862 socklen_t len; 5863 int error; 5864 5865 if (connp->conn_icmp->icmp_family == AF_INET) { 5866 len = sizeof (struct sockaddr_in); 5867 sin = (sin_t *)&sin6addr; 5868 *sin = sin_null; 5869 sin->sin_family = AF_INET; 5870 sin->sin_addr.s_addr = INADDR_ANY; 5871 } else { 5872 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5873 len = sizeof (sin6_t); 5874 sin6 = (sin6_t *)&sin6addr; 5875 *sin6 = sin6_null; 5876 sin6->sin6_family = AF_INET6; 5877 V6_SET_ZERO(sin6->sin6_addr); 5878 } 5879 5880 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5881 5882 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5883 } 5884 5885 static int 5886 rawip_unbind(conn_t *connp) 5887 { 5888 int error; 5889 5890 error = rawip_do_unbind(connp); 5891 if (error < 0) { 5892 error = proto_tlitosyserr(-error); 5893 } 5894 return (error); 5895 } 5896 5897 /* ARGSUSED */ 5898 int 5899 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5900 { 5901 return (EOPNOTSUPP); 5902 } 5903 5904 /* ARGSUSED */ 5905 int 5906 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5907 socklen_t len, sock_connid_t *id, cred_t *cr) 5908 { 5909 conn_t *connp = (conn_t *)proto_handle; 5910 icmp_t *icmp = connp->conn_icmp; 5911 int error; 5912 boolean_t did_bind = B_FALSE; 5913 5914 /* All Solaris components should pass a cred for this operation. */ 5915 ASSERT(cr != NULL); 5916 5917 if (sa == NULL) { 5918 /* 5919 * Disconnect 5920 * Make sure we are connected 5921 */ 5922 if (icmp->icmp_state != TS_DATA_XFER) 5923 return (EINVAL); 5924 5925 error = icmp_disconnect(connp); 5926 return (error); 5927 } 5928 5929 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5930 if (error != 0) 5931 return (error); 5932 5933 /* do an implicit bind if necessary */ 5934 if (icmp->icmp_state == TS_UNBND) { 5935 error = rawip_implicit_bind(connp); 5936 /* 5937 * We could be racing with an actual bind, in which case 5938 * we would see EPROTO. We cross our fingers and try 5939 * to connect. 5940 */ 5941 if (!(error == 0 || error == EPROTO)) 5942 return (error); 5943 did_bind = B_TRUE; 5944 } 5945 5946 /* 5947 * set SO_DGRAM_ERRIND 5948 */ 5949 icmp->icmp_dgram_errind = B_TRUE; 5950 5951 error = rawip_do_connect(connp, sa, len, cr); 5952 5953 if (error != 0 && did_bind) { 5954 int unbind_err; 5955 5956 unbind_err = rawip_unbind(connp); 5957 ASSERT(unbind_err == 0); 5958 } 5959 5960 if (error == 0) { 5961 *id = 0; 5962 (*connp->conn_upcalls->su_connected) 5963 (connp->conn_upper_handle, 0, NULL, -1); 5964 } else if (error < 0) { 5965 error = proto_tlitosyserr(-error); 5966 } 5967 return (error); 5968 } 5969 5970 /* ARGSUSED */ 5971 void 5972 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5973 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5974 { 5975 conn_t *connp = (conn_t *)proto_handle; 5976 icmp_t *icmp; 5977 struct T_capability_ack tca; 5978 struct sockaddr_in6 laddr, faddr; 5979 socklen_t laddrlen, faddrlen; 5980 short opts; 5981 struct stroptions *stropt; 5982 mblk_t *stropt_mp; 5983 int error; 5984 5985 icmp = connp->conn_icmp; 5986 5987 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5988 5989 /* 5990 * setup the fallback stream that was allocated 5991 */ 5992 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5993 connp->conn_minor_arena = WR(q)->q_ptr; 5994 5995 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5996 5997 WR(q)->q_qinfo = &icmpwinit; 5998 5999 connp->conn_rq = RD(q); 6000 connp->conn_wq = WR(q); 6001 6002 /* Notify stream head about options before sending up data */ 6003 stropt_mp->b_datap->db_type = M_SETOPTS; 6004 stropt_mp->b_wptr += sizeof (*stropt); 6005 stropt = (struct stroptions *)stropt_mp->b_rptr; 6006 stropt->so_flags = SO_WROFF | SO_HIWAT; 6007 stropt->so_wroff = 6008 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 6009 stropt->so_hiwat = icmp->icmp_recv_hiwat; 6010 putnext(RD(q), stropt_mp); 6011 6012 /* 6013 * free helper stream 6014 */ 6015 ip_free_helper_stream(connp); 6016 6017 /* 6018 * Collect the information needed to sync with the sonode 6019 */ 6020 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 6021 6022 laddrlen = faddrlen = sizeof (sin6_t); 6023 (void) rawip_getsockname((sock_lower_handle_t)connp, 6024 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6025 error = rawip_getpeername((sock_lower_handle_t)connp, 6026 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6027 if (error != 0) 6028 faddrlen = 0; 6029 opts = 0; 6030 if (icmp->icmp_dgram_errind) 6031 opts |= SO_DGRAM_ERRIND; 6032 if (icmp->icmp_dontroute) 6033 opts |= SO_DONTROUTE; 6034 6035 /* 6036 * Once we grab the drain lock, no data will be send up 6037 * to the socket. So we notify the socket that the endpoint 6038 * is quiescent and it's therefore safe move data from 6039 * the socket to the stream head. 6040 */ 6041 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6042 (struct sockaddr *)&laddr, laddrlen, 6043 (struct sockaddr *)&faddr, faddrlen, opts); 6044 6045 /* 6046 * push up any packets that were queued in icmp_t 6047 */ 6048 6049 mutex_enter(&icmp->icmp_recv_lock); 6050 while (icmp->icmp_fallback_queue_head != NULL) { 6051 mblk_t *mp; 6052 6053 mp = icmp->icmp_fallback_queue_head; 6054 icmp->icmp_fallback_queue_head = mp->b_next; 6055 mp->b_next = NULL; 6056 mutex_exit(&icmp->icmp_recv_lock); 6057 putnext(RD(q), mp); 6058 mutex_enter(&icmp->icmp_recv_lock); 6059 } 6060 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 6061 /* 6062 * No longer a streams less socket 6063 */ 6064 connp->conn_flags &= ~IPCL_NONSTR; 6065 mutex_exit(&icmp->icmp_recv_lock); 6066 ASSERT(icmp->icmp_fallback_queue_head == NULL && 6067 icmp->icmp_fallback_queue_tail == NULL); 6068 6069 ASSERT(connp->conn_ref >= 1); 6070 } 6071 6072 /* ARGSUSED */ 6073 sock_lower_handle_t 6074 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 6075 uint_t *smodep, int *errorp, int flags, cred_t *credp) 6076 { 6077 conn_t *connp; 6078 6079 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 6080 *errorp = EPROTONOSUPPORT; 6081 return (NULL); 6082 } 6083 6084 connp = icmp_open(family, credp, errorp, flags); 6085 if (connp != NULL) { 6086 icmp_stack_t *is; 6087 6088 is = connp->conn_icmp->icmp_is; 6089 connp->conn_flags |= IPCL_NONSTR; 6090 6091 if (connp->conn_icmp->icmp_family == AF_INET6) { 6092 /* Build initial header template for transmit */ 6093 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6094 if ((*errorp = 6095 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6096 rw_exit(&connp->conn_icmp->icmp_rwlock); 6097 ipcl_conn_destroy(connp); 6098 return (NULL); 6099 } 6100 rw_exit(&connp->conn_icmp->icmp_rwlock); 6101 } 6102 6103 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6104 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6105 6106 if ((*errorp = ip_create_helper_stream(connp, 6107 is->is_ldi_ident)) != 0) { 6108 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6109 (void) rawip_do_close(connp); 6110 return (NULL); 6111 } 6112 6113 mutex_enter(&connp->conn_lock); 6114 connp->conn_state_flags &= ~CONN_INCIPIENT; 6115 mutex_exit(&connp->conn_lock); 6116 *sock_downcalls = &sock_rawip_downcalls; 6117 *smodep = SM_ATOMIC; 6118 } else { 6119 ASSERT(*errorp != 0); 6120 } 6121 6122 return ((sock_lower_handle_t)connp); 6123 } 6124 6125 /* ARGSUSED */ 6126 void 6127 rawip_activate(sock_lower_handle_t proto_handle, 6128 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6129 cred_t *cr) 6130 { 6131 conn_t *connp = (conn_t *)proto_handle; 6132 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6133 struct sock_proto_props sopp; 6134 6135 /* All Solaris components should pass a cred for this operation. */ 6136 ASSERT(cr != NULL); 6137 6138 connp->conn_upcalls = sock_upcalls; 6139 connp->conn_upper_handle = sock_handle; 6140 6141 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6142 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6143 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6144 is->is_wroff_extra; 6145 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6146 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6147 sopp.sopp_maxblk = INFPSZ; 6148 sopp.sopp_maxpsz = IP_MAXPACKET; 6149 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6150 icmp_mod_info.mi_minpsz; 6151 6152 (*connp->conn_upcalls->su_set_proto_props) 6153 (connp->conn_upper_handle, &sopp); 6154 } 6155 6156 static int 6157 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6158 { 6159 sin_t *sin = (sin_t *)sa; 6160 sin6_t *sin6 = (sin6_t *)sa; 6161 6162 ASSERT(icmp != NULL); 6163 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6164 6165 switch (icmp->icmp_family) { 6166 case AF_INET: 6167 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6168 if (*salenp < sizeof (sin_t)) 6169 return (EINVAL); 6170 6171 *salenp = sizeof (sin_t); 6172 *sin = sin_null; 6173 sin->sin_family = AF_INET; 6174 if (icmp->icmp_state == TS_UNBND) { 6175 break; 6176 } 6177 6178 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6179 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6180 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6181 } else { 6182 /* 6183 * INADDR_ANY 6184 * icmp_v6src is not set, we might be bound to 6185 * broadcast/multicast. Use icmp_bound_v6src as 6186 * local address instead (that could 6187 * also still be INADDR_ANY) 6188 */ 6189 sin->sin_addr.s_addr = 6190 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6191 } 6192 break; 6193 case AF_INET6: 6194 6195 if (*salenp < sizeof (sin6_t)) 6196 return (EINVAL); 6197 6198 *salenp = sizeof (sin6_t); 6199 *sin6 = sin6_null; 6200 sin6->sin6_family = AF_INET6; 6201 if (icmp->icmp_state == TS_UNBND) { 6202 break; 6203 } 6204 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6205 sin6->sin6_addr = icmp->icmp_v6src; 6206 } else { 6207 /* 6208 * UNSPECIFIED 6209 * icmp_v6src is not set, we might be bound to 6210 * broadcast/multicast. Use icmp_bound_v6src as 6211 * local address instead (that could 6212 * also still be UNSPECIFIED) 6213 */ 6214 6215 sin6->sin6_addr = icmp->icmp_bound_v6src; 6216 } 6217 break; 6218 } 6219 return (0); 6220 } 6221 6222 static int 6223 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6224 { 6225 sin_t *sin = (sin_t *)sa; 6226 sin6_t *sin6 = (sin6_t *)sa; 6227 6228 ASSERT(icmp != NULL); 6229 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6230 6231 if (icmp->icmp_state != TS_DATA_XFER) 6232 return (ENOTCONN); 6233 6234 sa->sa_family = icmp->icmp_family; 6235 switch (icmp->icmp_family) { 6236 case AF_INET: 6237 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6238 6239 if (*salenp < sizeof (sin_t)) 6240 return (EINVAL); 6241 6242 *salenp = sizeof (sin_t); 6243 *sin = sin_null; 6244 sin->sin_family = AF_INET; 6245 sin->sin_addr.s_addr = 6246 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6247 break; 6248 case AF_INET6: 6249 if (*salenp < sizeof (sin6_t)) 6250 return (EINVAL); 6251 6252 *salenp = sizeof (sin6_t); 6253 *sin6 = sin6_null; 6254 *sin6 = icmp->icmp_v6dst; 6255 break; 6256 } 6257 return (0); 6258 } 6259 6260 /* ARGSUSED */ 6261 int 6262 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6263 socklen_t *salenp, cred_t *cr) 6264 { 6265 conn_t *connp = (conn_t *)proto_handle; 6266 icmp_t *icmp = connp->conn_icmp; 6267 int error; 6268 6269 /* All Solaris components should pass a cred for this operation. */ 6270 ASSERT(cr != NULL); 6271 6272 ASSERT(icmp != NULL); 6273 6274 rw_enter(&icmp->icmp_rwlock, RW_READER); 6275 6276 error = rawip_do_getpeername(icmp, sa, salenp); 6277 6278 rw_exit(&icmp->icmp_rwlock); 6279 6280 return (error); 6281 } 6282 6283 /* ARGSUSED */ 6284 int 6285 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6286 socklen_t *salenp, cred_t *cr) 6287 { 6288 conn_t *connp = (conn_t *)proto_handle; 6289 icmp_t *icmp = connp->conn_icmp; 6290 int error; 6291 6292 /* All Solaris components should pass a cred for this operation. */ 6293 ASSERT(cr != NULL); 6294 6295 ASSERT(icmp != NULL); 6296 rw_enter(&icmp->icmp_rwlock, RW_READER); 6297 6298 error = rawip_do_getsockname(icmp, sa, salenp); 6299 6300 rw_exit(&icmp->icmp_rwlock); 6301 6302 return (error); 6303 } 6304 6305 int 6306 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6307 const void *optvalp, socklen_t optlen, cred_t *cr) 6308 { 6309 conn_t *connp = (conn_t *)proto_handle; 6310 icmp_t *icmp = connp->conn_icmp; 6311 int error; 6312 6313 /* All Solaris components should pass a cred for this operation. */ 6314 ASSERT(cr != NULL); 6315 6316 error = proto_opt_check(level, option_name, optlen, NULL, 6317 icmp_opt_obj.odb_opt_des_arr, 6318 icmp_opt_obj.odb_opt_arr_cnt, 6319 icmp_opt_obj.odb_topmost_tpiprovider, 6320 B_TRUE, B_FALSE, cr); 6321 6322 if (error != 0) { 6323 /* 6324 * option not recognized 6325 */ 6326 if (error < 0) { 6327 error = proto_tlitosyserr(-error); 6328 } 6329 return (error); 6330 } 6331 6332 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6333 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6334 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6335 (uchar_t *)optvalp, NULL, cr); 6336 rw_exit(&icmp->icmp_rwlock); 6337 6338 if (error < 0) { 6339 /* 6340 * Pass on to ip 6341 */ 6342 error = ip_set_options(connp, level, option_name, optvalp, 6343 optlen, cr); 6344 } 6345 6346 ASSERT(error >= 0); 6347 6348 return (error); 6349 } 6350 6351 int 6352 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6353 void *optvalp, socklen_t *optlen, cred_t *cr) 6354 { 6355 int error; 6356 conn_t *connp = (conn_t *)proto_handle; 6357 icmp_t *icmp = connp->conn_icmp; 6358 t_uscalar_t max_optbuf_len; 6359 void *optvalp_buf; 6360 int len; 6361 6362 /* All Solaris components should pass a cred for this operation. */ 6363 ASSERT(cr != NULL); 6364 6365 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6366 icmp_opt_obj.odb_opt_des_arr, 6367 icmp_opt_obj.odb_opt_arr_cnt, 6368 icmp_opt_obj.odb_topmost_tpiprovider, 6369 B_FALSE, B_TRUE, cr); 6370 6371 if (error != 0) { 6372 if (error < 0) { 6373 error = proto_tlitosyserr(-error); 6374 } 6375 return (error); 6376 } 6377 6378 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6379 rw_enter(&icmp->icmp_rwlock, RW_READER); 6380 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6381 rw_exit(&icmp->icmp_rwlock); 6382 6383 if (len < 0) { 6384 /* 6385 * Pass on to IP 6386 */ 6387 kmem_free(optvalp_buf, max_optbuf_len); 6388 return (ip_get_options(connp, level, option_name, optvalp, 6389 optlen, cr)); 6390 } else { 6391 /* 6392 * update optlen and copy option value 6393 */ 6394 t_uscalar_t size = MIN(len, *optlen); 6395 bcopy(optvalp_buf, optvalp, size); 6396 bcopy(&size, optlen, sizeof (size)); 6397 6398 kmem_free(optvalp_buf, max_optbuf_len); 6399 return (0); 6400 } 6401 } 6402 6403 /* ARGSUSED */ 6404 int 6405 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6406 { 6407 conn_t *connp = (conn_t *)proto_handle; 6408 6409 /* All Solaris components should pass a cred for this operation. */ 6410 ASSERT(cr != NULL); 6411 6412 (void) rawip_do_close(connp); 6413 return (0); 6414 } 6415 6416 /* ARGSUSED */ 6417 int 6418 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6419 { 6420 conn_t *connp = (conn_t *)proto_handle; 6421 6422 /* All Solaris components should pass a cred for this operation. */ 6423 ASSERT(cr != NULL); 6424 6425 /* shut down the send side */ 6426 if (how != SHUT_RD) 6427 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6428 SOCK_OPCTL_SHUT_SEND, 0); 6429 /* shut down the recv side */ 6430 if (how != SHUT_WR) 6431 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6432 SOCK_OPCTL_SHUT_RECV, 0); 6433 return (0); 6434 } 6435 6436 void 6437 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6438 { 6439 conn_t *connp = (conn_t *)proto_handle; 6440 icmp_t *icmp = connp->conn_icmp; 6441 6442 mutex_enter(&icmp->icmp_recv_lock); 6443 connp->conn_flow_cntrld = B_FALSE; 6444 mutex_exit(&icmp->icmp_recv_lock); 6445 } 6446 6447 int 6448 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6449 int mode, int32_t *rvalp, cred_t *cr) 6450 { 6451 conn_t *connp = (conn_t *)proto_handle; 6452 int error; 6453 6454 /* All Solaris components should pass a cred for this operation. */ 6455 ASSERT(cr != NULL); 6456 6457 switch (cmd) { 6458 case ND_SET: 6459 case ND_GET: 6460 case _SIOCSOCKFALLBACK: 6461 case TI_GETPEERNAME: 6462 case TI_GETMYNAME: 6463 #ifdef DEBUG 6464 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6465 " socket", cmd); 6466 #endif 6467 error = EINVAL; 6468 break; 6469 default: 6470 /* 6471 * Pass on to IP using helper stream 6472 */ 6473 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6474 cmd, arg, mode, cr, rvalp); 6475 break; 6476 } 6477 return (error); 6478 } 6479 6480 /* ARGSUSED */ 6481 int 6482 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6483 cred_t *cr) 6484 { 6485 conn_t *connp = (conn_t *)proto_handle; 6486 icmp_t *icmp = connp->conn_icmp; 6487 icmp_stack_t *is = icmp->icmp_is; 6488 int error = 0; 6489 boolean_t bypass_dgram_errind = B_FALSE; 6490 6491 ASSERT(DB_TYPE(mp) == M_DATA); 6492 6493 /* All Solaris components should pass a cred for this operation. */ 6494 ASSERT(cr != NULL); 6495 6496 /* If labeled then sockfs should have already set db_credp */ 6497 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6498 6499 /* do an implicit bind if necessary */ 6500 if (icmp->icmp_state == TS_UNBND) { 6501 error = rawip_implicit_bind(connp); 6502 /* 6503 * We could be racing with an actual bind, in which case 6504 * we would see EPROTO. We cross our fingers and try 6505 * to connect. 6506 */ 6507 if (!(error == 0 || error == EPROTO)) { 6508 freemsg(mp); 6509 return (error); 6510 } 6511 } 6512 6513 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6514 6515 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6516 error = EISCONN; 6517 goto done_lock; 6518 } 6519 6520 switch (icmp->icmp_family) { 6521 case AF_INET6: { 6522 sin6_t *sin6; 6523 ip6_pkt_t ipp_s; /* For ancillary data options */ 6524 ip6_pkt_t *ipp = &ipp_s; 6525 6526 sin6 = (sin6_t *)msg->msg_name; 6527 if (sin6 != NULL) { 6528 error = proto_verify_ip_addr(icmp->icmp_family, 6529 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6530 if (error != 0) { 6531 bypass_dgram_errind = B_TRUE; 6532 goto done_lock; 6533 } 6534 if (icmp->icmp_delayed_error != 0) { 6535 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6536 sin6_t *sin2 = (sin6_t *) 6537 &icmp->icmp_delayed_addr; 6538 6539 error = icmp->icmp_delayed_error; 6540 icmp->icmp_delayed_error = 0; 6541 6542 /* Compare IP address and port */ 6543 6544 if (sin1->sin6_port == sin2->sin6_port && 6545 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6546 &sin2->sin6_addr)) { 6547 goto done_lock; 6548 } 6549 } 6550 } else { 6551 /* 6552 * Use connected address 6553 */ 6554 if (icmp->icmp_state != TS_DATA_XFER) { 6555 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6556 error = EDESTADDRREQ; 6557 bypass_dgram_errind = B_TRUE; 6558 goto done_lock; 6559 } 6560 sin6 = &icmp->icmp_v6dst; 6561 } 6562 6563 /* No support for mapped addresses on raw sockets */ 6564 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6565 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6566 error = EADDRNOTAVAIL; 6567 goto done_lock; 6568 } 6569 6570 ipp->ipp_fields = 0; 6571 ipp->ipp_sticky_ignored = 0; 6572 6573 /* 6574 * If options passed in, feed it for verification and handling 6575 */ 6576 if (msg->msg_controllen != 0) { 6577 error = process_auxiliary_options(connp, 6578 msg->msg_control, msg->msg_controllen, 6579 ipp, &icmp_opt_obj, icmp_opt_set, cr); 6580 if (error != 0) { 6581 goto done_lock; 6582 } 6583 } 6584 6585 rw_exit(&icmp->icmp_rwlock); 6586 6587 /* 6588 * Destination is a native IPv6 address. 6589 * Send out an IPv6 format packet. 6590 */ 6591 6592 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6593 ipp); 6594 } 6595 break; 6596 case AF_INET: { 6597 sin_t *sin; 6598 ip4_pkt_t pktinfo; 6599 ip4_pkt_t *pktinfop = &pktinfo; 6600 ipaddr_t v4dst; 6601 6602 sin = (sin_t *)msg->msg_name; 6603 if (sin != NULL) { 6604 error = proto_verify_ip_addr(icmp->icmp_family, 6605 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6606 if (error != 0) { 6607 bypass_dgram_errind = B_TRUE; 6608 goto done_lock; 6609 } 6610 v4dst = sin->sin_addr.s_addr; 6611 if (icmp->icmp_delayed_error != 0) { 6612 sin_t *sin1 = (sin_t *)msg->msg_name; 6613 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6614 6615 error = icmp->icmp_delayed_error; 6616 icmp->icmp_delayed_error = 0; 6617 6618 /* Compare IP address and port */ 6619 if (sin1->sin_port == sin2->sin_port && 6620 sin1->sin_addr.s_addr == 6621 sin2->sin_addr.s_addr) { 6622 goto done_lock; 6623 } 6624 6625 } 6626 } else { 6627 /* 6628 * Use connected address 6629 */ 6630 if (icmp->icmp_state != TS_DATA_XFER) { 6631 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6632 error = EDESTADDRREQ; 6633 bypass_dgram_errind = B_TRUE; 6634 goto done_lock; 6635 } 6636 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6637 } 6638 6639 6640 pktinfop->ip4_ill_index = 0; 6641 pktinfop->ip4_addr = INADDR_ANY; 6642 6643 /* 6644 * If options passed in, feed it for verification and handling 6645 */ 6646 if (msg->msg_controllen != 0) { 6647 error = process_auxiliary_options(connp, 6648 msg->msg_control, msg->msg_controllen, 6649 pktinfop, &icmp_opt_obj, icmp_opt_set, cr); 6650 if (error != 0) { 6651 goto done_lock; 6652 } 6653 } 6654 rw_exit(&icmp->icmp_rwlock); 6655 6656 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6657 v4dst, pktinfop); 6658 break; 6659 } 6660 6661 default: 6662 ASSERT(0); 6663 } 6664 6665 goto done; 6666 6667 done_lock: 6668 rw_exit(&icmp->icmp_rwlock); 6669 if (error != 0) { 6670 ASSERT(mp != NULL); 6671 freemsg(mp); 6672 } 6673 done: 6674 if (bypass_dgram_errind) 6675 return (error); 6676 return (icmp->icmp_dgram_errind ? error : 0); 6677 } 6678 6679 sock_downcalls_t sock_rawip_downcalls = { 6680 rawip_activate, 6681 rawip_accept, 6682 rawip_bind, 6683 rawip_listen, 6684 rawip_connect, 6685 rawip_getpeername, 6686 rawip_getsockname, 6687 rawip_getsockopt, 6688 rawip_setsockopt, 6689 rawip_send, 6690 NULL, 6691 NULL, 6692 NULL, 6693 rawip_shutdown, 6694 rawip_clr_flowctrl, 6695 rawip_ioctl, 6696 rawip_close 6697 }; 6698