1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/zone.h> 44 #include <sys/time.h> 45 46 #include <sys/sockio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/isa_defs.h> 50 #include <sys/suntpi.h> 51 #include <sys/xti_inet.h> 52 #include <sys/netstack.h> 53 54 #include <net/route.h> 55 #include <net/if.h> 56 57 #include <netinet/in.h> 58 #include <netinet/ip6.h> 59 #include <netinet/icmp6.h> 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip6.h> 63 #include <inet/proto_set.h> 64 #include <inet/nd.h> 65 #include <inet/optcom.h> 66 #include <inet/snmpcom.h> 67 #include <inet/kstatcom.h> 68 #include <inet/rawip_impl.h> 69 70 #include <netinet/ip_mroute.h> 71 #include <inet/tcp.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 #include <inet/ipclassifier.h> 75 76 #include <sys/tsol/label.h> 77 #include <sys/tsol/tnet.h> 78 79 #include <inet/ip_ire.h> 80 #include <inet/ip_if.h> 81 82 #include <inet/ip_impl.h> 83 #include <sys/disp.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * RAWIP is MT and uses the usual kernel synchronization primitives. There is 89 * locks, which is icmp_rwlock. We also use conn_lock when updating things 90 * which affect the IP classifier lookup. 91 * The lock order is icmp_rwlock -> conn_lock. 92 * 93 * The icmp_rwlock: 94 * This protects most of the other fields in the icmp_t. The exact list of 95 * fields which are protected by each of the above locks is documented in 96 * the icmp_t structure definition. 97 * 98 * Plumbing notes: 99 * ICMP is always a device driver. For compatibility with mibopen() code 100 * it is possible to I_PUSH "icmp", but that results in pushing a passthrough 101 * dummy module. 102 */ 103 104 static void icmp_addr_req(queue_t *q, mblk_t *mp); 105 static void icmp_tpi_bind(queue_t *q, mblk_t *mp); 106 static int icmp_bind_proto(conn_t *connp); 107 static int icmp_build_hdrs(icmp_t *icmp); 108 static void icmp_capability_req(queue_t *q, mblk_t *mp); 109 static int icmp_close(queue_t *q, int flags); 110 static void icmp_tpi_connect(queue_t *q, mblk_t *mp); 111 static void icmp_tpi_disconnect(queue_t *q, mblk_t *mp); 112 static void icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 113 int sys_error); 114 static void icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 115 t_scalar_t t_error, int sys_error); 116 static void icmp_icmp_error(conn_t *connp, mblk_t *mp); 117 static void icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp); 118 static void icmp_info_req(queue_t *q, mblk_t *mp); 119 static void icmp_input(void *, mblk_t *, void *); 120 static conn_t *icmp_open(int family, cred_t *credp, int *err, int flags); 121 static int icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 122 cred_t *credp); 123 static int icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 124 cred_t *credp); 125 static int icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, 126 int *errorp, void *thisdg_attrs); 127 static boolean_t icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 128 int icmp_opt_set(conn_t *connp, uint_t optset_context, 129 int level, int name, uint_t inlen, 130 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 131 void *thisdg_attrs, cred_t *cr); 132 int icmp_opt_get(conn_t *connp, int level, int name, 133 uchar_t *ptr); 134 static int icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 135 static boolean_t icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt); 136 static int icmp_param_set(queue_t *q, mblk_t *mp, char *value, 137 caddr_t cp, cred_t *cr); 138 static int icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 139 uchar_t *ptr, int len); 140 static void icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 141 static void icmp_tpi_unbind(queue_t *q, mblk_t *mp); 142 static int icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst); 143 static void icmp_wput(queue_t *q, mblk_t *mp); 144 static void icmp_wput_fallback(queue_t *q, mblk_t *mp); 145 static int raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, 146 sin6_t *sin6, ip6_pkt_t *ipp); 147 static int raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, 148 ipaddr_t v4dst, ip4_pkt_t *pktinfop); 149 static void icmp_wput_other(queue_t *q, mblk_t *mp); 150 static void icmp_wput_iocdata(queue_t *q, mblk_t *mp); 151 static void icmp_wput_restricted(queue_t *q, mblk_t *mp); 152 static void icmp_ulp_recv(conn_t *, mblk_t *); 153 154 static void *rawip_stack_init(netstackid_t stackid, netstack_t *ns); 155 static void rawip_stack_fini(netstackid_t stackid, void *arg); 156 157 static void *rawip_kstat_init(netstackid_t stackid); 158 static void rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp); 159 static int rawip_kstat_update(kstat_t *kp, int rw); 160 static void rawip_stack_shutdown(netstackid_t stackid, void *arg); 161 static int rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, 162 uint_t *salenp); 163 static int rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, 164 uint_t *salenp); 165 166 int rawip_getsockname(sock_lower_handle_t, struct sockaddr *, 167 socklen_t *, cred_t *); 168 int rawip_getpeername(sock_lower_handle_t, struct sockaddr *, 169 socklen_t *, cred_t *); 170 171 static struct module_info icmp_mod_info = { 172 5707, "icmp", 1, INFPSZ, 512, 128 173 }; 174 175 /* 176 * Entry points for ICMP as a device. 177 * We have separate open functions for the /dev/icmp and /dev/icmp6 devices. 178 */ 179 static struct qinit icmprinitv4 = { 180 NULL, NULL, icmp_openv4, icmp_close, NULL, &icmp_mod_info 181 }; 182 183 static struct qinit icmprinitv6 = { 184 NULL, NULL, icmp_openv6, icmp_close, NULL, &icmp_mod_info 185 }; 186 187 static struct qinit icmpwinit = { 188 (pfi_t)icmp_wput, NULL, NULL, NULL, NULL, &icmp_mod_info 189 }; 190 191 /* ICMP entry point during fallback */ 192 static struct qinit icmp_fallback_sock_winit = { 193 (pfi_t)icmp_wput_fallback, NULL, NULL, NULL, NULL, &icmp_mod_info 194 }; 195 196 /* For AF_INET aka /dev/icmp */ 197 struct streamtab icmpinfov4 = { 198 &icmprinitv4, &icmpwinit 199 }; 200 201 /* For AF_INET6 aka /dev/icmp6 */ 202 struct streamtab icmpinfov6 = { 203 &icmprinitv6, &icmpwinit 204 }; 205 206 static sin_t sin_null; /* Zero address for quick clears */ 207 static sin6_t sin6_null; /* Zero address for quick clears */ 208 209 /* Default structure copied into T_INFO_ACK messages */ 210 static struct T_info_ack icmp_g_t_info_ack = { 211 T_INFO_ACK, 212 IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */ 213 T_INVALID, /* ETSDU_size. icmp does not support expedited data. */ 214 T_INVALID, /* CDATA_size. icmp does not support connect data. */ 215 T_INVALID, /* DDATA_size. icmp does not support disconnect data. */ 216 0, /* ADDR_size - filled in later. */ 217 0, /* OPT_size - not initialized here */ 218 IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */ 219 T_CLTS, /* SERV_type. icmp supports connection-less. */ 220 TS_UNBND, /* CURRENT_state. This is set from icmp_state. */ 221 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 222 }; 223 224 /* 225 * Table of ND variables supported by icmp. These are loaded into is_nd 226 * when the stack instance is created. 227 * All of these are alterable, within the min/max values given, at run time. 228 */ 229 static icmpparam_t icmp_param_arr[] = { 230 /* min max value name */ 231 { 0, 128, 32, "icmp_wroff_extra" }, 232 { 1, 255, 255, "icmp_ipv4_ttl" }, 233 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "icmp_ipv6_hoplimit"}, 234 { 0, 1, 1, "icmp_bsd_compat" }, 235 { 4096, 65536, 8192, "icmp_xmit_hiwat"}, 236 { 0, 65536, 1024, "icmp_xmit_lowat"}, 237 { 4096, 65536, 8192, "icmp_recv_hiwat"}, 238 { 65536, 1024*1024*1024, 256*1024, "icmp_max_buf"}, 239 }; 240 #define is_wroff_extra is_param_arr[0].icmp_param_value 241 #define is_ipv4_ttl is_param_arr[1].icmp_param_value 242 #define is_ipv6_hoplimit is_param_arr[2].icmp_param_value 243 #define is_bsd_compat is_param_arr[3].icmp_param_value 244 #define is_xmit_hiwat is_param_arr[4].icmp_param_value 245 #define is_xmit_lowat is_param_arr[5].icmp_param_value 246 #define is_recv_hiwat is_param_arr[6].icmp_param_value 247 #define is_max_buf is_param_arr[7].icmp_param_value 248 249 static int rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len); 250 static int rawip_do_connect(conn_t *connp, const struct sockaddr *sa, 251 socklen_t len, cred_t *cr); 252 static void rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error); 253 254 /* 255 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 256 * passed to icmp_wput. 257 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the ICMP 258 * protocol type placed in the message following the address. A T_BIND_ACK 259 * message is returned by ip_bind_v4/v6. 260 */ 261 static void 262 icmp_tpi_bind(queue_t *q, mblk_t *mp) 263 { 264 int error; 265 struct sockaddr *sa; 266 struct T_bind_req *tbr; 267 socklen_t len; 268 sin_t *sin; 269 sin6_t *sin6; 270 icmp_t *icmp; 271 conn_t *connp = Q_TO_CONN(q); 272 mblk_t *mp1; 273 cred_t *cr; 274 275 /* 276 * All Solaris components should pass a db_credp 277 * for this TPI message, hence we ASSERT. 278 * But in case there is some other M_PROTO that looks 279 * like a TPI message sent by some other kernel 280 * component, we check and return an error. 281 */ 282 cr = msg_getcred(mp, NULL); 283 ASSERT(cr != NULL); 284 if (cr == NULL) { 285 icmp_err_ack(q, mp, TSYSERR, EINVAL); 286 return; 287 } 288 289 icmp = connp->conn_icmp; 290 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 291 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 292 "icmp_bind: bad req, len %u", 293 (uint_t)(mp->b_wptr - mp->b_rptr)); 294 icmp_err_ack(q, mp, TPROTO, 0); 295 return; 296 } 297 298 if (icmp->icmp_state != TS_UNBND) { 299 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 300 "icmp_bind: bad state, %d", icmp->icmp_state); 301 icmp_err_ack(q, mp, TOUTSTATE, 0); 302 return; 303 } 304 305 /* 306 * Reallocate the message to make sure we have enough room for an 307 * address and the protocol type. 308 */ 309 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 310 if (!mp1) { 311 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 312 return; 313 } 314 mp = mp1; 315 316 /* Reset the message type in preparation for shipping it back. */ 317 DB_TYPE(mp) = M_PCPROTO; 318 tbr = (struct T_bind_req *)mp->b_rptr; 319 len = tbr->ADDR_length; 320 switch (len) { 321 case 0: /* request for a generic port */ 322 tbr->ADDR_offset = sizeof (struct T_bind_req); 323 if (icmp->icmp_family == AF_INET) { 324 tbr->ADDR_length = sizeof (sin_t); 325 sin = (sin_t *)&tbr[1]; 326 *sin = sin_null; 327 sin->sin_family = AF_INET; 328 mp->b_wptr = (uchar_t *)&sin[1]; 329 sa = (struct sockaddr *)sin; 330 len = sizeof (sin_t); 331 } else { 332 ASSERT(icmp->icmp_family == AF_INET6); 333 tbr->ADDR_length = sizeof (sin6_t); 334 sin6 = (sin6_t *)&tbr[1]; 335 *sin6 = sin6_null; 336 sin6->sin6_family = AF_INET6; 337 mp->b_wptr = (uchar_t *)&sin6[1]; 338 sa = (struct sockaddr *)sin6; 339 len = sizeof (sin6_t); 340 } 341 break; 342 343 case sizeof (sin_t): /* Complete IPv4 address */ 344 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 345 sizeof (sin_t)); 346 break; 347 348 case sizeof (sin6_t): /* Complete IPv6 address */ 349 sa = (struct sockaddr *)mi_offset_param(mp, 350 tbr->ADDR_offset, sizeof (sin6_t)); 351 break; 352 353 default: 354 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 355 "icmp_bind: bad ADDR_length %d", tbr->ADDR_length); 356 icmp_err_ack(q, mp, TBADADDR, 0); 357 return; 358 } 359 360 error = rawip_do_bind(connp, sa, len); 361 done: 362 ASSERT(mp->b_cont == NULL); 363 if (error != 0) { 364 if (error > 0) { 365 icmp_err_ack(q, mp, TSYSERR, error); 366 } else { 367 icmp_err_ack(q, mp, -error, 0); 368 } 369 } else { 370 tbr->PRIM_type = T_BIND_ACK; 371 qreply(q, mp); 372 } 373 } 374 375 static int 376 rawip_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len) 377 { 378 sin_t *sin; 379 sin6_t *sin6; 380 icmp_t *icmp; 381 int error = 0; 382 mblk_t *ire_mp; 383 384 385 icmp = connp->conn_icmp; 386 387 if (sa == NULL || !OK_32PTR((char *)sa)) { 388 return (EINVAL); 389 } 390 391 /* 392 * The state must be TS_UNBND. TPI mandates that users must send 393 * TPI primitives only 1 at a time and wait for the response before 394 * sending the next primitive. 395 */ 396 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 397 if (icmp->icmp_state != TS_UNBND || icmp->icmp_pending_op != -1) { 398 error = -TOUTSTATE; 399 goto done; 400 } 401 402 ASSERT(len != 0); 403 switch (len) { 404 case sizeof (sin_t): /* Complete IPv4 address */ 405 sin = (sin_t *)sa; 406 if (sin->sin_family != AF_INET || 407 icmp->icmp_family != AF_INET) { 408 /* TSYSERR, EAFNOSUPPORT */ 409 error = EAFNOSUPPORT; 410 goto done; 411 } 412 break; 413 case sizeof (sin6_t): /* Complete IPv6 address */ 414 sin6 = (sin6_t *)sa; 415 if (sin6->sin6_family != AF_INET6 || 416 icmp->icmp_family != AF_INET6) { 417 /* TSYSERR, EAFNOSUPPORT */ 418 error = EAFNOSUPPORT; 419 goto done; 420 } 421 /* No support for mapped addresses on raw sockets */ 422 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 423 /* TSYSERR, EADDRNOTAVAIL */ 424 error = EADDRNOTAVAIL; 425 goto done; 426 } 427 break; 428 429 default: 430 /* TBADADDR */ 431 error = EADDRNOTAVAIL; 432 goto done; 433 } 434 435 icmp->icmp_pending_op = T_BIND_REQ; 436 icmp->icmp_state = TS_IDLE; 437 438 /* 439 * Copy the source address into our icmp structure. This address 440 * may still be zero; if so, ip will fill in the correct address 441 * each time an outbound packet is passed to it. 442 * If we are binding to a broadcast or multicast address then 443 * rawip_post_ip_bind_connect will clear the source address. 444 */ 445 446 if (icmp->icmp_family == AF_INET) { 447 ASSERT(sin != NULL); 448 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 449 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, 450 &icmp->icmp_v6src); 451 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 452 icmp->icmp_ip_snd_options_len; 453 icmp->icmp_bound_v6src = icmp->icmp_v6src; 454 } else { 455 int error; 456 457 ASSERT(sin6 != NULL); 458 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 459 icmp->icmp_v6src = sin6->sin6_addr; 460 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 461 icmp->icmp_bound_v6src = icmp->icmp_v6src; 462 463 /* Rebuild the header template */ 464 error = icmp_build_hdrs(icmp); 465 if (error != 0) { 466 icmp->icmp_pending_op = -1; 467 /* 468 * TSYSERR 469 */ 470 goto done; 471 } 472 } 473 474 ire_mp = NULL; 475 if (!(V6_OR_V4_INADDR_ANY(icmp->icmp_v6src))) { 476 /* 477 * request an IRE if src not 0 (INADDR_ANY) 478 */ 479 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 480 if (ire_mp == NULL) { 481 icmp->icmp_pending_op = -1; 482 error = ENOMEM; 483 goto done; 484 } 485 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 486 } 487 done: 488 rw_exit(&icmp->icmp_rwlock); 489 if (error != 0) 490 return (error); 491 492 if (icmp->icmp_family == AF_INET6) { 493 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 494 &sin6->sin6_addr, sin6->sin6_port, B_TRUE); 495 } else { 496 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 497 sin->sin_addr.s_addr, sin->sin_port, B_TRUE); 498 } 499 rawip_post_ip_bind_connect(icmp, ire_mp, error); 500 return (error); 501 } 502 503 static void 504 rawip_post_ip_bind_connect(icmp_t *icmp, mblk_t *ire_mp, int error) 505 { 506 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 507 if (icmp->icmp_state == TS_UNBND) { 508 /* 509 * not yet bound - bind sent by icmp_bind_proto. 510 */ 511 rw_exit(&icmp->icmp_rwlock); 512 return; 513 } 514 ASSERT(icmp->icmp_pending_op != -1); 515 icmp->icmp_pending_op = -1; 516 517 if (error != 0) { 518 if (icmp->icmp_state == TS_DATA_XFER) { 519 /* Connect failed */ 520 /* Revert back to the bound source */ 521 icmp->icmp_v6src = icmp->icmp_bound_v6src; 522 icmp->icmp_state = TS_IDLE; 523 if (icmp->icmp_family == AF_INET6) 524 (void) icmp_build_hdrs(icmp); 525 } else { 526 V6_SET_ZERO(icmp->icmp_v6src); 527 V6_SET_ZERO(icmp->icmp_bound_v6src); 528 icmp->icmp_state = TS_UNBND; 529 if (icmp->icmp_family == AF_INET6) 530 (void) icmp_build_hdrs(icmp); 531 } 532 } else { 533 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 534 ire_t *ire; 535 536 ire = (ire_t *)ire_mp->b_rptr; 537 /* 538 * If a broadcast/multicast address was bound set 539 * the source address to 0. 540 * This ensures no datagrams with broadcast address 541 * as source address are emitted (which would violate 542 * RFC1122 - Hosts requirements) 543 * Note: we get IRE_BROADCAST for IPv6 544 * to "mark" a multicast local address. 545 */ 546 547 548 if (ire->ire_type == IRE_BROADCAST && 549 icmp->icmp_state != TS_DATA_XFER) { 550 /* 551 * This was just a local bind to a 552 * MC/broadcast addr 553 */ 554 V6_SET_ZERO(icmp->icmp_v6src); 555 if (icmp->icmp_family == AF_INET6) 556 (void) icmp_build_hdrs(icmp); 557 } 558 } 559 560 } 561 rw_exit(&icmp->icmp_rwlock); 562 if (ire_mp != NULL) 563 freeb(ire_mp); 564 } 565 566 /* 567 * Send message to IP to just bind to the protocol. 568 */ 569 static int 570 icmp_bind_proto(conn_t *connp) 571 { 572 icmp_t *icmp; 573 int error; 574 575 icmp = connp->conn_icmp; 576 577 if (icmp->icmp_family == AF_INET6) 578 error = ip_proto_bind_laddr_v6(connp, NULL, icmp->icmp_proto, 579 &sin6_null.sin6_addr, 0, B_TRUE); 580 else 581 error = ip_proto_bind_laddr_v4(connp, NULL, icmp->icmp_proto, 582 sin_null.sin_addr.s_addr, 0, B_TRUE); 583 584 rawip_post_ip_bind_connect(icmp, NULL, error); 585 return (error); 586 } 587 588 static void 589 icmp_tpi_connect(queue_t *q, mblk_t *mp) 590 { 591 conn_t *connp = Q_TO_CONN(q); 592 struct T_conn_req *tcr; 593 icmp_t *icmp; 594 struct sockaddr *sa; 595 socklen_t len; 596 int error; 597 cred_t *cr; 598 599 /* 600 * All Solaris components should pass a db_credp 601 * for this TPI message, hence we ASSERT. 602 * But in case there is some other M_PROTO that looks 603 * like a TPI message sent by some other kernel 604 * component, we check and return an error. 605 */ 606 cr = msg_getcred(mp, NULL); 607 ASSERT(cr != NULL); 608 if (cr == NULL) { 609 icmp_err_ack(q, mp, TSYSERR, EINVAL); 610 return; 611 } 612 613 icmp = connp->conn_icmp; 614 tcr = (struct T_conn_req *)mp->b_rptr; 615 /* Sanity checks */ 616 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 617 icmp_err_ack(q, mp, TPROTO, 0); 618 return; 619 } 620 621 if (tcr->OPT_length != 0) { 622 icmp_err_ack(q, mp, TBADOPT, 0); 623 return; 624 } 625 626 len = tcr->DEST_length; 627 628 switch (len) { 629 default: 630 icmp_err_ack(q, mp, TBADADDR, 0); 631 return; 632 case sizeof (sin_t): 633 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 634 sizeof (sin_t)); 635 break; 636 case sizeof (sin6_t): 637 sa = (struct sockaddr *)mi_offset_param(mp, 638 tcr->DEST_offset, sizeof (sin6_t)); 639 break; 640 } 641 642 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 643 if (error != 0) { 644 icmp_err_ack(q, mp, TSYSERR, error); 645 return; 646 } 647 648 error = rawip_do_connect(connp, sa, len, cr); 649 if (error != 0) { 650 if (error < 0) { 651 icmp_err_ack(q, mp, -error, 0); 652 } else { 653 icmp_err_ack(q, mp, 0, error); 654 } 655 } else { 656 mblk_t *mp1; 657 658 /* 659 * We have to send a connection confirmation to 660 * keep TLI happy. 661 */ 662 if (icmp->icmp_family == AF_INET) { 663 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 664 sizeof (sin_t), NULL, 0); 665 } else { 666 ASSERT(icmp->icmp_family == AF_INET6); 667 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 668 sizeof (sin6_t), NULL, 0); 669 } 670 if (mp1 == NULL) { 671 rw_exit(&icmp->icmp_rwlock); 672 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 673 return; 674 } 675 676 /* 677 * Send ok_ack for T_CONN_REQ 678 */ 679 mp = mi_tpi_ok_ack_alloc(mp); 680 if (mp == NULL) { 681 /* Unable to reuse the T_CONN_REQ for the ack. */ 682 freemsg(mp1); 683 icmp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 684 return; 685 } 686 putnext(connp->conn_rq, mp); 687 putnext(connp->conn_rq, mp1); 688 } 689 } 690 691 static int 692 rawip_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 693 cred_t *cr) 694 { 695 icmp_t *icmp; 696 sin_t *sin; 697 sin6_t *sin6; 698 mblk_t *ire_mp; 699 int error; 700 ipaddr_t v4dst; 701 in6_addr_t v6dst; 702 703 icmp = connp->conn_icmp; 704 705 if (sa == NULL || !OK_32PTR((char *)sa)) { 706 return (EINVAL); 707 } 708 709 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 710 if (ire_mp == NULL) 711 return (ENOMEM); 712 DB_TYPE(ire_mp) = IRE_DB_REQ_TYPE; 713 714 715 ASSERT(sa != NULL && len != 0); 716 717 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 718 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 719 rw_exit(&icmp->icmp_rwlock); 720 freeb(ire_mp); 721 return (-TOUTSTATE); 722 } 723 724 switch (len) { 725 case sizeof (sin_t): 726 sin = (sin_t *)sa; 727 728 ASSERT(icmp->icmp_family == AF_INET); 729 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 730 731 v4dst = sin->sin_addr.s_addr; 732 /* 733 * Interpret a zero destination to mean loopback. 734 * Update the T_CONN_REQ (sin/sin6) since it is used to 735 * generate the T_CONN_CON. 736 */ 737 if (v4dst == INADDR_ANY) { 738 v4dst = htonl(INADDR_LOOPBACK); 739 } 740 741 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 742 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 743 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 744 icmp->icmp_ip_snd_options_len; 745 icmp->icmp_v6dst.sin6_addr = v6dst; 746 icmp->icmp_v6dst.sin6_family = AF_INET6; 747 icmp->icmp_v6dst.sin6_flowinfo = 0; 748 icmp->icmp_v6dst.sin6_port = 0; 749 750 /* 751 * If the destination address is multicast and 752 * an outgoing multicast interface has been set, 753 * use the address of that interface as our 754 * source address if no source address has been set. 755 */ 756 if (V4_PART_OF_V6(icmp->icmp_v6src) == INADDR_ANY && 757 CLASSD(v4dst) && 758 icmp->icmp_multicast_if_addr != INADDR_ANY) { 759 IN6_IPADDR_TO_V4MAPPED(icmp->icmp_multicast_if_addr, 760 &icmp->icmp_v6src); 761 } 762 break; 763 case sizeof (sin6_t): 764 sin6 = (sin6_t *)sa; 765 766 /* No support for mapped addresses on raw sockets */ 767 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 768 rw_exit(&icmp->icmp_rwlock); 769 freeb(ire_mp); 770 return (EADDRNOTAVAIL); 771 } 772 773 ASSERT(icmp->icmp_ipversion == IPV6_VERSION); 774 ASSERT(icmp->icmp_family == AF_INET6); 775 776 icmp->icmp_max_hdr_len = icmp->icmp_sticky_hdrs_len; 777 778 icmp->icmp_v6dst = *sin6; 779 icmp->icmp_v6dst.sin6_port = 0; 780 781 /* 782 * Interpret a zero destination to mean loopback. 783 * Update the T_CONN_REQ (sin/sin6) since it is used to 784 * generate the T_CONN_CON. 785 */ 786 if (IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6dst.sin6_addr)) { 787 icmp->icmp_v6dst.sin6_addr = ipv6_loopback; 788 } 789 /* 790 * If the destination address is multicast and 791 * an outgoing multicast interface has been set, 792 * then the ip bind logic will pick the correct source 793 * address (i.e. matching the outgoing multicast interface). 794 */ 795 break; 796 } 797 798 icmp->icmp_pending_op = T_CONN_REQ; 799 800 if (icmp->icmp_state == TS_DATA_XFER) { 801 /* Already connected - clear out state */ 802 icmp->icmp_v6src = icmp->icmp_bound_v6src; 803 icmp->icmp_state = TS_IDLE; 804 } 805 806 icmp->icmp_state = TS_DATA_XFER; 807 rw_exit(&icmp->icmp_rwlock); 808 809 if (icmp->icmp_family == AF_INET6) { 810 error = ip_proto_bind_connected_v6(connp, &ire_mp, 811 icmp->icmp_proto, &icmp->icmp_v6src, 0, 812 &icmp->icmp_v6dst.sin6_addr, 813 NULL, sin6->sin6_port, B_TRUE, B_TRUE, cr); 814 } else { 815 error = ip_proto_bind_connected_v4(connp, &ire_mp, 816 icmp->icmp_proto, &V4_PART_OF_V6(icmp->icmp_v6src), 0, 817 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr), sin->sin_port, 818 B_TRUE, B_TRUE, cr); 819 } 820 rawip_post_ip_bind_connect(icmp, ire_mp, error); 821 return (error); 822 } 823 824 static void 825 icmp_close_free(conn_t *connp) 826 { 827 icmp_t *icmp = connp->conn_icmp; 828 829 /* If there are any options associated with the stream, free them. */ 830 if (icmp->icmp_ip_snd_options != NULL) { 831 mi_free((char *)icmp->icmp_ip_snd_options); 832 icmp->icmp_ip_snd_options = NULL; 833 icmp->icmp_ip_snd_options_len = 0; 834 } 835 836 if (icmp->icmp_filter != NULL) { 837 kmem_free(icmp->icmp_filter, sizeof (icmp6_filter_t)); 838 icmp->icmp_filter = NULL; 839 } 840 841 /* Free memory associated with sticky options */ 842 if (icmp->icmp_sticky_hdrs_len != 0) { 843 kmem_free(icmp->icmp_sticky_hdrs, 844 icmp->icmp_sticky_hdrs_len); 845 icmp->icmp_sticky_hdrs = NULL; 846 icmp->icmp_sticky_hdrs_len = 0; 847 } 848 ip6_pkt_free(&icmp->icmp_sticky_ipp); 849 850 /* 851 * Clear any fields which the kmem_cache constructor clears. 852 * Only icmp_connp needs to be preserved. 853 * TBD: We should make this more efficient to avoid clearing 854 * everything. 855 */ 856 ASSERT(icmp->icmp_connp == connp); 857 bzero(icmp, sizeof (icmp_t)); 858 icmp->icmp_connp = connp; 859 } 860 861 static int 862 rawip_do_close(conn_t *connp) 863 { 864 ASSERT(connp != NULL && IPCL_IS_RAWIP(connp)); 865 866 ip_quiesce_conn(connp); 867 868 if (!IPCL_IS_NONSTR(connp)) { 869 qprocsoff(connp->conn_rq); 870 } 871 872 ASSERT(connp->conn_icmp->icmp_fallback_queue_head == NULL && 873 connp->conn_icmp->icmp_fallback_queue_tail == NULL); 874 icmp_close_free(connp); 875 876 /* 877 * Now we are truly single threaded on this stream, and can 878 * delete the things hanging off the connp, and finally the connp. 879 * We removed this connp from the fanout list, it cannot be 880 * accessed thru the fanouts, and we already waited for the 881 * conn_ref to drop to 0. We are already in close, so 882 * there cannot be any other thread from the top. qprocsoff 883 * has completed, and service has completed or won't run in 884 * future. 885 */ 886 ASSERT(connp->conn_ref == 1); 887 888 if (!IPCL_IS_NONSTR(connp)) { 889 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 890 } else { 891 ip_free_helper_stream(connp); 892 } 893 894 connp->conn_ref--; 895 ipcl_conn_destroy(connp); 896 897 return (0); 898 } 899 900 static int 901 icmp_close(queue_t *q, int flags) 902 { 903 conn_t *connp; 904 905 if (flags & SO_FALLBACK) { 906 /* 907 * stream is being closed while in fallback 908 * simply free the resources that were allocated 909 */ 910 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 911 qprocsoff(q); 912 goto done; 913 } 914 915 connp = Q_TO_CONN(q); 916 (void) rawip_do_close(connp); 917 done: 918 q->q_ptr = WR(q)->q_ptr = NULL; 919 return (0); 920 } 921 922 /* 923 * This routine handles each T_DISCON_REQ message passed to icmp 924 * as an indicating that ICMP is no longer connected. This results 925 * in sending a T_BIND_REQ to IP to restore the binding to just 926 * the local address. 927 * 928 * The disconnect completes in rawip_post_ip_bind_connect. 929 */ 930 static int 931 icmp_do_disconnect(conn_t *connp) 932 { 933 icmp_t *icmp; 934 mblk_t *ire_mp; 935 int error; 936 937 icmp = connp->conn_icmp; 938 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 939 if (icmp->icmp_state != TS_DATA_XFER || icmp->icmp_pending_op != -1) { 940 rw_exit(&icmp->icmp_rwlock); 941 return (-TOUTSTATE); 942 } 943 icmp->icmp_pending_op = T_DISCON_REQ; 944 icmp->icmp_v6src = icmp->icmp_bound_v6src; 945 icmp->icmp_state = TS_IDLE; 946 947 948 if (icmp->icmp_family == AF_INET6) { 949 /* Rebuild the header template */ 950 error = icmp_build_hdrs(icmp); 951 if (error != 0) { 952 icmp->icmp_pending_op = -1; 953 rw_exit(&icmp->icmp_rwlock); 954 return (error); 955 } 956 } 957 958 rw_exit(&icmp->icmp_rwlock); 959 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 960 if (ire_mp == NULL) { 961 return (ENOMEM); 962 } 963 964 if (icmp->icmp_family == AF_INET6) { 965 error = ip_proto_bind_laddr_v6(connp, &ire_mp, icmp->icmp_proto, 966 &icmp->icmp_bound_v6src, 0, B_TRUE); 967 } else { 968 969 error = ip_proto_bind_laddr_v4(connp, &ire_mp, icmp->icmp_proto, 970 V4_PART_OF_V6(icmp->icmp_bound_v6src), 0, B_TRUE); 971 } 972 973 rawip_post_ip_bind_connect(icmp, ire_mp, error); 974 975 return (error); 976 } 977 978 static void 979 icmp_tpi_disconnect(queue_t *q, mblk_t *mp) 980 { 981 conn_t *connp = Q_TO_CONN(q); 982 int error; 983 984 /* 985 * Allocate the largest primitive we need to send back 986 * T_error_ack is > than T_ok_ack 987 */ 988 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 989 if (mp == NULL) { 990 /* Unable to reuse the T_DISCON_REQ for the ack. */ 991 icmp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 992 return; 993 } 994 995 error = icmp_do_disconnect(connp); 996 997 if (error != 0) { 998 if (error > 0) { 999 icmp_err_ack(q, mp, 0, error); 1000 } else { 1001 icmp_err_ack(q, mp, -error, 0); 1002 } 1003 } else { 1004 mp = mi_tpi_ok_ack_alloc(mp); 1005 ASSERT(mp != NULL); 1006 qreply(q, mp); 1007 } 1008 1009 } 1010 1011 static int 1012 icmp_disconnect(conn_t *connp) 1013 { 1014 int error; 1015 icmp_t *icmp = connp->conn_icmp; 1016 1017 icmp->icmp_dgram_errind = B_FALSE; 1018 1019 error = icmp_do_disconnect(connp); 1020 1021 if (error < 0) 1022 error = proto_tlitosyserr(-error); 1023 return (error); 1024 } 1025 1026 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1027 static void 1028 icmp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1029 { 1030 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1031 qreply(q, mp); 1032 } 1033 1034 /* Shorthand to generate and send TPI error acks to our client */ 1035 static void 1036 icmp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 1037 t_scalar_t t_error, int sys_error) 1038 { 1039 struct T_error_ack *teackp; 1040 1041 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1042 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1043 teackp = (struct T_error_ack *)mp->b_rptr; 1044 teackp->ERROR_prim = primitive; 1045 teackp->TLI_error = t_error; 1046 teackp->UNIX_error = sys_error; 1047 qreply(q, mp); 1048 } 1049 } 1050 1051 /* 1052 * icmp_icmp_error is called by icmp_input to process ICMP 1053 * messages passed up by IP. 1054 * Generates the appropriate permanent (non-transient) errors. 1055 * Assumes that IP has pulled up everything up to and including 1056 * the ICMP header. 1057 */ 1058 static void 1059 icmp_icmp_error(conn_t *connp, mblk_t *mp) 1060 { 1061 icmph_t *icmph; 1062 ipha_t *ipha; 1063 int iph_hdr_length; 1064 sin_t sin; 1065 mblk_t *mp1; 1066 int error = 0; 1067 icmp_t *icmp = connp->conn_icmp; 1068 1069 ipha = (ipha_t *)mp->b_rptr; 1070 1071 ASSERT(OK_32PTR(mp->b_rptr)); 1072 1073 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1074 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1075 icmp_icmp_error_ipv6(connp, mp); 1076 return; 1077 } 1078 1079 /* 1080 * icmp does not support v4 mapped addresses 1081 * so we can never be here for a V6 socket 1082 * i.e. icmp_family == AF_INET6 1083 */ 1084 ASSERT((IPH_HDR_VERSION(ipha) == IPV4_VERSION) && 1085 (icmp->icmp_family == AF_INET)); 1086 1087 ASSERT(icmp->icmp_family == AF_INET); 1088 1089 /* Skip past the outer IP and ICMP headers */ 1090 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1091 icmph = (icmph_t *)(&mp->b_rptr[iph_hdr_length]); 1092 ipha = (ipha_t *)&icmph[1]; 1093 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1094 1095 switch (icmph->icmph_type) { 1096 case ICMP_DEST_UNREACHABLE: 1097 switch (icmph->icmph_code) { 1098 case ICMP_FRAGMENTATION_NEEDED: 1099 /* 1100 * IP has already adjusted the path MTU. 1101 */ 1102 break; 1103 case ICMP_PORT_UNREACHABLE: 1104 case ICMP_PROTOCOL_UNREACHABLE: 1105 error = ECONNREFUSED; 1106 break; 1107 default: 1108 /* Transient errors */ 1109 break; 1110 } 1111 break; 1112 default: 1113 /* Transient errors */ 1114 break; 1115 } 1116 if (error == 0) { 1117 freemsg(mp); 1118 return; 1119 } 1120 1121 /* 1122 * Deliver T_UDERROR_IND when the application has asked for it. 1123 * The socket layer enables this automatically when connected. 1124 */ 1125 if (!icmp->icmp_dgram_errind) { 1126 freemsg(mp); 1127 return; 1128 } 1129 1130 sin = sin_null; 1131 sin.sin_family = AF_INET; 1132 sin.sin_addr.s_addr = ipha->ipha_dst; 1133 1134 if (IPCL_IS_NONSTR(connp)) { 1135 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1136 if (icmp->icmp_state == TS_DATA_XFER) { 1137 if (sin.sin_addr.s_addr == 1138 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr)) { 1139 rw_exit(&icmp->icmp_rwlock); 1140 (*connp->conn_upcalls->su_set_error) 1141 (connp->conn_upper_handle, error); 1142 goto done; 1143 } 1144 } else { 1145 icmp->icmp_delayed_error = error; 1146 *((sin_t *)&icmp->icmp_delayed_addr) = sin; 1147 } 1148 rw_exit(&icmp->icmp_rwlock); 1149 } else { 1150 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 1151 0, error); 1152 if (mp1 != NULL) 1153 putnext(connp->conn_rq, mp1); 1154 } 1155 done: 1156 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1157 freemsg(mp); 1158 } 1159 1160 /* 1161 * icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMPv6 1162 * for IPv6 packets. 1163 * Send permanent (non-transient) errors upstream. 1164 * Assumes that IP has pulled up all the extension headers as well 1165 * as the ICMPv6 header. 1166 */ 1167 static void 1168 icmp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1169 { 1170 icmp6_t *icmp6; 1171 ip6_t *ip6h, *outer_ip6h; 1172 uint16_t iph_hdr_length; 1173 uint8_t *nexthdrp; 1174 sin6_t sin6; 1175 mblk_t *mp1; 1176 int error = 0; 1177 icmp_t *icmp = connp->conn_icmp; 1178 1179 outer_ip6h = (ip6_t *)mp->b_rptr; 1180 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1181 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1182 else 1183 iph_hdr_length = IPV6_HDR_LEN; 1184 1185 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1186 ip6h = (ip6_t *)&icmp6[1]; 1187 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1188 freemsg(mp); 1189 return; 1190 } 1191 1192 switch (icmp6->icmp6_type) { 1193 case ICMP6_DST_UNREACH: 1194 switch (icmp6->icmp6_code) { 1195 case ICMP6_DST_UNREACH_NOPORT: 1196 error = ECONNREFUSED; 1197 break; 1198 case ICMP6_DST_UNREACH_ADMIN: 1199 case ICMP6_DST_UNREACH_NOROUTE: 1200 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1201 case ICMP6_DST_UNREACH_ADDR: 1202 /* Transient errors */ 1203 break; 1204 default: 1205 break; 1206 } 1207 break; 1208 case ICMP6_PACKET_TOO_BIG: { 1209 struct T_unitdata_ind *tudi; 1210 struct T_opthdr *toh; 1211 size_t udi_size; 1212 mblk_t *newmp; 1213 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1214 sizeof (struct ip6_mtuinfo); 1215 sin6_t *sin6; 1216 struct ip6_mtuinfo *mtuinfo; 1217 1218 /* 1219 * If the application has requested to receive path mtu 1220 * information, send up an empty message containing an 1221 * IPV6_PATHMTU ancillary data item. 1222 */ 1223 if (!icmp->icmp_ipv6_recvpathmtu) 1224 break; 1225 1226 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1227 opt_length; 1228 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1229 BUMP_MIB(&icmp->icmp_is->is_rawip_mib, rawipInErrors); 1230 break; 1231 } 1232 1233 /* 1234 * newmp->b_cont is left to NULL on purpose. This is an 1235 * empty message containing only ancillary data. 1236 */ 1237 newmp->b_datap->db_type = M_PROTO; 1238 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1239 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1240 tudi->PRIM_type = T_UNITDATA_IND; 1241 tudi->SRC_length = sizeof (sin6_t); 1242 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1243 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1244 tudi->OPT_length = opt_length; 1245 1246 sin6 = (sin6_t *)&tudi[1]; 1247 bzero(sin6, sizeof (sin6_t)); 1248 sin6->sin6_family = AF_INET6; 1249 sin6->sin6_addr = icmp->icmp_v6dst.sin6_addr; 1250 1251 toh = (struct T_opthdr *)&sin6[1]; 1252 toh->level = IPPROTO_IPV6; 1253 toh->name = IPV6_PATHMTU; 1254 toh->len = opt_length; 1255 toh->status = 0; 1256 1257 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1258 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1259 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1260 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1261 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1262 /* 1263 * We've consumed everything we need from the original 1264 * message. Free it, then send our empty message. 1265 */ 1266 freemsg(mp); 1267 icmp_ulp_recv(connp, newmp); 1268 1269 return; 1270 } 1271 case ICMP6_TIME_EXCEEDED: 1272 /* Transient errors */ 1273 break; 1274 case ICMP6_PARAM_PROB: 1275 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1276 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1277 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1278 (uchar_t *)nexthdrp) { 1279 error = ECONNREFUSED; 1280 break; 1281 } 1282 break; 1283 } 1284 if (error == 0) { 1285 freemsg(mp); 1286 return; 1287 } 1288 1289 /* 1290 * Deliver T_UDERROR_IND when the application has asked for it. 1291 * The socket layer enables this automatically when connected. 1292 */ 1293 if (!icmp->icmp_dgram_errind) { 1294 freemsg(mp); 1295 return; 1296 } 1297 1298 sin6 = sin6_null; 1299 sin6.sin6_family = AF_INET6; 1300 sin6.sin6_addr = ip6h->ip6_dst; 1301 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1302 1303 if (IPCL_IS_NONSTR(connp)) { 1304 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1305 if (icmp->icmp_state == TS_DATA_XFER) { 1306 if (IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1307 &icmp->icmp_v6dst.sin6_addr)) { 1308 rw_exit(&icmp->icmp_rwlock); 1309 (*connp->conn_upcalls->su_set_error) 1310 (connp->conn_upper_handle, error); 1311 goto done; 1312 } 1313 } else { 1314 icmp->icmp_delayed_error = error; 1315 *((sin6_t *)&icmp->icmp_delayed_addr) = sin6; 1316 } 1317 rw_exit(&icmp->icmp_rwlock); 1318 } else { 1319 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1320 NULL, 0, error); 1321 if (mp1 != NULL) 1322 putnext(connp->conn_rq, mp1); 1323 } 1324 done: 1325 ASSERT(!RW_ISWRITER(&icmp->icmp_rwlock)); 1326 freemsg(mp); 1327 } 1328 1329 /* 1330 * This routine responds to T_ADDR_REQ messages. It is called by icmp_wput. 1331 * The local address is filled in if endpoint is bound. The remote address 1332 * is filled in if remote address has been precified ("connected endpoint") 1333 * (The concept of connected CLTS sockets is alien to published TPI 1334 * but we support it anyway). 1335 */ 1336 static void 1337 icmp_addr_req(queue_t *q, mblk_t *mp) 1338 { 1339 icmp_t *icmp = Q_TO_ICMP(q); 1340 mblk_t *ackmp; 1341 struct T_addr_ack *taa; 1342 1343 /* Make it large enough for worst case */ 1344 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1345 2 * sizeof (sin6_t), 1); 1346 if (ackmp == NULL) { 1347 icmp_err_ack(q, mp, TSYSERR, ENOMEM); 1348 return; 1349 } 1350 taa = (struct T_addr_ack *)ackmp->b_rptr; 1351 1352 bzero(taa, sizeof (struct T_addr_ack)); 1353 ackmp->b_wptr = (uchar_t *)&taa[1]; 1354 1355 taa->PRIM_type = T_ADDR_ACK; 1356 ackmp->b_datap->db_type = M_PCPROTO; 1357 rw_enter(&icmp->icmp_rwlock, RW_READER); 1358 /* 1359 * Note: Following code assumes 32 bit alignment of basic 1360 * data structures like sin_t and struct T_addr_ack. 1361 */ 1362 if (icmp->icmp_state != TS_UNBND) { 1363 /* 1364 * Fill in local address 1365 */ 1366 taa->LOCADDR_offset = sizeof (*taa); 1367 if (icmp->icmp_family == AF_INET) { 1368 sin_t *sin; 1369 1370 taa->LOCADDR_length = sizeof (sin_t); 1371 sin = (sin_t *)&taa[1]; 1372 /* Fill zeroes and then intialize non-zero fields */ 1373 *sin = sin_null; 1374 sin->sin_family = AF_INET; 1375 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 1376 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1377 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, 1378 sin->sin_addr.s_addr); 1379 } else { 1380 /* 1381 * INADDR_ANY 1382 * icmp_v6src is not set, we might be bound to 1383 * broadcast/multicast. Use icmp_bound_v6src as 1384 * local address instead (that could 1385 * also still be INADDR_ANY) 1386 */ 1387 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_bound_v6src, 1388 sin->sin_addr.s_addr); 1389 } 1390 ackmp->b_wptr = (uchar_t *)&sin[1]; 1391 } else { 1392 sin6_t *sin6; 1393 1394 ASSERT(icmp->icmp_family == AF_INET6); 1395 taa->LOCADDR_length = sizeof (sin6_t); 1396 sin6 = (sin6_t *)&taa[1]; 1397 /* Fill zeroes and then intialize non-zero fields */ 1398 *sin6 = sin6_null; 1399 sin6->sin6_family = AF_INET6; 1400 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 1401 sin6->sin6_addr = icmp->icmp_v6src; 1402 } else { 1403 /* 1404 * UNSPECIFIED 1405 * icmp_v6src is not set, we might be bound to 1406 * broadcast/multicast. Use icmp_bound_v6src as 1407 * local address instead (that could 1408 * also still be UNSPECIFIED) 1409 */ 1410 sin6->sin6_addr = icmp->icmp_bound_v6src; 1411 } 1412 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1413 } 1414 } 1415 rw_exit(&icmp->icmp_rwlock); 1416 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1417 qreply(q, ackmp); 1418 } 1419 1420 static void 1421 icmp_copy_info(struct T_info_ack *tap, icmp_t *icmp) 1422 { 1423 *tap = icmp_g_t_info_ack; 1424 1425 if (icmp->icmp_family == AF_INET6) 1426 tap->ADDR_size = sizeof (sin6_t); 1427 else 1428 tap->ADDR_size = sizeof (sin_t); 1429 tap->CURRENT_state = icmp->icmp_state; 1430 tap->OPT_size = icmp_max_optsize; 1431 } 1432 1433 static void 1434 icmp_do_capability_ack(icmp_t *icmp, struct T_capability_ack *tcap, 1435 t_uscalar_t cap_bits1) 1436 { 1437 tcap->CAP_bits1 = 0; 1438 1439 if (cap_bits1 & TC1_INFO) { 1440 icmp_copy_info(&tcap->INFO_ack, icmp); 1441 tcap->CAP_bits1 |= TC1_INFO; 1442 } 1443 } 1444 1445 /* 1446 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1447 * icmp_wput. Much of the T_CAPABILITY_ACK information is copied from 1448 * icmp_g_t_info_ack. The current state of the stream is copied from 1449 * icmp_state. 1450 */ 1451 static void 1452 icmp_capability_req(queue_t *q, mblk_t *mp) 1453 { 1454 icmp_t *icmp = Q_TO_ICMP(q); 1455 t_uscalar_t cap_bits1; 1456 struct T_capability_ack *tcap; 1457 1458 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1459 1460 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1461 mp->b_datap->db_type, T_CAPABILITY_ACK); 1462 if (!mp) 1463 return; 1464 1465 tcap = (struct T_capability_ack *)mp->b_rptr; 1466 1467 icmp_do_capability_ack(icmp, tcap, cap_bits1); 1468 1469 qreply(q, mp); 1470 } 1471 1472 /* 1473 * This routine responds to T_INFO_REQ messages. It is called by icmp_wput. 1474 * Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack. 1475 * The current state of the stream is copied from icmp_state. 1476 */ 1477 static void 1478 icmp_info_req(queue_t *q, mblk_t *mp) 1479 { 1480 icmp_t *icmp = Q_TO_ICMP(q); 1481 1482 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1483 T_INFO_ACK); 1484 if (!mp) 1485 return; 1486 icmp_copy_info((struct T_info_ack *)mp->b_rptr, icmp); 1487 qreply(q, mp); 1488 } 1489 1490 /* For /dev/icmp aka AF_INET open */ 1491 static int 1492 icmp_tpi_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1493 int family) 1494 { 1495 conn_t *connp; 1496 dev_t conn_dev; 1497 icmp_stack_t *is; 1498 int error; 1499 1500 conn_dev = NULL; 1501 1502 /* If the stream is already open, return immediately. */ 1503 if (q->q_ptr != NULL) 1504 return (0); 1505 1506 if (sflag == MODOPEN) 1507 return (EINVAL); 1508 1509 /* 1510 * Since ICMP is not used so heavily, allocating from the small 1511 * arena should be sufficient. 1512 */ 1513 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 1514 return (EBUSY); 1515 } 1516 1517 if (flag & SO_FALLBACK) { 1518 /* 1519 * Non streams socket needs a stream to fallback to 1520 */ 1521 RD(q)->q_ptr = (void *)conn_dev; 1522 WR(q)->q_qinfo = &icmp_fallback_sock_winit; 1523 WR(q)->q_ptr = (void *)ip_minor_arena_sa; 1524 qprocson(q); 1525 return (0); 1526 } 1527 1528 connp = icmp_open(family, credp, &error, KM_SLEEP); 1529 if (connp == NULL) { 1530 ASSERT(error != NULL); 1531 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1532 return (error); 1533 } 1534 1535 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1536 connp->conn_dev = conn_dev; 1537 connp->conn_minor_arena = ip_minor_arena_sa; 1538 1539 is = connp->conn_icmp->icmp_is; 1540 1541 /* 1542 * Initialize the icmp_t structure for this stream. 1543 */ 1544 q->q_ptr = connp; 1545 WR(q)->q_ptr = connp; 1546 connp->conn_rq = q; 1547 connp->conn_wq = WR(q); 1548 1549 if (connp->conn_icmp->icmp_family == AF_INET6) { 1550 /* Build initial header template for transmit */ 1551 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 1552 if ((error = icmp_build_hdrs(connp->conn_icmp)) != 0) { 1553 rw_exit(&connp->conn_icmp->icmp_rwlock); 1554 inet_minor_free(ip_minor_arena_sa, connp->conn_dev); 1555 ipcl_conn_destroy(connp); 1556 return (error); 1557 } 1558 rw_exit(&connp->conn_icmp->icmp_rwlock); 1559 } 1560 1561 1562 q->q_hiwat = is->is_recv_hiwat; 1563 WR(q)->q_hiwat = is->is_xmit_hiwat; 1564 WR(q)->q_lowat = is->is_xmit_lowat; 1565 1566 qprocson(q); 1567 1568 /* Set the Stream head write offset. */ 1569 (void) proto_set_tx_wroff(q, connp, 1570 connp->conn_icmp->icmp_max_hdr_len + is->is_wroff_extra); 1571 (void) proto_set_rx_hiwat(connp->conn_rq, connp, q->q_hiwat); 1572 1573 mutex_enter(&connp->conn_lock); 1574 connp->conn_state_flags &= ~CONN_INCIPIENT; 1575 mutex_exit(&connp->conn_lock); 1576 1577 return (0); 1578 } 1579 1580 /* For /dev/icmp4 aka AF_INET open */ 1581 static int 1582 icmp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1583 { 1584 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET)); 1585 } 1586 1587 /* For /dev/icmp6 aka AF_INET6 open */ 1588 static int 1589 icmp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1590 { 1591 return (icmp_tpi_open(q, devp, flag, sflag, credp, AF_INET6)); 1592 } 1593 1594 /* 1595 * This is the open routine for icmp. It allocates a icmp_t structure for 1596 * the stream and, on the first open of the module, creates an ND table. 1597 */ 1598 /* ARGSUSED */ 1599 static conn_t * 1600 icmp_open(int family, cred_t *credp, int *err, int flags) 1601 { 1602 icmp_t *icmp; 1603 conn_t *connp; 1604 zoneid_t zoneid; 1605 netstack_t *ns; 1606 icmp_stack_t *is; 1607 boolean_t isv6 = B_FALSE; 1608 1609 *err = secpolicy_net_icmpaccess(credp); 1610 if (*err != 0) 1611 return (NULL); 1612 1613 if (family == AF_INET6) 1614 isv6 = B_TRUE; 1615 ns = netstack_find_by_cred(credp); 1616 ASSERT(ns != NULL); 1617 is = ns->netstack_icmp; 1618 ASSERT(is != NULL); 1619 1620 /* 1621 * For exclusive stacks we set the zoneid to zero 1622 * to make ICMP operate as if in the global zone. 1623 */ 1624 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 1625 zoneid = GLOBAL_ZONEID; 1626 else 1627 zoneid = crgetzoneid(credp); 1628 1629 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 1630 1631 connp = ipcl_conn_create(IPCL_RAWIPCONN, flags, ns); 1632 icmp = connp->conn_icmp; 1633 icmp->icmp_v6dst = sin6_null; 1634 1635 /* 1636 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1637 * done by netstack_find_by_cred() 1638 */ 1639 netstack_rele(ns); 1640 1641 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 1642 ASSERT(connp->conn_ulp == IPPROTO_ICMP); 1643 ASSERT(connp->conn_icmp == icmp); 1644 ASSERT(icmp->icmp_connp == connp); 1645 1646 /* Set the initial state of the stream and the privilege status. */ 1647 icmp->icmp_state = TS_UNBND; 1648 if (isv6) { 1649 icmp->icmp_ipversion = IPV6_VERSION; 1650 icmp->icmp_family = AF_INET6; 1651 connp->conn_ulp = IPPROTO_ICMPV6; 1652 /* May be changed by a SO_PROTOTYPE socket option. */ 1653 icmp->icmp_proto = IPPROTO_ICMPV6; 1654 icmp->icmp_checksum_off = 2; /* Offset for icmp6_cksum */ 1655 icmp->icmp_max_hdr_len = IPV6_HDR_LEN; 1656 icmp->icmp_ttl = (uint8_t)is->is_ipv6_hoplimit; 1657 connp->conn_af_isv6 = B_TRUE; 1658 connp->conn_flags |= IPCL_ISV6; 1659 } else { 1660 icmp->icmp_ipversion = IPV4_VERSION; 1661 icmp->icmp_family = AF_INET; 1662 /* May be changed by a SO_PROTOTYPE socket option. */ 1663 icmp->icmp_proto = IPPROTO_ICMP; 1664 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH; 1665 icmp->icmp_ttl = (uint8_t)is->is_ipv4_ttl; 1666 connp->conn_af_isv6 = B_FALSE; 1667 connp->conn_flags &= ~IPCL_ISV6; 1668 } 1669 icmp->icmp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1670 icmp->icmp_pending_op = -1; 1671 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 1672 connp->conn_zoneid = zoneid; 1673 1674 /* 1675 * If the caller has the process-wide flag set, then default to MAC 1676 * exempt mode. This allows read-down to unlabeled hosts. 1677 */ 1678 if (getpflags(NET_MAC_AWARE, credp) != 0) 1679 connp->conn_mac_exempt = B_TRUE; 1680 1681 connp->conn_ulp_labeled = is_system_labeled(); 1682 1683 icmp->icmp_is = is; 1684 1685 connp->conn_recv = icmp_input; 1686 crhold(credp); 1687 connp->conn_cred = credp; 1688 1689 rw_exit(&icmp->icmp_rwlock); 1690 1691 connp->conn_flow_cntrld = B_FALSE; 1692 return (connp); 1693 } 1694 1695 /* 1696 * Which ICMP options OK to set through T_UNITDATA_REQ... 1697 */ 1698 /* ARGSUSED */ 1699 static boolean_t 1700 icmp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1701 { 1702 return (B_TRUE); 1703 } 1704 1705 /* 1706 * This routine gets default values of certain options whose default 1707 * values are maintained by protcol specific code 1708 */ 1709 /* ARGSUSED */ 1710 int 1711 icmp_opt_default(queue_t *q, int level, int name, uchar_t *ptr) 1712 { 1713 icmp_t *icmp = Q_TO_ICMP(q); 1714 icmp_stack_t *is = icmp->icmp_is; 1715 int *i1 = (int *)ptr; 1716 1717 switch (level) { 1718 case IPPROTO_IP: 1719 switch (name) { 1720 case IP_MULTICAST_TTL: 1721 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1722 return (sizeof (uchar_t)); 1723 case IP_MULTICAST_LOOP: 1724 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1725 return (sizeof (uchar_t)); 1726 } 1727 break; 1728 case IPPROTO_IPV6: 1729 switch (name) { 1730 case IPV6_MULTICAST_HOPS: 1731 *i1 = IP_DEFAULT_MULTICAST_TTL; 1732 return (sizeof (int)); 1733 case IPV6_MULTICAST_LOOP: 1734 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1735 return (sizeof (int)); 1736 case IPV6_UNICAST_HOPS: 1737 *i1 = is->is_ipv6_hoplimit; 1738 return (sizeof (int)); 1739 } 1740 break; 1741 case IPPROTO_ICMPV6: 1742 switch (name) { 1743 case ICMP6_FILTER: 1744 /* Make it look like "pass all" */ 1745 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 1746 return (sizeof (icmp6_filter_t)); 1747 } 1748 break; 1749 } 1750 return (-1); 1751 } 1752 1753 /* 1754 * This routine retrieves the current status of socket options. 1755 * It returns the size of the option retrieved. 1756 */ 1757 int 1758 icmp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1759 { 1760 icmp_t *icmp = connp->conn_icmp; 1761 icmp_stack_t *is = icmp->icmp_is; 1762 int *i1 = (int *)ptr; 1763 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 1764 int ret = 0; 1765 1766 ASSERT(RW_READ_HELD(&icmp->icmp_rwlock)); 1767 switch (level) { 1768 case SOL_SOCKET: 1769 switch (name) { 1770 case SO_DEBUG: 1771 *i1 = icmp->icmp_debug; 1772 break; 1773 case SO_TYPE: 1774 *i1 = SOCK_RAW; 1775 break; 1776 case SO_PROTOTYPE: 1777 *i1 = icmp->icmp_proto; 1778 break; 1779 case SO_REUSEADDR: 1780 *i1 = icmp->icmp_reuseaddr; 1781 break; 1782 1783 /* 1784 * The following three items are available here, 1785 * but are only meaningful to IP. 1786 */ 1787 case SO_DONTROUTE: 1788 *i1 = icmp->icmp_dontroute; 1789 break; 1790 case SO_USELOOPBACK: 1791 *i1 = icmp->icmp_useloopback; 1792 break; 1793 case SO_BROADCAST: 1794 *i1 = icmp->icmp_broadcast; 1795 break; 1796 1797 case SO_SNDBUF: 1798 ASSERT(icmp->icmp_xmit_hiwat <= INT_MAX); 1799 *i1 = icmp->icmp_xmit_hiwat; 1800 break; 1801 case SO_RCVBUF: 1802 ASSERT(icmp->icmp_recv_hiwat <= INT_MAX); 1803 *i1 = icmp->icmp_recv_hiwat; 1804 break; 1805 case SO_DGRAM_ERRIND: 1806 *i1 = icmp->icmp_dgram_errind; 1807 break; 1808 case SO_TIMESTAMP: 1809 *i1 = icmp->icmp_timestamp; 1810 break; 1811 case SO_MAC_EXEMPT: 1812 *i1 = connp->conn_mac_exempt; 1813 break; 1814 case SO_DOMAIN: 1815 *i1 = icmp->icmp_family; 1816 break; 1817 1818 /* 1819 * Following four not meaningful for icmp 1820 * Action is same as "default" to which we fallthrough 1821 * so we keep them in comments. 1822 * case SO_LINGER: 1823 * case SO_KEEPALIVE: 1824 * case SO_OOBINLINE: 1825 * case SO_ALLZONES: 1826 */ 1827 default: 1828 ret = -1; 1829 goto done; 1830 } 1831 break; 1832 case IPPROTO_IP: 1833 /* 1834 * Only allow IPv4 option processing on IPv4 sockets. 1835 */ 1836 if (icmp->icmp_family != AF_INET) { 1837 ret = -1; 1838 goto done; 1839 } 1840 1841 switch (name) { 1842 case IP_OPTIONS: 1843 case T_IP_OPTIONS: 1844 /* Options are passed up with each packet */ 1845 ret = 0; 1846 goto done; 1847 case IP_HDRINCL: 1848 *i1 = (int)icmp->icmp_hdrincl; 1849 break; 1850 case IP_TOS: 1851 case T_IP_TOS: 1852 *i1 = (int)icmp->icmp_type_of_service; 1853 break; 1854 case IP_TTL: 1855 *i1 = (int)icmp->icmp_ttl; 1856 break; 1857 case IP_MULTICAST_IF: 1858 /* 0 address if not set */ 1859 *(ipaddr_t *)ptr = icmp->icmp_multicast_if_addr; 1860 ret = sizeof (ipaddr_t); 1861 goto done; 1862 case IP_MULTICAST_TTL: 1863 *(uchar_t *)ptr = icmp->icmp_multicast_ttl; 1864 ret = sizeof (uchar_t); 1865 goto done; 1866 case IP_MULTICAST_LOOP: 1867 *ptr = connp->conn_multicast_loop; 1868 ret = sizeof (uint8_t); 1869 goto done; 1870 case IP_BOUND_IF: 1871 /* Zero if not set */ 1872 *i1 = icmp->icmp_bound_if; 1873 break; /* goto sizeof (int) option return */ 1874 case IP_UNSPEC_SRC: 1875 *ptr = icmp->icmp_unspec_source; 1876 break; /* goto sizeof (int) option return */ 1877 case IP_RECVIF: 1878 *ptr = icmp->icmp_recvif; 1879 break; /* goto sizeof (int) option return */ 1880 case IP_BROADCAST_TTL: 1881 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1882 return (sizeof (uchar_t)); 1883 case IP_RECVPKTINFO: 1884 /* 1885 * This also handles IP_PKTINFO. 1886 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1887 * Differentiation is based on the size of the argument 1888 * passed in. 1889 * This option is handled in IP which will return an 1890 * error for IP_PKTINFO as it's not supported as a 1891 * sticky option. 1892 */ 1893 ret = -EINVAL; 1894 goto done; 1895 /* 1896 * Cannot "get" the value of following options 1897 * at this level. Action is same as "default" to 1898 * which we fallthrough so we keep them in comments. 1899 * 1900 * case IP_ADD_MEMBERSHIP: 1901 * case IP_DROP_MEMBERSHIP: 1902 * case IP_BLOCK_SOURCE: 1903 * case IP_UNBLOCK_SOURCE: 1904 * case IP_ADD_SOURCE_MEMBERSHIP: 1905 * case IP_DROP_SOURCE_MEMBERSHIP: 1906 * case MCAST_JOIN_GROUP: 1907 * case MCAST_LEAVE_GROUP: 1908 * case MCAST_BLOCK_SOURCE: 1909 * case MCAST_UNBLOCK_SOURCE: 1910 * case MCAST_JOIN_SOURCE_GROUP: 1911 * case MCAST_LEAVE_SOURCE_GROUP: 1912 * case MRT_INIT: 1913 * case MRT_DONE: 1914 * case MRT_ADD_VIF: 1915 * case MRT_DEL_VIF: 1916 * case MRT_ADD_MFC: 1917 * case MRT_DEL_MFC: 1918 * case MRT_VERSION: 1919 * case MRT_ASSERT: 1920 * case IP_SEC_OPT: 1921 * case IP_NEXTHOP: 1922 */ 1923 default: 1924 ret = -1; 1925 goto done; 1926 } 1927 break; 1928 case IPPROTO_IPV6: 1929 /* 1930 * Only allow IPv6 option processing on native IPv6 sockets. 1931 */ 1932 if (icmp->icmp_family != AF_INET6) { 1933 ret = -1; 1934 goto done; 1935 } 1936 switch (name) { 1937 case IPV6_UNICAST_HOPS: 1938 *i1 = (unsigned int)icmp->icmp_ttl; 1939 break; 1940 case IPV6_MULTICAST_IF: 1941 /* 0 index if not set */ 1942 *i1 = icmp->icmp_multicast_if_index; 1943 break; 1944 case IPV6_MULTICAST_HOPS: 1945 *i1 = icmp->icmp_multicast_ttl; 1946 break; 1947 case IPV6_MULTICAST_LOOP: 1948 *i1 = connp->conn_multicast_loop; 1949 break; 1950 case IPV6_BOUND_IF: 1951 /* Zero if not set */ 1952 *i1 = icmp->icmp_bound_if; 1953 break; 1954 case IPV6_UNSPEC_SRC: 1955 *i1 = icmp->icmp_unspec_source; 1956 break; 1957 case IPV6_CHECKSUM: 1958 /* 1959 * Return offset or -1 if no checksum offset. 1960 * Does not apply to IPPROTO_ICMPV6 1961 */ 1962 if (icmp->icmp_proto == IPPROTO_ICMPV6) { 1963 ret = -1; 1964 goto done; 1965 } 1966 1967 if (icmp->icmp_raw_checksum) { 1968 *i1 = icmp->icmp_checksum_off; 1969 } else { 1970 *i1 = -1; 1971 } 1972 break; 1973 case IPV6_JOIN_GROUP: 1974 case IPV6_LEAVE_GROUP: 1975 case MCAST_JOIN_GROUP: 1976 case MCAST_LEAVE_GROUP: 1977 case MCAST_BLOCK_SOURCE: 1978 case MCAST_UNBLOCK_SOURCE: 1979 case MCAST_JOIN_SOURCE_GROUP: 1980 case MCAST_LEAVE_SOURCE_GROUP: 1981 /* cannot "get" the value for these */ 1982 ret = -1; 1983 goto done; 1984 case IPV6_RECVPKTINFO: 1985 *i1 = icmp->icmp_ip_recvpktinfo; 1986 break; 1987 case IPV6_RECVTCLASS: 1988 *i1 = icmp->icmp_ipv6_recvtclass; 1989 break; 1990 case IPV6_RECVPATHMTU: 1991 *i1 = icmp->icmp_ipv6_recvpathmtu; 1992 break; 1993 case IPV6_V6ONLY: 1994 *i1 = 1; 1995 break; 1996 case IPV6_RECVHOPLIMIT: 1997 *i1 = icmp->icmp_ipv6_recvhoplimit; 1998 break; 1999 case IPV6_RECVHOPOPTS: 2000 *i1 = icmp->icmp_ipv6_recvhopopts; 2001 break; 2002 case IPV6_RECVDSTOPTS: 2003 *i1 = icmp->icmp_ipv6_recvdstopts; 2004 break; 2005 case _OLD_IPV6_RECVDSTOPTS: 2006 *i1 = icmp->icmp_old_ipv6_recvdstopts; 2007 break; 2008 case IPV6_RECVRTHDRDSTOPTS: 2009 *i1 = icmp->icmp_ipv6_recvrtdstopts; 2010 break; 2011 case IPV6_RECVRTHDR: 2012 *i1 = icmp->icmp_ipv6_recvrthdr; 2013 break; 2014 case IPV6_PKTINFO: { 2015 /* XXX assumes that caller has room for max size! */ 2016 struct in6_pktinfo *pkti; 2017 2018 pkti = (struct in6_pktinfo *)ptr; 2019 if (ipp->ipp_fields & IPPF_IFINDEX) 2020 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2021 else 2022 pkti->ipi6_ifindex = 0; 2023 if (ipp->ipp_fields & IPPF_ADDR) 2024 pkti->ipi6_addr = ipp->ipp_addr; 2025 else 2026 pkti->ipi6_addr = ipv6_all_zeros; 2027 ret = sizeof (struct in6_pktinfo); 2028 goto done; 2029 } 2030 case IPV6_NEXTHOP: { 2031 sin6_t *sin6 = (sin6_t *)ptr; 2032 2033 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2034 return (0); 2035 *sin6 = sin6_null; 2036 sin6->sin6_family = AF_INET6; 2037 sin6->sin6_addr = ipp->ipp_nexthop; 2038 ret = (sizeof (sin6_t)); 2039 goto done; 2040 } 2041 case IPV6_HOPOPTS: 2042 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2043 return (0); 2044 if (ipp->ipp_hopoptslen <= icmp->icmp_label_len_v6) 2045 return (0); 2046 bcopy((char *)ipp->ipp_hopopts + 2047 icmp->icmp_label_len_v6, ptr, 2048 ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2049 if (icmp->icmp_label_len_v6 > 0) { 2050 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2051 ptr[1] = (ipp->ipp_hopoptslen - 2052 icmp->icmp_label_len_v6 + 7) / 8 - 1; 2053 } 2054 ret = (ipp->ipp_hopoptslen - icmp->icmp_label_len_v6); 2055 goto done; 2056 case IPV6_RTHDRDSTOPTS: 2057 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2058 return (0); 2059 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2060 ret = ipp->ipp_rtdstoptslen; 2061 goto done; 2062 case IPV6_RTHDR: 2063 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2064 return (0); 2065 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2066 ret = ipp->ipp_rthdrlen; 2067 goto done; 2068 case IPV6_DSTOPTS: 2069 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) { 2070 ret = 0; 2071 goto done; 2072 } 2073 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2074 ret = ipp->ipp_dstoptslen; 2075 goto done; 2076 case IPV6_PATHMTU: 2077 if (!(ipp->ipp_fields & IPPF_PATHMTU)) { 2078 ret = 0; 2079 } else { 2080 ret = ip_fill_mtuinfo( 2081 &icmp->icmp_v6dst.sin6_addr, 0, 2082 (struct ip6_mtuinfo *)ptr, 2083 is->is_netstack); 2084 } 2085 goto done; 2086 case IPV6_TCLASS: 2087 if (ipp->ipp_fields & IPPF_TCLASS) 2088 *i1 = ipp->ipp_tclass; 2089 else 2090 *i1 = IPV6_FLOW_TCLASS( 2091 IPV6_DEFAULT_VERS_AND_FLOW); 2092 break; 2093 default: 2094 ret = -1; 2095 goto done; 2096 } 2097 break; 2098 case IPPROTO_ICMPV6: 2099 /* 2100 * Only allow IPv6 option processing on native IPv6 sockets. 2101 */ 2102 if (icmp->icmp_family != AF_INET6) { 2103 ret = -1; 2104 } 2105 2106 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 2107 ret = -1; 2108 } 2109 2110 switch (name) { 2111 case ICMP6_FILTER: 2112 if (icmp->icmp_filter == NULL) { 2113 /* Make it look like "pass all" */ 2114 ICMP6_FILTER_SETPASSALL((icmp6_filter_t *)ptr); 2115 } else { 2116 (void) bcopy(icmp->icmp_filter, ptr, 2117 sizeof (icmp6_filter_t)); 2118 } 2119 ret = sizeof (icmp6_filter_t); 2120 goto done; 2121 default: 2122 ret = -1; 2123 goto done; 2124 } 2125 default: 2126 ret = -1; 2127 goto done; 2128 } 2129 ret = sizeof (int); 2130 done: 2131 return (ret); 2132 } 2133 2134 /* 2135 * This routine retrieves the current status of socket options. 2136 * It returns the size of the option retrieved. 2137 */ 2138 int 2139 icmp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr) 2140 { 2141 conn_t *connp = Q_TO_CONN(q); 2142 icmp_t *icmp = connp->conn_icmp; 2143 int err; 2144 2145 rw_enter(&icmp->icmp_rwlock, RW_READER); 2146 err = icmp_opt_get(connp, level, name, ptr); 2147 rw_exit(&icmp->icmp_rwlock); 2148 return (err); 2149 } 2150 2151 int 2152 icmp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2153 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2154 void *thisdg_attrs, boolean_t checkonly) 2155 { 2156 2157 int *i1 = (int *)invalp; 2158 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2159 icmp_t *icmp = connp->conn_icmp; 2160 icmp_stack_t *is = icmp->icmp_is; 2161 int error; 2162 2163 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 2164 /* 2165 * For fixed length options, no sanity check 2166 * of passed in length is done. It is assumed *_optcom_req() 2167 * routines do the right thing. 2168 */ 2169 switch (level) { 2170 case SOL_SOCKET: 2171 switch (name) { 2172 case SO_DEBUG: 2173 if (!checkonly) 2174 icmp->icmp_debug = onoff; 2175 break; 2176 case SO_PROTOTYPE: 2177 if ((*i1 & 0xFF) != IPPROTO_ICMP && 2178 (*i1 & 0xFF) != IPPROTO_ICMPV6 && 2179 secpolicy_net_rawaccess(cr) != 0) { 2180 *outlenp = 0; 2181 return (EACCES); 2182 } 2183 /* Can't use IPPROTO_RAW with IPv6 */ 2184 if ((*i1 & 0xFF) == IPPROTO_RAW && 2185 icmp->icmp_family == AF_INET6) { 2186 *outlenp = 0; 2187 return (EPROTONOSUPPORT); 2188 } 2189 if (checkonly) { 2190 /* T_CHECK case */ 2191 *(int *)outvalp = (*i1 & 0xFF); 2192 break; 2193 } 2194 icmp->icmp_proto = *i1 & 0xFF; 2195 if ((icmp->icmp_proto == IPPROTO_RAW || 2196 icmp->icmp_proto == IPPROTO_IGMP) && 2197 icmp->icmp_family == AF_INET) 2198 icmp->icmp_hdrincl = 1; 2199 else 2200 icmp->icmp_hdrincl = 0; 2201 2202 if (icmp->icmp_family == AF_INET6 && 2203 icmp->icmp_proto == IPPROTO_ICMPV6) { 2204 /* Set offset for icmp6_cksum */ 2205 icmp->icmp_raw_checksum = 0; 2206 icmp->icmp_checksum_off = 2; 2207 } 2208 if (icmp->icmp_proto == IPPROTO_UDP || 2209 icmp->icmp_proto == IPPROTO_TCP || 2210 icmp->icmp_proto == IPPROTO_SCTP) { 2211 icmp->icmp_no_tp_cksum = 1; 2212 icmp->icmp_sticky_ipp.ipp_fields |= 2213 IPPF_NO_CKSUM; 2214 } else { 2215 icmp->icmp_no_tp_cksum = 0; 2216 icmp->icmp_sticky_ipp.ipp_fields &= 2217 ~IPPF_NO_CKSUM; 2218 } 2219 2220 if (icmp->icmp_filter != NULL && 2221 icmp->icmp_proto != IPPROTO_ICMPV6) { 2222 kmem_free(icmp->icmp_filter, 2223 sizeof (icmp6_filter_t)); 2224 icmp->icmp_filter = NULL; 2225 } 2226 2227 /* Rebuild the header template */ 2228 error = icmp_build_hdrs(icmp); 2229 if (error != 0) { 2230 *outlenp = 0; 2231 return (error); 2232 } 2233 2234 /* 2235 * For SCTP, we don't use icmp_bind_proto() for 2236 * raw socket binding. Note that we do not need 2237 * to set *outlenp. 2238 * FIXME: how does SCTP work? 2239 */ 2240 if (icmp->icmp_proto == IPPROTO_SCTP) 2241 return (0); 2242 2243 *outlenp = sizeof (int); 2244 *(int *)outvalp = *i1 & 0xFF; 2245 2246 /* Drop lock across the bind operation */ 2247 rw_exit(&icmp->icmp_rwlock); 2248 (void) icmp_bind_proto(connp); 2249 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2250 return (0); 2251 case SO_REUSEADDR: 2252 if (!checkonly) { 2253 icmp->icmp_reuseaddr = onoff; 2254 PASS_OPT_TO_IP(connp); 2255 } 2256 break; 2257 2258 /* 2259 * The following three items are available here, 2260 * but are only meaningful to IP. 2261 */ 2262 case SO_DONTROUTE: 2263 if (!checkonly) { 2264 icmp->icmp_dontroute = onoff; 2265 PASS_OPT_TO_IP(connp); 2266 } 2267 break; 2268 case SO_USELOOPBACK: 2269 if (!checkonly) { 2270 icmp->icmp_useloopback = onoff; 2271 PASS_OPT_TO_IP(connp); 2272 } 2273 break; 2274 case SO_BROADCAST: 2275 if (!checkonly) { 2276 icmp->icmp_broadcast = onoff; 2277 PASS_OPT_TO_IP(connp); 2278 } 2279 break; 2280 2281 case SO_SNDBUF: 2282 if (*i1 > is->is_max_buf) { 2283 *outlenp = 0; 2284 return (ENOBUFS); 2285 } 2286 if (!checkonly) { 2287 if (!IPCL_IS_NONSTR(connp)) { 2288 connp->conn_wq->q_hiwat = *i1; 2289 } 2290 icmp->icmp_xmit_hiwat = *i1; 2291 } 2292 break; 2293 case SO_RCVBUF: 2294 if (*i1 > is->is_max_buf) { 2295 *outlenp = 0; 2296 return (ENOBUFS); 2297 } 2298 if (!checkonly) { 2299 icmp->icmp_recv_hiwat = *i1; 2300 rw_exit(&icmp->icmp_rwlock); 2301 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2302 *i1); 2303 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2304 } 2305 break; 2306 case SO_DGRAM_ERRIND: 2307 if (!checkonly) 2308 icmp->icmp_dgram_errind = onoff; 2309 break; 2310 case SO_ALLZONES: 2311 /* 2312 * "soft" error (negative) 2313 * option not handled at this level 2314 * Note: Do not modify *outlenp 2315 */ 2316 return (-EINVAL); 2317 case SO_TIMESTAMP: 2318 if (!checkonly) { 2319 icmp->icmp_timestamp = onoff; 2320 } 2321 break; 2322 case SO_MAC_EXEMPT: 2323 /* 2324 * "soft" error (negative) 2325 * option not handled at this level 2326 * Note: Do not modify *outlenp 2327 */ 2328 return (-EINVAL); 2329 case SO_RCVTIMEO: 2330 case SO_SNDTIMEO: 2331 /* 2332 * Pass these two options in order for third part 2333 * protocol usage. Here just return directly. 2334 */ 2335 return (0); 2336 /* 2337 * Following three not meaningful for icmp 2338 * Action is same as "default" so we keep them 2339 * in comments. 2340 * case SO_LINGER: 2341 * case SO_KEEPALIVE: 2342 * case SO_OOBINLINE: 2343 */ 2344 default: 2345 *outlenp = 0; 2346 return (EINVAL); 2347 } 2348 break; 2349 case IPPROTO_IP: 2350 /* 2351 * Only allow IPv4 option processing on IPv4 sockets. 2352 */ 2353 if (icmp->icmp_family != AF_INET) { 2354 *outlenp = 0; 2355 return (ENOPROTOOPT); 2356 } 2357 switch (name) { 2358 case IP_OPTIONS: 2359 case T_IP_OPTIONS: 2360 /* Save options for use by IP. */ 2361 if ((inlen & 0x3) || 2362 inlen + icmp->icmp_label_len > IP_MAX_OPT_LENGTH) { 2363 *outlenp = 0; 2364 return (EINVAL); 2365 } 2366 if (checkonly) 2367 break; 2368 2369 if (!tsol_option_set(&icmp->icmp_ip_snd_options, 2370 &icmp->icmp_ip_snd_options_len, 2371 icmp->icmp_label_len, invalp, inlen)) { 2372 *outlenp = 0; 2373 return (ENOMEM); 2374 } 2375 2376 icmp->icmp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2377 icmp->icmp_ip_snd_options_len; 2378 rw_exit(&icmp->icmp_rwlock); 2379 (void) proto_set_tx_wroff(connp->conn_rq == NULL ? NULL: 2380 RD(connp->conn_rq), connp, 2381 icmp->icmp_max_hdr_len + is->is_wroff_extra); 2382 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 2383 break; 2384 case IP_HDRINCL: 2385 if (!checkonly) 2386 icmp->icmp_hdrincl = onoff; 2387 break; 2388 case IP_TOS: 2389 case T_IP_TOS: 2390 if (!checkonly) { 2391 icmp->icmp_type_of_service = (uint8_t)*i1; 2392 } 2393 break; 2394 case IP_TTL: 2395 if (!checkonly) { 2396 icmp->icmp_ttl = (uint8_t)*i1; 2397 } 2398 break; 2399 case IP_MULTICAST_IF: 2400 /* 2401 * TODO should check OPTMGMT reply and undo this if 2402 * there is an error. 2403 */ 2404 if (!checkonly) { 2405 icmp->icmp_multicast_if_addr = *i1; 2406 PASS_OPT_TO_IP(connp); 2407 } 2408 break; 2409 case IP_MULTICAST_TTL: 2410 if (!checkonly) 2411 icmp->icmp_multicast_ttl = *invalp; 2412 break; 2413 case IP_MULTICAST_LOOP: 2414 if (!checkonly) { 2415 connp->conn_multicast_loop = 2416 (*invalp == 0) ? 0 : 1; 2417 PASS_OPT_TO_IP(connp); 2418 } 2419 break; 2420 case IP_BOUND_IF: 2421 if (!checkonly) { 2422 icmp->icmp_bound_if = *i1; 2423 PASS_OPT_TO_IP(connp); 2424 } 2425 break; 2426 case IP_UNSPEC_SRC: 2427 if (!checkonly) { 2428 icmp->icmp_unspec_source = onoff; 2429 PASS_OPT_TO_IP(connp); 2430 } 2431 break; 2432 case IP_BROADCAST_TTL: 2433 if (!checkonly) 2434 connp->conn_broadcast_ttl = *invalp; 2435 break; 2436 case IP_RECVIF: 2437 if (!checkonly) { 2438 icmp->icmp_recvif = onoff; 2439 } 2440 /* 2441 * pass to ip 2442 */ 2443 return (-EINVAL); 2444 case IP_PKTINFO: { 2445 /* 2446 * This also handles IP_RECVPKTINFO. 2447 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2448 * Differentiation is based on the size of the argument 2449 * passed in. 2450 */ 2451 struct in_pktinfo *pktinfop; 2452 ip4_pkt_t *attr_pktinfop; 2453 2454 if (checkonly) 2455 break; 2456 2457 if (inlen == sizeof (int)) { 2458 /* 2459 * This is IP_RECVPKTINFO option. 2460 * Keep a local copy of wether this option is 2461 * set or not and pass it down to IP for 2462 * processing. 2463 */ 2464 icmp->icmp_ip_recvpktinfo = onoff; 2465 return (-EINVAL); 2466 } 2467 2468 2469 if (inlen != sizeof (struct in_pktinfo)) { 2470 return (EINVAL); 2471 } 2472 2473 if ((attr_pktinfop = (ip4_pkt_t *)thisdg_attrs) 2474 == NULL) { 2475 /* 2476 * sticky option is not supported 2477 */ 2478 return (EINVAL); 2479 } 2480 2481 pktinfop = (struct in_pktinfo *)invalp; 2482 2483 /* 2484 * Atleast one of the values should be specified 2485 */ 2486 if (pktinfop->ipi_ifindex == 0 && 2487 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2488 return (EINVAL); 2489 } 2490 2491 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2492 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2493 } 2494 break; 2495 case IP_ADD_MEMBERSHIP: 2496 case IP_DROP_MEMBERSHIP: 2497 case IP_BLOCK_SOURCE: 2498 case IP_UNBLOCK_SOURCE: 2499 case IP_ADD_SOURCE_MEMBERSHIP: 2500 case IP_DROP_SOURCE_MEMBERSHIP: 2501 case MCAST_JOIN_GROUP: 2502 case MCAST_LEAVE_GROUP: 2503 case MCAST_BLOCK_SOURCE: 2504 case MCAST_UNBLOCK_SOURCE: 2505 case MCAST_JOIN_SOURCE_GROUP: 2506 case MCAST_LEAVE_SOURCE_GROUP: 2507 case MRT_INIT: 2508 case MRT_DONE: 2509 case MRT_ADD_VIF: 2510 case MRT_DEL_VIF: 2511 case MRT_ADD_MFC: 2512 case MRT_DEL_MFC: 2513 case MRT_VERSION: 2514 case MRT_ASSERT: 2515 case IP_SEC_OPT: 2516 case IP_NEXTHOP: 2517 /* 2518 * "soft" error (negative) 2519 * option not handled at this level 2520 * Note: Do not modify *outlenp 2521 */ 2522 return (-EINVAL); 2523 default: 2524 *outlenp = 0; 2525 return (EINVAL); 2526 } 2527 break; 2528 case IPPROTO_IPV6: { 2529 ip6_pkt_t *ipp; 2530 boolean_t sticky; 2531 2532 if (icmp->icmp_family != AF_INET6) { 2533 *outlenp = 0; 2534 return (ENOPROTOOPT); 2535 } 2536 /* 2537 * Deal with both sticky options and ancillary data 2538 */ 2539 if (thisdg_attrs == NULL) { 2540 /* sticky options, or none */ 2541 ipp = &icmp->icmp_sticky_ipp; 2542 sticky = B_TRUE; 2543 } else { 2544 /* ancillary data */ 2545 ipp = (ip6_pkt_t *)thisdg_attrs; 2546 sticky = B_FALSE; 2547 } 2548 2549 switch (name) { 2550 case IPV6_MULTICAST_IF: 2551 if (!checkonly) { 2552 icmp->icmp_multicast_if_index = *i1; 2553 PASS_OPT_TO_IP(connp); 2554 } 2555 break; 2556 case IPV6_UNICAST_HOPS: 2557 /* -1 means use default */ 2558 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2559 *outlenp = 0; 2560 return (EINVAL); 2561 } 2562 if (!checkonly) { 2563 if (*i1 == -1) { 2564 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2565 is->is_ipv6_hoplimit; 2566 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2567 /* Pass modified value to IP. */ 2568 *i1 = ipp->ipp_hoplimit; 2569 } else { 2570 icmp->icmp_ttl = ipp->ipp_unicast_hops = 2571 (uint8_t)*i1; 2572 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2573 } 2574 /* Rebuild the header template */ 2575 error = icmp_build_hdrs(icmp); 2576 if (error != 0) { 2577 *outlenp = 0; 2578 return (error); 2579 } 2580 } 2581 break; 2582 case IPV6_MULTICAST_HOPS: 2583 /* -1 means use default */ 2584 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2585 *outlenp = 0; 2586 return (EINVAL); 2587 } 2588 if (!checkonly) { 2589 if (*i1 == -1) { 2590 icmp->icmp_multicast_ttl = 2591 ipp->ipp_multicast_hops = 2592 IP_DEFAULT_MULTICAST_TTL; 2593 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2594 /* Pass modified value to IP. */ 2595 *i1 = icmp->icmp_multicast_ttl; 2596 } else { 2597 icmp->icmp_multicast_ttl = 2598 ipp->ipp_multicast_hops = 2599 (uint8_t)*i1; 2600 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2601 } 2602 } 2603 break; 2604 case IPV6_MULTICAST_LOOP: 2605 if (*i1 != 0 && *i1 != 1) { 2606 *outlenp = 0; 2607 return (EINVAL); 2608 } 2609 if (!checkonly) { 2610 connp->conn_multicast_loop = *i1; 2611 PASS_OPT_TO_IP(connp); 2612 } 2613 break; 2614 case IPV6_CHECKSUM: 2615 /* 2616 * Integer offset into the user data of where the 2617 * checksum is located. 2618 * Offset of -1 disables option. 2619 * Does not apply to IPPROTO_ICMPV6. 2620 */ 2621 if (icmp->icmp_proto == IPPROTO_ICMPV6 || !sticky) { 2622 *outlenp = 0; 2623 return (EINVAL); 2624 } 2625 if ((*i1 != -1) && ((*i1 < 0) || (*i1 & 0x1) != 0)) { 2626 /* Negative or not 16 bit aligned offset */ 2627 *outlenp = 0; 2628 return (EINVAL); 2629 } 2630 if (checkonly) 2631 break; 2632 2633 if (*i1 == -1) { 2634 icmp->icmp_raw_checksum = 0; 2635 ipp->ipp_fields &= ~IPPF_RAW_CKSUM; 2636 } else { 2637 icmp->icmp_raw_checksum = 1; 2638 icmp->icmp_checksum_off = *i1; 2639 ipp->ipp_fields |= IPPF_RAW_CKSUM; 2640 } 2641 /* Rebuild the header template */ 2642 error = icmp_build_hdrs(icmp); 2643 if (error != 0) { 2644 *outlenp = 0; 2645 return (error); 2646 } 2647 break; 2648 case IPV6_JOIN_GROUP: 2649 case IPV6_LEAVE_GROUP: 2650 case MCAST_JOIN_GROUP: 2651 case MCAST_LEAVE_GROUP: 2652 case MCAST_BLOCK_SOURCE: 2653 case MCAST_UNBLOCK_SOURCE: 2654 case MCAST_JOIN_SOURCE_GROUP: 2655 case MCAST_LEAVE_SOURCE_GROUP: 2656 /* 2657 * "soft" error (negative) 2658 * option not handled at this level 2659 * Note: Do not modify *outlenp 2660 */ 2661 return (-EINVAL); 2662 case IPV6_BOUND_IF: 2663 if (!checkonly) { 2664 icmp->icmp_bound_if = *i1; 2665 PASS_OPT_TO_IP(connp); 2666 } 2667 break; 2668 case IPV6_UNSPEC_SRC: 2669 if (!checkonly) { 2670 icmp->icmp_unspec_source = onoff; 2671 PASS_OPT_TO_IP(connp); 2672 } 2673 break; 2674 case IPV6_RECVTCLASS: 2675 if (!checkonly) { 2676 icmp->icmp_ipv6_recvtclass = onoff; 2677 PASS_OPT_TO_IP(connp); 2678 } 2679 break; 2680 /* 2681 * Set boolean switches for ancillary data delivery 2682 */ 2683 case IPV6_RECVPKTINFO: 2684 if (!checkonly) { 2685 icmp->icmp_ip_recvpktinfo = onoff; 2686 PASS_OPT_TO_IP(connp); 2687 } 2688 break; 2689 case IPV6_RECVPATHMTU: 2690 if (!checkonly) { 2691 icmp->icmp_ipv6_recvpathmtu = onoff; 2692 PASS_OPT_TO_IP(connp); 2693 } 2694 break; 2695 case IPV6_RECVHOPLIMIT: 2696 if (!checkonly) { 2697 icmp->icmp_ipv6_recvhoplimit = onoff; 2698 PASS_OPT_TO_IP(connp); 2699 } 2700 break; 2701 case IPV6_RECVHOPOPTS: 2702 if (!checkonly) { 2703 icmp->icmp_ipv6_recvhopopts = onoff; 2704 PASS_OPT_TO_IP(connp); 2705 } 2706 break; 2707 case IPV6_RECVDSTOPTS: 2708 if (!checkonly) { 2709 icmp->icmp_ipv6_recvdstopts = onoff; 2710 PASS_OPT_TO_IP(connp); 2711 } 2712 break; 2713 case _OLD_IPV6_RECVDSTOPTS: 2714 if (!checkonly) 2715 icmp->icmp_old_ipv6_recvdstopts = onoff; 2716 break; 2717 case IPV6_RECVRTHDRDSTOPTS: 2718 if (!checkonly) { 2719 icmp->icmp_ipv6_recvrtdstopts = onoff; 2720 PASS_OPT_TO_IP(connp); 2721 } 2722 break; 2723 case IPV6_RECVRTHDR: 2724 if (!checkonly) { 2725 icmp->icmp_ipv6_recvrthdr = onoff; 2726 PASS_OPT_TO_IP(connp); 2727 } 2728 break; 2729 /* 2730 * Set sticky options or ancillary data. 2731 * If sticky options, (re)build any extension headers 2732 * that might be needed as a result. 2733 */ 2734 case IPV6_PKTINFO: 2735 /* 2736 * The source address and ifindex are verified 2737 * in ip_opt_set(). For ancillary data the 2738 * source address is checked in ip_wput_v6. 2739 */ 2740 if (inlen != 0 && inlen != 2741 sizeof (struct in6_pktinfo)) { 2742 return (EINVAL); 2743 } 2744 if (checkonly) 2745 break; 2746 2747 if (inlen == 0) { 2748 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2749 ipp->ipp_sticky_ignored |= 2750 (IPPF_IFINDEX|IPPF_ADDR); 2751 } else { 2752 struct in6_pktinfo *pkti; 2753 2754 pkti = (struct in6_pktinfo *)invalp; 2755 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2756 ipp->ipp_addr = pkti->ipi6_addr; 2757 if (ipp->ipp_ifindex != 0) 2758 ipp->ipp_fields |= IPPF_IFINDEX; 2759 else 2760 ipp->ipp_fields &= ~IPPF_IFINDEX; 2761 if (!IN6_IS_ADDR_UNSPECIFIED( 2762 &ipp->ipp_addr)) 2763 ipp->ipp_fields |= IPPF_ADDR; 2764 else 2765 ipp->ipp_fields &= ~IPPF_ADDR; 2766 } 2767 if (sticky) { 2768 error = icmp_build_hdrs(icmp); 2769 if (error != 0) 2770 return (error); 2771 PASS_OPT_TO_IP(connp); 2772 } 2773 break; 2774 case IPV6_HOPLIMIT: 2775 /* This option can only be used as ancillary data. */ 2776 if (sticky) 2777 return (EINVAL); 2778 if (inlen != 0 && inlen != sizeof (int)) 2779 return (EINVAL); 2780 if (checkonly) 2781 break; 2782 2783 if (inlen == 0) { 2784 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2785 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2786 } else { 2787 if (*i1 > 255 || *i1 < -1) 2788 return (EINVAL); 2789 if (*i1 == -1) 2790 ipp->ipp_hoplimit = 2791 is->is_ipv6_hoplimit; 2792 else 2793 ipp->ipp_hoplimit = *i1; 2794 ipp->ipp_fields |= IPPF_HOPLIMIT; 2795 } 2796 break; 2797 case IPV6_TCLASS: 2798 /* 2799 * IPV6_RECVTCLASS accepts -1 as use kernel default 2800 * and [0, 255] as the actualy traffic class. 2801 */ 2802 if (inlen != 0 && inlen != sizeof (int)) { 2803 return (EINVAL); 2804 } 2805 if (checkonly) 2806 break; 2807 2808 if (inlen == 0) { 2809 ipp->ipp_fields &= ~IPPF_TCLASS; 2810 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2811 } else { 2812 if (*i1 >= 256 || *i1 < -1) 2813 return (EINVAL); 2814 if (*i1 == -1) { 2815 ipp->ipp_tclass = 2816 IPV6_FLOW_TCLASS( 2817 IPV6_DEFAULT_VERS_AND_FLOW); 2818 } else { 2819 ipp->ipp_tclass = *i1; 2820 } 2821 ipp->ipp_fields |= IPPF_TCLASS; 2822 } 2823 if (sticky) { 2824 error = icmp_build_hdrs(icmp); 2825 if (error != 0) 2826 return (error); 2827 } 2828 break; 2829 case IPV6_NEXTHOP: 2830 /* 2831 * IP will verify that the nexthop is reachable 2832 * and fail for sticky options. 2833 */ 2834 if (inlen != 0 && inlen != sizeof (sin6_t)) { 2835 return (EINVAL); 2836 } 2837 if (checkonly) 2838 break; 2839 2840 if (inlen == 0) { 2841 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2842 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2843 } else { 2844 sin6_t *sin6 = (sin6_t *)invalp; 2845 2846 if (sin6->sin6_family != AF_INET6) { 2847 return (EAFNOSUPPORT); 2848 } 2849 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 2850 return (EADDRNOTAVAIL); 2851 } 2852 ipp->ipp_nexthop = sin6->sin6_addr; 2853 if (!IN6_IS_ADDR_UNSPECIFIED( 2854 &ipp->ipp_nexthop)) 2855 ipp->ipp_fields |= IPPF_NEXTHOP; 2856 else 2857 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2858 } 2859 if (sticky) { 2860 error = icmp_build_hdrs(icmp); 2861 if (error != 0) 2862 return (error); 2863 PASS_OPT_TO_IP(connp); 2864 } 2865 break; 2866 case IPV6_HOPOPTS: { 2867 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2868 /* 2869 * Sanity checks - minimum size, size a multiple of 2870 * eight bytes, and matching size passed in. 2871 */ 2872 if (inlen != 0 && 2873 inlen != (8 * (hopts->ip6h_len + 1))) { 2874 return (EINVAL); 2875 } 2876 2877 if (checkonly) 2878 break; 2879 error = optcom_pkt_set(invalp, inlen, sticky, 2880 (uchar_t **)&ipp->ipp_hopopts, 2881 &ipp->ipp_hopoptslen, 2882 sticky ? icmp->icmp_label_len_v6 : 0); 2883 if (error != 0) 2884 return (error); 2885 if (ipp->ipp_hopoptslen == 0) { 2886 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2887 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2888 } else { 2889 ipp->ipp_fields |= IPPF_HOPOPTS; 2890 } 2891 if (sticky) { 2892 error = icmp_build_hdrs(icmp); 2893 if (error != 0) 2894 return (error); 2895 } 2896 break; 2897 } 2898 case IPV6_RTHDRDSTOPTS: { 2899 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2900 2901 /* 2902 * Sanity checks - minimum size, size a multiple of 2903 * eight bytes, and matching size passed in. 2904 */ 2905 if (inlen != 0 && 2906 inlen != (8 * (dopts->ip6d_len + 1))) 2907 return (EINVAL); 2908 2909 if (checkonly) 2910 break; 2911 2912 if (inlen == 0) { 2913 if (sticky && 2914 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2915 kmem_free(ipp->ipp_rtdstopts, 2916 ipp->ipp_rtdstoptslen); 2917 ipp->ipp_rtdstopts = NULL; 2918 ipp->ipp_rtdstoptslen = 0; 2919 } 2920 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2921 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2922 } else { 2923 error = optcom_pkt_set(invalp, inlen, sticky, 2924 (uchar_t **)&ipp->ipp_rtdstopts, 2925 &ipp->ipp_rtdstoptslen, 0); 2926 if (error != 0) 2927 return (error); 2928 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2929 } 2930 if (sticky) { 2931 error = icmp_build_hdrs(icmp); 2932 if (error != 0) 2933 return (error); 2934 } 2935 break; 2936 } 2937 case IPV6_DSTOPTS: { 2938 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2939 2940 /* 2941 * Sanity checks - minimum size, size a multiple of 2942 * eight bytes, and matching size passed in. 2943 */ 2944 if (inlen != 0 && 2945 inlen != (8 * (dopts->ip6d_len + 1))) 2946 return (EINVAL); 2947 2948 if (checkonly) 2949 break; 2950 2951 if (inlen == 0) { 2952 if (sticky && 2953 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2954 kmem_free(ipp->ipp_dstopts, 2955 ipp->ipp_dstoptslen); 2956 ipp->ipp_dstopts = NULL; 2957 ipp->ipp_dstoptslen = 0; 2958 } 2959 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2960 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2961 } else { 2962 error = optcom_pkt_set(invalp, inlen, sticky, 2963 (uchar_t **)&ipp->ipp_dstopts, 2964 &ipp->ipp_dstoptslen, 0); 2965 if (error != 0) 2966 return (error); 2967 ipp->ipp_fields |= IPPF_DSTOPTS; 2968 } 2969 if (sticky) { 2970 error = icmp_build_hdrs(icmp); 2971 if (error != 0) 2972 return (error); 2973 } 2974 break; 2975 } 2976 case IPV6_RTHDR: { 2977 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2978 2979 /* 2980 * Sanity checks - minimum size, size a multiple of 2981 * eight bytes, and matching size passed in. 2982 */ 2983 if (inlen != 0 && 2984 inlen != (8 * (rt->ip6r_len + 1))) 2985 return (EINVAL); 2986 2987 if (checkonly) 2988 break; 2989 2990 if (inlen == 0) { 2991 if (sticky && 2992 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2993 kmem_free(ipp->ipp_rthdr, 2994 ipp->ipp_rthdrlen); 2995 ipp->ipp_rthdr = NULL; 2996 ipp->ipp_rthdrlen = 0; 2997 } 2998 ipp->ipp_fields &= ~IPPF_RTHDR; 2999 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3000 } else { 3001 error = optcom_pkt_set(invalp, inlen, sticky, 3002 (uchar_t **)&ipp->ipp_rthdr, 3003 &ipp->ipp_rthdrlen, 0); 3004 if (error != 0) 3005 return (error); 3006 ipp->ipp_fields |= IPPF_RTHDR; 3007 } 3008 if (sticky) { 3009 error = icmp_build_hdrs(icmp); 3010 if (error != 0) 3011 return (error); 3012 } 3013 break; 3014 } 3015 3016 case IPV6_DONTFRAG: 3017 if (checkonly) 3018 break; 3019 3020 if (onoff) { 3021 ipp->ipp_fields |= IPPF_DONTFRAG; 3022 } else { 3023 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3024 } 3025 break; 3026 3027 case IPV6_USE_MIN_MTU: 3028 if (inlen != sizeof (int)) 3029 return (EINVAL); 3030 3031 if (*i1 < -1 || *i1 > 1) 3032 return (EINVAL); 3033 3034 if (checkonly) 3035 break; 3036 3037 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3038 ipp->ipp_use_min_mtu = *i1; 3039 break; 3040 3041 /* 3042 * This option can't be set. Its only returned via 3043 * getsockopt() or ancillary data. 3044 */ 3045 case IPV6_PATHMTU: 3046 return (EINVAL); 3047 3048 case IPV6_SEC_OPT: 3049 case IPV6_SRC_PREFERENCES: 3050 case IPV6_V6ONLY: 3051 /* Handled at IP level */ 3052 return (-EINVAL); 3053 default: 3054 *outlenp = 0; 3055 return (EINVAL); 3056 } 3057 break; 3058 } /* end IPPROTO_IPV6 */ 3059 3060 case IPPROTO_ICMPV6: 3061 /* 3062 * Only allow IPv6 option processing on IPv6 sockets. 3063 */ 3064 if (icmp->icmp_family != AF_INET6) { 3065 *outlenp = 0; 3066 return (ENOPROTOOPT); 3067 } 3068 if (icmp->icmp_proto != IPPROTO_ICMPV6) { 3069 *outlenp = 0; 3070 return (ENOPROTOOPT); 3071 } 3072 switch (name) { 3073 case ICMP6_FILTER: 3074 if (!checkonly) { 3075 if ((inlen != 0) && 3076 (inlen != sizeof (icmp6_filter_t))) 3077 return (EINVAL); 3078 3079 if (inlen == 0) { 3080 if (icmp->icmp_filter != NULL) { 3081 kmem_free(icmp->icmp_filter, 3082 sizeof (icmp6_filter_t)); 3083 icmp->icmp_filter = NULL; 3084 } 3085 } else { 3086 if (icmp->icmp_filter == NULL) { 3087 icmp->icmp_filter = kmem_alloc( 3088 sizeof (icmp6_filter_t), 3089 KM_NOSLEEP); 3090 if (icmp->icmp_filter == NULL) { 3091 *outlenp = 0; 3092 return (ENOBUFS); 3093 } 3094 } 3095 (void) bcopy(invalp, icmp->icmp_filter, 3096 inlen); 3097 } 3098 } 3099 break; 3100 3101 default: 3102 *outlenp = 0; 3103 return (EINVAL); 3104 } 3105 break; 3106 default: 3107 *outlenp = 0; 3108 return (EINVAL); 3109 } 3110 /* 3111 * Common case of OK return with outval same as inval. 3112 */ 3113 if (invalp != outvalp) { 3114 /* don't trust bcopy for identical src/dst */ 3115 (void) bcopy(invalp, outvalp, inlen); 3116 } 3117 *outlenp = inlen; 3118 return (0); 3119 } 3120 3121 /* This routine sets socket options. */ 3122 /* ARGSUSED */ 3123 int 3124 icmp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3125 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3126 void *thisdg_attrs, cred_t *cr) 3127 { 3128 boolean_t checkonly; 3129 int error; 3130 3131 error = 0; 3132 switch (optset_context) { 3133 case SETFN_OPTCOM_CHECKONLY: 3134 checkonly = B_TRUE; 3135 /* 3136 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3137 * inlen != 0 implies value supplied and 3138 * we have to "pretend" to set it. 3139 * inlen == 0 implies that there is no 3140 * value part in T_CHECK request and just validation 3141 * done elsewhere should be enough, we just return here. 3142 */ 3143 if (inlen == 0) { 3144 *outlenp = 0; 3145 error = 0; 3146 goto done; 3147 } 3148 break; 3149 case SETFN_OPTCOM_NEGOTIATE: 3150 checkonly = B_FALSE; 3151 break; 3152 case SETFN_UD_NEGOTIATE: 3153 case SETFN_CONN_NEGOTIATE: 3154 checkonly = B_FALSE; 3155 /* 3156 * Negotiating local and "association-related" options 3157 * through T_UNITDATA_REQ. 3158 * 3159 * Following routine can filter out ones we do not 3160 * want to be "set" this way. 3161 */ 3162 if (!icmp_opt_allow_udr_set(level, name)) { 3163 *outlenp = 0; 3164 error = EINVAL; 3165 goto done; 3166 } 3167 break; 3168 default: 3169 /* 3170 * We should never get here 3171 */ 3172 *outlenp = 0; 3173 error = EINVAL; 3174 goto done; 3175 } 3176 3177 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3178 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3179 error = icmp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3180 outvalp, cr, thisdg_attrs, checkonly); 3181 3182 done: 3183 return (error); 3184 } 3185 3186 /* This routine sets socket options. */ 3187 /* ARGSUSED */ 3188 int 3189 icmp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3190 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3191 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3192 { 3193 conn_t *connp = Q_TO_CONN(q); 3194 icmp_t *icmp; 3195 int error; 3196 3197 icmp = connp->conn_icmp; 3198 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3199 error = icmp_opt_set(connp, optset_context, level, name, inlen, invalp, 3200 outlenp, outvalp, thisdg_attrs, cr); 3201 rw_exit(&icmp->icmp_rwlock); 3202 return (error); 3203 } 3204 3205 /* 3206 * Update icmp_sticky_hdrs based on icmp_sticky_ipp, icmp_v6src, icmp_ttl, 3207 * icmp_proto, icmp_raw_checksum and icmp_no_tp_cksum. 3208 * The headers include ip6i_t (if needed), ip6_t, and any sticky extension 3209 * headers. 3210 * Returns failure if can't allocate memory. 3211 */ 3212 static int 3213 icmp_build_hdrs(icmp_t *icmp) 3214 { 3215 icmp_stack_t *is = icmp->icmp_is; 3216 uchar_t *hdrs; 3217 uint_t hdrs_len; 3218 ip6_t *ip6h; 3219 ip6i_t *ip6i; 3220 ip6_pkt_t *ipp = &icmp->icmp_sticky_ipp; 3221 3222 ASSERT(RW_WRITE_HELD(&icmp->icmp_rwlock)); 3223 hdrs_len = ip_total_hdrs_len_v6(ipp); 3224 ASSERT(hdrs_len != 0); 3225 if (hdrs_len != icmp->icmp_sticky_hdrs_len) { 3226 /* Need to reallocate */ 3227 if (hdrs_len != 0) { 3228 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3229 if (hdrs == NULL) 3230 return (ENOMEM); 3231 } else { 3232 hdrs = NULL; 3233 } 3234 if (icmp->icmp_sticky_hdrs_len != 0) { 3235 kmem_free(icmp->icmp_sticky_hdrs, 3236 icmp->icmp_sticky_hdrs_len); 3237 } 3238 icmp->icmp_sticky_hdrs = hdrs; 3239 icmp->icmp_sticky_hdrs_len = hdrs_len; 3240 } 3241 ip_build_hdrs_v6(icmp->icmp_sticky_hdrs, 3242 icmp->icmp_sticky_hdrs_len, ipp, icmp->icmp_proto); 3243 3244 /* Set header fields not in ipp */ 3245 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3246 ip6i = (ip6i_t *)icmp->icmp_sticky_hdrs; 3247 ip6h = (ip6_t *)&ip6i[1]; 3248 3249 if (ipp->ipp_fields & IPPF_RAW_CKSUM) { 3250 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 3251 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 3252 } 3253 if (ipp->ipp_fields & IPPF_NO_CKSUM) { 3254 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 3255 } 3256 } else { 3257 ip6h = (ip6_t *)icmp->icmp_sticky_hdrs; 3258 } 3259 3260 if (!(ipp->ipp_fields & IPPF_ADDR)) 3261 ip6h->ip6_src = icmp->icmp_v6src; 3262 3263 /* Try to get everything in a single mblk */ 3264 if (hdrs_len > icmp->icmp_max_hdr_len) { 3265 icmp->icmp_max_hdr_len = hdrs_len; 3266 rw_exit(&icmp->icmp_rwlock); 3267 (void) proto_set_tx_wroff(icmp->icmp_connp->conn_rq, 3268 icmp->icmp_connp, 3269 icmp->icmp_max_hdr_len + is->is_wroff_extra); 3270 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 3271 } 3272 return (0); 3273 } 3274 3275 /* 3276 * This routine retrieves the value of an ND variable in a icmpparam_t 3277 * structure. It is called through nd_getset when a user reads the 3278 * variable. 3279 */ 3280 /* ARGSUSED */ 3281 static int 3282 icmp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3283 { 3284 icmpparam_t *icmppa = (icmpparam_t *)cp; 3285 3286 (void) mi_mpprintf(mp, "%d", icmppa->icmp_param_value); 3287 return (0); 3288 } 3289 3290 /* 3291 * Walk through the param array specified registering each element with the 3292 * named dispatch (ND) handler. 3293 */ 3294 static boolean_t 3295 icmp_param_register(IDP *ndp, icmpparam_t *icmppa, int cnt) 3296 { 3297 for (; cnt-- > 0; icmppa++) { 3298 if (icmppa->icmp_param_name && icmppa->icmp_param_name[0]) { 3299 if (!nd_load(ndp, icmppa->icmp_param_name, 3300 icmp_param_get, icmp_param_set, 3301 (caddr_t)icmppa)) { 3302 nd_free(ndp); 3303 return (B_FALSE); 3304 } 3305 } 3306 } 3307 return (B_TRUE); 3308 } 3309 3310 /* This routine sets an ND variable in a icmpparam_t structure. */ 3311 /* ARGSUSED */ 3312 static int 3313 icmp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3314 { 3315 long new_value; 3316 icmpparam_t *icmppa = (icmpparam_t *)cp; 3317 3318 /* 3319 * Fail the request if the new value does not lie within the 3320 * required bounds. 3321 */ 3322 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3323 new_value < icmppa->icmp_param_min || 3324 new_value > icmppa->icmp_param_max) { 3325 return (EINVAL); 3326 } 3327 /* Set the new value */ 3328 icmppa->icmp_param_value = new_value; 3329 return (0); 3330 } 3331 3332 static mblk_t * 3333 icmp_queue_fallback(icmp_t *icmp, mblk_t *mp) 3334 { 3335 ASSERT(MUTEX_HELD(&icmp->icmp_recv_lock)); 3336 if (IPCL_IS_NONSTR(icmp->icmp_connp)) { 3337 /* 3338 * fallback has started but messages have not been moved yet 3339 */ 3340 if (icmp->icmp_fallback_queue_head == NULL) { 3341 ASSERT(icmp->icmp_fallback_queue_tail == NULL); 3342 icmp->icmp_fallback_queue_head = mp; 3343 icmp->icmp_fallback_queue_tail = mp; 3344 } else { 3345 ASSERT(icmp->icmp_fallback_queue_tail != NULL); 3346 icmp->icmp_fallback_queue_tail->b_next = mp; 3347 icmp->icmp_fallback_queue_tail = mp; 3348 } 3349 return (NULL); 3350 } else { 3351 /* 3352 * Fallback completed, let the caller putnext() the mblk. 3353 */ 3354 return (mp); 3355 } 3356 } 3357 3358 /* 3359 * Deliver data to ULP. In case we have a socket, and it's falling back to 3360 * TPI, then we'll queue the mp for later processing. 3361 */ 3362 static void 3363 icmp_ulp_recv(conn_t *connp, mblk_t *mp) 3364 { 3365 3366 if (IPCL_IS_NONSTR(connp)) { 3367 icmp_t *icmp = connp->conn_icmp; 3368 int error; 3369 3370 if ((*connp->conn_upcalls->su_recv) 3371 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3372 NULL) < 0) { 3373 mutex_enter(&icmp->icmp_recv_lock); 3374 if (error == ENOSPC) { 3375 /* 3376 * let's confirm while holding the lock 3377 */ 3378 if ((*connp->conn_upcalls->su_recv) 3379 (connp->conn_upper_handle, NULL, 0, 0, 3380 &error, NULL) < 0) { 3381 ASSERT(error == ENOSPC); 3382 if (error == ENOSPC) { 3383 connp->conn_flow_cntrld = 3384 B_TRUE; 3385 } 3386 } 3387 mutex_exit(&icmp->icmp_recv_lock); 3388 } else { 3389 ASSERT(error == EOPNOTSUPP); 3390 mp = icmp_queue_fallback(icmp, mp); 3391 mutex_exit(&icmp->icmp_recv_lock); 3392 if (mp != NULL) 3393 putnext(connp->conn_rq, mp); 3394 } 3395 } 3396 ASSERT(MUTEX_NOT_HELD(&icmp->icmp_recv_lock)); 3397 } else { 3398 putnext(connp->conn_rq, mp); 3399 } 3400 } 3401 3402 /*ARGSUSED2*/ 3403 static void 3404 icmp_input(void *arg1, mblk_t *mp, void *arg2) 3405 { 3406 conn_t *connp = (conn_t *)arg1; 3407 struct T_unitdata_ind *tudi; 3408 uchar_t *rptr; 3409 icmp_t *icmp; 3410 icmp_stack_t *is; 3411 sin_t *sin; 3412 sin6_t *sin6; 3413 ip6_t *ip6h; 3414 ip6i_t *ip6i; 3415 mblk_t *mp1; 3416 int hdr_len; 3417 ipha_t *ipha; 3418 int udi_size; /* Size of T_unitdata_ind */ 3419 uint_t ipvers; 3420 ip6_pkt_t ipp; 3421 uint8_t nexthdr; 3422 ip_pktinfo_t *pinfo = NULL; 3423 mblk_t *options_mp = NULL; 3424 uint_t icmp_opt = 0; 3425 boolean_t icmp_ipv6_recvhoplimit = B_FALSE; 3426 uint_t hopstrip; 3427 3428 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 3429 3430 icmp = connp->conn_icmp; 3431 is = icmp->icmp_is; 3432 rptr = mp->b_rptr; 3433 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3434 ASSERT(OK_32PTR(rptr)); 3435 3436 /* 3437 * IP should have prepended the options data in an M_CTL 3438 * Check M_CTL "type" to make sure are not here bcos of 3439 * a valid ICMP message 3440 */ 3441 if (DB_TYPE(mp) == M_CTL) { 3442 /* 3443 * FIXME: does IP still do this? 3444 * IP sends up the IPSEC_IN message for handling IPSEC 3445 * policy at the TCP level. We don't need it here. 3446 */ 3447 if (*(uint32_t *)(mp->b_rptr) == IPSEC_IN) { 3448 mp1 = mp->b_cont; 3449 freeb(mp); 3450 mp = mp1; 3451 rptr = mp->b_rptr; 3452 } else if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3453 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3454 IN_PKTINFO) { 3455 /* 3456 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3457 * has been prepended to the packet by IP. We need to 3458 * extract the mblk and adjust the rptr 3459 */ 3460 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3461 options_mp = mp; 3462 mp = mp->b_cont; 3463 rptr = mp->b_rptr; 3464 } else { 3465 /* 3466 * ICMP messages. 3467 */ 3468 icmp_icmp_error(connp, mp); 3469 return; 3470 } 3471 } 3472 3473 /* 3474 * Discard message if it is misaligned or smaller than the IP header. 3475 */ 3476 if (!OK_32PTR(rptr) || (mp->b_wptr - rptr) < sizeof (ipha_t)) { 3477 freemsg(mp); 3478 if (options_mp != NULL) 3479 freeb(options_mp); 3480 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3481 return; 3482 } 3483 ipvers = IPH_HDR_VERSION((ipha_t *)rptr); 3484 3485 /* Handle M_DATA messages containing IP packets messages */ 3486 if (ipvers == IPV4_VERSION) { 3487 /* 3488 * Special case where IP attaches 3489 * the IRE needs to be handled so that we don't send up 3490 * IRE to the user land. 3491 */ 3492 ipha = (ipha_t *)rptr; 3493 hdr_len = IPH_HDR_LENGTH(ipha); 3494 3495 if (ipha->ipha_protocol == IPPROTO_TCP) { 3496 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3497 3498 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == 3499 TH_SYN) && mp->b_cont != NULL) { 3500 mp1 = mp->b_cont; 3501 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3502 freeb(mp1); 3503 mp->b_cont = NULL; 3504 } 3505 } 3506 } 3507 if (is->is_bsd_compat) { 3508 ushort_t len; 3509 len = ntohs(ipha->ipha_length); 3510 3511 if (mp->b_datap->db_ref > 1) { 3512 /* 3513 * Allocate a new IP header so that we can 3514 * modify ipha_length. 3515 */ 3516 mblk_t *mp1; 3517 3518 mp1 = allocb(hdr_len, BPRI_MED); 3519 if (!mp1) { 3520 freemsg(mp); 3521 if (options_mp != NULL) 3522 freeb(options_mp); 3523 BUMP_MIB(&is->is_rawip_mib, 3524 rawipInErrors); 3525 return; 3526 } 3527 bcopy(rptr, mp1->b_rptr, hdr_len); 3528 mp->b_rptr = rptr + hdr_len; 3529 rptr = mp1->b_rptr; 3530 ipha = (ipha_t *)rptr; 3531 mp1->b_cont = mp; 3532 mp1->b_wptr = rptr + hdr_len; 3533 mp = mp1; 3534 } 3535 len -= hdr_len; 3536 ipha->ipha_length = htons(len); 3537 } 3538 } 3539 3540 /* 3541 * This is the inbound data path. Packets are passed upstream as 3542 * T_UNITDATA_IND messages with full IP headers still attached. 3543 */ 3544 if (icmp->icmp_family == AF_INET) { 3545 ASSERT(ipvers == IPV4_VERSION); 3546 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3547 if (icmp->icmp_recvif && (pinfo != NULL) && 3548 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3549 udi_size += sizeof (struct T_opthdr) + 3550 sizeof (uint_t); 3551 } 3552 3553 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3554 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3555 udi_size += sizeof (struct T_opthdr) + 3556 sizeof (struct in_pktinfo); 3557 } 3558 3559 /* 3560 * If SO_TIMESTAMP is set allocate the appropriate sized 3561 * buffer. Since gethrestime() expects a pointer aligned 3562 * argument, we allocate space necessary for extra 3563 * alignment (even though it might not be used). 3564 */ 3565 if (icmp->icmp_timestamp) { 3566 udi_size += sizeof (struct T_opthdr) + 3567 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3568 } 3569 mp1 = allocb(udi_size, BPRI_MED); 3570 if (mp1 == NULL) { 3571 freemsg(mp); 3572 if (options_mp != NULL) 3573 freeb(options_mp); 3574 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3575 return; 3576 } 3577 mp1->b_cont = mp; 3578 mp = mp1; 3579 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3580 mp->b_datap->db_type = M_PROTO; 3581 mp->b_wptr = (uchar_t *)tudi + udi_size; 3582 tudi->PRIM_type = T_UNITDATA_IND; 3583 tudi->SRC_length = sizeof (sin_t); 3584 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3585 sin = (sin_t *)&tudi[1]; 3586 *sin = sin_null; 3587 sin->sin_family = AF_INET; 3588 sin->sin_addr.s_addr = ipha->ipha_src; 3589 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3590 sizeof (sin_t); 3591 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3592 tudi->OPT_length = udi_size; 3593 3594 /* 3595 * Add options if IP_RECVIF is set 3596 */ 3597 if (udi_size != 0) { 3598 char *dstopt; 3599 3600 dstopt = (char *)&sin[1]; 3601 if (icmp->icmp_recvif && (pinfo != NULL) && 3602 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3603 3604 struct T_opthdr *toh; 3605 uint_t *dstptr; 3606 3607 toh = (struct T_opthdr *)dstopt; 3608 toh->level = IPPROTO_IP; 3609 toh->name = IP_RECVIF; 3610 toh->len = sizeof (struct T_opthdr) + 3611 sizeof (uint_t); 3612 toh->status = 0; 3613 dstopt += sizeof (struct T_opthdr); 3614 dstptr = (uint_t *)dstopt; 3615 *dstptr = pinfo->ip_pkt_ifindex; 3616 dstopt += sizeof (uint_t); 3617 udi_size -= toh->len; 3618 } 3619 if (icmp->icmp_timestamp) { 3620 struct T_opthdr *toh; 3621 3622 toh = (struct T_opthdr *)dstopt; 3623 toh->level = SOL_SOCKET; 3624 toh->name = SCM_TIMESTAMP; 3625 toh->len = sizeof (struct T_opthdr) + 3626 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3627 toh->status = 0; 3628 dstopt += sizeof (struct T_opthdr); 3629 /* Align for gethrestime() */ 3630 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3631 sizeof (intptr_t)); 3632 gethrestime((timestruc_t *)dstopt); 3633 dstopt = (char *)toh + toh->len; 3634 udi_size -= toh->len; 3635 } 3636 if (icmp->icmp_ip_recvpktinfo && (pinfo != NULL) && 3637 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3638 struct T_opthdr *toh; 3639 struct in_pktinfo *pktinfop; 3640 3641 toh = (struct T_opthdr *)dstopt; 3642 toh->level = IPPROTO_IP; 3643 toh->name = IP_PKTINFO; 3644 toh->len = sizeof (struct T_opthdr) + 3645 sizeof (in_pktinfo_t); 3646 toh->status = 0; 3647 dstopt += sizeof (struct T_opthdr); 3648 pktinfop = (struct in_pktinfo *)dstopt; 3649 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3650 pktinfop->ipi_spec_dst = 3651 pinfo->ip_pkt_match_addr; 3652 3653 pktinfop->ipi_addr.s_addr = ipha->ipha_dst; 3654 3655 dstopt += sizeof (struct in_pktinfo); 3656 udi_size -= toh->len; 3657 } 3658 3659 /* Consumed all of allocated space */ 3660 ASSERT(udi_size == 0); 3661 } 3662 3663 if (options_mp != NULL) 3664 freeb(options_mp); 3665 3666 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 3667 goto deliver; 3668 } 3669 3670 /* 3671 * We don't need options_mp in the IPv6 path. 3672 */ 3673 if (options_mp != NULL) { 3674 freeb(options_mp); 3675 options_mp = NULL; 3676 } 3677 3678 /* 3679 * Discard message if it is smaller than the IPv6 header 3680 * or if the header is malformed. 3681 */ 3682 if ((mp->b_wptr - rptr) < sizeof (ip6_t) || 3683 IPH_HDR_VERSION((ipha_t *)rptr) != IPV6_VERSION || 3684 icmp->icmp_family != AF_INET6) { 3685 freemsg(mp); 3686 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3687 return; 3688 } 3689 3690 /* Initialize */ 3691 ipp.ipp_fields = 0; 3692 hopstrip = 0; 3693 3694 ip6h = (ip6_t *)rptr; 3695 /* 3696 * Call on ip_find_hdr_v6 which gets the total hdr len 3697 * as well as individual lenghts of ext hdrs (and ptrs to 3698 * them). 3699 */ 3700 if (ip6h->ip6_nxt != icmp->icmp_proto) { 3701 /* Look for ifindex information */ 3702 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3703 ip6i = (ip6i_t *)ip6h; 3704 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3705 ASSERT(ip6i->ip6i_ifindex != 0); 3706 ipp.ipp_fields |= IPPF_IFINDEX; 3707 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3708 } 3709 rptr = (uchar_t *)&ip6i[1]; 3710 mp->b_rptr = rptr; 3711 if (rptr == mp->b_wptr) { 3712 mp1 = mp->b_cont; 3713 freeb(mp); 3714 mp = mp1; 3715 rptr = mp->b_rptr; 3716 } 3717 ASSERT(mp->b_wptr - rptr >= IPV6_HDR_LEN); 3718 ip6h = (ip6_t *)rptr; 3719 } 3720 hdr_len = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdr); 3721 3722 /* 3723 * We need to lie a bit to the user because users inside 3724 * labeled compartments should not see their own labels. We 3725 * assume that in all other respects IP has checked the label, 3726 * and that the label is always first among the options. (If 3727 * it's not first, then this code won't see it, and the option 3728 * will be passed along to the user.) 3729 * 3730 * If we had multilevel ICMP sockets, then the following code 3731 * should be skipped for them to allow the user to see the 3732 * label. 3733 * 3734 * Alignment restrictions in the definition of IP options 3735 * (namely, the requirement that the 4-octet DOI goes on a 3736 * 4-octet boundary) mean that we know exactly where the option 3737 * should start, but we're lenient for other hosts. 3738 * 3739 * Note that there are no multilevel ICMP or raw IP sockets 3740 * yet, thus nobody ever sees the IP6OPT_LS option. 3741 */ 3742 if ((ipp.ipp_fields & IPPF_HOPOPTS) && 3743 ipp.ipp_hopoptslen > 5 && is_system_labeled()) { 3744 const uchar_t *ucp = 3745 (const uchar_t *)ipp.ipp_hopopts + 2; 3746 int remlen = ipp.ipp_hopoptslen - 2; 3747 3748 while (remlen > 0) { 3749 if (*ucp == IP6OPT_PAD1) { 3750 remlen--; 3751 ucp++; 3752 } else if (*ucp == IP6OPT_PADN) { 3753 remlen -= ucp[1] + 2; 3754 ucp += ucp[1] + 2; 3755 } else if (*ucp == ip6opt_ls) { 3756 hopstrip = (ucp - 3757 (const uchar_t *)ipp.ipp_hopopts) + 3758 ucp[1] + 2; 3759 hopstrip = (hopstrip + 7) & ~7; 3760 break; 3761 } else { 3762 /* label option must be first */ 3763 break; 3764 } 3765 } 3766 } 3767 } else { 3768 hdr_len = IPV6_HDR_LEN; 3769 ip6i = NULL; 3770 nexthdr = ip6h->ip6_nxt; 3771 } 3772 /* 3773 * One special case where IP attaches the IRE needs to 3774 * be handled so that we don't send up IRE to the user land. 3775 */ 3776 if (nexthdr == IPPROTO_TCP) { 3777 tcph_t *tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 3778 3779 if (((tcph->th_flags[0] & (TH_SYN|TH_ACK)) == TH_SYN) && 3780 mp->b_cont != NULL) { 3781 mp1 = mp->b_cont; 3782 if (mp1->b_datap->db_type == IRE_DB_TYPE) { 3783 freeb(mp1); 3784 mp->b_cont = NULL; 3785 } 3786 } 3787 } 3788 /* 3789 * Check a filter for ICMPv6 types if needed. 3790 * Verify raw checksums if needed. 3791 */ 3792 if (icmp->icmp_filter != NULL || icmp->icmp_raw_checksum) { 3793 if (icmp->icmp_filter != NULL) { 3794 int type; 3795 3796 /* Assumes that IP has done the pullupmsg */ 3797 type = mp->b_rptr[hdr_len]; 3798 3799 ASSERT(mp->b_rptr + hdr_len <= mp->b_wptr); 3800 if (ICMP6_FILTER_WILLBLOCK(type, icmp->icmp_filter)) { 3801 freemsg(mp); 3802 return; 3803 } 3804 } else { 3805 /* Checksum */ 3806 uint16_t *up; 3807 uint32_t sum; 3808 int remlen; 3809 3810 up = (uint16_t *)&ip6h->ip6_src; 3811 3812 remlen = msgdsize(mp) - hdr_len; 3813 sum = htons(icmp->icmp_proto + remlen) 3814 + up[0] + up[1] + up[2] + up[3] 3815 + up[4] + up[5] + up[6] + up[7] 3816 + up[8] + up[9] + up[10] + up[11] 3817 + up[12] + up[13] + up[14] + up[15]; 3818 sum = (sum & 0xffff) + (sum >> 16); 3819 sum = IP_CSUM(mp, hdr_len, sum); 3820 if (sum != 0) { 3821 /* IPv6 RAW checksum failed */ 3822 ip0dbg(("icmp_rput: RAW checksum " 3823 "failed %x\n", sum)); 3824 freemsg(mp); 3825 BUMP_MIB(&is->is_rawip_mib, 3826 rawipInCksumErrs); 3827 return; 3828 } 3829 } 3830 } 3831 /* Skip all the IPv6 headers per API */ 3832 mp->b_rptr += hdr_len; 3833 3834 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3835 3836 /* 3837 * We use local variables icmp_opt and icmp_ipv6_recvhoplimit to 3838 * maintain state information, instead of relying on icmp_t 3839 * structure, since there arent any locks protecting these members 3840 * and there is a window where there might be a race between a 3841 * thread setting options on the write side and a thread reading 3842 * these options on the read size. 3843 */ 3844 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3845 IPPF_RTHDR|IPPF_IFINDEX)) { 3846 if (icmp->icmp_ipv6_recvhopopts && 3847 (ipp.ipp_fields & IPPF_HOPOPTS) && 3848 ipp.ipp_hopoptslen > hopstrip) { 3849 udi_size += sizeof (struct T_opthdr) + 3850 ipp.ipp_hopoptslen - hopstrip; 3851 icmp_opt |= IPPF_HOPOPTS; 3852 } 3853 if ((icmp->icmp_ipv6_recvdstopts || 3854 icmp->icmp_old_ipv6_recvdstopts) && 3855 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3856 udi_size += sizeof (struct T_opthdr) + 3857 ipp.ipp_dstoptslen; 3858 icmp_opt |= IPPF_DSTOPTS; 3859 } 3860 if (((icmp->icmp_ipv6_recvdstopts && 3861 icmp->icmp_ipv6_recvrthdr && 3862 (ipp.ipp_fields & IPPF_RTHDR)) || 3863 icmp->icmp_ipv6_recvrtdstopts) && 3864 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3865 udi_size += sizeof (struct T_opthdr) + 3866 ipp.ipp_rtdstoptslen; 3867 icmp_opt |= IPPF_RTDSTOPTS; 3868 } 3869 if (icmp->icmp_ipv6_recvrthdr && 3870 (ipp.ipp_fields & IPPF_RTHDR)) { 3871 udi_size += sizeof (struct T_opthdr) + 3872 ipp.ipp_rthdrlen; 3873 icmp_opt |= IPPF_RTHDR; 3874 } 3875 if (icmp->icmp_ip_recvpktinfo && 3876 (ipp.ipp_fields & IPPF_IFINDEX)) { 3877 udi_size += sizeof (struct T_opthdr) + 3878 sizeof (struct in6_pktinfo); 3879 icmp_opt |= IPPF_IFINDEX; 3880 } 3881 } 3882 if (icmp->icmp_ipv6_recvhoplimit) { 3883 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3884 icmp_ipv6_recvhoplimit = B_TRUE; 3885 } 3886 3887 if (icmp->icmp_ipv6_recvtclass) 3888 udi_size += sizeof (struct T_opthdr) + sizeof (int); 3889 3890 /* 3891 * If SO_TIMESTAMP is set allocate the appropriate sized 3892 * buffer. Since gethrestime() expects a pointer aligned 3893 * argument, we allocate space necessary for extra 3894 * alignment (even though it might not be used). 3895 */ 3896 if (icmp->icmp_timestamp) { 3897 udi_size += sizeof (struct T_opthdr) + 3898 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3899 } 3900 3901 mp1 = allocb(udi_size, BPRI_MED); 3902 if (mp1 == NULL) { 3903 freemsg(mp); 3904 BUMP_MIB(&is->is_rawip_mib, rawipInErrors); 3905 return; 3906 } 3907 mp1->b_cont = mp; 3908 mp = mp1; 3909 mp->b_datap->db_type = M_PROTO; 3910 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3911 mp->b_wptr = (uchar_t *)tudi + udi_size; 3912 tudi->PRIM_type = T_UNITDATA_IND; 3913 tudi->SRC_length = sizeof (sin6_t); 3914 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3915 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3916 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 3917 tudi->OPT_length = udi_size; 3918 sin6 = (sin6_t *)&tudi[1]; 3919 sin6->sin6_port = 0; 3920 sin6->sin6_family = AF_INET6; 3921 3922 sin6->sin6_addr = ip6h->ip6_src; 3923 /* No sin6_flowinfo per API */ 3924 sin6->sin6_flowinfo = 0; 3925 /* For link-scope source pass up scope id */ 3926 if ((ipp.ipp_fields & IPPF_IFINDEX) && 3927 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 3928 sin6->sin6_scope_id = ipp.ipp_ifindex; 3929 else 3930 sin6->sin6_scope_id = 0; 3931 3932 sin6->__sin6_src_id = ip_srcid_find_addr(&ip6h->ip6_dst, 3933 icmp->icmp_zoneid, is->is_netstack); 3934 3935 if (udi_size != 0) { 3936 uchar_t *dstopt; 3937 3938 dstopt = (uchar_t *)&sin6[1]; 3939 if (icmp_opt & IPPF_IFINDEX) { 3940 struct T_opthdr *toh; 3941 struct in6_pktinfo *pkti; 3942 3943 toh = (struct T_opthdr *)dstopt; 3944 toh->level = IPPROTO_IPV6; 3945 toh->name = IPV6_PKTINFO; 3946 toh->len = sizeof (struct T_opthdr) + 3947 sizeof (*pkti); 3948 toh->status = 0; 3949 dstopt += sizeof (struct T_opthdr); 3950 pkti = (struct in6_pktinfo *)dstopt; 3951 pkti->ipi6_addr = ip6h->ip6_dst; 3952 pkti->ipi6_ifindex = ipp.ipp_ifindex; 3953 dstopt += sizeof (*pkti); 3954 udi_size -= toh->len; 3955 } 3956 if (icmp_ipv6_recvhoplimit) { 3957 struct T_opthdr *toh; 3958 3959 toh = (struct T_opthdr *)dstopt; 3960 toh->level = IPPROTO_IPV6; 3961 toh->name = IPV6_HOPLIMIT; 3962 toh->len = sizeof (struct T_opthdr) + 3963 sizeof (uint_t); 3964 toh->status = 0; 3965 dstopt += sizeof (struct T_opthdr); 3966 *(uint_t *)dstopt = ip6h->ip6_hops; 3967 dstopt += sizeof (uint_t); 3968 udi_size -= toh->len; 3969 } 3970 if (icmp->icmp_ipv6_recvtclass) { 3971 struct T_opthdr *toh; 3972 3973 toh = (struct T_opthdr *)dstopt; 3974 toh->level = IPPROTO_IPV6; 3975 toh->name = IPV6_TCLASS; 3976 toh->len = sizeof (struct T_opthdr) + 3977 sizeof (uint_t); 3978 toh->status = 0; 3979 dstopt += sizeof (struct T_opthdr); 3980 *(uint_t *)dstopt = IPV6_FLOW_TCLASS(ip6h->ip6_flow); 3981 dstopt += sizeof (uint_t); 3982 udi_size -= toh->len; 3983 } 3984 if (icmp->icmp_timestamp) { 3985 struct T_opthdr *toh; 3986 3987 toh = (struct T_opthdr *)dstopt; 3988 toh->level = SOL_SOCKET; 3989 toh->name = SCM_TIMESTAMP; 3990 toh->len = sizeof (struct T_opthdr) + 3991 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3992 toh->status = 0; 3993 dstopt += sizeof (struct T_opthdr); 3994 /* Align for gethrestime() */ 3995 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 3996 sizeof (intptr_t)); 3997 gethrestime((timestruc_t *)dstopt); 3998 dstopt = (uchar_t *)toh + toh->len; 3999 udi_size -= toh->len; 4000 } 4001 4002 if (icmp_opt & IPPF_HOPOPTS) { 4003 struct T_opthdr *toh; 4004 4005 toh = (struct T_opthdr *)dstopt; 4006 toh->level = IPPROTO_IPV6; 4007 toh->name = IPV6_HOPOPTS; 4008 toh->len = sizeof (struct T_opthdr) + 4009 ipp.ipp_hopoptslen - hopstrip; 4010 toh->status = 0; 4011 dstopt += sizeof (struct T_opthdr); 4012 bcopy((char *)ipp.ipp_hopopts + hopstrip, dstopt, 4013 ipp.ipp_hopoptslen - hopstrip); 4014 if (hopstrip > 0) { 4015 /* copy next header value and fake length */ 4016 dstopt[0] = ((uchar_t *)ipp.ipp_hopopts)[0]; 4017 dstopt[1] = ((uchar_t *)ipp.ipp_hopopts)[1] - 4018 hopstrip / 8; 4019 } 4020 dstopt += ipp.ipp_hopoptslen - hopstrip; 4021 udi_size -= toh->len; 4022 } 4023 if (icmp_opt & IPPF_RTDSTOPTS) { 4024 struct T_opthdr *toh; 4025 4026 toh = (struct T_opthdr *)dstopt; 4027 toh->level = IPPROTO_IPV6; 4028 toh->name = IPV6_DSTOPTS; 4029 toh->len = sizeof (struct T_opthdr) + 4030 ipp.ipp_rtdstoptslen; 4031 toh->status = 0; 4032 dstopt += sizeof (struct T_opthdr); 4033 bcopy(ipp.ipp_rtdstopts, dstopt, 4034 ipp.ipp_rtdstoptslen); 4035 dstopt += ipp.ipp_rtdstoptslen; 4036 udi_size -= toh->len; 4037 } 4038 if (icmp_opt & IPPF_RTHDR) { 4039 struct T_opthdr *toh; 4040 4041 toh = (struct T_opthdr *)dstopt; 4042 toh->level = IPPROTO_IPV6; 4043 toh->name = IPV6_RTHDR; 4044 toh->len = sizeof (struct T_opthdr) + 4045 ipp.ipp_rthdrlen; 4046 toh->status = 0; 4047 dstopt += sizeof (struct T_opthdr); 4048 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4049 dstopt += ipp.ipp_rthdrlen; 4050 udi_size -= toh->len; 4051 } 4052 if (icmp_opt & IPPF_DSTOPTS) { 4053 struct T_opthdr *toh; 4054 4055 toh = (struct T_opthdr *)dstopt; 4056 toh->level = IPPROTO_IPV6; 4057 toh->name = IPV6_DSTOPTS; 4058 toh->len = sizeof (struct T_opthdr) + 4059 ipp.ipp_dstoptslen; 4060 toh->status = 0; 4061 dstopt += sizeof (struct T_opthdr); 4062 bcopy(ipp.ipp_dstopts, dstopt, 4063 ipp.ipp_dstoptslen); 4064 dstopt += ipp.ipp_dstoptslen; 4065 udi_size -= toh->len; 4066 } 4067 /* Consumed all of allocated space */ 4068 ASSERT(udi_size == 0); 4069 } 4070 BUMP_MIB(&is->is_rawip_mib, rawipInDatagrams); 4071 4072 deliver: 4073 icmp_ulp_recv(connp, mp); 4074 4075 } 4076 4077 /* 4078 * return SNMP stuff in buffer in mpdata 4079 */ 4080 mblk_t * 4081 icmp_snmp_get(queue_t *q, mblk_t *mpctl) 4082 { 4083 mblk_t *mpdata; 4084 struct opthdr *optp; 4085 conn_t *connp = Q_TO_CONN(q); 4086 icmp_stack_t *is = connp->conn_netstack->netstack_icmp; 4087 mblk_t *mp2ctl; 4088 4089 /* 4090 * make a copy of the original message 4091 */ 4092 mp2ctl = copymsg(mpctl); 4093 4094 if (mpctl == NULL || 4095 (mpdata = mpctl->b_cont) == NULL) { 4096 freemsg(mpctl); 4097 freemsg(mp2ctl); 4098 return (0); 4099 } 4100 4101 /* fixed length structure for IPv4 and IPv6 counters */ 4102 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4103 optp->level = EXPER_RAWIP; 4104 optp->name = 0; 4105 (void) snmp_append_data(mpdata, (char *)&is->is_rawip_mib, 4106 sizeof (is->is_rawip_mib)); 4107 optp->len = msgdsize(mpdata); 4108 qreply(q, mpctl); 4109 4110 return (mp2ctl); 4111 } 4112 4113 /* 4114 * Return 0 if invalid set request, 1 otherwise, including non-rawip requests. 4115 * TODO: If this ever actually tries to set anything, it needs to be 4116 * to do the appropriate locking. 4117 */ 4118 /* ARGSUSED */ 4119 int 4120 icmp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4121 uchar_t *ptr, int len) 4122 { 4123 switch (level) { 4124 case EXPER_RAWIP: 4125 return (0); 4126 default: 4127 return (1); 4128 } 4129 } 4130 4131 /* 4132 * This routine creates a T_UDERROR_IND message and passes it upstream. 4133 * The address and options are copied from the T_UNITDATA_REQ message 4134 * passed in mp. This message is freed. 4135 */ 4136 static void 4137 icmp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 4138 { 4139 mblk_t *mp1; 4140 uchar_t *rptr = mp->b_rptr; 4141 struct T_unitdata_req *tudr = (struct T_unitdata_req *)rptr; 4142 4143 mp1 = mi_tpi_uderror_ind((char *)&rptr[tudr->DEST_offset], 4144 tudr->DEST_length, (char *)&rptr[tudr->OPT_offset], 4145 tudr->OPT_length, err); 4146 if (mp1) 4147 qreply(q, mp1); 4148 freemsg(mp); 4149 } 4150 4151 4152 static int 4153 rawip_do_unbind(conn_t *connp) 4154 { 4155 icmp_t *icmp = connp->conn_icmp; 4156 4157 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4158 /* If a bind has not been done, we can't unbind. */ 4159 if (icmp->icmp_state == TS_UNBND || icmp->icmp_pending_op != -1) { 4160 rw_exit(&icmp->icmp_rwlock); 4161 return (-TOUTSTATE); 4162 } 4163 icmp->icmp_pending_op = T_UNBIND_REQ; 4164 rw_exit(&icmp->icmp_rwlock); 4165 4166 /* 4167 * Call ip to unbind 4168 */ 4169 4170 ip_unbind(connp); 4171 4172 /* 4173 * Once we're unbound from IP, the pending operation may be cleared 4174 * here. 4175 */ 4176 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 4177 V6_SET_ZERO(icmp->icmp_v6src); 4178 V6_SET_ZERO(icmp->icmp_bound_v6src); 4179 icmp->icmp_pending_op = -1; 4180 icmp->icmp_state = TS_UNBND; 4181 if (icmp->icmp_family == AF_INET6) 4182 (void) icmp_build_hdrs(icmp); 4183 rw_exit(&icmp->icmp_rwlock); 4184 return (0); 4185 } 4186 4187 /* 4188 * This routine is called by icmp_wput to handle T_UNBIND_REQ messages. 4189 * After some error checking, the message is passed downstream to ip. 4190 */ 4191 static void 4192 icmp_tpi_unbind(queue_t *q, mblk_t *mp) 4193 { 4194 conn_t *connp = Q_TO_CONN(q); 4195 int error; 4196 4197 ASSERT(mp->b_cont == NULL); 4198 error = rawip_do_unbind(connp); 4199 if (error) { 4200 if (error < 0) { 4201 icmp_err_ack(q, mp, -error, 0); 4202 } else { 4203 icmp_err_ack(q, mp, 0, error); 4204 } 4205 return; 4206 } 4207 4208 /* 4209 * Convert mp into a T_OK_ACK 4210 */ 4211 4212 mp = mi_tpi_ok_ack_alloc(mp); 4213 4214 /* 4215 * should not happen in practice... T_OK_ACK is smaller than the 4216 * original message. 4217 */ 4218 ASSERT(mp != NULL); 4219 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4220 qreply(q, mp); 4221 } 4222 4223 4224 /* 4225 * Process IPv4 packets that already include an IP header. 4226 * Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and 4227 * IPPROTO_IGMP). 4228 */ 4229 static int 4230 icmp_wput_hdrincl(queue_t *q, conn_t *connp, mblk_t *mp, icmp_t *icmp, 4231 ip4_pkt_t *pktinfop) 4232 { 4233 icmp_stack_t *is = icmp->icmp_is; 4234 ipha_t *ipha; 4235 int ip_hdr_length; 4236 int tp_hdr_len; 4237 mblk_t *mp1; 4238 uint_t pkt_len; 4239 ip_opt_info_t optinfo; 4240 4241 optinfo.ip_opt_flags = 0; 4242 optinfo.ip_opt_ill_index = 0; 4243 ipha = (ipha_t *)mp->b_rptr; 4244 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4245 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 4246 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4247 ASSERT(icmp != NULL); 4248 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4249 freemsg(mp); 4250 return (0); 4251 } 4252 ipha = (ipha_t *)mp->b_rptr; 4253 } 4254 ipha->ipha_version_and_hdr_length = 4255 (IP_VERSION<<4) | (ip_hdr_length>>2); 4256 4257 /* 4258 * For the socket of SOCK_RAW type, the checksum is provided in the 4259 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4260 * tell IP that the application has sent a complete IP header and not 4261 * to compute the transport checksum nor change the DF flag. 4262 */ 4263 ipha->ipha_ident = IP_HDR_INCLUDED; 4264 ipha->ipha_hdr_checksum = 0; 4265 ipha->ipha_fragment_offset_and_flags &= htons(IPH_DF); 4266 /* Insert options if any */ 4267 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4268 /* 4269 * Put the IP header plus any transport header that is 4270 * checksumed by ip_wput into the first mblk. (ip_wput assumes 4271 * that at least the checksum field is in the first mblk.) 4272 */ 4273 switch (ipha->ipha_protocol) { 4274 case IPPROTO_UDP: 4275 tp_hdr_len = 8; 4276 break; 4277 case IPPROTO_TCP: 4278 tp_hdr_len = 20; 4279 break; 4280 default: 4281 tp_hdr_len = 0; 4282 break; 4283 } 4284 /* 4285 * The code below assumes that IP_SIMPLE_HDR_LENGTH plus 4286 * tp_hdr_len bytes will be in a single mblk. 4287 */ 4288 if ((mp->b_wptr - mp->b_rptr) < (IP_SIMPLE_HDR_LENGTH + 4289 tp_hdr_len)) { 4290 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH + 4291 tp_hdr_len)) { 4292 BUMP_MIB(&is->is_rawip_mib, 4293 rawipOutErrors); 4294 freemsg(mp); 4295 return (0); 4296 } 4297 ipha = (ipha_t *)mp->b_rptr; 4298 } 4299 4300 /* 4301 * if the length is larger then the max allowed IP packet, 4302 * then send an error and abort the processing. 4303 */ 4304 pkt_len = ntohs(ipha->ipha_length) 4305 + icmp->icmp_ip_snd_options_len; 4306 if (pkt_len > IP_MAXPACKET) { 4307 return (EMSGSIZE); 4308 } 4309 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra + 4310 tp_hdr_len, BPRI_LO))) { 4311 return (ENOMEM); 4312 } 4313 mp1->b_rptr += is->is_wroff_extra; 4314 mp1->b_wptr = mp1->b_rptr + ip_hdr_length; 4315 4316 ipha->ipha_length = htons((uint16_t)pkt_len); 4317 bcopy(ipha, mp1->b_rptr, IP_SIMPLE_HDR_LENGTH); 4318 4319 /* Copy transport header if any */ 4320 bcopy(&ipha[1], mp1->b_wptr, tp_hdr_len); 4321 mp1->b_wptr += tp_hdr_len; 4322 4323 /* Add options */ 4324 ipha = (ipha_t *)mp1->b_rptr; 4325 bcopy(icmp->icmp_ip_snd_options, &ipha[1], 4326 icmp->icmp_ip_snd_options_len); 4327 4328 /* Drop IP header and transport header from original */ 4329 (void) adjmsg(mp, IP_SIMPLE_HDR_LENGTH + tp_hdr_len); 4330 4331 mp1->b_cont = mp; 4332 mp = mp1; 4333 /* 4334 * Massage source route putting first source 4335 * route in ipha_dst. 4336 */ 4337 (void) ip_massage_options(ipha, is->is_netstack); 4338 } 4339 4340 if (pktinfop != NULL) { 4341 /* 4342 * Over write the source address provided in the header 4343 */ 4344 if (pktinfop->ip4_addr != INADDR_ANY) { 4345 ipha->ipha_src = pktinfop->ip4_addr; 4346 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4347 } 4348 4349 if (pktinfop->ip4_ill_index != 0) { 4350 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4351 } 4352 } 4353 4354 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4355 return (0); 4356 } 4357 4358 static int 4359 icmp_update_label(icmp_t *icmp, mblk_t *mp, ipaddr_t dst) 4360 { 4361 int err; 4362 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4363 icmp_stack_t *is = icmp->icmp_is; 4364 conn_t *connp = icmp->icmp_connp; 4365 cred_t *cr; 4366 4367 /* 4368 * All Solaris components should pass a db_credp 4369 * for this message, hence we ASSERT. 4370 * On production kernels we return an error to be robust against 4371 * random streams modules sitting on top of us. 4372 */ 4373 cr = msg_getcred(mp, NULL); 4374 ASSERT(cr != NULL); 4375 if (cr == NULL) 4376 return (EINVAL); 4377 4378 err = tsol_compute_label(cr, dst, 4379 opt_storage, connp->conn_mac_exempt, 4380 is->is_netstack->netstack_ip); 4381 if (err == 0) { 4382 err = tsol_update_options(&icmp->icmp_ip_snd_options, 4383 &icmp->icmp_ip_snd_options_len, &icmp->icmp_label_len, 4384 opt_storage); 4385 } 4386 if (err != 0) { 4387 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4388 DTRACE_PROBE4( 4389 tx__ip__log__drop__updatelabel__icmp, 4390 char *, "icmp(1) failed to update options(2) on mp(3)", 4391 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4392 return (err); 4393 } 4394 IN6_IPADDR_TO_V4MAPPED(dst, &icmp->icmp_v6lastdst); 4395 return (0); 4396 } 4397 4398 /* 4399 * This routine handles all messages passed downstream. It either 4400 * consumes the message or passes it downstream; it never queues a 4401 * a message. 4402 */ 4403 static void 4404 icmp_wput(queue_t *q, mblk_t *mp) 4405 { 4406 uchar_t *rptr = mp->b_rptr; 4407 ipha_t *ipha; 4408 mblk_t *mp1; 4409 #define tudr ((struct T_unitdata_req *)rptr) 4410 size_t ip_len; 4411 conn_t *connp = Q_TO_CONN(q); 4412 icmp_t *icmp = connp->conn_icmp; 4413 icmp_stack_t *is = icmp->icmp_is; 4414 sin6_t *sin6; 4415 sin_t *sin; 4416 ipaddr_t v4dst; 4417 ip4_pkt_t pktinfo; 4418 ip4_pkt_t *pktinfop = &pktinfo; 4419 ip6_pkt_t ipp_s; /* For ancillary data options */ 4420 ip6_pkt_t *ipp = &ipp_s; 4421 int error; 4422 4423 ipp->ipp_fields = 0; 4424 ipp->ipp_sticky_ignored = 0; 4425 4426 switch (mp->b_datap->db_type) { 4427 case M_DATA: 4428 if (icmp->icmp_hdrincl) { 4429 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 4430 ipha = (ipha_t *)mp->b_rptr; 4431 if (mp->b_wptr - mp->b_rptr < IP_SIMPLE_HDR_LENGTH) { 4432 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 4433 BUMP_MIB(&is->is_rawip_mib, 4434 rawipOutErrors); 4435 freemsg(mp); 4436 return; 4437 } 4438 ipha = (ipha_t *)mp->b_rptr; 4439 } 4440 /* 4441 * If this connection was used for v6 (inconceivable!) 4442 * or if we have a new destination, then it's time to 4443 * figure a new label. 4444 */ 4445 if (is_system_labeled() && 4446 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4447 V4_PART_OF_V6(icmp->icmp_v6lastdst) != 4448 ipha->ipha_dst)) { 4449 error = icmp_update_label(icmp, mp, 4450 ipha->ipha_dst); 4451 if (error != 0) { 4452 icmp_ud_err(q, mp, error); 4453 return; 4454 } 4455 } 4456 error = icmp_wput_hdrincl(q, connp, mp, icmp, NULL); 4457 if (error != 0) 4458 icmp_ud_err(q, mp, error); 4459 return; 4460 } 4461 freemsg(mp); 4462 return; 4463 case M_PROTO: 4464 case M_PCPROTO: 4465 ip_len = mp->b_wptr - rptr; 4466 if (ip_len >= sizeof (struct T_unitdata_req)) { 4467 /* Expedite valid T_UNITDATA_REQ to below the switch */ 4468 if (((union T_primitives *)rptr)->type 4469 == T_UNITDATA_REQ) 4470 break; 4471 } 4472 /* FALLTHRU */ 4473 default: 4474 icmp_wput_other(q, mp); 4475 return; 4476 } 4477 4478 /* Handle T_UNITDATA_REQ messages here. */ 4479 4480 mp1 = mp->b_cont; 4481 if (mp1 == NULL) { 4482 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4483 icmp_ud_err(q, mp, EPROTO); 4484 return; 4485 } 4486 4487 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > mp->b_wptr) { 4488 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4489 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4490 return; 4491 } 4492 4493 switch (icmp->icmp_family) { 4494 case AF_INET6: 4495 sin6 = (sin6_t *)&rptr[tudr->DEST_offset]; 4496 if (!OK_32PTR((char *)sin6) || 4497 tudr->DEST_length != sizeof (sin6_t) || 4498 sin6->sin6_family != AF_INET6) { 4499 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4500 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4501 return; 4502 } 4503 4504 /* No support for mapped addresses on raw sockets */ 4505 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 4506 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4507 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4508 return; 4509 } 4510 4511 /* 4512 * Destination is a native IPv6 address. 4513 * Send out an IPv6 format packet. 4514 */ 4515 if (tudr->OPT_length != 0) { 4516 int error; 4517 4518 error = 0; 4519 if (icmp_unitdata_opt_process(q, mp, &error, 4520 (void *)ipp) < 0) { 4521 /* failure */ 4522 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4523 icmp_ud_err(q, mp, error); 4524 return; 4525 } 4526 ASSERT(error == 0); 4527 } 4528 4529 error = raw_ip_send_data_v6(q, connp, mp1, sin6, ipp); 4530 goto done; 4531 4532 case AF_INET: 4533 sin = (sin_t *)&rptr[tudr->DEST_offset]; 4534 if (!OK_32PTR((char *)sin) || 4535 tudr->DEST_length != sizeof (sin_t) || 4536 sin->sin_family != AF_INET) { 4537 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4538 icmp_ud_err(q, mp, EADDRNOTAVAIL); 4539 return; 4540 } 4541 /* Extract and ipaddr */ 4542 v4dst = sin->sin_addr.s_addr; 4543 break; 4544 4545 default: 4546 ASSERT(0); 4547 } 4548 4549 pktinfop->ip4_ill_index = 0; 4550 pktinfop->ip4_addr = INADDR_ANY; 4551 4552 /* 4553 * If options passed in, feed it for verification and handling 4554 */ 4555 if (tudr->OPT_length != 0) { 4556 int error; 4557 4558 error = 0; 4559 if (icmp_unitdata_opt_process(q, mp, &error, 4560 (void *)pktinfop) < 0) { 4561 /* failure */ 4562 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4563 icmp_ud_err(q, mp, error); 4564 return; 4565 } 4566 ASSERT(error == 0); 4567 /* 4568 * Note: Success in processing options. 4569 * mp option buffer represented by 4570 * OPT_length/offset now potentially modified 4571 * and contain option setting results 4572 */ 4573 } 4574 4575 error = raw_ip_send_data_v4(q, connp, mp1, v4dst, pktinfop); 4576 done: 4577 if (error != 0) { 4578 icmp_ud_err(q, mp, error); 4579 return; 4580 } else { 4581 mp->b_cont = NULL; 4582 freeb(mp); 4583 } 4584 } 4585 4586 4587 /* ARGSUSED */ 4588 static void 4589 icmp_wput_fallback(queue_t *q, mblk_t *mp) 4590 { 4591 #ifdef DEBUG 4592 cmn_err(CE_CONT, "icmp_wput_fallback: Message during fallback \n"); 4593 #endif 4594 freemsg(mp); 4595 } 4596 4597 static int 4598 raw_ip_send_data_v4(queue_t *q, conn_t *connp, mblk_t *mp, ipaddr_t v4dst, 4599 ip4_pkt_t *pktinfop) 4600 { 4601 ipha_t *ipha; 4602 size_t ip_len; 4603 icmp_t *icmp = connp->conn_icmp; 4604 icmp_stack_t *is = icmp->icmp_is; 4605 int ip_hdr_length; 4606 ip_opt_info_t optinfo; 4607 4608 optinfo.ip_opt_flags = 0; 4609 optinfo.ip_opt_ill_index = 0; 4610 4611 if (icmp->icmp_state == TS_UNBND) { 4612 /* If a port has not been bound to the stream, fail. */ 4613 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4614 return (EPROTO); 4615 } 4616 4617 if (v4dst == INADDR_ANY) 4618 v4dst = htonl(INADDR_LOOPBACK); 4619 4620 /* Check if our saved options are valid; update if not */ 4621 if (is_system_labeled() && 4622 (!IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6lastdst) || 4623 V4_PART_OF_V6(icmp->icmp_v6lastdst) != v4dst)) { 4624 int error = icmp_update_label(icmp, mp, v4dst); 4625 4626 if (error != 0) 4627 return (error); 4628 } 4629 4630 /* Protocol 255 contains full IP headers */ 4631 if (icmp->icmp_hdrincl) 4632 return (icmp_wput_hdrincl(q, connp, mp, icmp, pktinfop)); 4633 4634 /* Add an IP header */ 4635 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + icmp->icmp_ip_snd_options_len; 4636 ipha = (ipha_t *)&mp->b_rptr[-ip_hdr_length]; 4637 if ((uchar_t *)ipha < mp->b_datap->db_base || 4638 mp->b_datap->db_ref != 1 || 4639 !OK_32PTR(ipha)) { 4640 mblk_t *mp1; 4641 if (!(mp1 = allocb(ip_hdr_length + is->is_wroff_extra, 4642 BPRI_LO))) { 4643 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4644 return (ENOMEM); 4645 } 4646 mp1->b_cont = mp; 4647 ipha = (ipha_t *)mp1->b_datap->db_lim; 4648 mp1->b_wptr = (uchar_t *)ipha; 4649 ipha = (ipha_t *)((uchar_t *)ipha - ip_hdr_length); 4650 mp = mp1; 4651 } 4652 #ifdef _BIG_ENDIAN 4653 /* Set version, header length, and tos */ 4654 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4655 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4656 icmp->icmp_type_of_service); 4657 /* Set ttl and protocol */ 4658 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_ttl << 8) | icmp->icmp_proto; 4659 #else 4660 /* Set version, header length, and tos */ 4661 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4662 ((icmp->icmp_type_of_service << 8) | 4663 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4664 /* Set ttl and protocol */ 4665 *(uint16_t *)&ipha->ipha_ttl = (icmp->icmp_proto << 8) | icmp->icmp_ttl; 4666 #endif 4667 if (pktinfop->ip4_addr != INADDR_ANY) { 4668 ipha->ipha_src = pktinfop->ip4_addr; 4669 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4670 } else { 4671 4672 /* 4673 * Copy our address into the packet. If this is zero, 4674 * ip will fill in the real source address. 4675 */ 4676 IN6_V4MAPPED_TO_IPADDR(&icmp->icmp_v6src, ipha->ipha_src); 4677 } 4678 4679 ipha->ipha_fragment_offset_and_flags = 0; 4680 4681 if (pktinfop->ip4_ill_index != 0) { 4682 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 4683 } 4684 4685 4686 /* 4687 * For the socket of SOCK_RAW type, the checksum is provided in the 4688 * pre-built packet. We set the ipha_ident field to IP_HDR_INCLUDED to 4689 * tell IP that the application has sent a complete IP header and not 4690 * to compute the transport checksum nor change the DF flag. 4691 */ 4692 ipha->ipha_ident = IP_HDR_INCLUDED; 4693 4694 /* Finish common formatting of the packet. */ 4695 mp->b_rptr = (uchar_t *)ipha; 4696 4697 ip_len = mp->b_wptr - (uchar_t *)ipha; 4698 if (mp->b_cont != NULL) 4699 ip_len += msgdsize(mp->b_cont); 4700 4701 /* 4702 * Set the length into the IP header. 4703 * If the length is greater than the maximum allowed by IP, 4704 * then free the message and return. Do not try and send it 4705 * as this can cause problems in layers below. 4706 */ 4707 if (ip_len > IP_MAXPACKET) { 4708 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4709 return (EMSGSIZE); 4710 } 4711 ipha->ipha_length = htons((uint16_t)ip_len); 4712 /* 4713 * Copy in the destination address request 4714 */ 4715 ipha->ipha_dst = v4dst; 4716 4717 /* 4718 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 4719 */ 4720 if (CLASSD(v4dst)) 4721 ipha->ipha_ttl = icmp->icmp_multicast_ttl; 4722 4723 /* Copy in options if any */ 4724 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 4725 bcopy(icmp->icmp_ip_snd_options, 4726 &ipha[1], icmp->icmp_ip_snd_options_len); 4727 /* 4728 * Massage source route putting first source route in ipha_dst. 4729 * Ignore the destination in the T_unitdata_req. 4730 */ 4731 (void) ip_massage_options(ipha, is->is_netstack); 4732 } 4733 4734 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 4735 ip_output_options(connp, mp, q, IP_WPUT, &optinfo); 4736 return (0); 4737 } 4738 4739 static int 4740 icmp_update_label_v6(icmp_t *icmp, mblk_t *mp, in6_addr_t *dst) 4741 { 4742 int err; 4743 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 4744 icmp_stack_t *is = icmp->icmp_is; 4745 conn_t *connp = icmp->icmp_connp; 4746 cred_t *cr; 4747 4748 /* 4749 * All Solaris components should pass a db_credp 4750 * for this message, hence we ASSERT. 4751 * On production kernels we return an error to be robust against 4752 * random streams modules sitting on top of us. 4753 */ 4754 cr = msg_getcred(mp, NULL); 4755 ASSERT(cr != NULL); 4756 if (cr == NULL) 4757 return (EINVAL); 4758 4759 err = tsol_compute_label_v6(cr, dst, 4760 opt_storage, connp->conn_mac_exempt, 4761 is->is_netstack->netstack_ip); 4762 if (err == 0) { 4763 err = tsol_update_sticky(&icmp->icmp_sticky_ipp, 4764 &icmp->icmp_label_len_v6, opt_storage); 4765 } 4766 if (err != 0) { 4767 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4768 DTRACE_PROBE4( 4769 tx__ip__log__drop__updatelabel__icmp6, 4770 char *, "icmp(1) failed to update options(2) on mp(3)", 4771 icmp_t *, icmp, char *, opt_storage, mblk_t *, mp); 4772 return (err); 4773 } 4774 4775 icmp->icmp_v6lastdst = *dst; 4776 return (0); 4777 } 4778 4779 /* 4780 * raw_ip_send_data_v6(): 4781 * Assumes that icmp_wput did some sanity checking on the destination 4782 * address, but that the label may not yet be correct. 4783 */ 4784 static int 4785 raw_ip_send_data_v6(queue_t *q, conn_t *connp, mblk_t *mp, sin6_t *sin6, 4786 ip6_pkt_t *ipp) 4787 { 4788 ip6_t *ip6h; 4789 ip6i_t *ip6i; /* mp->b_rptr even if no ip6i_t */ 4790 int ip_hdr_len = IPV6_HDR_LEN; 4791 size_t ip_len; 4792 icmp_t *icmp = connp->conn_icmp; 4793 icmp_stack_t *is = icmp->icmp_is; 4794 ip6_pkt_t *tipp; 4795 uint32_t csum = 0; 4796 uint_t ignore = 0; 4797 uint_t option_exists = 0, is_sticky = 0; 4798 uint8_t *cp; 4799 uint8_t *nxthdr_ptr; 4800 in6_addr_t ip6_dst; 4801 4802 /* 4803 * If the local address is a mapped address return 4804 * an error. 4805 * It would be possible to send an IPv6 packet but the 4806 * response would never make it back to the application 4807 * since it is bound to a mapped address. 4808 */ 4809 if (IN6_IS_ADDR_V4MAPPED(&icmp->icmp_v6src)) { 4810 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 4811 return (EADDRNOTAVAIL); 4812 } 4813 4814 ignore = ipp->ipp_sticky_ignored; 4815 if (sin6->sin6_scope_id != 0 && 4816 IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 4817 /* 4818 * IPPF_SCOPE_ID is special. It's neither a sticky 4819 * option nor ancillary data. It needs to be 4820 * explicitly set in options_exists. 4821 */ 4822 option_exists |= IPPF_SCOPE_ID; 4823 } 4824 4825 /* 4826 * Compute the destination address 4827 */ 4828 ip6_dst = sin6->sin6_addr; 4829 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 4830 ip6_dst = ipv6_loopback; 4831 4832 /* 4833 * If we're not going to the same destination as last time, then 4834 * recompute the label required. This is done in a separate routine to 4835 * avoid blowing up our stack here. 4836 */ 4837 if (is_system_labeled() && 4838 !IN6_ARE_ADDR_EQUAL(&icmp->icmp_v6lastdst, &ip6_dst)) { 4839 int error = 0; 4840 4841 error = icmp_update_label_v6(icmp, mp, &ip6_dst); 4842 if (error != 0) 4843 return (error); 4844 } 4845 4846 /* 4847 * If there's a security label here, then we ignore any options the 4848 * user may try to set. We keep the peer's label as a hidden sticky 4849 * option. 4850 */ 4851 if (icmp->icmp_label_len_v6 > 0) { 4852 ignore &= ~IPPF_HOPOPTS; 4853 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4854 } 4855 4856 if ((icmp->icmp_sticky_ipp.ipp_fields == 0) && 4857 (ipp->ipp_fields == 0)) { 4858 /* No sticky options nor ancillary data. */ 4859 goto no_options; 4860 } 4861 4862 /* 4863 * Go through the options figuring out where each is going to 4864 * come from and build two masks. The first mask indicates if 4865 * the option exists at all. The second mask indicates if the 4866 * option is sticky or ancillary. 4867 */ 4868 if (!(ignore & IPPF_HOPOPTS)) { 4869 if (ipp->ipp_fields & IPPF_HOPOPTS) { 4870 option_exists |= IPPF_HOPOPTS; 4871 ip_hdr_len += ipp->ipp_hopoptslen; 4872 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 4873 option_exists |= IPPF_HOPOPTS; 4874 is_sticky |= IPPF_HOPOPTS; 4875 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_hopoptslen; 4876 } 4877 } 4878 4879 if (!(ignore & IPPF_RTHDR)) { 4880 if (ipp->ipp_fields & IPPF_RTHDR) { 4881 option_exists |= IPPF_RTHDR; 4882 ip_hdr_len += ipp->ipp_rthdrlen; 4883 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 4884 option_exists |= IPPF_RTHDR; 4885 is_sticky |= IPPF_RTHDR; 4886 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_rthdrlen; 4887 } 4888 } 4889 4890 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 4891 /* 4892 * Need to have a router header to use these. 4893 */ 4894 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 4895 option_exists |= IPPF_RTDSTOPTS; 4896 ip_hdr_len += ipp->ipp_rtdstoptslen; 4897 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 4898 option_exists |= IPPF_RTDSTOPTS; 4899 is_sticky |= IPPF_RTDSTOPTS; 4900 ip_hdr_len += 4901 icmp->icmp_sticky_ipp.ipp_rtdstoptslen; 4902 } 4903 } 4904 4905 if (!(ignore & IPPF_DSTOPTS)) { 4906 if (ipp->ipp_fields & IPPF_DSTOPTS) { 4907 option_exists |= IPPF_DSTOPTS; 4908 ip_hdr_len += ipp->ipp_dstoptslen; 4909 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 4910 option_exists |= IPPF_DSTOPTS; 4911 is_sticky |= IPPF_DSTOPTS; 4912 ip_hdr_len += icmp->icmp_sticky_ipp.ipp_dstoptslen; 4913 } 4914 } 4915 4916 if (!(ignore & IPPF_IFINDEX)) { 4917 if (ipp->ipp_fields & IPPF_IFINDEX) { 4918 option_exists |= IPPF_IFINDEX; 4919 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 4920 option_exists |= IPPF_IFINDEX; 4921 is_sticky |= IPPF_IFINDEX; 4922 } 4923 } 4924 4925 if (!(ignore & IPPF_ADDR)) { 4926 if (ipp->ipp_fields & IPPF_ADDR) { 4927 option_exists |= IPPF_ADDR; 4928 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_ADDR) { 4929 option_exists |= IPPF_ADDR; 4930 is_sticky |= IPPF_ADDR; 4931 } 4932 } 4933 4934 if (!(ignore & IPPF_DONTFRAG)) { 4935 if (ipp->ipp_fields & IPPF_DONTFRAG) { 4936 option_exists |= IPPF_DONTFRAG; 4937 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 4938 option_exists |= IPPF_DONTFRAG; 4939 is_sticky |= IPPF_DONTFRAG; 4940 } 4941 } 4942 4943 if (!(ignore & IPPF_USE_MIN_MTU)) { 4944 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 4945 option_exists |= IPPF_USE_MIN_MTU; 4946 } else if (icmp->icmp_sticky_ipp.ipp_fields & 4947 IPPF_USE_MIN_MTU) { 4948 option_exists |= IPPF_USE_MIN_MTU; 4949 is_sticky |= IPPF_USE_MIN_MTU; 4950 } 4951 } 4952 4953 if (!(ignore & IPPF_NEXTHOP)) { 4954 if (ipp->ipp_fields & IPPF_NEXTHOP) { 4955 option_exists |= IPPF_NEXTHOP; 4956 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NEXTHOP) { 4957 option_exists |= IPPF_NEXTHOP; 4958 is_sticky |= IPPF_NEXTHOP; 4959 } 4960 } 4961 4962 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 4963 option_exists |= IPPF_HOPLIMIT; 4964 /* IPV6_HOPLIMIT can never be sticky */ 4965 ASSERT(!(icmp->icmp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 4966 4967 if (!(ignore & IPPF_UNICAST_HOPS) && 4968 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 4969 option_exists |= IPPF_UNICAST_HOPS; 4970 is_sticky |= IPPF_UNICAST_HOPS; 4971 } 4972 4973 if (!(ignore & IPPF_MULTICAST_HOPS) && 4974 (icmp->icmp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 4975 option_exists |= IPPF_MULTICAST_HOPS; 4976 is_sticky |= IPPF_MULTICAST_HOPS; 4977 } 4978 4979 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_NO_CKSUM) { 4980 /* This is a sticky socket option only */ 4981 option_exists |= IPPF_NO_CKSUM; 4982 is_sticky |= IPPF_NO_CKSUM; 4983 } 4984 4985 if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_RAW_CKSUM) { 4986 /* This is a sticky socket option only */ 4987 option_exists |= IPPF_RAW_CKSUM; 4988 is_sticky |= IPPF_RAW_CKSUM; 4989 } 4990 4991 if (!(ignore & IPPF_TCLASS)) { 4992 if (ipp->ipp_fields & IPPF_TCLASS) { 4993 option_exists |= IPPF_TCLASS; 4994 } else if (icmp->icmp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 4995 option_exists |= IPPF_TCLASS; 4996 is_sticky |= IPPF_TCLASS; 4997 } 4998 } 4999 5000 no_options: 5001 5002 /* 5003 * If any options carried in the ip6i_t were specified, we 5004 * need to account for the ip6i_t in the data we'll be sending 5005 * down. 5006 */ 5007 if (option_exists & IPPF_HAS_IP6I) 5008 ip_hdr_len += sizeof (ip6i_t); 5009 5010 /* check/fix buffer config, setup pointers into it */ 5011 ip6h = (ip6_t *)&mp->b_rptr[-ip_hdr_len]; 5012 if ((mp->b_datap->db_ref != 1) || 5013 ((unsigned char *)ip6h < mp->b_datap->db_base) || 5014 !OK_32PTR(ip6h)) { 5015 mblk_t *mp1; 5016 5017 /* Try to get everything in a single mblk next time */ 5018 if (ip_hdr_len > icmp->icmp_max_hdr_len) { 5019 icmp->icmp_max_hdr_len = ip_hdr_len; 5020 5021 (void) proto_set_tx_wroff(q == NULL ? NULL:RD(q), connp, 5022 icmp->icmp_max_hdr_len + is->is_wroff_extra); 5023 } 5024 mp1 = allocb(ip_hdr_len + is->is_wroff_extra, BPRI_LO); 5025 if (!mp1) { 5026 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5027 return (ENOMEM); 5028 } 5029 mp1->b_cont = mp; 5030 mp1->b_wptr = mp1->b_datap->db_lim; 5031 ip6h = (ip6_t *)(mp1->b_wptr - ip_hdr_len); 5032 mp = mp1; 5033 } 5034 mp->b_rptr = (unsigned char *)ip6h; 5035 ip6i = (ip6i_t *)ip6h; 5036 5037 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &icmp->icmp_sticky_ipp : ipp) 5038 if (option_exists & IPPF_HAS_IP6I) { 5039 ip6h = (ip6_t *)&ip6i[1]; 5040 ip6i->ip6i_flags = 0; 5041 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5042 5043 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 5044 if (option_exists & IPPF_SCOPE_ID) { 5045 ip6i->ip6i_flags |= IP6I_IFINDEX; 5046 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 5047 } else if (option_exists & IPPF_IFINDEX) { 5048 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 5049 ASSERT(tipp->ipp_ifindex != 0); 5050 ip6i->ip6i_flags |= IP6I_IFINDEX; 5051 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 5052 } 5053 5054 if (option_exists & IPPF_RAW_CKSUM) { 5055 ip6i->ip6i_flags |= IP6I_RAW_CHECKSUM; 5056 ip6i->ip6i_checksum_off = icmp->icmp_checksum_off; 5057 } 5058 5059 if (option_exists & IPPF_NO_CKSUM) { 5060 ip6i->ip6i_flags |= IP6I_NO_ULP_CKSUM; 5061 } 5062 5063 if (option_exists & IPPF_ADDR) { 5064 /* 5065 * Enable per-packet source address verification if 5066 * IPV6_PKTINFO specified the source address. 5067 * ip6_src is set in the transport's _wput function. 5068 */ 5069 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 5070 } 5071 5072 if (option_exists & IPPF_DONTFRAG) { 5073 ip6i->ip6i_flags |= IP6I_DONTFRAG; 5074 } 5075 5076 if (option_exists & IPPF_USE_MIN_MTU) { 5077 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 5078 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 5079 } 5080 5081 if (option_exists & IPPF_NEXTHOP) { 5082 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 5083 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 5084 ip6i->ip6i_flags |= IP6I_NEXTHOP; 5085 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 5086 } 5087 5088 /* 5089 * tell IP this is an ip6i_t private header 5090 */ 5091 ip6i->ip6i_nxt = IPPROTO_RAW; 5092 } 5093 5094 /* Initialize IPv6 header */ 5095 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 5096 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 5097 5098 /* Set the hoplimit of the outgoing packet. */ 5099 if (option_exists & IPPF_HOPLIMIT) { 5100 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 5101 ip6h->ip6_hops = ipp->ipp_hoplimit; 5102 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5103 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 5104 ip6h->ip6_hops = icmp->icmp_multicast_ttl; 5105 if (option_exists & IPPF_MULTICAST_HOPS) 5106 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5107 } else { 5108 ip6h->ip6_hops = icmp->icmp_ttl; 5109 if (option_exists & IPPF_UNICAST_HOPS) 5110 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 5111 } 5112 5113 if (option_exists & IPPF_ADDR) { 5114 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 5115 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 5116 ip6h->ip6_src = tipp->ipp_addr; 5117 } else { 5118 /* 5119 * The source address was not set using IPV6_PKTINFO. 5120 * First look at the bound source. 5121 * If unspecified fallback to __sin6_src_id. 5122 */ 5123 ip6h->ip6_src = icmp->icmp_v6src; 5124 if (sin6->__sin6_src_id != 0 && 5125 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 5126 ip_srcid_find_id(sin6->__sin6_src_id, 5127 &ip6h->ip6_src, icmp->icmp_zoneid, 5128 is->is_netstack); 5129 } 5130 } 5131 5132 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 5133 cp = (uint8_t *)&ip6h[1]; 5134 5135 /* 5136 * Here's where we have to start stringing together 5137 * any extension headers in the right order: 5138 * Hop-by-hop, destination, routing, and final destination opts. 5139 */ 5140 if (option_exists & IPPF_HOPOPTS) { 5141 /* Hop-by-hop options */ 5142 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 5143 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 5144 5145 *nxthdr_ptr = IPPROTO_HOPOPTS; 5146 nxthdr_ptr = &hbh->ip6h_nxt; 5147 5148 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 5149 cp += tipp->ipp_hopoptslen; 5150 } 5151 /* 5152 * En-route destination options 5153 * Only do them if there's a routing header as well 5154 */ 5155 if (option_exists & IPPF_RTDSTOPTS) { 5156 ip6_dest_t *dst = (ip6_dest_t *)cp; 5157 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 5158 5159 *nxthdr_ptr = IPPROTO_DSTOPTS; 5160 nxthdr_ptr = &dst->ip6d_nxt; 5161 5162 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 5163 cp += tipp->ipp_rtdstoptslen; 5164 } 5165 /* 5166 * Routing header next 5167 */ 5168 if (option_exists & IPPF_RTHDR) { 5169 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 5170 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 5171 5172 *nxthdr_ptr = IPPROTO_ROUTING; 5173 nxthdr_ptr = &rt->ip6r_nxt; 5174 5175 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 5176 cp += tipp->ipp_rthdrlen; 5177 } 5178 /* 5179 * Do ultimate destination options 5180 */ 5181 if (option_exists & IPPF_DSTOPTS) { 5182 ip6_dest_t *dest = (ip6_dest_t *)cp; 5183 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 5184 5185 *nxthdr_ptr = IPPROTO_DSTOPTS; 5186 nxthdr_ptr = &dest->ip6d_nxt; 5187 5188 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 5189 cp += tipp->ipp_dstoptslen; 5190 } 5191 5192 /* 5193 * Now set the last header pointer to the proto passed in 5194 */ 5195 ASSERT((int)(cp - (uint8_t *)ip6i) == ip_hdr_len); 5196 *nxthdr_ptr = icmp->icmp_proto; 5197 5198 /* 5199 * Copy in the destination address 5200 */ 5201 ip6h->ip6_dst = ip6_dst; 5202 5203 ip6h->ip6_vcf = 5204 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 5205 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 5206 5207 if (option_exists & IPPF_TCLASS) { 5208 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 5209 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 5210 tipp->ipp_tclass); 5211 } 5212 if (option_exists & IPPF_RTHDR) { 5213 ip6_rthdr_t *rth; 5214 5215 /* 5216 * Perform any processing needed for source routing. 5217 * We know that all extension headers will be in the same mblk 5218 * as the IPv6 header. 5219 */ 5220 rth = ip_find_rthdr_v6(ip6h, mp->b_wptr); 5221 if (rth != NULL && rth->ip6r_segleft != 0) { 5222 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 5223 /* 5224 * Drop packet - only support Type 0 routing. 5225 * Notify the application as well. 5226 */ 5227 BUMP_MIB(&is->is_rawip_mib, 5228 rawipOutErrors); 5229 return (EPROTO); 5230 } 5231 /* 5232 * rth->ip6r_len is twice the number of 5233 * addresses in the header 5234 */ 5235 if (rth->ip6r_len & 0x1) { 5236 BUMP_MIB(&is->is_rawip_mib, 5237 rawipOutErrors); 5238 return (EPROTO); 5239 } 5240 /* 5241 * Shuffle the routing header and ip6_dst 5242 * addresses, and get the checksum difference 5243 * between the first hop (in ip6_dst) and 5244 * the destination (in the last routing hdr entry). 5245 */ 5246 csum = ip_massage_options_v6(ip6h, rth, 5247 is->is_netstack); 5248 /* 5249 * Verify that the first hop isn't a mapped address. 5250 * Routers along the path need to do this verification 5251 * for subsequent hops. 5252 */ 5253 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 5254 BUMP_MIB(&is->is_rawip_mib, 5255 rawipOutErrors); 5256 return (EADDRNOTAVAIL); 5257 } 5258 } 5259 } 5260 5261 ip_len = mp->b_wptr - (uchar_t *)ip6h - IPV6_HDR_LEN; 5262 if (mp->b_cont != NULL) 5263 ip_len += msgdsize(mp->b_cont); 5264 5265 /* 5266 * Set the length into the IP header. 5267 * If the length is greater than the maximum allowed by IP, 5268 * then free the message and return. Do not try and send it 5269 * as this can cause problems in layers below. 5270 */ 5271 if (ip_len > IP_MAXPACKET) { 5272 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 5273 return (EMSGSIZE); 5274 } 5275 if (icmp->icmp_proto == IPPROTO_ICMPV6 || icmp->icmp_raw_checksum) { 5276 uint_t cksum_off; /* From ip6i == mp->b_rptr */ 5277 uint16_t *cksum_ptr; 5278 uint_t ext_hdrs_len; 5279 5280 /* ICMPv6 must have an offset matching icmp6_cksum offset */ 5281 ASSERT(icmp->icmp_proto != IPPROTO_ICMPV6 || 5282 icmp->icmp_checksum_off == 2); 5283 5284 /* 5285 * We make it easy for IP to include our pseudo header 5286 * by putting our length in uh_checksum, modified (if 5287 * we have a routing header) by the checksum difference 5288 * between the ultimate destination and first hop addresses. 5289 * Note: ICMPv6 must always checksum the packet. 5290 */ 5291 cksum_off = ip_hdr_len + icmp->icmp_checksum_off; 5292 if (cksum_off + sizeof (uint16_t) > mp->b_wptr - mp->b_rptr) { 5293 if (!pullupmsg(mp, cksum_off + sizeof (uint16_t))) { 5294 BUMP_MIB(&is->is_rawip_mib, 5295 rawipOutErrors); 5296 freemsg(mp); 5297 return (0); 5298 } 5299 ip6i = (ip6i_t *)mp->b_rptr; 5300 if (ip6i->ip6i_nxt == IPPROTO_RAW) 5301 ip6h = (ip6_t *)&ip6i[1]; 5302 else 5303 ip6h = (ip6_t *)ip6i; 5304 } 5305 /* Add payload length to checksum */ 5306 ext_hdrs_len = ip_hdr_len - IPV6_HDR_LEN - 5307 (int)((uchar_t *)ip6h - (uchar_t *)ip6i); 5308 csum += htons(ip_len - ext_hdrs_len); 5309 5310 cksum_ptr = (uint16_t *)((uchar_t *)ip6i + cksum_off); 5311 csum = (csum & 0xFFFF) + (csum >> 16); 5312 *cksum_ptr = (uint16_t)csum; 5313 } 5314 5315 #ifdef _LITTLE_ENDIAN 5316 ip_len = htons(ip_len); 5317 #endif 5318 ip6h->ip6_plen = (uint16_t)ip_len; 5319 5320 /* We're done. Pass the packet to IP */ 5321 BUMP_MIB(&is->is_rawip_mib, rawipOutDatagrams); 5322 ip_output_v6(icmp->icmp_connp, mp, q, IP_WPUT); 5323 return (0); 5324 } 5325 5326 static void 5327 icmp_wput_other(queue_t *q, mblk_t *mp) 5328 { 5329 uchar_t *rptr = mp->b_rptr; 5330 struct iocblk *iocp; 5331 #define tudr ((struct T_unitdata_req *)rptr) 5332 conn_t *connp = Q_TO_CONN(q); 5333 icmp_t *icmp = connp->conn_icmp; 5334 icmp_stack_t *is = icmp->icmp_is; 5335 cred_t *cr; 5336 5337 switch (mp->b_datap->db_type) { 5338 case M_PROTO: 5339 case M_PCPROTO: 5340 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5341 /* 5342 * If the message does not contain a PRIM_type, 5343 * throw it away. 5344 */ 5345 freemsg(mp); 5346 return; 5347 } 5348 switch (((union T_primitives *)rptr)->type) { 5349 case T_ADDR_REQ: 5350 icmp_addr_req(q, mp); 5351 return; 5352 case O_T_BIND_REQ: 5353 case T_BIND_REQ: 5354 icmp_tpi_bind(q, mp); 5355 return; 5356 case T_CONN_REQ: 5357 icmp_tpi_connect(q, mp); 5358 return; 5359 case T_CAPABILITY_REQ: 5360 icmp_capability_req(q, mp); 5361 return; 5362 case T_INFO_REQ: 5363 icmp_info_req(q, mp); 5364 return; 5365 case T_UNITDATA_REQ: 5366 /* 5367 * If a T_UNITDATA_REQ gets here, the address must 5368 * be bad. Valid T_UNITDATA_REQs are found above 5369 * and break to below this switch. 5370 */ 5371 icmp_ud_err(q, mp, EADDRNOTAVAIL); 5372 return; 5373 case T_UNBIND_REQ: 5374 icmp_tpi_unbind(q, mp); 5375 return; 5376 5377 case T_SVR4_OPTMGMT_REQ: 5378 /* 5379 * All Solaris components should pass a db_credp 5380 * for this TPI message, hence we ASSERT. 5381 * But in case there is some other M_PROTO that looks 5382 * like a TPI message sent by some other kernel 5383 * component, we check and return an error. 5384 */ 5385 cr = msg_getcred(mp, NULL); 5386 ASSERT(cr != NULL); 5387 if (cr == NULL) { 5388 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5389 return; 5390 } 5391 5392 if (!snmpcom_req(q, mp, icmp_snmp_set, ip_snmp_get, 5393 cr)) { 5394 /* Only IP can return anything meaningful */ 5395 (void) svr4_optcom_req(q, mp, cr, 5396 &icmp_opt_obj, B_TRUE); 5397 } 5398 return; 5399 5400 case T_OPTMGMT_REQ: 5401 /* 5402 * All Solaris components should pass a db_credp 5403 * for this TPI message, hence we ASSERT. 5404 * But in case there is some other M_PROTO that looks 5405 * like a TPI message sent by some other kernel 5406 * component, we check and return an error. 5407 */ 5408 cr = msg_getcred(mp, NULL); 5409 ASSERT(cr != NULL); 5410 if (cr == NULL) { 5411 icmp_err_ack(q, mp, TSYSERR, EINVAL); 5412 return; 5413 } 5414 /* Only IP can return anything meaningful */ 5415 (void) tpi_optcom_req(q, mp, cr, &icmp_opt_obj, B_TRUE); 5416 return; 5417 5418 case T_DISCON_REQ: 5419 icmp_tpi_disconnect(q, mp); 5420 return; 5421 5422 /* The following TPI message is not supported by icmp. */ 5423 case O_T_CONN_RES: 5424 case T_CONN_RES: 5425 icmp_err_ack(q, mp, TNOTSUPPORT, 0); 5426 return; 5427 5428 /* The following 3 TPI requests are illegal for icmp. */ 5429 case T_DATA_REQ: 5430 case T_EXDATA_REQ: 5431 case T_ORDREL_REQ: 5432 freemsg(mp); 5433 (void) putctl1(RD(q), M_ERROR, EPROTO); 5434 return; 5435 default: 5436 break; 5437 } 5438 break; 5439 case M_IOCTL: 5440 iocp = (struct iocblk *)mp->b_rptr; 5441 switch (iocp->ioc_cmd) { 5442 case TI_GETPEERNAME: 5443 if (icmp->icmp_state != TS_DATA_XFER) { 5444 /* 5445 * If a default destination address has not 5446 * been associated with the stream, then we 5447 * don't know the peer's name. 5448 */ 5449 iocp->ioc_error = ENOTCONN; 5450 err_ret:; 5451 iocp->ioc_count = 0; 5452 mp->b_datap->db_type = M_IOCACK; 5453 qreply(q, mp); 5454 return; 5455 } 5456 /* FALLTHRU */ 5457 case TI_GETMYNAME: 5458 /* 5459 * For TI_GETPEERNAME and TI_GETMYNAME, we first 5460 * need to copyin the user's strbuf structure. 5461 * Processing will continue in the M_IOCDATA case 5462 * below. 5463 */ 5464 mi_copyin(q, mp, NULL, 5465 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 5466 return; 5467 case ND_SET: 5468 /* nd_getset performs the necessary error checking */ 5469 case ND_GET: 5470 if (nd_getset(q, is->is_nd, mp)) { 5471 qreply(q, mp); 5472 return; 5473 } 5474 break; 5475 case _SIOCSOCKFALLBACK: 5476 /* 5477 * socket is falling back to be a 5478 * streams socket. Nothing to do 5479 */ 5480 iocp->ioc_count = 0; 5481 iocp->ioc_rval = 0; 5482 qreply(q, mp); 5483 return; 5484 default: 5485 break; 5486 } 5487 break; 5488 case M_IOCDATA: 5489 icmp_wput_iocdata(q, mp); 5490 return; 5491 default: 5492 break; 5493 } 5494 ip_wput(q, mp); 5495 } 5496 5497 /* 5498 * icmp_wput_iocdata is called by icmp_wput_slow to handle all M_IOCDATA 5499 * messages. 5500 */ 5501 static void 5502 icmp_wput_iocdata(queue_t *q, mblk_t *mp) 5503 { 5504 mblk_t *mp1; 5505 STRUCT_HANDLE(strbuf, sb); 5506 icmp_t *icmp; 5507 uint_t addrlen; 5508 uint_t error; 5509 5510 /* Make sure it is one of ours. */ 5511 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5512 case TI_GETMYNAME: 5513 case TI_GETPEERNAME: 5514 break; 5515 default: 5516 icmp = Q_TO_ICMP(q); 5517 ip_output(icmp->icmp_connp, mp, q, IP_WPUT); 5518 return; 5519 } 5520 switch (mi_copy_state(q, mp, &mp1)) { 5521 case -1: 5522 return; 5523 case MI_COPY_CASE(MI_COPY_IN, 1): 5524 break; 5525 case MI_COPY_CASE(MI_COPY_OUT, 1): 5526 /* 5527 * The address has been copied out, so now 5528 * copyout the strbuf. 5529 */ 5530 mi_copyout(q, mp); 5531 return; 5532 case MI_COPY_CASE(MI_COPY_OUT, 2): 5533 /* 5534 * The address and strbuf have been copied out. 5535 * We're done, so just acknowledge the original 5536 * M_IOCTL. 5537 */ 5538 mi_copy_done(q, mp, 0); 5539 return; 5540 default: 5541 /* 5542 * Something strange has happened, so acknowledge 5543 * the original M_IOCTL with an EPROTO error. 5544 */ 5545 mi_copy_done(q, mp, EPROTO); 5546 return; 5547 } 5548 /* 5549 * Now we have the strbuf structure for TI_GETMYNAME 5550 * and TI_GETPEERNAME. Next we copyout the requested 5551 * address and then we'll copyout the strbuf. 5552 */ 5553 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 5554 (void *)mp1->b_rptr); 5555 icmp = Q_TO_ICMP(q); 5556 if (icmp->icmp_family == AF_INET) 5557 addrlen = sizeof (sin_t); 5558 else 5559 addrlen = sizeof (sin6_t); 5560 5561 if (STRUCT_FGET(sb, maxlen) < addrlen) { 5562 mi_copy_done(q, mp, EINVAL); 5563 return; 5564 } 5565 5566 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 5567 5568 if (mp1 == NULL) 5569 return; 5570 5571 rw_enter(&icmp->icmp_rwlock, RW_READER); 5572 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 5573 case TI_GETMYNAME: 5574 error = rawip_do_getsockname(icmp, (void *)mp1->b_rptr, 5575 &addrlen); 5576 break; 5577 case TI_GETPEERNAME: 5578 error = rawip_do_getpeername(icmp, (void *)mp1->b_rptr, 5579 &addrlen); 5580 break; 5581 } 5582 rw_exit(&icmp->icmp_rwlock); 5583 5584 if (error != 0) { 5585 mi_copy_done(q, mp, error); 5586 } else { 5587 mp1->b_wptr += addrlen; 5588 STRUCT_FSET(sb, len, addrlen); 5589 5590 /* Copy out the address */ 5591 mi_copyout(q, mp); 5592 } 5593 } 5594 5595 static int 5596 icmp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 5597 void *thisdg_attrs) 5598 { 5599 struct T_unitdata_req *udreqp; 5600 int is_absreq_failure; 5601 cred_t *cr; 5602 5603 udreqp = (struct T_unitdata_req *)mp->b_rptr; 5604 *errorp = 0; 5605 5606 /* 5607 * All Solaris components should pass a db_credp 5608 * for this TPI message, hence we ASSERT. 5609 * But in case there is some other M_PROTO that looks 5610 * like a TPI message sent by some other kernel 5611 * component, we check and return an error. 5612 */ 5613 cr = msg_getcred(mp, NULL); 5614 ASSERT(cr != NULL); 5615 if (cr == NULL) 5616 return (-1); 5617 5618 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 5619 udreqp->OPT_offset, cr, &icmp_opt_obj, 5620 thisdg_attrs, &is_absreq_failure); 5621 5622 if (*errorp != 0) { 5623 /* 5624 * Note: No special action needed in this 5625 * module for "is_absreq_failure" 5626 */ 5627 return (-1); /* failure */ 5628 } 5629 ASSERT(is_absreq_failure == 0); 5630 return (0); /* success */ 5631 } 5632 5633 void 5634 icmp_ddi_g_init(void) 5635 { 5636 icmp_max_optsize = optcom_max_optsize(icmp_opt_obj.odb_opt_des_arr, 5637 icmp_opt_obj.odb_opt_arr_cnt); 5638 5639 /* 5640 * We want to be informed each time a stack is created or 5641 * destroyed in the kernel, so we can maintain the 5642 * set of icmp_stack_t's. 5643 */ 5644 netstack_register(NS_ICMP, rawip_stack_init, NULL, rawip_stack_fini); 5645 } 5646 5647 void 5648 icmp_ddi_g_destroy(void) 5649 { 5650 netstack_unregister(NS_ICMP); 5651 } 5652 5653 #define INET_NAME "ip" 5654 5655 /* 5656 * Initialize the ICMP stack instance. 5657 */ 5658 static void * 5659 rawip_stack_init(netstackid_t stackid, netstack_t *ns) 5660 { 5661 icmp_stack_t *is; 5662 icmpparam_t *pa; 5663 int error = 0; 5664 major_t major; 5665 5666 is = (icmp_stack_t *)kmem_zalloc(sizeof (*is), KM_SLEEP); 5667 is->is_netstack = ns; 5668 5669 pa = (icmpparam_t *)kmem_alloc(sizeof (icmp_param_arr), KM_SLEEP); 5670 is->is_param_arr = pa; 5671 bcopy(icmp_param_arr, is->is_param_arr, sizeof (icmp_param_arr)); 5672 5673 (void) icmp_param_register(&is->is_nd, 5674 is->is_param_arr, A_CNT(icmp_param_arr)); 5675 is->is_ksp = rawip_kstat_init(stackid); 5676 5677 major = mod_name_to_major(INET_NAME); 5678 error = ldi_ident_from_major(major, &is->is_ldi_ident); 5679 ASSERT(error == 0); 5680 return (is); 5681 } 5682 5683 /* 5684 * Free the ICMP stack instance. 5685 */ 5686 static void 5687 rawip_stack_fini(netstackid_t stackid, void *arg) 5688 { 5689 icmp_stack_t *is = (icmp_stack_t *)arg; 5690 5691 nd_free(&is->is_nd); 5692 kmem_free(is->is_param_arr, sizeof (icmp_param_arr)); 5693 is->is_param_arr = NULL; 5694 5695 rawip_kstat_fini(stackid, is->is_ksp); 5696 is->is_ksp = NULL; 5697 ldi_ident_release(is->is_ldi_ident); 5698 kmem_free(is, sizeof (*is)); 5699 } 5700 5701 static void * 5702 rawip_kstat_init(netstackid_t stackid) { 5703 kstat_t *ksp; 5704 5705 rawip_named_kstat_t template = { 5706 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 5707 { "inCksumErrs", KSTAT_DATA_UINT32, 0 }, 5708 { "inErrors", KSTAT_DATA_UINT32, 0 }, 5709 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 5710 { "outErrors", KSTAT_DATA_UINT32, 0 }, 5711 }; 5712 5713 ksp = kstat_create_netstack("icmp", 0, "rawip", "mib2", 5714 KSTAT_TYPE_NAMED, 5715 NUM_OF_FIELDS(rawip_named_kstat_t), 5716 0, stackid); 5717 if (ksp == NULL || ksp->ks_data == NULL) 5718 return (NULL); 5719 5720 bcopy(&template, ksp->ks_data, sizeof (template)); 5721 ksp->ks_update = rawip_kstat_update; 5722 ksp->ks_private = (void *)(uintptr_t)stackid; 5723 5724 kstat_install(ksp); 5725 return (ksp); 5726 } 5727 5728 static void 5729 rawip_kstat_fini(netstackid_t stackid, kstat_t *ksp) 5730 { 5731 if (ksp != NULL) { 5732 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 5733 kstat_delete_netstack(ksp, stackid); 5734 } 5735 } 5736 5737 static int 5738 rawip_kstat_update(kstat_t *ksp, int rw) 5739 { 5740 rawip_named_kstat_t *rawipkp; 5741 netstackid_t stackid = (netstackid_t)(uintptr_t)ksp->ks_private; 5742 netstack_t *ns; 5743 icmp_stack_t *is; 5744 5745 if ((ksp == NULL) || (ksp->ks_data == NULL)) 5746 return (EIO); 5747 5748 if (rw == KSTAT_WRITE) 5749 return (EACCES); 5750 5751 rawipkp = (rawip_named_kstat_t *)ksp->ks_data; 5752 5753 ns = netstack_find_by_stackid(stackid); 5754 if (ns == NULL) 5755 return (-1); 5756 is = ns->netstack_icmp; 5757 if (is == NULL) { 5758 netstack_rele(ns); 5759 return (-1); 5760 } 5761 rawipkp->inDatagrams.value.ui32 = is->is_rawip_mib.rawipInDatagrams; 5762 rawipkp->inCksumErrs.value.ui32 = is->is_rawip_mib.rawipInCksumErrs; 5763 rawipkp->inErrors.value.ui32 = is->is_rawip_mib.rawipInErrors; 5764 rawipkp->outDatagrams.value.ui32 = is->is_rawip_mib.rawipOutDatagrams; 5765 rawipkp->outErrors.value.ui32 = is->is_rawip_mib.rawipOutErrors; 5766 netstack_rele(ns); 5767 return (0); 5768 } 5769 5770 /* ARGSUSED */ 5771 int 5772 rawip_accept(sock_lower_handle_t lproto_handle, 5773 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 5774 cred_t *cr) 5775 { 5776 return (EOPNOTSUPP); 5777 } 5778 5779 /* ARGSUSED */ 5780 int 5781 rawip_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5782 socklen_t len, cred_t *cr) 5783 { 5784 conn_t *connp = (conn_t *)proto_handle; 5785 int error; 5786 5787 /* All Solaris components should pass a cred for this operation. */ 5788 ASSERT(cr != NULL); 5789 5790 /* Binding to a NULL address really means unbind */ 5791 if (sa == NULL) 5792 error = rawip_do_unbind(connp); 5793 else 5794 error = rawip_do_bind(connp, sa, len); 5795 5796 if (error < 0) { 5797 if (error == -TOUTSTATE) 5798 error = EINVAL; 5799 else 5800 error = proto_tlitosyserr(-error); 5801 } 5802 return (error); 5803 } 5804 5805 static int 5806 rawip_implicit_bind(conn_t *connp) 5807 { 5808 sin6_t sin6addr; 5809 sin_t *sin; 5810 sin6_t *sin6; 5811 socklen_t len; 5812 int error; 5813 5814 if (connp->conn_icmp->icmp_family == AF_INET) { 5815 len = sizeof (struct sockaddr_in); 5816 sin = (sin_t *)&sin6addr; 5817 *sin = sin_null; 5818 sin->sin_family = AF_INET; 5819 sin->sin_addr.s_addr = INADDR_ANY; 5820 } else { 5821 ASSERT(connp->conn_icmp->icmp_family == AF_INET6); 5822 len = sizeof (sin6_t); 5823 sin6 = (sin6_t *)&sin6addr; 5824 *sin6 = sin6_null; 5825 sin6->sin6_family = AF_INET6; 5826 V6_SET_ZERO(sin6->sin6_addr); 5827 } 5828 5829 error = rawip_do_bind(connp, (struct sockaddr *)&sin6addr, len); 5830 5831 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5832 } 5833 5834 static int 5835 rawip_unbind(conn_t *connp) 5836 { 5837 int error; 5838 5839 error = rawip_do_unbind(connp); 5840 if (error < 0) { 5841 error = proto_tlitosyserr(-error); 5842 } 5843 return (error); 5844 } 5845 5846 /* ARGSUSED */ 5847 int 5848 rawip_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 5849 { 5850 return (EOPNOTSUPP); 5851 } 5852 5853 /* ARGSUSED */ 5854 int 5855 rawip_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5856 socklen_t len, sock_connid_t *id, cred_t *cr) 5857 { 5858 conn_t *connp = (conn_t *)proto_handle; 5859 icmp_t *icmp = connp->conn_icmp; 5860 int error; 5861 boolean_t did_bind = B_FALSE; 5862 5863 /* All Solaris components should pass a cred for this operation. */ 5864 ASSERT(cr != NULL); 5865 5866 if (sa == NULL) { 5867 /* 5868 * Disconnect 5869 * Make sure we are connected 5870 */ 5871 if (icmp->icmp_state != TS_DATA_XFER) 5872 return (EINVAL); 5873 5874 error = icmp_disconnect(connp); 5875 return (error); 5876 } 5877 5878 error = proto_verify_ip_addr(icmp->icmp_family, sa, len); 5879 if (error != 0) 5880 return (error); 5881 5882 /* do an implicit bind if necessary */ 5883 if (icmp->icmp_state == TS_UNBND) { 5884 error = rawip_implicit_bind(connp); 5885 /* 5886 * We could be racing with an actual bind, in which case 5887 * we would see EPROTO. We cross our fingers and try 5888 * to connect. 5889 */ 5890 if (!(error == 0 || error == EPROTO)) 5891 return (error); 5892 did_bind = B_TRUE; 5893 } 5894 5895 /* 5896 * set SO_DGRAM_ERRIND 5897 */ 5898 icmp->icmp_dgram_errind = B_TRUE; 5899 5900 error = rawip_do_connect(connp, sa, len, cr); 5901 5902 if (error != 0 && did_bind) { 5903 int unbind_err; 5904 5905 unbind_err = rawip_unbind(connp); 5906 ASSERT(unbind_err == 0); 5907 } 5908 5909 if (error == 0) { 5910 *id = 0; 5911 (*connp->conn_upcalls->su_connected) 5912 (connp->conn_upper_handle, 0, NULL, -1); 5913 } else if (error < 0) { 5914 error = proto_tlitosyserr(-error); 5915 } 5916 return (error); 5917 } 5918 5919 /* ARGSUSED */ 5920 int 5921 rawip_fallback(sock_lower_handle_t proto_handle, queue_t *q, 5922 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 5923 { 5924 conn_t *connp = (conn_t *)proto_handle; 5925 icmp_t *icmp; 5926 struct T_capability_ack tca; 5927 struct sockaddr_in6 laddr, faddr; 5928 socklen_t laddrlen, faddrlen; 5929 short opts; 5930 struct stroptions *stropt; 5931 mblk_t *stropt_mp; 5932 int error; 5933 5934 icmp = connp->conn_icmp; 5935 5936 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 5937 5938 /* 5939 * setup the fallback stream that was allocated 5940 */ 5941 connp->conn_dev = (dev_t)RD(q)->q_ptr; 5942 connp->conn_minor_arena = WR(q)->q_ptr; 5943 5944 RD(q)->q_ptr = WR(q)->q_ptr = connp; 5945 5946 WR(q)->q_qinfo = &icmpwinit; 5947 5948 connp->conn_rq = RD(q); 5949 connp->conn_wq = WR(q); 5950 5951 /* Notify stream head about options before sending up data */ 5952 stropt_mp->b_datap->db_type = M_SETOPTS; 5953 stropt_mp->b_wptr += sizeof (*stropt); 5954 stropt = (struct stroptions *)stropt_mp->b_rptr; 5955 stropt->so_flags = SO_WROFF | SO_HIWAT; 5956 stropt->so_wroff = 5957 (ushort_t)(icmp->icmp_max_hdr_len + icmp->icmp_is->is_wroff_extra); 5958 stropt->so_hiwat = icmp->icmp_recv_hiwat; 5959 putnext(RD(q), stropt_mp); 5960 5961 /* 5962 * free helper stream 5963 */ 5964 ip_free_helper_stream(connp); 5965 5966 /* 5967 * Collect the information needed to sync with the sonode 5968 */ 5969 icmp_do_capability_ack(icmp, &tca, TC1_INFO); 5970 5971 laddrlen = faddrlen = sizeof (sin6_t); 5972 (void) rawip_getsockname((sock_lower_handle_t)connp, 5973 (struct sockaddr *)&laddr, &laddrlen, CRED()); 5974 error = rawip_getpeername((sock_lower_handle_t)connp, 5975 (struct sockaddr *)&faddr, &faddrlen, CRED()); 5976 if (error != 0) 5977 faddrlen = 0; 5978 opts = 0; 5979 if (icmp->icmp_dgram_errind) 5980 opts |= SO_DGRAM_ERRIND; 5981 if (icmp->icmp_dontroute) 5982 opts |= SO_DONTROUTE; 5983 5984 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 5985 (struct sockaddr *)&laddr, laddrlen, 5986 (struct sockaddr *)&faddr, faddrlen, opts); 5987 5988 /* 5989 * Attempts to send data up during fallback will result in it being 5990 * queued in udp_t. Now we push up any queued packets. 5991 */ 5992 mutex_enter(&icmp->icmp_recv_lock); 5993 while (icmp->icmp_fallback_queue_head != NULL) { 5994 mblk_t *mp; 5995 5996 mp = icmp->icmp_fallback_queue_head; 5997 icmp->icmp_fallback_queue_head = mp->b_next; 5998 mp->b_next = NULL; 5999 mutex_exit(&icmp->icmp_recv_lock); 6000 putnext(RD(q), mp); 6001 mutex_enter(&icmp->icmp_recv_lock); 6002 } 6003 icmp->icmp_fallback_queue_tail = icmp->icmp_fallback_queue_head; 6004 6005 /* 6006 * No longer a streams less socket 6007 */ 6008 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6009 connp->conn_flags &= ~IPCL_NONSTR; 6010 rw_exit(&icmp->icmp_rwlock); 6011 6012 mutex_exit(&icmp->icmp_recv_lock); 6013 6014 ASSERT(icmp->icmp_fallback_queue_head == NULL && 6015 icmp->icmp_fallback_queue_tail == NULL); 6016 6017 ASSERT(connp->conn_ref >= 1); 6018 6019 return (0); 6020 } 6021 6022 /* ARGSUSED */ 6023 sock_lower_handle_t 6024 rawip_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 6025 uint_t *smodep, int *errorp, int flags, cred_t *credp) 6026 { 6027 conn_t *connp; 6028 6029 if (type != SOCK_RAW || (family != AF_INET && family != AF_INET6)) { 6030 *errorp = EPROTONOSUPPORT; 6031 return (NULL); 6032 } 6033 6034 connp = icmp_open(family, credp, errorp, flags); 6035 if (connp != NULL) { 6036 icmp_stack_t *is; 6037 6038 is = connp->conn_icmp->icmp_is; 6039 connp->conn_flags |= IPCL_NONSTR; 6040 6041 if (connp->conn_icmp->icmp_family == AF_INET6) { 6042 /* Build initial header template for transmit */ 6043 rw_enter(&connp->conn_icmp->icmp_rwlock, RW_WRITER); 6044 if ((*errorp = 6045 icmp_build_hdrs(connp->conn_icmp)) != 0) { 6046 rw_exit(&connp->conn_icmp->icmp_rwlock); 6047 ipcl_conn_destroy(connp); 6048 return (NULL); 6049 } 6050 rw_exit(&connp->conn_icmp->icmp_rwlock); 6051 } 6052 6053 connp->conn_icmp->icmp_recv_hiwat = is->is_recv_hiwat; 6054 connp->conn_icmp->icmp_xmit_hiwat = is->is_xmit_hiwat; 6055 6056 if ((*errorp = ip_create_helper_stream(connp, 6057 is->is_ldi_ident)) != 0) { 6058 cmn_err(CE_CONT, "create of IP helper stream failed\n"); 6059 (void) rawip_do_close(connp); 6060 return (NULL); 6061 } 6062 6063 mutex_enter(&connp->conn_lock); 6064 connp->conn_state_flags &= ~CONN_INCIPIENT; 6065 mutex_exit(&connp->conn_lock); 6066 *sock_downcalls = &sock_rawip_downcalls; 6067 *smodep = SM_ATOMIC; 6068 } else { 6069 ASSERT(*errorp != 0); 6070 } 6071 6072 return ((sock_lower_handle_t)connp); 6073 } 6074 6075 /* ARGSUSED */ 6076 void 6077 rawip_activate(sock_lower_handle_t proto_handle, 6078 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, int flags, 6079 cred_t *cr) 6080 { 6081 conn_t *connp = (conn_t *)proto_handle; 6082 icmp_stack_t *is = connp->conn_icmp->icmp_is; 6083 struct sock_proto_props sopp; 6084 6085 /* All Solaris components should pass a cred for this operation. */ 6086 ASSERT(cr != NULL); 6087 6088 connp->conn_upcalls = sock_upcalls; 6089 connp->conn_upper_handle = sock_handle; 6090 6091 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 6092 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 6093 sopp.sopp_wroff = connp->conn_icmp->icmp_max_hdr_len + 6094 is->is_wroff_extra; 6095 sopp.sopp_rxhiwat = is->is_recv_hiwat; 6096 sopp.sopp_rxlowat = icmp_mod_info.mi_lowat; 6097 sopp.sopp_maxblk = INFPSZ; 6098 sopp.sopp_maxpsz = IP_MAXPACKET; 6099 sopp.sopp_minpsz = (icmp_mod_info.mi_minpsz == 1) ? 0 : 6100 icmp_mod_info.mi_minpsz; 6101 6102 (*connp->conn_upcalls->su_set_proto_props) 6103 (connp->conn_upper_handle, &sopp); 6104 } 6105 6106 static int 6107 rawip_do_getsockname(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6108 { 6109 sin_t *sin = (sin_t *)sa; 6110 sin6_t *sin6 = (sin6_t *)sa; 6111 6112 ASSERT(icmp != NULL); 6113 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6114 6115 switch (icmp->icmp_family) { 6116 case AF_INET: 6117 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6118 if (*salenp < sizeof (sin_t)) 6119 return (EINVAL); 6120 6121 *salenp = sizeof (sin_t); 6122 *sin = sin_null; 6123 sin->sin_family = AF_INET; 6124 if (icmp->icmp_state == TS_UNBND) { 6125 break; 6126 } 6127 6128 if (!IN6_IS_ADDR_V4MAPPED_ANY(&icmp->icmp_v6src) && 6129 !IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6130 sin->sin_addr.s_addr = V4_PART_OF_V6(icmp->icmp_v6src); 6131 } else { 6132 /* 6133 * INADDR_ANY 6134 * icmp_v6src is not set, we might be bound to 6135 * broadcast/multicast. Use icmp_bound_v6src as 6136 * local address instead (that could 6137 * also still be INADDR_ANY) 6138 */ 6139 sin->sin_addr.s_addr = 6140 V4_PART_OF_V6(icmp->icmp_bound_v6src); 6141 } 6142 break; 6143 case AF_INET6: 6144 6145 if (*salenp < sizeof (sin6_t)) 6146 return (EINVAL); 6147 6148 *salenp = sizeof (sin6_t); 6149 *sin6 = sin6_null; 6150 sin6->sin6_family = AF_INET6; 6151 if (icmp->icmp_state == TS_UNBND) { 6152 break; 6153 } 6154 if (!IN6_IS_ADDR_UNSPECIFIED(&icmp->icmp_v6src)) { 6155 sin6->sin6_addr = icmp->icmp_v6src; 6156 } else { 6157 /* 6158 * UNSPECIFIED 6159 * icmp_v6src is not set, we might be bound to 6160 * broadcast/multicast. Use icmp_bound_v6src as 6161 * local address instead (that could 6162 * also still be UNSPECIFIED) 6163 */ 6164 6165 sin6->sin6_addr = icmp->icmp_bound_v6src; 6166 } 6167 break; 6168 } 6169 return (0); 6170 } 6171 6172 static int 6173 rawip_do_getpeername(icmp_t *icmp, struct sockaddr *sa, uint_t *salenp) 6174 { 6175 sin_t *sin = (sin_t *)sa; 6176 sin6_t *sin6 = (sin6_t *)sa; 6177 6178 ASSERT(icmp != NULL); 6179 ASSERT(RW_LOCK_HELD(&icmp->icmp_rwlock)); 6180 6181 if (icmp->icmp_state != TS_DATA_XFER) 6182 return (ENOTCONN); 6183 6184 sa->sa_family = icmp->icmp_family; 6185 switch (icmp->icmp_family) { 6186 case AF_INET: 6187 ASSERT(icmp->icmp_ipversion == IPV4_VERSION); 6188 6189 if (*salenp < sizeof (sin_t)) 6190 return (EINVAL); 6191 6192 *salenp = sizeof (sin_t); 6193 *sin = sin_null; 6194 sin->sin_family = AF_INET; 6195 sin->sin_addr.s_addr = 6196 V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6197 break; 6198 case AF_INET6: 6199 if (*salenp < sizeof (sin6_t)) 6200 return (EINVAL); 6201 6202 *salenp = sizeof (sin6_t); 6203 *sin6 = sin6_null; 6204 *sin6 = icmp->icmp_v6dst; 6205 break; 6206 } 6207 return (0); 6208 } 6209 6210 /* ARGSUSED */ 6211 int 6212 rawip_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6213 socklen_t *salenp, cred_t *cr) 6214 { 6215 conn_t *connp = (conn_t *)proto_handle; 6216 icmp_t *icmp = connp->conn_icmp; 6217 int error; 6218 6219 /* All Solaris components should pass a cred for this operation. */ 6220 ASSERT(cr != NULL); 6221 6222 ASSERT(icmp != NULL); 6223 6224 rw_enter(&icmp->icmp_rwlock, RW_READER); 6225 6226 error = rawip_do_getpeername(icmp, sa, salenp); 6227 6228 rw_exit(&icmp->icmp_rwlock); 6229 6230 return (error); 6231 } 6232 6233 /* ARGSUSED */ 6234 int 6235 rawip_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6236 socklen_t *salenp, cred_t *cr) 6237 { 6238 conn_t *connp = (conn_t *)proto_handle; 6239 icmp_t *icmp = connp->conn_icmp; 6240 int error; 6241 6242 /* All Solaris components should pass a cred for this operation. */ 6243 ASSERT(cr != NULL); 6244 6245 ASSERT(icmp != NULL); 6246 rw_enter(&icmp->icmp_rwlock, RW_READER); 6247 6248 error = rawip_do_getsockname(icmp, sa, salenp); 6249 6250 rw_exit(&icmp->icmp_rwlock); 6251 6252 return (error); 6253 } 6254 6255 int 6256 rawip_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6257 const void *optvalp, socklen_t optlen, cred_t *cr) 6258 { 6259 conn_t *connp = (conn_t *)proto_handle; 6260 icmp_t *icmp = connp->conn_icmp; 6261 int error; 6262 6263 /* All Solaris components should pass a cred for this operation. */ 6264 ASSERT(cr != NULL); 6265 6266 error = proto_opt_check(level, option_name, optlen, NULL, 6267 icmp_opt_obj.odb_opt_des_arr, 6268 icmp_opt_obj.odb_opt_arr_cnt, 6269 icmp_opt_obj.odb_topmost_tpiprovider, 6270 B_TRUE, B_FALSE, cr); 6271 6272 if (error != 0) { 6273 /* 6274 * option not recognized 6275 */ 6276 if (error < 0) { 6277 error = proto_tlitosyserr(-error); 6278 } 6279 return (error); 6280 } 6281 6282 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6283 error = icmp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, 6284 option_name, optlen, (uchar_t *)optvalp, (uint_t *)&optlen, 6285 (uchar_t *)optvalp, NULL, cr); 6286 rw_exit(&icmp->icmp_rwlock); 6287 6288 if (error < 0) { 6289 /* 6290 * Pass on to ip 6291 */ 6292 error = ip_set_options(connp, level, option_name, optvalp, 6293 optlen, cr); 6294 } 6295 6296 ASSERT(error >= 0); 6297 6298 return (error); 6299 } 6300 6301 int 6302 rawip_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6303 void *optvalp, socklen_t *optlen, cred_t *cr) 6304 { 6305 int error; 6306 conn_t *connp = (conn_t *)proto_handle; 6307 icmp_t *icmp = connp->conn_icmp; 6308 t_uscalar_t max_optbuf_len; 6309 void *optvalp_buf; 6310 int len; 6311 6312 /* All Solaris components should pass a cred for this operation. */ 6313 ASSERT(cr != NULL); 6314 6315 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6316 icmp_opt_obj.odb_opt_des_arr, 6317 icmp_opt_obj.odb_opt_arr_cnt, 6318 icmp_opt_obj.odb_topmost_tpiprovider, 6319 B_FALSE, B_TRUE, cr); 6320 6321 if (error != 0) { 6322 if (error < 0) { 6323 error = proto_tlitosyserr(-error); 6324 } 6325 return (error); 6326 } 6327 6328 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6329 rw_enter(&icmp->icmp_rwlock, RW_READER); 6330 len = icmp_opt_get(connp, level, option_name, optvalp_buf); 6331 rw_exit(&icmp->icmp_rwlock); 6332 6333 if (len < 0) { 6334 /* 6335 * Pass on to IP 6336 */ 6337 kmem_free(optvalp_buf, max_optbuf_len); 6338 return (ip_get_options(connp, level, option_name, optvalp, 6339 optlen, cr)); 6340 } else { 6341 /* 6342 * update optlen and copy option value 6343 */ 6344 t_uscalar_t size = MIN(len, *optlen); 6345 bcopy(optvalp_buf, optvalp, size); 6346 bcopy(&size, optlen, sizeof (size)); 6347 6348 kmem_free(optvalp_buf, max_optbuf_len); 6349 return (0); 6350 } 6351 } 6352 6353 /* ARGSUSED */ 6354 int 6355 rawip_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 6356 { 6357 conn_t *connp = (conn_t *)proto_handle; 6358 6359 /* All Solaris components should pass a cred for this operation. */ 6360 ASSERT(cr != NULL); 6361 6362 (void) rawip_do_close(connp); 6363 return (0); 6364 } 6365 6366 /* ARGSUSED */ 6367 int 6368 rawip_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6369 { 6370 conn_t *connp = (conn_t *)proto_handle; 6371 6372 /* All Solaris components should pass a cred for this operation. */ 6373 ASSERT(cr != NULL); 6374 6375 /* shut down the send side */ 6376 if (how != SHUT_RD) 6377 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6378 SOCK_OPCTL_SHUT_SEND, 0); 6379 /* shut down the recv side */ 6380 if (how != SHUT_WR) 6381 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6382 SOCK_OPCTL_SHUT_RECV, 0); 6383 return (0); 6384 } 6385 6386 void 6387 rawip_clr_flowctrl(sock_lower_handle_t proto_handle) 6388 { 6389 conn_t *connp = (conn_t *)proto_handle; 6390 icmp_t *icmp = connp->conn_icmp; 6391 6392 mutex_enter(&icmp->icmp_recv_lock); 6393 connp->conn_flow_cntrld = B_FALSE; 6394 mutex_exit(&icmp->icmp_recv_lock); 6395 } 6396 6397 int 6398 rawip_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6399 int mode, int32_t *rvalp, cred_t *cr) 6400 { 6401 conn_t *connp = (conn_t *)proto_handle; 6402 int error; 6403 6404 /* All Solaris components should pass a cred for this operation. */ 6405 ASSERT(cr != NULL); 6406 6407 switch (cmd) { 6408 case ND_SET: 6409 case ND_GET: 6410 case _SIOCSOCKFALLBACK: 6411 case TI_GETPEERNAME: 6412 case TI_GETMYNAME: 6413 #ifdef DEBUG 6414 cmn_err(CE_CONT, "icmp_ioctl cmd 0x%x on non streams" 6415 " socket", cmd); 6416 #endif 6417 error = EINVAL; 6418 break; 6419 default: 6420 /* 6421 * Pass on to IP using helper stream 6422 */ 6423 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6424 cmd, arg, mode, cr, rvalp); 6425 break; 6426 } 6427 return (error); 6428 } 6429 6430 /* ARGSUSED */ 6431 int 6432 rawip_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6433 cred_t *cr) 6434 { 6435 conn_t *connp = (conn_t *)proto_handle; 6436 icmp_t *icmp = connp->conn_icmp; 6437 icmp_stack_t *is = icmp->icmp_is; 6438 int error = 0; 6439 boolean_t bypass_dgram_errind = B_FALSE; 6440 6441 ASSERT(DB_TYPE(mp) == M_DATA); 6442 6443 /* All Solaris components should pass a cred for this operation. */ 6444 ASSERT(cr != NULL); 6445 6446 /* If labeled then sockfs should have already set db_credp */ 6447 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6448 6449 /* do an implicit bind if necessary */ 6450 if (icmp->icmp_state == TS_UNBND) { 6451 error = rawip_implicit_bind(connp); 6452 /* 6453 * We could be racing with an actual bind, in which case 6454 * we would see EPROTO. We cross our fingers and try 6455 * to connect. 6456 */ 6457 if (!(error == 0 || error == EPROTO)) { 6458 freemsg(mp); 6459 return (error); 6460 } 6461 } 6462 6463 rw_enter(&icmp->icmp_rwlock, RW_WRITER); 6464 6465 if (msg->msg_name != NULL && icmp->icmp_state == TS_DATA_XFER) { 6466 error = EISCONN; 6467 goto done_lock; 6468 } 6469 6470 switch (icmp->icmp_family) { 6471 case AF_INET6: { 6472 sin6_t *sin6; 6473 ip6_pkt_t ipp_s; /* For ancillary data options */ 6474 ip6_pkt_t *ipp = &ipp_s; 6475 6476 sin6 = (sin6_t *)msg->msg_name; 6477 if (sin6 != NULL) { 6478 error = proto_verify_ip_addr(icmp->icmp_family, 6479 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6480 if (error != 0) { 6481 bypass_dgram_errind = B_TRUE; 6482 goto done_lock; 6483 } 6484 if (icmp->icmp_delayed_error != 0) { 6485 sin6_t *sin1 = (sin6_t *)msg->msg_name; 6486 sin6_t *sin2 = (sin6_t *) 6487 &icmp->icmp_delayed_addr; 6488 6489 error = icmp->icmp_delayed_error; 6490 icmp->icmp_delayed_error = 0; 6491 6492 /* Compare IP address and port */ 6493 6494 if (sin1->sin6_port == sin2->sin6_port && 6495 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 6496 &sin2->sin6_addr)) { 6497 goto done_lock; 6498 } 6499 } 6500 } else { 6501 /* 6502 * Use connected address 6503 */ 6504 if (icmp->icmp_state != TS_DATA_XFER) { 6505 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6506 error = EDESTADDRREQ; 6507 bypass_dgram_errind = B_TRUE; 6508 goto done_lock; 6509 } 6510 sin6 = &icmp->icmp_v6dst; 6511 } 6512 6513 /* No support for mapped addresses on raw sockets */ 6514 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6515 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6516 error = EADDRNOTAVAIL; 6517 goto done_lock; 6518 } 6519 6520 ipp->ipp_fields = 0; 6521 ipp->ipp_sticky_ignored = 0; 6522 6523 /* 6524 * If options passed in, feed it for verification and handling 6525 */ 6526 if (msg->msg_controllen != 0) { 6527 error = process_auxiliary_options(connp, 6528 msg->msg_control, msg->msg_controllen, 6529 ipp, &icmp_opt_obj, icmp_opt_set, cr); 6530 if (error != 0) { 6531 goto done_lock; 6532 } 6533 } 6534 6535 rw_exit(&icmp->icmp_rwlock); 6536 6537 /* 6538 * Destination is a native IPv6 address. 6539 * Send out an IPv6 format packet. 6540 */ 6541 6542 error = raw_ip_send_data_v6(connp->conn_wq, connp, mp, sin6, 6543 ipp); 6544 } 6545 break; 6546 case AF_INET: { 6547 sin_t *sin; 6548 ip4_pkt_t pktinfo; 6549 ip4_pkt_t *pktinfop = &pktinfo; 6550 ipaddr_t v4dst; 6551 6552 sin = (sin_t *)msg->msg_name; 6553 if (sin != NULL) { 6554 error = proto_verify_ip_addr(icmp->icmp_family, 6555 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6556 if (error != 0) { 6557 bypass_dgram_errind = B_TRUE; 6558 goto done_lock; 6559 } 6560 v4dst = sin->sin_addr.s_addr; 6561 if (icmp->icmp_delayed_error != 0) { 6562 sin_t *sin1 = (sin_t *)msg->msg_name; 6563 sin_t *sin2 = (sin_t *)&icmp->icmp_delayed_addr; 6564 6565 error = icmp->icmp_delayed_error; 6566 icmp->icmp_delayed_error = 0; 6567 6568 /* Compare IP address and port */ 6569 if (sin1->sin_port == sin2->sin_port && 6570 sin1->sin_addr.s_addr == 6571 sin2->sin_addr.s_addr) { 6572 goto done_lock; 6573 } 6574 6575 } 6576 } else { 6577 /* 6578 * Use connected address 6579 */ 6580 if (icmp->icmp_state != TS_DATA_XFER) { 6581 BUMP_MIB(&is->is_rawip_mib, rawipOutErrors); 6582 error = EDESTADDRREQ; 6583 bypass_dgram_errind = B_TRUE; 6584 goto done_lock; 6585 } 6586 v4dst = V4_PART_OF_V6(icmp->icmp_v6dst.sin6_addr); 6587 } 6588 6589 6590 pktinfop->ip4_ill_index = 0; 6591 pktinfop->ip4_addr = INADDR_ANY; 6592 6593 /* 6594 * If options passed in, feed it for verification and handling 6595 */ 6596 if (msg->msg_controllen != 0) { 6597 error = process_auxiliary_options(connp, 6598 msg->msg_control, msg->msg_controllen, 6599 pktinfop, &icmp_opt_obj, icmp_opt_set, cr); 6600 if (error != 0) { 6601 goto done_lock; 6602 } 6603 } 6604 rw_exit(&icmp->icmp_rwlock); 6605 6606 error = raw_ip_send_data_v4(connp->conn_wq, connp, mp, 6607 v4dst, pktinfop); 6608 break; 6609 } 6610 6611 default: 6612 ASSERT(0); 6613 } 6614 6615 goto done; 6616 6617 done_lock: 6618 rw_exit(&icmp->icmp_rwlock); 6619 if (error != 0) { 6620 ASSERT(mp != NULL); 6621 freemsg(mp); 6622 } 6623 done: 6624 if (bypass_dgram_errind) 6625 return (error); 6626 return (icmp->icmp_dgram_errind ? error : 0); 6627 } 6628 6629 sock_downcalls_t sock_rawip_downcalls = { 6630 rawip_activate, 6631 rawip_accept, 6632 rawip_bind, 6633 rawip_listen, 6634 rawip_connect, 6635 rawip_getpeername, 6636 rawip_getsockname, 6637 rawip_getsockopt, 6638 rawip_setsockopt, 6639 rawip_send, 6640 NULL, 6641 NULL, 6642 NULL, 6643 rawip_shutdown, 6644 rawip_clr_flowctrl, 6645 rawip_ioctl, 6646 rawip_close 6647 }; 6648