1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 #define NDD_TOO_QUICK_MSG \ 137 "ndd get info rate too high for non-privileged users, try again " \ 138 "later.\n" 139 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 140 141 /* Option processing attrs */ 142 typedef struct udpattrs_s { 143 union { 144 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 145 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 146 } udpattr_ippu; 147 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 148 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 149 mblk_t *udpattr_mb; 150 boolean_t udpattr_credset; 151 } udpattrs_t; 152 153 static void udp_addr_req(queue_t *q, mblk_t *mp); 154 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 155 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 156 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 157 static int udp_build_hdrs(udp_t *udp); 158 static void udp_capability_req(queue_t *q, mblk_t *mp); 159 static int udp_tpi_close(queue_t *q, int flags); 160 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 161 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 162 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 165 t_scalar_t tlierr, int unixerr); 166 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 167 cred_t *cr); 168 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 169 char *value, caddr_t cp, cred_t *cr); 170 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static void udp_icmp_error(conn_t *, mblk_t *); 173 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 174 static void udp_info_req(queue_t *q, mblk_t *mp); 175 static void udp_input(void *, mblk_t *, void *); 176 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 177 t_scalar_t addr_length); 178 static void udp_lrput(queue_t *, mblk_t *); 179 static void udp_lwput(queue_t *, mblk_t *); 180 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 181 cred_t *credp, boolean_t isv6); 182 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp); 184 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 187 int *errorp, udpattrs_t *udpattrs); 188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 189 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 191 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 192 cred_t *cr); 193 static void udp_report_item(mblk_t *mp, udp_t *udp); 194 static int udp_rinfop(queue_t *q, infod_t *dp); 195 static int udp_rrw(queue_t *q, struiod_t *dp); 196 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 197 cred_t *cr); 198 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 199 ipha_t *ipha); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 229 cred_t *, pid_t); 230 231 /* Common routine for TPI and socket module */ 232 static conn_t *udp_do_open(cred_t *, boolean_t, int); 233 static void udp_do_close(conn_t *); 234 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 235 boolean_t); 236 static int udp_do_unbind(conn_t *); 237 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 238 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 239 240 int udp_getsockname(sock_lower_handle_t, 241 struct sockaddr *, socklen_t *, cred_t *); 242 int udp_getpeername(sock_lower_handle_t, 243 struct sockaddr *, socklen_t *, cred_t *); 244 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t); 245 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 246 247 #define UDP_RECV_HIWATER (56 * 1024) 248 #define UDP_RECV_LOWATER 128 249 #define UDP_XMIT_HIWATER (56 * 1024) 250 #define UDP_XMIT_LOWATER 1024 251 252 /* 253 * The following is defined in tcp.c 254 */ 255 extern int (*cl_inet_connect2)(netstackid_t stack_id, 256 uint8_t protocol, boolean_t is_outgoing, 257 sa_family_t addr_family, 258 uint8_t *laddrp, in_port_t lport, 259 uint8_t *faddrp, in_port_t fport, void *args); 260 261 /* 262 * Checks if the given destination addr/port is allowed out. 263 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 264 * Called for each connect() and for sendto()/sendmsg() to a different 265 * destination. 266 * For connect(), called in udp_connect(). 267 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 268 * 269 * This macro assumes that the cl_inet_connect2 hook is not NULL. 270 * Please check this before calling this macro. 271 * 272 * void 273 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 274 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 275 */ 276 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 277 (err) = 0; \ 278 /* \ 279 * Running in cluster mode - check and register active \ 280 * "connection" information \ 281 */ \ 282 if ((udp)->udp_ipversion == IPV4_VERSION) \ 283 (err) = (*cl_inet_connect2)( \ 284 (cp)->conn_netstack->netstack_stackid, \ 285 IPPROTO_UDP, is_outgoing, AF_INET, \ 286 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 287 (udp)->udp_port, \ 288 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 289 (in_port_t)(fport), NULL); \ 290 else \ 291 (err) = (*cl_inet_connect2)( \ 292 (cp)->conn_netstack->netstack_stackid, \ 293 IPPROTO_UDP, is_outgoing, AF_INET6, \ 294 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 295 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 296 } 297 298 static struct module_info udp_mod_info = { 299 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 300 }; 301 302 /* 303 * Entry points for UDP as a device. 304 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 305 */ 306 static struct qinit udp_rinitv4 = { 307 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 308 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 309 }; 310 311 static struct qinit udp_rinitv6 = { 312 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 313 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 314 }; 315 316 static struct qinit udp_winit = { 317 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 318 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 319 }; 320 321 /* UDP entry point during fallback */ 322 struct qinit udp_fallback_sock_winit = { 323 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 324 }; 325 326 /* 327 * UDP needs to handle I_LINK and I_PLINK since ifconfig 328 * likes to use it as a place to hang the various streams. 329 */ 330 static struct qinit udp_lrinit = { 331 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 332 &udp_mod_info 333 }; 334 335 static struct qinit udp_lwinit = { 336 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 337 &udp_mod_info 338 }; 339 340 /* For AF_INET aka /dev/udp */ 341 struct streamtab udpinfov4 = { 342 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 343 }; 344 345 /* For AF_INET6 aka /dev/udp6 */ 346 struct streamtab udpinfov6 = { 347 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 348 }; 349 350 static sin_t sin_null; /* Zero address for quick clears */ 351 static sin6_t sin6_null; /* Zero address for quick clears */ 352 353 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 354 355 /* Default structure copied into T_INFO_ACK messages */ 356 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 357 T_INFO_ACK, 358 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 359 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 360 T_INVALID, /* CDATA_size. udp does not support connect data. */ 361 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 362 sizeof (sin_t), /* ADDR_size. */ 363 0, /* OPT_size - not initialized here */ 364 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 365 T_CLTS, /* SERV_type. udp supports connection-less. */ 366 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 367 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 368 }; 369 370 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 371 372 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 373 T_INFO_ACK, 374 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 375 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 376 T_INVALID, /* CDATA_size. udp does not support connect data. */ 377 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 378 sizeof (sin6_t), /* ADDR_size. */ 379 0, /* OPT_size - not initialized here */ 380 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 381 T_CLTS, /* SERV_type. udp supports connection-less. */ 382 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 383 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 384 }; 385 386 /* largest UDP port number */ 387 #define UDP_MAX_PORT 65535 388 389 /* 390 * Table of ND variables supported by udp. These are loaded into us_nd 391 * in udp_open. 392 * All of these are alterable, within the min/max values given, at run time. 393 */ 394 /* BEGIN CSTYLED */ 395 udpparam_t udp_param_arr[] = { 396 /*min max value name */ 397 { 0L, 256, 32, "udp_wroff_extra" }, 398 { 1L, 255, 255, "udp_ipv4_ttl" }, 399 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 400 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 401 { 0, 1, 1, "udp_do_checksum" }, 402 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 403 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 404 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 405 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 406 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 407 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 408 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 409 }; 410 /* END CSTYLED */ 411 412 /* Setable in /etc/system */ 413 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 414 uint32_t udp_random_anon_port = 1; 415 416 /* 417 * Hook functions to enable cluster networking. 418 * On non-clustered systems these vectors must always be NULL 419 */ 420 421 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 422 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 423 void *args) = NULL; 424 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 425 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 426 void *args) = NULL; 427 428 typedef union T_primitives *t_primp_t; 429 430 /* 431 * Return the next anonymous port in the privileged port range for 432 * bind checking. 433 * 434 * Trusted Extension (TX) notes: TX allows administrator to mark or 435 * reserve ports as Multilevel ports (MLP). MLP has special function 436 * on TX systems. Once a port is made MLP, it's not available as 437 * ordinary port. This creates "holes" in the port name space. It 438 * may be necessary to skip the "holes" find a suitable anon port. 439 */ 440 static in_port_t 441 udp_get_next_priv_port(udp_t *udp) 442 { 443 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 444 in_port_t nextport; 445 boolean_t restart = B_FALSE; 446 udp_stack_t *us = udp->udp_us; 447 448 retry: 449 if (next_priv_port < us->us_min_anonpriv_port || 450 next_priv_port >= IPPORT_RESERVED) { 451 next_priv_port = IPPORT_RESERVED - 1; 452 if (restart) 453 return (0); 454 restart = B_TRUE; 455 } 456 457 if (is_system_labeled() && 458 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 459 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 460 next_priv_port = nextport; 461 goto retry; 462 } 463 464 return (next_priv_port--); 465 } 466 467 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 468 /* ARGSUSED */ 469 static int 470 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 471 { 472 udp_fanout_t *udpf; 473 int i; 474 zoneid_t zoneid; 475 conn_t *connp; 476 udp_t *udp; 477 udp_stack_t *us; 478 479 connp = Q_TO_CONN(q); 480 udp = connp->conn_udp; 481 us = udp->udp_us; 482 483 /* Refer to comments in udp_status_report(). */ 484 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 485 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 486 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 487 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 488 return (0); 489 } 490 } 491 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 492 /* The following may work even if we cannot get a large buf. */ 493 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 494 return (0); 495 } 496 497 (void) mi_mpprintf(mp, 498 "UDP " MI_COL_HDRPAD_STR 499 /* 12345678[89ABCDEF] */ 500 " zone lport src addr dest addr port state"); 501 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 502 503 zoneid = connp->conn_zoneid; 504 505 for (i = 0; i < us->us_bind_fanout_size; i++) { 506 udpf = &us->us_bind_fanout[i]; 507 mutex_enter(&udpf->uf_lock); 508 509 /* Print the hash index. */ 510 udp = udpf->uf_udp; 511 if (zoneid != GLOBAL_ZONEID) { 512 /* skip to first entry in this zone; might be none */ 513 while (udp != NULL && 514 udp->udp_connp->conn_zoneid != zoneid) 515 udp = udp->udp_bind_hash; 516 } 517 if (udp != NULL) { 518 uint_t print_len, buf_len; 519 520 buf_len = mp->b_cont->b_datap->db_lim - 521 mp->b_cont->b_wptr; 522 print_len = snprintf((char *)mp->b_cont->b_wptr, 523 buf_len, "%d\n", i); 524 if (print_len < buf_len) { 525 mp->b_cont->b_wptr += print_len; 526 } else { 527 mp->b_cont->b_wptr += buf_len; 528 } 529 for (; udp != NULL; udp = udp->udp_bind_hash) { 530 if (zoneid == GLOBAL_ZONEID || 531 zoneid == udp->udp_connp->conn_zoneid) 532 udp_report_item(mp->b_cont, udp); 533 } 534 } 535 mutex_exit(&udpf->uf_lock); 536 } 537 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 538 return (0); 539 } 540 541 /* 542 * Hash list removal routine for udp_t structures. 543 */ 544 static void 545 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 546 { 547 udp_t *udpnext; 548 kmutex_t *lockp; 549 udp_stack_t *us = udp->udp_us; 550 551 if (udp->udp_ptpbhn == NULL) 552 return; 553 554 /* 555 * Extract the lock pointer in case there are concurrent 556 * hash_remove's for this instance. 557 */ 558 ASSERT(udp->udp_port != 0); 559 if (!caller_holds_lock) { 560 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 561 us->us_bind_fanout_size)].uf_lock; 562 ASSERT(lockp != NULL); 563 mutex_enter(lockp); 564 } 565 if (udp->udp_ptpbhn != NULL) { 566 udpnext = udp->udp_bind_hash; 567 if (udpnext != NULL) { 568 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 569 udp->udp_bind_hash = NULL; 570 } 571 *udp->udp_ptpbhn = udpnext; 572 udp->udp_ptpbhn = NULL; 573 } 574 if (!caller_holds_lock) { 575 mutex_exit(lockp); 576 } 577 } 578 579 static void 580 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 581 { 582 udp_t **udpp; 583 udp_t *udpnext; 584 585 ASSERT(MUTEX_HELD(&uf->uf_lock)); 586 ASSERT(udp->udp_ptpbhn == NULL); 587 udpp = &uf->uf_udp; 588 udpnext = udpp[0]; 589 if (udpnext != NULL) { 590 /* 591 * If the new udp bound to the INADDR_ANY address 592 * and the first one in the list is not bound to 593 * INADDR_ANY we skip all entries until we find the 594 * first one bound to INADDR_ANY. 595 * This makes sure that applications binding to a 596 * specific address get preference over those binding to 597 * INADDR_ANY. 598 */ 599 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 600 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 601 while ((udpnext = udpp[0]) != NULL && 602 !V6_OR_V4_INADDR_ANY( 603 udpnext->udp_bound_v6src)) { 604 udpp = &(udpnext->udp_bind_hash); 605 } 606 if (udpnext != NULL) 607 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 608 } else { 609 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 610 } 611 } 612 udp->udp_bind_hash = udpnext; 613 udp->udp_ptpbhn = udpp; 614 udpp[0] = udp; 615 } 616 617 /* 618 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 619 * passed to udp_wput. 620 * It associates a port number and local address with the stream. 621 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 622 * protocol type (IPPROTO_UDP) placed in the message following the address. 623 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 624 * (Called as writer.) 625 * 626 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 627 * without setting SO_REUSEADDR. This is needed so that they 628 * can be viewed as two independent transport protocols. 629 * However, anonymouns ports are allocated from the same range to avoid 630 * duplicating the us->us_next_port_to_try. 631 */ 632 static void 633 udp_tpi_bind(queue_t *q, mblk_t *mp) 634 { 635 sin_t *sin; 636 sin6_t *sin6; 637 mblk_t *mp1; 638 struct T_bind_req *tbr; 639 conn_t *connp; 640 udp_t *udp; 641 int error; 642 struct sockaddr *sa; 643 644 connp = Q_TO_CONN(q); 645 udp = connp->conn_udp; 646 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 647 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 648 "udp_bind: bad req, len %u", 649 (uint_t)(mp->b_wptr - mp->b_rptr)); 650 udp_err_ack(q, mp, TPROTO, 0); 651 return; 652 } 653 if (udp->udp_state != TS_UNBND) { 654 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 655 "udp_bind: bad state, %u", udp->udp_state); 656 udp_err_ack(q, mp, TOUTSTATE, 0); 657 return; 658 } 659 /* 660 * Reallocate the message to make sure we have enough room for an 661 * address and the protocol type. 662 */ 663 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 664 if (!mp1) { 665 udp_err_ack(q, mp, TSYSERR, ENOMEM); 666 return; 667 } 668 669 mp = mp1; 670 671 /* Reset the message type in preparation for shipping it back. */ 672 DB_TYPE(mp) = M_PCPROTO; 673 674 tbr = (struct T_bind_req *)mp->b_rptr; 675 switch (tbr->ADDR_length) { 676 case 0: /* Request for a generic port */ 677 tbr->ADDR_offset = sizeof (struct T_bind_req); 678 if (udp->udp_family == AF_INET) { 679 tbr->ADDR_length = sizeof (sin_t); 680 sin = (sin_t *)&tbr[1]; 681 *sin = sin_null; 682 sin->sin_family = AF_INET; 683 mp->b_wptr = (uchar_t *)&sin[1]; 684 sa = (struct sockaddr *)sin; 685 } else { 686 ASSERT(udp->udp_family == AF_INET6); 687 tbr->ADDR_length = sizeof (sin6_t); 688 sin6 = (sin6_t *)&tbr[1]; 689 *sin6 = sin6_null; 690 sin6->sin6_family = AF_INET6; 691 mp->b_wptr = (uchar_t *)&sin6[1]; 692 sa = (struct sockaddr *)sin6; 693 } 694 break; 695 696 case sizeof (sin_t): /* Complete IPv4 address */ 697 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 698 sizeof (sin_t)); 699 if (sa == NULL || !OK_32PTR((char *)sa)) { 700 udp_err_ack(q, mp, TSYSERR, EINVAL); 701 return; 702 } 703 if (udp->udp_family != AF_INET || 704 sa->sa_family != AF_INET) { 705 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 706 return; 707 } 708 break; 709 710 case sizeof (sin6_t): /* complete IPv6 address */ 711 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 712 sizeof (sin6_t)); 713 if (sa == NULL || !OK_32PTR((char *)sa)) { 714 udp_err_ack(q, mp, TSYSERR, EINVAL); 715 return; 716 } 717 if (udp->udp_family != AF_INET6 || 718 sa->sa_family != AF_INET6) { 719 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 720 return; 721 } 722 break; 723 724 default: /* Invalid request */ 725 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 726 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 727 udp_err_ack(q, mp, TBADADDR, 0); 728 return; 729 } 730 731 732 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 733 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 734 tbr->PRIM_type != O_T_BIND_REQ); 735 736 if (error != 0) { 737 if (error > 0) { 738 udp_err_ack(q, mp, TSYSERR, error); 739 } else { 740 udp_err_ack(q, mp, -error, 0); 741 } 742 } else { 743 tbr->PRIM_type = T_BIND_ACK; 744 qreply(q, mp); 745 } 746 } 747 748 /* 749 * This routine handles each T_CONN_REQ message passed to udp. It 750 * associates a default destination address with the stream. 751 * 752 * This routine sends down a T_BIND_REQ to IP with the following mblks: 753 * T_BIND_REQ - specifying local and remote address/port 754 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 755 * T_OK_ACK - for the T_CONN_REQ 756 * T_CONN_CON - to keep the TPI user happy 757 * 758 * The connect completes in udp_do_connect. 759 * When a T_BIND_ACK is received information is extracted from the IRE 760 * and the two appended messages are sent to the TPI user. 761 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 762 * convert it to an error ack for the appropriate primitive. 763 */ 764 static void 765 udp_tpi_connect(queue_t *q, mblk_t *mp) 766 { 767 mblk_t *mp1; 768 udp_t *udp; 769 conn_t *connp = Q_TO_CONN(q); 770 int error; 771 socklen_t len; 772 struct sockaddr *sa; 773 struct T_conn_req *tcr; 774 775 udp = connp->conn_udp; 776 tcr = (struct T_conn_req *)mp->b_rptr; 777 778 /* A bit of sanity checking */ 779 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 780 udp_err_ack(q, mp, TPROTO, 0); 781 return; 782 } 783 784 if (tcr->OPT_length != 0) { 785 udp_err_ack(q, mp, TBADOPT, 0); 786 return; 787 } 788 789 /* 790 * Determine packet type based on type of address passed in 791 * the request should contain an IPv4 or IPv6 address. 792 * Make sure that address family matches the type of 793 * family of the the address passed down 794 */ 795 len = tcr->DEST_length; 796 switch (tcr->DEST_length) { 797 default: 798 udp_err_ack(q, mp, TBADADDR, 0); 799 return; 800 801 case sizeof (sin_t): 802 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 803 sizeof (sin_t)); 804 break; 805 806 case sizeof (sin6_t): 807 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 808 sizeof (sin6_t)); 809 break; 810 } 811 812 error = proto_verify_ip_addr(udp->udp_family, sa, len); 813 if (error != 0) { 814 udp_err_ack(q, mp, TSYSERR, error); 815 return; 816 } 817 818 /* 819 * We have to send a connection confirmation to 820 * keep TLI happy. 821 */ 822 if (udp->udp_family == AF_INET) { 823 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 824 sizeof (sin_t), NULL, 0); 825 } else { 826 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 827 sizeof (sin6_t), NULL, 0); 828 } 829 if (mp1 == NULL) { 830 udp_err_ack(q, mp, TSYSERR, ENOMEM); 831 return; 832 } 833 834 /* 835 * ok_ack for T_CONN_REQ 836 */ 837 mp = mi_tpi_ok_ack_alloc(mp); 838 if (mp == NULL) { 839 /* Unable to reuse the T_CONN_REQ for the ack. */ 840 freemsg(mp1); 841 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 842 return; 843 } 844 845 error = udp_do_connect(connp, sa, len); 846 if (error != 0) { 847 freeb(mp1); 848 if (error < 0) 849 udp_err_ack(q, mp, -error, 0); 850 else 851 udp_err_ack(q, mp, TSYSERR, error); 852 } else { 853 putnext(connp->conn_rq, mp); 854 putnext(connp->conn_rq, mp1); 855 } 856 } 857 858 static int 859 udp_tpi_close(queue_t *q, int flags) 860 { 861 conn_t *connp; 862 863 if (flags & SO_FALLBACK) { 864 /* 865 * stream is being closed while in fallback 866 * simply free the resources that were allocated 867 */ 868 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 869 qprocsoff(q); 870 goto done; 871 } 872 873 connp = Q_TO_CONN(q); 874 udp_do_close(connp); 875 done: 876 q->q_ptr = WR(q)->q_ptr = NULL; 877 return (0); 878 } 879 880 /* 881 * Called in the close path to quiesce the conn 882 */ 883 void 884 udp_quiesce_conn(conn_t *connp) 885 { 886 udp_t *udp = connp->conn_udp; 887 888 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 889 /* 890 * Running in cluster mode - register unbind information 891 */ 892 if (udp->udp_ipversion == IPV4_VERSION) { 893 (*cl_inet_unbind)( 894 connp->conn_netstack->netstack_stackid, 895 IPPROTO_UDP, AF_INET, 896 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 897 (in_port_t)udp->udp_port, NULL); 898 } else { 899 (*cl_inet_unbind)( 900 connp->conn_netstack->netstack_stackid, 901 IPPROTO_UDP, AF_INET6, 902 (uint8_t *)(&(udp->udp_v6src)), 903 (in_port_t)udp->udp_port, NULL); 904 } 905 } 906 907 udp_bind_hash_remove(udp, B_FALSE); 908 909 } 910 911 void 912 udp_close_free(conn_t *connp) 913 { 914 udp_t *udp = connp->conn_udp; 915 916 /* If there are any options associated with the stream, free them. */ 917 if (udp->udp_ip_snd_options != NULL) { 918 mi_free((char *)udp->udp_ip_snd_options); 919 udp->udp_ip_snd_options = NULL; 920 udp->udp_ip_snd_options_len = 0; 921 } 922 923 if (udp->udp_ip_rcv_options != NULL) { 924 mi_free((char *)udp->udp_ip_rcv_options); 925 udp->udp_ip_rcv_options = NULL; 926 udp->udp_ip_rcv_options_len = 0; 927 } 928 929 /* Free memory associated with sticky options */ 930 if (udp->udp_sticky_hdrs_len != 0) { 931 kmem_free(udp->udp_sticky_hdrs, 932 udp->udp_sticky_hdrs_len); 933 udp->udp_sticky_hdrs = NULL; 934 udp->udp_sticky_hdrs_len = 0; 935 } 936 937 ip6_pkt_free(&udp->udp_sticky_ipp); 938 939 /* 940 * Clear any fields which the kmem_cache constructor clears. 941 * Only udp_connp needs to be preserved. 942 * TBD: We should make this more efficient to avoid clearing 943 * everything. 944 */ 945 ASSERT(udp->udp_connp == connp); 946 bzero(udp, sizeof (udp_t)); 947 udp->udp_connp = connp; 948 } 949 950 static int 951 udp_do_disconnect(conn_t *connp) 952 { 953 udp_t *udp; 954 mblk_t *ire_mp; 955 udp_fanout_t *udpf; 956 udp_stack_t *us; 957 int error; 958 959 udp = connp->conn_udp; 960 us = udp->udp_us; 961 rw_enter(&udp->udp_rwlock, RW_WRITER); 962 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 963 rw_exit(&udp->udp_rwlock); 964 return (-TOUTSTATE); 965 } 966 udp->udp_pending_op = T_DISCON_REQ; 967 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 968 us->us_bind_fanout_size)]; 969 mutex_enter(&udpf->uf_lock); 970 udp->udp_v6src = udp->udp_bound_v6src; 971 udp->udp_state = TS_IDLE; 972 mutex_exit(&udpf->uf_lock); 973 974 if (udp->udp_family == AF_INET6) { 975 /* Rebuild the header template */ 976 error = udp_build_hdrs(udp); 977 if (error != 0) { 978 udp->udp_pending_op = -1; 979 rw_exit(&udp->udp_rwlock); 980 return (error); 981 } 982 } 983 984 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 985 if (ire_mp == NULL) { 986 mutex_enter(&udpf->uf_lock); 987 udp->udp_pending_op = -1; 988 mutex_exit(&udpf->uf_lock); 989 rw_exit(&udp->udp_rwlock); 990 return (ENOMEM); 991 } 992 993 rw_exit(&udp->udp_rwlock); 994 995 if (udp->udp_family == AF_INET6) { 996 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 997 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 998 } else { 999 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 1000 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1001 } 1002 1003 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1004 } 1005 1006 1007 static void 1008 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1009 { 1010 conn_t *connp = Q_TO_CONN(q); 1011 int error; 1012 1013 /* 1014 * Allocate the largest primitive we need to send back 1015 * T_error_ack is > than T_ok_ack 1016 */ 1017 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1018 if (mp == NULL) { 1019 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1020 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1021 return; 1022 } 1023 1024 error = udp_do_disconnect(connp); 1025 1026 if (error != 0) { 1027 if (error < 0) { 1028 udp_err_ack(q, mp, -error, 0); 1029 } else { 1030 udp_err_ack(q, mp, TSYSERR, error); 1031 } 1032 } else { 1033 mp = mi_tpi_ok_ack_alloc(mp); 1034 ASSERT(mp != NULL); 1035 qreply(q, mp); 1036 } 1037 } 1038 1039 int 1040 udp_disconnect(conn_t *connp) 1041 { 1042 int error; 1043 udp_t *udp = connp->conn_udp; 1044 1045 udp->udp_dgram_errind = B_FALSE; 1046 1047 error = udp_do_disconnect(connp); 1048 1049 if (error < 0) 1050 error = proto_tlitosyserr(-error); 1051 1052 return (error); 1053 } 1054 1055 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1056 static void 1057 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1058 { 1059 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1060 qreply(q, mp); 1061 } 1062 1063 /* Shorthand to generate and send TPI error acks to our client */ 1064 static void 1065 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1066 int sys_error) 1067 { 1068 struct T_error_ack *teackp; 1069 1070 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1071 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1072 teackp = (struct T_error_ack *)mp->b_rptr; 1073 teackp->ERROR_prim = primitive; 1074 teackp->TLI_error = t_error; 1075 teackp->UNIX_error = sys_error; 1076 qreply(q, mp); 1077 } 1078 } 1079 1080 /*ARGSUSED*/ 1081 static int 1082 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1083 { 1084 int i; 1085 udp_t *udp = Q_TO_UDP(q); 1086 udp_stack_t *us = udp->udp_us; 1087 1088 for (i = 0; i < us->us_num_epriv_ports; i++) { 1089 if (us->us_epriv_ports[i] != 0) 1090 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1091 } 1092 return (0); 1093 } 1094 1095 /* ARGSUSED */ 1096 static int 1097 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1098 cred_t *cr) 1099 { 1100 long new_value; 1101 int i; 1102 udp_t *udp = Q_TO_UDP(q); 1103 udp_stack_t *us = udp->udp_us; 1104 1105 /* 1106 * Fail the request if the new value does not lie within the 1107 * port number limits. 1108 */ 1109 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1110 new_value <= 0 || new_value >= 65536) { 1111 return (EINVAL); 1112 } 1113 1114 /* Check if the value is already in the list */ 1115 for (i = 0; i < us->us_num_epriv_ports; i++) { 1116 if (new_value == us->us_epriv_ports[i]) { 1117 return (EEXIST); 1118 } 1119 } 1120 /* Find an empty slot */ 1121 for (i = 0; i < us->us_num_epriv_ports; i++) { 1122 if (us->us_epriv_ports[i] == 0) 1123 break; 1124 } 1125 if (i == us->us_num_epriv_ports) { 1126 return (EOVERFLOW); 1127 } 1128 1129 /* Set the new value */ 1130 us->us_epriv_ports[i] = (in_port_t)new_value; 1131 return (0); 1132 } 1133 1134 /* ARGSUSED */ 1135 static int 1136 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1137 cred_t *cr) 1138 { 1139 long new_value; 1140 int i; 1141 udp_t *udp = Q_TO_UDP(q); 1142 udp_stack_t *us = udp->udp_us; 1143 1144 /* 1145 * Fail the request if the new value does not lie within the 1146 * port number limits. 1147 */ 1148 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1149 new_value <= 0 || new_value >= 65536) { 1150 return (EINVAL); 1151 } 1152 1153 /* Check that the value is already in the list */ 1154 for (i = 0; i < us->us_num_epriv_ports; i++) { 1155 if (us->us_epriv_ports[i] == new_value) 1156 break; 1157 } 1158 if (i == us->us_num_epriv_ports) { 1159 return (ESRCH); 1160 } 1161 1162 /* Clear the value */ 1163 us->us_epriv_ports[i] = 0; 1164 return (0); 1165 } 1166 1167 /* At minimum we need 4 bytes of UDP header */ 1168 #define ICMP_MIN_UDP_HDR 4 1169 1170 /* 1171 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1172 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1173 * Assumes that IP has pulled up everything up to and including the ICMP header. 1174 */ 1175 static void 1176 udp_icmp_error(conn_t *connp, mblk_t *mp) 1177 { 1178 icmph_t *icmph; 1179 ipha_t *ipha; 1180 int iph_hdr_length; 1181 udpha_t *udpha; 1182 sin_t sin; 1183 sin6_t sin6; 1184 mblk_t *mp1; 1185 int error = 0; 1186 udp_t *udp = connp->conn_udp; 1187 1188 mp1 = NULL; 1189 ipha = (ipha_t *)mp->b_rptr; 1190 1191 ASSERT(OK_32PTR(mp->b_rptr)); 1192 1193 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1194 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1195 udp_icmp_error_ipv6(connp, mp); 1196 return; 1197 } 1198 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1199 1200 /* Skip past the outer IP and ICMP headers */ 1201 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1202 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1203 ipha = (ipha_t *)&icmph[1]; 1204 1205 /* Skip past the inner IP and find the ULP header */ 1206 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1207 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1208 1209 switch (icmph->icmph_type) { 1210 case ICMP_DEST_UNREACHABLE: 1211 switch (icmph->icmph_code) { 1212 case ICMP_FRAGMENTATION_NEEDED: 1213 /* 1214 * IP has already adjusted the path MTU. 1215 */ 1216 break; 1217 case ICMP_PORT_UNREACHABLE: 1218 case ICMP_PROTOCOL_UNREACHABLE: 1219 error = ECONNREFUSED; 1220 break; 1221 default: 1222 /* Transient errors */ 1223 break; 1224 } 1225 break; 1226 default: 1227 /* Transient errors */ 1228 break; 1229 } 1230 if (error == 0) { 1231 freemsg(mp); 1232 return; 1233 } 1234 1235 /* 1236 * Deliver T_UDERROR_IND when the application has asked for it. 1237 * The socket layer enables this automatically when connected. 1238 */ 1239 if (!udp->udp_dgram_errind) { 1240 freemsg(mp); 1241 return; 1242 } 1243 1244 1245 switch (udp->udp_family) { 1246 case AF_INET: 1247 sin = sin_null; 1248 sin.sin_family = AF_INET; 1249 sin.sin_addr.s_addr = ipha->ipha_dst; 1250 sin.sin_port = udpha->uha_dst_port; 1251 if (IPCL_IS_NONSTR(connp)) { 1252 rw_enter(&udp->udp_rwlock, RW_WRITER); 1253 if (udp->udp_state == TS_DATA_XFER) { 1254 if (sin.sin_port == udp->udp_dstport && 1255 sin.sin_addr.s_addr == 1256 V4_PART_OF_V6(udp->udp_v6dst)) { 1257 1258 rw_exit(&udp->udp_rwlock); 1259 (*connp->conn_upcalls->su_set_error) 1260 (connp->conn_upper_handle, error); 1261 goto done; 1262 } 1263 } else { 1264 udp->udp_delayed_error = error; 1265 *((sin_t *)&udp->udp_delayed_addr) = sin; 1266 } 1267 rw_exit(&udp->udp_rwlock); 1268 } else { 1269 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1270 NULL, 0, error); 1271 } 1272 break; 1273 case AF_INET6: 1274 sin6 = sin6_null; 1275 sin6.sin6_family = AF_INET6; 1276 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1277 sin6.sin6_port = udpha->uha_dst_port; 1278 if (IPCL_IS_NONSTR(connp)) { 1279 rw_enter(&udp->udp_rwlock, RW_WRITER); 1280 if (udp->udp_state == TS_DATA_XFER) { 1281 if (sin6.sin6_port == udp->udp_dstport && 1282 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1283 &udp->udp_v6dst)) { 1284 rw_exit(&udp->udp_rwlock); 1285 (*connp->conn_upcalls->su_set_error) 1286 (connp->conn_upper_handle, error); 1287 goto done; 1288 } 1289 } else { 1290 udp->udp_delayed_error = error; 1291 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1292 } 1293 rw_exit(&udp->udp_rwlock); 1294 } else { 1295 1296 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1297 NULL, 0, error); 1298 } 1299 break; 1300 } 1301 if (mp1 != NULL) 1302 putnext(connp->conn_rq, mp1); 1303 done: 1304 freemsg(mp); 1305 } 1306 1307 /* 1308 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1309 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1310 * Assumes that IP has pulled up all the extension headers as well as the 1311 * ICMPv6 header. 1312 */ 1313 static void 1314 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1315 { 1316 icmp6_t *icmp6; 1317 ip6_t *ip6h, *outer_ip6h; 1318 uint16_t iph_hdr_length; 1319 uint8_t *nexthdrp; 1320 udpha_t *udpha; 1321 sin6_t sin6; 1322 mblk_t *mp1; 1323 int error = 0; 1324 udp_t *udp = connp->conn_udp; 1325 udp_stack_t *us = udp->udp_us; 1326 1327 outer_ip6h = (ip6_t *)mp->b_rptr; 1328 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1329 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1330 else 1331 iph_hdr_length = IPV6_HDR_LEN; 1332 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1333 ip6h = (ip6_t *)&icmp6[1]; 1334 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1335 freemsg(mp); 1336 return; 1337 } 1338 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1339 1340 switch (icmp6->icmp6_type) { 1341 case ICMP6_DST_UNREACH: 1342 switch (icmp6->icmp6_code) { 1343 case ICMP6_DST_UNREACH_NOPORT: 1344 error = ECONNREFUSED; 1345 break; 1346 case ICMP6_DST_UNREACH_ADMIN: 1347 case ICMP6_DST_UNREACH_NOROUTE: 1348 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1349 case ICMP6_DST_UNREACH_ADDR: 1350 /* Transient errors */ 1351 break; 1352 default: 1353 break; 1354 } 1355 break; 1356 case ICMP6_PACKET_TOO_BIG: { 1357 struct T_unitdata_ind *tudi; 1358 struct T_opthdr *toh; 1359 size_t udi_size; 1360 mblk_t *newmp; 1361 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1362 sizeof (struct ip6_mtuinfo); 1363 sin6_t *sin6; 1364 struct ip6_mtuinfo *mtuinfo; 1365 1366 /* 1367 * If the application has requested to receive path mtu 1368 * information, send up an empty message containing an 1369 * IPV6_PATHMTU ancillary data item. 1370 */ 1371 if (!udp->udp_ipv6_recvpathmtu) 1372 break; 1373 1374 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1375 opt_length; 1376 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1377 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1378 break; 1379 } 1380 1381 /* 1382 * newmp->b_cont is left to NULL on purpose. This is an 1383 * empty message containing only ancillary data. 1384 */ 1385 newmp->b_datap->db_type = M_PROTO; 1386 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1387 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1388 tudi->PRIM_type = T_UNITDATA_IND; 1389 tudi->SRC_length = sizeof (sin6_t); 1390 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1391 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1392 tudi->OPT_length = opt_length; 1393 1394 sin6 = (sin6_t *)&tudi[1]; 1395 bzero(sin6, sizeof (sin6_t)); 1396 sin6->sin6_family = AF_INET6; 1397 sin6->sin6_addr = udp->udp_v6dst; 1398 1399 toh = (struct T_opthdr *)&sin6[1]; 1400 toh->level = IPPROTO_IPV6; 1401 toh->name = IPV6_PATHMTU; 1402 toh->len = opt_length; 1403 toh->status = 0; 1404 1405 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1406 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1407 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1408 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1409 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1410 /* 1411 * We've consumed everything we need from the original 1412 * message. Free it, then send our empty message. 1413 */ 1414 freemsg(mp); 1415 if (!IPCL_IS_NONSTR(connp)) { 1416 putnext(connp->conn_rq, newmp); 1417 } else { 1418 (*connp->conn_upcalls->su_recv) 1419 (connp->conn_upper_handle, newmp, 0, 0, &error, 1420 NULL); 1421 } 1422 return; 1423 } 1424 case ICMP6_TIME_EXCEEDED: 1425 /* Transient errors */ 1426 break; 1427 case ICMP6_PARAM_PROB: 1428 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1429 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1430 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1431 (uchar_t *)nexthdrp) { 1432 error = ECONNREFUSED; 1433 break; 1434 } 1435 break; 1436 } 1437 if (error == 0) { 1438 freemsg(mp); 1439 return; 1440 } 1441 1442 /* 1443 * Deliver T_UDERROR_IND when the application has asked for it. 1444 * The socket layer enables this automatically when connected. 1445 */ 1446 if (!udp->udp_dgram_errind) { 1447 freemsg(mp); 1448 return; 1449 } 1450 1451 sin6 = sin6_null; 1452 sin6.sin6_family = AF_INET6; 1453 sin6.sin6_addr = ip6h->ip6_dst; 1454 sin6.sin6_port = udpha->uha_dst_port; 1455 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1456 1457 if (IPCL_IS_NONSTR(connp)) { 1458 rw_enter(&udp->udp_rwlock, RW_WRITER); 1459 if (udp->udp_state == TS_DATA_XFER) { 1460 if (sin6.sin6_port == udp->udp_dstport && 1461 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1462 &udp->udp_v6dst)) { 1463 rw_exit(&udp->udp_rwlock); 1464 (*connp->conn_upcalls->su_set_error) 1465 (connp->conn_upper_handle, error); 1466 goto done; 1467 } 1468 } else { 1469 udp->udp_delayed_error = error; 1470 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1471 } 1472 rw_exit(&udp->udp_rwlock); 1473 } else { 1474 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1475 NULL, 0, error); 1476 if (mp1 != NULL) 1477 putnext(connp->conn_rq, mp1); 1478 } 1479 1480 done: 1481 freemsg(mp); 1482 } 1483 1484 /* 1485 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1486 * The local address is filled in if endpoint is bound. The remote address 1487 * is filled in if remote address has been precified ("connected endpoint") 1488 * (The concept of connected CLTS sockets is alien to published TPI 1489 * but we support it anyway). 1490 */ 1491 static void 1492 udp_addr_req(queue_t *q, mblk_t *mp) 1493 { 1494 sin_t *sin; 1495 sin6_t *sin6; 1496 mblk_t *ackmp; 1497 struct T_addr_ack *taa; 1498 udp_t *udp = Q_TO_UDP(q); 1499 1500 /* Make it large enough for worst case */ 1501 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1502 2 * sizeof (sin6_t), 1); 1503 if (ackmp == NULL) { 1504 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1505 return; 1506 } 1507 taa = (struct T_addr_ack *)ackmp->b_rptr; 1508 1509 bzero(taa, sizeof (struct T_addr_ack)); 1510 ackmp->b_wptr = (uchar_t *)&taa[1]; 1511 1512 taa->PRIM_type = T_ADDR_ACK; 1513 ackmp->b_datap->db_type = M_PCPROTO; 1514 rw_enter(&udp->udp_rwlock, RW_READER); 1515 /* 1516 * Note: Following code assumes 32 bit alignment of basic 1517 * data structures like sin_t and struct T_addr_ack. 1518 */ 1519 if (udp->udp_state != TS_UNBND) { 1520 /* 1521 * Fill in local address first 1522 */ 1523 taa->LOCADDR_offset = sizeof (*taa); 1524 if (udp->udp_family == AF_INET) { 1525 taa->LOCADDR_length = sizeof (sin_t); 1526 sin = (sin_t *)&taa[1]; 1527 /* Fill zeroes and then initialize non-zero fields */ 1528 *sin = sin_null; 1529 sin->sin_family = AF_INET; 1530 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1531 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1532 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1533 sin->sin_addr.s_addr); 1534 } else { 1535 /* 1536 * INADDR_ANY 1537 * udp_v6src is not set, we might be bound to 1538 * broadcast/multicast. Use udp_bound_v6src as 1539 * local address instead (that could 1540 * also still be INADDR_ANY) 1541 */ 1542 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1543 sin->sin_addr.s_addr); 1544 } 1545 sin->sin_port = udp->udp_port; 1546 ackmp->b_wptr = (uchar_t *)&sin[1]; 1547 if (udp->udp_state == TS_DATA_XFER) { 1548 /* 1549 * connected, fill remote address too 1550 */ 1551 taa->REMADDR_length = sizeof (sin_t); 1552 /* assumed 32-bit alignment */ 1553 taa->REMADDR_offset = taa->LOCADDR_offset + 1554 taa->LOCADDR_length; 1555 1556 sin = (sin_t *)(ackmp->b_rptr + 1557 taa->REMADDR_offset); 1558 /* initialize */ 1559 *sin = sin_null; 1560 sin->sin_family = AF_INET; 1561 sin->sin_addr.s_addr = 1562 V4_PART_OF_V6(udp->udp_v6dst); 1563 sin->sin_port = udp->udp_dstport; 1564 ackmp->b_wptr = (uchar_t *)&sin[1]; 1565 } 1566 } else { 1567 taa->LOCADDR_length = sizeof (sin6_t); 1568 sin6 = (sin6_t *)&taa[1]; 1569 /* Fill zeroes and then initialize non-zero fields */ 1570 *sin6 = sin6_null; 1571 sin6->sin6_family = AF_INET6; 1572 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1573 sin6->sin6_addr = udp->udp_v6src; 1574 } else { 1575 /* 1576 * UNSPECIFIED 1577 * udp_v6src is not set, we might be bound to 1578 * broadcast/multicast. Use udp_bound_v6src as 1579 * local address instead (that could 1580 * also still be UNSPECIFIED) 1581 */ 1582 sin6->sin6_addr = 1583 udp->udp_bound_v6src; 1584 } 1585 sin6->sin6_port = udp->udp_port; 1586 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1587 if (udp->udp_state == TS_DATA_XFER) { 1588 /* 1589 * connected, fill remote address too 1590 */ 1591 taa->REMADDR_length = sizeof (sin6_t); 1592 /* assumed 32-bit alignment */ 1593 taa->REMADDR_offset = taa->LOCADDR_offset + 1594 taa->LOCADDR_length; 1595 1596 sin6 = (sin6_t *)(ackmp->b_rptr + 1597 taa->REMADDR_offset); 1598 /* initialize */ 1599 *sin6 = sin6_null; 1600 sin6->sin6_family = AF_INET6; 1601 sin6->sin6_addr = udp->udp_v6dst; 1602 sin6->sin6_port = udp->udp_dstport; 1603 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1604 } 1605 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1606 } 1607 } 1608 rw_exit(&udp->udp_rwlock); 1609 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1610 qreply(q, ackmp); 1611 } 1612 1613 static void 1614 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1615 { 1616 if (udp->udp_family == AF_INET) { 1617 *tap = udp_g_t_info_ack_ipv4; 1618 } else { 1619 *tap = udp_g_t_info_ack_ipv6; 1620 } 1621 tap->CURRENT_state = udp->udp_state; 1622 tap->OPT_size = udp_max_optsize; 1623 } 1624 1625 static void 1626 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1627 t_uscalar_t cap_bits1) 1628 { 1629 tcap->CAP_bits1 = 0; 1630 1631 if (cap_bits1 & TC1_INFO) { 1632 udp_copy_info(&tcap->INFO_ack, udp); 1633 tcap->CAP_bits1 |= TC1_INFO; 1634 } 1635 } 1636 1637 /* 1638 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1639 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1640 * udp_g_t_info_ack. The current state of the stream is copied from 1641 * udp_state. 1642 */ 1643 static void 1644 udp_capability_req(queue_t *q, mblk_t *mp) 1645 { 1646 t_uscalar_t cap_bits1; 1647 struct T_capability_ack *tcap; 1648 udp_t *udp = Q_TO_UDP(q); 1649 1650 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1651 1652 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1653 mp->b_datap->db_type, T_CAPABILITY_ACK); 1654 if (!mp) 1655 return; 1656 1657 tcap = (struct T_capability_ack *)mp->b_rptr; 1658 udp_do_capability_ack(udp, tcap, cap_bits1); 1659 1660 qreply(q, mp); 1661 } 1662 1663 /* 1664 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1665 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1666 * The current state of the stream is copied from udp_state. 1667 */ 1668 static void 1669 udp_info_req(queue_t *q, mblk_t *mp) 1670 { 1671 udp_t *udp = Q_TO_UDP(q); 1672 1673 /* Create a T_INFO_ACK message. */ 1674 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1675 T_INFO_ACK); 1676 if (!mp) 1677 return; 1678 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1679 qreply(q, mp); 1680 } 1681 1682 /* 1683 * IP recognizes seven kinds of bind requests: 1684 * 1685 * - A zero-length address binds only to the protocol number. 1686 * 1687 * - A 4-byte address is treated as a request to 1688 * validate that the address is a valid local IPv4 1689 * address, appropriate for an application to bind to. 1690 * IP does the verification, but does not make any note 1691 * of the address at this time. 1692 * 1693 * - A 16-byte address contains is treated as a request 1694 * to validate a local IPv6 address, as the 4-byte 1695 * address case above. 1696 * 1697 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1698 * use it for the inbound fanout of packets. 1699 * 1700 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1701 * use it for the inbound fanout of packets. 1702 * 1703 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1704 * information consisting of local and remote addresses 1705 * and ports. In this case, the addresses are both 1706 * validated as appropriate for this operation, and, if 1707 * so, the information is retained for use in the 1708 * inbound fanout. 1709 * 1710 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1711 * fanout information, like the 12-byte case above. 1712 * 1713 * IP will also fill in the IRE request mblk with information 1714 * regarding our peer. In all cases, we notify IP of our protocol 1715 * type by appending a single protocol byte to the bind request. 1716 */ 1717 static mblk_t * 1718 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1719 { 1720 char *cp; 1721 mblk_t *mp; 1722 struct T_bind_req *tbr; 1723 ipa_conn_t *ac; 1724 ipa6_conn_t *ac6; 1725 sin_t *sin; 1726 sin6_t *sin6; 1727 1728 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1729 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1730 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1731 if (!mp) 1732 return (mp); 1733 mp->b_datap->db_type = M_PROTO; 1734 tbr = (struct T_bind_req *)mp->b_rptr; 1735 tbr->PRIM_type = bind_prim; 1736 tbr->ADDR_offset = sizeof (*tbr); 1737 tbr->CONIND_number = 0; 1738 tbr->ADDR_length = addr_length; 1739 cp = (char *)&tbr[1]; 1740 switch (addr_length) { 1741 case sizeof (ipa_conn_t): 1742 ASSERT(udp->udp_family == AF_INET); 1743 /* Append a request for an IRE */ 1744 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1745 if (!mp->b_cont) { 1746 freemsg(mp); 1747 return (NULL); 1748 } 1749 mp->b_cont->b_wptr += sizeof (ire_t); 1750 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1751 1752 /* cp known to be 32 bit aligned */ 1753 ac = (ipa_conn_t *)cp; 1754 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1755 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1756 ac->ac_fport = udp->udp_dstport; 1757 ac->ac_lport = udp->udp_port; 1758 break; 1759 1760 case sizeof (ipa6_conn_t): 1761 ASSERT(udp->udp_family == AF_INET6); 1762 /* Append a request for an IRE */ 1763 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1764 if (!mp->b_cont) { 1765 freemsg(mp); 1766 return (NULL); 1767 } 1768 mp->b_cont->b_wptr += sizeof (ire_t); 1769 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1770 1771 /* cp known to be 32 bit aligned */ 1772 ac6 = (ipa6_conn_t *)cp; 1773 ac6->ac6_laddr = udp->udp_v6src; 1774 ac6->ac6_faddr = udp->udp_v6dst; 1775 ac6->ac6_fport = udp->udp_dstport; 1776 ac6->ac6_lport = udp->udp_port; 1777 break; 1778 1779 case sizeof (sin_t): 1780 ASSERT(udp->udp_family == AF_INET); 1781 /* Append a request for an IRE */ 1782 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1783 if (!mp->b_cont) { 1784 freemsg(mp); 1785 return (NULL); 1786 } 1787 mp->b_cont->b_wptr += sizeof (ire_t); 1788 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1789 1790 sin = (sin_t *)cp; 1791 *sin = sin_null; 1792 sin->sin_family = AF_INET; 1793 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1794 sin->sin_port = udp->udp_port; 1795 break; 1796 1797 case sizeof (sin6_t): 1798 ASSERT(udp->udp_family == AF_INET6); 1799 /* Append a request for an IRE */ 1800 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1801 if (!mp->b_cont) { 1802 freemsg(mp); 1803 return (NULL); 1804 } 1805 mp->b_cont->b_wptr += sizeof (ire_t); 1806 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1807 1808 sin6 = (sin6_t *)cp; 1809 *sin6 = sin6_null; 1810 sin6->sin6_family = AF_INET6; 1811 sin6->sin6_addr = udp->udp_bound_v6src; 1812 sin6->sin6_port = udp->udp_port; 1813 break; 1814 } 1815 /* Add protocol number to end */ 1816 cp[addr_length] = (char)IPPROTO_UDP; 1817 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1818 return (mp); 1819 } 1820 1821 /* For /dev/udp aka AF_INET open */ 1822 static int 1823 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1824 { 1825 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1826 } 1827 1828 /* For /dev/udp6 aka AF_INET6 open */ 1829 static int 1830 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1831 { 1832 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1833 } 1834 1835 /* 1836 * This is the open routine for udp. It allocates a udp_t structure for 1837 * the stream and, on the first open of the module, creates an ND table. 1838 */ 1839 /*ARGSUSED2*/ 1840 static int 1841 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1842 boolean_t isv6) 1843 { 1844 int error; 1845 udp_t *udp; 1846 conn_t *connp; 1847 dev_t conn_dev; 1848 udp_stack_t *us; 1849 vmem_t *minor_arena; 1850 1851 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1852 1853 /* If the stream is already open, return immediately. */ 1854 if (q->q_ptr != NULL) 1855 return (0); 1856 1857 if (sflag == MODOPEN) 1858 return (EINVAL); 1859 1860 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1861 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1862 minor_arena = ip_minor_arena_la; 1863 } else { 1864 /* 1865 * Either minor numbers in the large arena were exhausted 1866 * or a non socket application is doing the open. 1867 * Try to allocate from the small arena. 1868 */ 1869 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1870 return (EBUSY); 1871 1872 minor_arena = ip_minor_arena_sa; 1873 } 1874 1875 if (flag & SO_FALLBACK) { 1876 /* 1877 * Non streams socket needs a stream to fallback to 1878 */ 1879 RD(q)->q_ptr = (void *)conn_dev; 1880 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1881 WR(q)->q_ptr = (void *)minor_arena; 1882 qprocson(q); 1883 return (0); 1884 } 1885 1886 connp = udp_do_open(credp, isv6, KM_SLEEP); 1887 if (connp == NULL) { 1888 inet_minor_free(minor_arena, conn_dev); 1889 return (ENOMEM); 1890 } 1891 udp = connp->conn_udp; 1892 us = udp->udp_us; 1893 1894 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1895 connp->conn_dev = conn_dev; 1896 connp->conn_minor_arena = minor_arena; 1897 1898 /* 1899 * Initialize the udp_t structure for this stream. 1900 */ 1901 q->q_ptr = connp; 1902 WR(q)->q_ptr = connp; 1903 connp->conn_rq = q; 1904 connp->conn_wq = WR(q); 1905 1906 rw_enter(&udp->udp_rwlock, RW_WRITER); 1907 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1908 ASSERT(connp->conn_udp == udp); 1909 ASSERT(udp->udp_connp == connp); 1910 1911 if (flag & SO_SOCKSTR) { 1912 connp->conn_flags |= IPCL_SOCKET; 1913 udp->udp_issocket = B_TRUE; 1914 udp->udp_direct_sockfs = B_TRUE; 1915 } 1916 1917 q->q_hiwat = us->us_recv_hiwat; 1918 WR(q)->q_hiwat = us->us_xmit_hiwat; 1919 WR(q)->q_lowat = us->us_xmit_lowat; 1920 1921 qprocson(q); 1922 1923 if (udp->udp_family == AF_INET6) { 1924 /* Build initial header template for transmit */ 1925 if ((error = udp_build_hdrs(udp)) != 0) { 1926 rw_exit(&udp->udp_rwlock); 1927 qprocsoff(q); 1928 inet_minor_free(minor_arena, conn_dev); 1929 ipcl_conn_destroy(connp); 1930 return (error); 1931 } 1932 } 1933 rw_exit(&udp->udp_rwlock); 1934 1935 /* Set the Stream head write offset and high watermark. */ 1936 (void) proto_set_tx_wroff(q, connp, 1937 udp->udp_max_hdr_len + us->us_wroff_extra); 1938 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1939 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1940 1941 mutex_enter(&connp->conn_lock); 1942 connp->conn_state_flags &= ~CONN_INCIPIENT; 1943 mutex_exit(&connp->conn_lock); 1944 return (0); 1945 } 1946 1947 /* 1948 * Which UDP options OK to set through T_UNITDATA_REQ... 1949 */ 1950 /* ARGSUSED */ 1951 static boolean_t 1952 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1953 { 1954 return (B_TRUE); 1955 } 1956 1957 /* 1958 * This routine gets default values of certain options whose default 1959 * values are maintained by protcol specific code 1960 */ 1961 /* ARGSUSED */ 1962 int 1963 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1964 { 1965 udp_t *udp = Q_TO_UDP(q); 1966 udp_stack_t *us = udp->udp_us; 1967 int *i1 = (int *)ptr; 1968 1969 switch (level) { 1970 case IPPROTO_IP: 1971 switch (name) { 1972 case IP_MULTICAST_TTL: 1973 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1974 return (sizeof (uchar_t)); 1975 case IP_MULTICAST_LOOP: 1976 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1977 return (sizeof (uchar_t)); 1978 } 1979 break; 1980 case IPPROTO_IPV6: 1981 switch (name) { 1982 case IPV6_MULTICAST_HOPS: 1983 *i1 = IP_DEFAULT_MULTICAST_TTL; 1984 return (sizeof (int)); 1985 case IPV6_MULTICAST_LOOP: 1986 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1987 return (sizeof (int)); 1988 case IPV6_UNICAST_HOPS: 1989 *i1 = us->us_ipv6_hoplimit; 1990 return (sizeof (int)); 1991 } 1992 break; 1993 } 1994 return (-1); 1995 } 1996 1997 /* 1998 * This routine retrieves the current status of socket options. 1999 * It returns the size of the option retrieved. 2000 */ 2001 static int 2002 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2003 { 2004 udp_t *udp = connp->conn_udp; 2005 udp_stack_t *us = udp->udp_us; 2006 int *i1 = (int *)ptr; 2007 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2008 int len; 2009 2010 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2011 switch (level) { 2012 case SOL_SOCKET: 2013 switch (name) { 2014 case SO_DEBUG: 2015 *i1 = udp->udp_debug; 2016 break; /* goto sizeof (int) option return */ 2017 case SO_REUSEADDR: 2018 *i1 = udp->udp_reuseaddr; 2019 break; /* goto sizeof (int) option return */ 2020 case SO_TYPE: 2021 *i1 = SOCK_DGRAM; 2022 break; /* goto sizeof (int) option return */ 2023 2024 /* 2025 * The following three items are available here, 2026 * but are only meaningful to IP. 2027 */ 2028 case SO_DONTROUTE: 2029 *i1 = udp->udp_dontroute; 2030 break; /* goto sizeof (int) option return */ 2031 case SO_USELOOPBACK: 2032 *i1 = udp->udp_useloopback; 2033 break; /* goto sizeof (int) option return */ 2034 case SO_BROADCAST: 2035 *i1 = udp->udp_broadcast; 2036 break; /* goto sizeof (int) option return */ 2037 2038 case SO_SNDBUF: 2039 *i1 = udp->udp_xmit_hiwat; 2040 break; /* goto sizeof (int) option return */ 2041 case SO_RCVBUF: 2042 *i1 = udp->udp_rcv_disply_hiwat; 2043 break; /* goto sizeof (int) option return */ 2044 case SO_DGRAM_ERRIND: 2045 *i1 = udp->udp_dgram_errind; 2046 break; /* goto sizeof (int) option return */ 2047 case SO_RECVUCRED: 2048 *i1 = udp->udp_recvucred; 2049 break; /* goto sizeof (int) option return */ 2050 case SO_TIMESTAMP: 2051 *i1 = udp->udp_timestamp; 2052 break; /* goto sizeof (int) option return */ 2053 case SO_ANON_MLP: 2054 *i1 = connp->conn_anon_mlp; 2055 break; /* goto sizeof (int) option return */ 2056 case SO_MAC_EXEMPT: 2057 *i1 = connp->conn_mac_exempt; 2058 break; /* goto sizeof (int) option return */ 2059 case SO_ALLZONES: 2060 *i1 = connp->conn_allzones; 2061 break; /* goto sizeof (int) option return */ 2062 case SO_EXCLBIND: 2063 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2064 break; 2065 case SO_PROTOTYPE: 2066 *i1 = IPPROTO_UDP; 2067 break; 2068 case SO_DOMAIN: 2069 *i1 = udp->udp_family; 2070 break; 2071 default: 2072 return (-1); 2073 } 2074 break; 2075 case IPPROTO_IP: 2076 if (udp->udp_family != AF_INET) 2077 return (-1); 2078 switch (name) { 2079 case IP_OPTIONS: 2080 case T_IP_OPTIONS: 2081 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2082 if (len > 0) { 2083 bcopy(udp->udp_ip_rcv_options + 2084 udp->udp_label_len, ptr, len); 2085 } 2086 return (len); 2087 case IP_TOS: 2088 case T_IP_TOS: 2089 *i1 = (int)udp->udp_type_of_service; 2090 break; /* goto sizeof (int) option return */ 2091 case IP_TTL: 2092 *i1 = (int)udp->udp_ttl; 2093 break; /* goto sizeof (int) option return */ 2094 case IP_DHCPINIT_IF: 2095 return (-EINVAL); 2096 case IP_NEXTHOP: 2097 case IP_RECVPKTINFO: 2098 /* 2099 * This also handles IP_PKTINFO. 2100 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2101 * Differentiation is based on the size of the argument 2102 * passed in. 2103 * This option is handled in IP which will return an 2104 * error for IP_PKTINFO as it's not supported as a 2105 * sticky option. 2106 */ 2107 return (-EINVAL); 2108 case IP_MULTICAST_IF: 2109 /* 0 address if not set */ 2110 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2111 return (sizeof (ipaddr_t)); 2112 case IP_MULTICAST_TTL: 2113 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2114 return (sizeof (uchar_t)); 2115 case IP_MULTICAST_LOOP: 2116 *ptr = connp->conn_multicast_loop; 2117 return (sizeof (uint8_t)); 2118 case IP_RECVOPTS: 2119 *i1 = udp->udp_recvopts; 2120 break; /* goto sizeof (int) option return */ 2121 case IP_RECVDSTADDR: 2122 *i1 = udp->udp_recvdstaddr; 2123 break; /* goto sizeof (int) option return */ 2124 case IP_RECVIF: 2125 *i1 = udp->udp_recvif; 2126 break; /* goto sizeof (int) option return */ 2127 case IP_RECVSLLA: 2128 *i1 = udp->udp_recvslla; 2129 break; /* goto sizeof (int) option return */ 2130 case IP_RECVTTL: 2131 *i1 = udp->udp_recvttl; 2132 break; /* goto sizeof (int) option return */ 2133 case IP_ADD_MEMBERSHIP: 2134 case IP_DROP_MEMBERSHIP: 2135 case IP_BLOCK_SOURCE: 2136 case IP_UNBLOCK_SOURCE: 2137 case IP_ADD_SOURCE_MEMBERSHIP: 2138 case IP_DROP_SOURCE_MEMBERSHIP: 2139 case MCAST_JOIN_GROUP: 2140 case MCAST_LEAVE_GROUP: 2141 case MCAST_BLOCK_SOURCE: 2142 case MCAST_UNBLOCK_SOURCE: 2143 case MCAST_JOIN_SOURCE_GROUP: 2144 case MCAST_LEAVE_SOURCE_GROUP: 2145 /* cannot "get" the value for these */ 2146 return (-1); 2147 case IP_BOUND_IF: 2148 /* Zero if not set */ 2149 *i1 = udp->udp_bound_if; 2150 break; /* goto sizeof (int) option return */ 2151 case IP_UNSPEC_SRC: 2152 *i1 = udp->udp_unspec_source; 2153 break; /* goto sizeof (int) option return */ 2154 case IP_BROADCAST_TTL: 2155 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2156 return (sizeof (uchar_t)); 2157 default: 2158 return (-1); 2159 } 2160 break; 2161 case IPPROTO_IPV6: 2162 if (udp->udp_family != AF_INET6) 2163 return (-1); 2164 switch (name) { 2165 case IPV6_UNICAST_HOPS: 2166 *i1 = (unsigned int)udp->udp_ttl; 2167 break; /* goto sizeof (int) option return */ 2168 case IPV6_MULTICAST_IF: 2169 /* 0 index if not set */ 2170 *i1 = udp->udp_multicast_if_index; 2171 break; /* goto sizeof (int) option return */ 2172 case IPV6_MULTICAST_HOPS: 2173 *i1 = udp->udp_multicast_ttl; 2174 break; /* goto sizeof (int) option return */ 2175 case IPV6_MULTICAST_LOOP: 2176 *i1 = connp->conn_multicast_loop; 2177 break; /* goto sizeof (int) option return */ 2178 case IPV6_JOIN_GROUP: 2179 case IPV6_LEAVE_GROUP: 2180 case MCAST_JOIN_GROUP: 2181 case MCAST_LEAVE_GROUP: 2182 case MCAST_BLOCK_SOURCE: 2183 case MCAST_UNBLOCK_SOURCE: 2184 case MCAST_JOIN_SOURCE_GROUP: 2185 case MCAST_LEAVE_SOURCE_GROUP: 2186 /* cannot "get" the value for these */ 2187 return (-1); 2188 case IPV6_BOUND_IF: 2189 /* Zero if not set */ 2190 *i1 = udp->udp_bound_if; 2191 break; /* goto sizeof (int) option return */ 2192 case IPV6_UNSPEC_SRC: 2193 *i1 = udp->udp_unspec_source; 2194 break; /* goto sizeof (int) option return */ 2195 case IPV6_RECVPKTINFO: 2196 *i1 = udp->udp_ip_recvpktinfo; 2197 break; /* goto sizeof (int) option return */ 2198 case IPV6_RECVTCLASS: 2199 *i1 = udp->udp_ipv6_recvtclass; 2200 break; /* goto sizeof (int) option return */ 2201 case IPV6_RECVPATHMTU: 2202 *i1 = udp->udp_ipv6_recvpathmtu; 2203 break; /* goto sizeof (int) option return */ 2204 case IPV6_RECVHOPLIMIT: 2205 *i1 = udp->udp_ipv6_recvhoplimit; 2206 break; /* goto sizeof (int) option return */ 2207 case IPV6_RECVHOPOPTS: 2208 *i1 = udp->udp_ipv6_recvhopopts; 2209 break; /* goto sizeof (int) option return */ 2210 case IPV6_RECVDSTOPTS: 2211 *i1 = udp->udp_ipv6_recvdstopts; 2212 break; /* goto sizeof (int) option return */ 2213 case _OLD_IPV6_RECVDSTOPTS: 2214 *i1 = udp->udp_old_ipv6_recvdstopts; 2215 break; /* goto sizeof (int) option return */ 2216 case IPV6_RECVRTHDRDSTOPTS: 2217 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2218 break; /* goto sizeof (int) option return */ 2219 case IPV6_RECVRTHDR: 2220 *i1 = udp->udp_ipv6_recvrthdr; 2221 break; /* goto sizeof (int) option return */ 2222 case IPV6_PKTINFO: { 2223 /* XXX assumes that caller has room for max size! */ 2224 struct in6_pktinfo *pkti; 2225 2226 pkti = (struct in6_pktinfo *)ptr; 2227 if (ipp->ipp_fields & IPPF_IFINDEX) 2228 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2229 else 2230 pkti->ipi6_ifindex = 0; 2231 if (ipp->ipp_fields & IPPF_ADDR) 2232 pkti->ipi6_addr = ipp->ipp_addr; 2233 else 2234 pkti->ipi6_addr = ipv6_all_zeros; 2235 return (sizeof (struct in6_pktinfo)); 2236 } 2237 case IPV6_TCLASS: 2238 if (ipp->ipp_fields & IPPF_TCLASS) 2239 *i1 = ipp->ipp_tclass; 2240 else 2241 *i1 = IPV6_FLOW_TCLASS( 2242 IPV6_DEFAULT_VERS_AND_FLOW); 2243 break; /* goto sizeof (int) option return */ 2244 case IPV6_NEXTHOP: { 2245 sin6_t *sin6 = (sin6_t *)ptr; 2246 2247 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2248 return (0); 2249 *sin6 = sin6_null; 2250 sin6->sin6_family = AF_INET6; 2251 sin6->sin6_addr = ipp->ipp_nexthop; 2252 return (sizeof (sin6_t)); 2253 } 2254 case IPV6_HOPOPTS: 2255 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2256 return (0); 2257 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2258 return (0); 2259 /* 2260 * The cipso/label option is added by kernel. 2261 * User is not usually aware of this option. 2262 * We copy out the hbh opt after the label option. 2263 */ 2264 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2265 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2266 if (udp->udp_label_len_v6 > 0) { 2267 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2268 ptr[1] = (ipp->ipp_hopoptslen - 2269 udp->udp_label_len_v6 + 7) / 8 - 1; 2270 } 2271 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2272 case IPV6_RTHDRDSTOPTS: 2273 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2274 return (0); 2275 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2276 return (ipp->ipp_rtdstoptslen); 2277 case IPV6_RTHDR: 2278 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2279 return (0); 2280 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2281 return (ipp->ipp_rthdrlen); 2282 case IPV6_DSTOPTS: 2283 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2284 return (0); 2285 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2286 return (ipp->ipp_dstoptslen); 2287 case IPV6_PATHMTU: 2288 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2289 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2290 us->us_netstack)); 2291 default: 2292 return (-1); 2293 } 2294 break; 2295 case IPPROTO_UDP: 2296 switch (name) { 2297 case UDP_ANONPRIVBIND: 2298 *i1 = udp->udp_anon_priv_bind; 2299 break; 2300 case UDP_EXCLBIND: 2301 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2302 break; 2303 case UDP_RCVHDR: 2304 *i1 = udp->udp_rcvhdr ? 1 : 0; 2305 break; 2306 case UDP_NAT_T_ENDPOINT: 2307 *i1 = udp->udp_nat_t_endpoint; 2308 break; 2309 default: 2310 return (-1); 2311 } 2312 break; 2313 default: 2314 return (-1); 2315 } 2316 return (sizeof (int)); 2317 } 2318 2319 int 2320 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2321 { 2322 udp_t *udp; 2323 int err; 2324 2325 udp = Q_TO_UDP(q); 2326 2327 rw_enter(&udp->udp_rwlock, RW_READER); 2328 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2329 rw_exit(&udp->udp_rwlock); 2330 return (err); 2331 } 2332 2333 /* 2334 * This routine sets socket options. 2335 */ 2336 /* ARGSUSED */ 2337 static int 2338 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2339 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2340 void *thisdg_attrs, boolean_t checkonly) 2341 { 2342 udpattrs_t *attrs = thisdg_attrs; 2343 int *i1 = (int *)invalp; 2344 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2345 udp_t *udp = connp->conn_udp; 2346 udp_stack_t *us = udp->udp_us; 2347 int error; 2348 uint_t newlen; 2349 size_t sth_wroff; 2350 2351 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2352 /* 2353 * For fixed length options, no sanity check 2354 * of passed in length is done. It is assumed *_optcom_req() 2355 * routines do the right thing. 2356 */ 2357 switch (level) { 2358 case SOL_SOCKET: 2359 switch (name) { 2360 case SO_REUSEADDR: 2361 if (!checkonly) { 2362 udp->udp_reuseaddr = onoff; 2363 PASS_OPT_TO_IP(connp); 2364 } 2365 break; 2366 case SO_DEBUG: 2367 if (!checkonly) 2368 udp->udp_debug = onoff; 2369 break; 2370 /* 2371 * The following three items are available here, 2372 * but are only meaningful to IP. 2373 */ 2374 case SO_DONTROUTE: 2375 if (!checkonly) { 2376 udp->udp_dontroute = onoff; 2377 PASS_OPT_TO_IP(connp); 2378 } 2379 break; 2380 case SO_USELOOPBACK: 2381 if (!checkonly) { 2382 udp->udp_useloopback = onoff; 2383 PASS_OPT_TO_IP(connp); 2384 } 2385 break; 2386 case SO_BROADCAST: 2387 if (!checkonly) { 2388 udp->udp_broadcast = onoff; 2389 PASS_OPT_TO_IP(connp); 2390 } 2391 break; 2392 2393 case SO_SNDBUF: 2394 if (*i1 > us->us_max_buf) { 2395 *outlenp = 0; 2396 return (ENOBUFS); 2397 } 2398 if (!checkonly) { 2399 udp->udp_xmit_hiwat = *i1; 2400 connp->conn_wq->q_hiwat = *i1; 2401 } 2402 break; 2403 case SO_RCVBUF: 2404 if (*i1 > us->us_max_buf) { 2405 *outlenp = 0; 2406 return (ENOBUFS); 2407 } 2408 if (!checkonly) { 2409 int size; 2410 2411 udp->udp_rcv_disply_hiwat = *i1; 2412 size = udp_set_rcv_hiwat(udp, *i1); 2413 rw_exit(&udp->udp_rwlock); 2414 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2415 size); 2416 rw_enter(&udp->udp_rwlock, RW_WRITER); 2417 } 2418 break; 2419 case SO_DGRAM_ERRIND: 2420 if (!checkonly) 2421 udp->udp_dgram_errind = onoff; 2422 break; 2423 case SO_RECVUCRED: 2424 if (!checkonly) 2425 udp->udp_recvucred = onoff; 2426 break; 2427 case SO_ALLZONES: 2428 /* 2429 * "soft" error (negative) 2430 * option not handled at this level 2431 * Do not modify *outlenp. 2432 */ 2433 return (-EINVAL); 2434 case SO_TIMESTAMP: 2435 if (!checkonly) 2436 udp->udp_timestamp = onoff; 2437 break; 2438 case SO_ANON_MLP: 2439 if (!checkonly) { 2440 connp->conn_anon_mlp = onoff; 2441 PASS_OPT_TO_IP(connp); 2442 } 2443 break; 2444 case SO_MAC_EXEMPT: 2445 if (secpolicy_net_mac_aware(cr) != 0 || 2446 udp->udp_state != TS_UNBND) 2447 return (EACCES); 2448 if (!checkonly) { 2449 connp->conn_mac_exempt = onoff; 2450 PASS_OPT_TO_IP(connp); 2451 } 2452 break; 2453 case SCM_UCRED: { 2454 struct ucred_s *ucr; 2455 cred_t *cr, *newcr; 2456 ts_label_t *tsl; 2457 2458 /* 2459 * Only sockets that have proper privileges and are 2460 * bound to MLPs will have any other value here, so 2461 * this implicitly tests for privilege to set label. 2462 */ 2463 if (connp->conn_mlp_type == mlptSingle) 2464 break; 2465 ucr = (struct ucred_s *)invalp; 2466 if (inlen != ucredsize || 2467 ucr->uc_labeloff < sizeof (*ucr) || 2468 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2469 return (EINVAL); 2470 if (!checkonly) { 2471 mblk_t *mb; 2472 2473 if (attrs == NULL || 2474 (mb = attrs->udpattr_mb) == NULL) 2475 return (EINVAL); 2476 if ((cr = DB_CRED(mb)) == NULL) 2477 cr = udp->udp_connp->conn_cred; 2478 ASSERT(cr != NULL); 2479 if ((tsl = crgetlabel(cr)) == NULL) 2480 return (EINVAL); 2481 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2482 tsl->tsl_doi, KM_NOSLEEP); 2483 if (newcr == NULL) 2484 return (ENOSR); 2485 mblk_setcred(mb, newcr); 2486 attrs->udpattr_credset = B_TRUE; 2487 crfree(newcr); 2488 } 2489 break; 2490 } 2491 case SO_EXCLBIND: 2492 if (!checkonly) 2493 udp->udp_exclbind = onoff; 2494 break; 2495 case SO_RCVTIMEO: 2496 case SO_SNDTIMEO: 2497 /* 2498 * Pass these two options in order for third part 2499 * protocol usage. Here just return directly. 2500 */ 2501 return (0); 2502 default: 2503 *outlenp = 0; 2504 return (EINVAL); 2505 } 2506 break; 2507 case IPPROTO_IP: 2508 if (udp->udp_family != AF_INET) { 2509 *outlenp = 0; 2510 return (ENOPROTOOPT); 2511 } 2512 switch (name) { 2513 case IP_OPTIONS: 2514 case T_IP_OPTIONS: 2515 /* Save options for use by IP. */ 2516 newlen = inlen + udp->udp_label_len; 2517 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2518 *outlenp = 0; 2519 return (EINVAL); 2520 } 2521 if (checkonly) 2522 break; 2523 2524 /* 2525 * Update the stored options taking into account 2526 * any CIPSO option which we should not overwrite. 2527 */ 2528 if (!tsol_option_set(&udp->udp_ip_snd_options, 2529 &udp->udp_ip_snd_options_len, 2530 udp->udp_label_len, invalp, inlen)) { 2531 *outlenp = 0; 2532 return (ENOMEM); 2533 } 2534 2535 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2536 UDPH_SIZE + udp->udp_ip_snd_options_len; 2537 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2538 rw_exit(&udp->udp_rwlock); 2539 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2540 sth_wroff); 2541 rw_enter(&udp->udp_rwlock, RW_WRITER); 2542 break; 2543 2544 case IP_TTL: 2545 if (!checkonly) { 2546 udp->udp_ttl = (uchar_t)*i1; 2547 } 2548 break; 2549 case IP_TOS: 2550 case T_IP_TOS: 2551 if (!checkonly) { 2552 udp->udp_type_of_service = (uchar_t)*i1; 2553 } 2554 break; 2555 case IP_MULTICAST_IF: { 2556 /* 2557 * TODO should check OPTMGMT reply and undo this if 2558 * there is an error. 2559 */ 2560 struct in_addr *inap = (struct in_addr *)invalp; 2561 if (!checkonly) { 2562 udp->udp_multicast_if_addr = 2563 inap->s_addr; 2564 PASS_OPT_TO_IP(connp); 2565 } 2566 break; 2567 } 2568 case IP_MULTICAST_TTL: 2569 if (!checkonly) 2570 udp->udp_multicast_ttl = *invalp; 2571 break; 2572 case IP_MULTICAST_LOOP: 2573 if (!checkonly) { 2574 connp->conn_multicast_loop = *invalp; 2575 PASS_OPT_TO_IP(connp); 2576 } 2577 break; 2578 case IP_RECVOPTS: 2579 if (!checkonly) 2580 udp->udp_recvopts = onoff; 2581 break; 2582 case IP_RECVDSTADDR: 2583 if (!checkonly) 2584 udp->udp_recvdstaddr = onoff; 2585 break; 2586 case IP_RECVIF: 2587 if (!checkonly) { 2588 udp->udp_recvif = onoff; 2589 PASS_OPT_TO_IP(connp); 2590 } 2591 break; 2592 case IP_RECVSLLA: 2593 if (!checkonly) { 2594 udp->udp_recvslla = onoff; 2595 PASS_OPT_TO_IP(connp); 2596 } 2597 break; 2598 case IP_RECVTTL: 2599 if (!checkonly) 2600 udp->udp_recvttl = onoff; 2601 break; 2602 case IP_PKTINFO: { 2603 /* 2604 * This also handles IP_RECVPKTINFO. 2605 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2606 * Differentiation is based on the size of the 2607 * argument passed in. 2608 */ 2609 struct in_pktinfo *pktinfop; 2610 ip4_pkt_t *attr_pktinfop; 2611 2612 if (checkonly) 2613 break; 2614 2615 if (inlen == sizeof (int)) { 2616 /* 2617 * This is IP_RECVPKTINFO option. 2618 * Keep a local copy of whether this option is 2619 * set or not and pass it down to IP for 2620 * processing. 2621 */ 2622 2623 udp->udp_ip_recvpktinfo = onoff; 2624 return (-EINVAL); 2625 } 2626 2627 if (attrs == NULL || 2628 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2629 /* 2630 * sticky option or no buffer to return 2631 * the results. 2632 */ 2633 return (EINVAL); 2634 } 2635 2636 if (inlen != sizeof (struct in_pktinfo)) 2637 return (EINVAL); 2638 2639 pktinfop = (struct in_pktinfo *)invalp; 2640 2641 /* 2642 * At least one of the values should be specified 2643 */ 2644 if (pktinfop->ipi_ifindex == 0 && 2645 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2646 return (EINVAL); 2647 } 2648 2649 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2650 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2651 2652 break; 2653 } 2654 case IP_ADD_MEMBERSHIP: 2655 case IP_DROP_MEMBERSHIP: 2656 case IP_BLOCK_SOURCE: 2657 case IP_UNBLOCK_SOURCE: 2658 case IP_ADD_SOURCE_MEMBERSHIP: 2659 case IP_DROP_SOURCE_MEMBERSHIP: 2660 case MCAST_JOIN_GROUP: 2661 case MCAST_LEAVE_GROUP: 2662 case MCAST_BLOCK_SOURCE: 2663 case MCAST_UNBLOCK_SOURCE: 2664 case MCAST_JOIN_SOURCE_GROUP: 2665 case MCAST_LEAVE_SOURCE_GROUP: 2666 case IP_SEC_OPT: 2667 case IP_NEXTHOP: 2668 case IP_DHCPINIT_IF: 2669 /* 2670 * "soft" error (negative) 2671 * option not handled at this level 2672 * Do not modify *outlenp. 2673 */ 2674 return (-EINVAL); 2675 case IP_BOUND_IF: 2676 if (!checkonly) { 2677 udp->udp_bound_if = *i1; 2678 PASS_OPT_TO_IP(connp); 2679 } 2680 break; 2681 case IP_UNSPEC_SRC: 2682 if (!checkonly) { 2683 udp->udp_unspec_source = onoff; 2684 PASS_OPT_TO_IP(connp); 2685 } 2686 break; 2687 case IP_BROADCAST_TTL: 2688 if (!checkonly) 2689 connp->conn_broadcast_ttl = *invalp; 2690 break; 2691 default: 2692 *outlenp = 0; 2693 return (EINVAL); 2694 } 2695 break; 2696 case IPPROTO_IPV6: { 2697 ip6_pkt_t *ipp; 2698 boolean_t sticky; 2699 2700 if (udp->udp_family != AF_INET6) { 2701 *outlenp = 0; 2702 return (ENOPROTOOPT); 2703 } 2704 /* 2705 * Deal with both sticky options and ancillary data 2706 */ 2707 sticky = B_FALSE; 2708 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2709 NULL) { 2710 /* sticky options, or none */ 2711 ipp = &udp->udp_sticky_ipp; 2712 sticky = B_TRUE; 2713 } 2714 2715 switch (name) { 2716 case IPV6_MULTICAST_IF: 2717 if (!checkonly) { 2718 udp->udp_multicast_if_index = *i1; 2719 PASS_OPT_TO_IP(connp); 2720 } 2721 break; 2722 case IPV6_UNICAST_HOPS: 2723 /* -1 means use default */ 2724 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2725 *outlenp = 0; 2726 return (EINVAL); 2727 } 2728 if (!checkonly) { 2729 if (*i1 == -1) { 2730 udp->udp_ttl = ipp->ipp_unicast_hops = 2731 us->us_ipv6_hoplimit; 2732 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2733 /* Pass modified value to IP. */ 2734 *i1 = udp->udp_ttl; 2735 } else { 2736 udp->udp_ttl = ipp->ipp_unicast_hops = 2737 (uint8_t)*i1; 2738 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2739 } 2740 /* Rebuild the header template */ 2741 error = udp_build_hdrs(udp); 2742 if (error != 0) { 2743 *outlenp = 0; 2744 return (error); 2745 } 2746 } 2747 break; 2748 case IPV6_MULTICAST_HOPS: 2749 /* -1 means use default */ 2750 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2751 *outlenp = 0; 2752 return (EINVAL); 2753 } 2754 if (!checkonly) { 2755 if (*i1 == -1) { 2756 udp->udp_multicast_ttl = 2757 ipp->ipp_multicast_hops = 2758 IP_DEFAULT_MULTICAST_TTL; 2759 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2760 /* Pass modified value to IP. */ 2761 *i1 = udp->udp_multicast_ttl; 2762 } else { 2763 udp->udp_multicast_ttl = 2764 ipp->ipp_multicast_hops = 2765 (uint8_t)*i1; 2766 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2767 } 2768 } 2769 break; 2770 case IPV6_MULTICAST_LOOP: 2771 if (*i1 != 0 && *i1 != 1) { 2772 *outlenp = 0; 2773 return (EINVAL); 2774 } 2775 if (!checkonly) { 2776 connp->conn_multicast_loop = *i1; 2777 PASS_OPT_TO_IP(connp); 2778 } 2779 break; 2780 case IPV6_JOIN_GROUP: 2781 case IPV6_LEAVE_GROUP: 2782 case MCAST_JOIN_GROUP: 2783 case MCAST_LEAVE_GROUP: 2784 case MCAST_BLOCK_SOURCE: 2785 case MCAST_UNBLOCK_SOURCE: 2786 case MCAST_JOIN_SOURCE_GROUP: 2787 case MCAST_LEAVE_SOURCE_GROUP: 2788 /* 2789 * "soft" error (negative) 2790 * option not handled at this level 2791 * Note: Do not modify *outlenp 2792 */ 2793 return (-EINVAL); 2794 case IPV6_BOUND_IF: 2795 if (!checkonly) { 2796 udp->udp_bound_if = *i1; 2797 PASS_OPT_TO_IP(connp); 2798 } 2799 break; 2800 case IPV6_UNSPEC_SRC: 2801 if (!checkonly) { 2802 udp->udp_unspec_source = onoff; 2803 PASS_OPT_TO_IP(connp); 2804 } 2805 break; 2806 /* 2807 * Set boolean switches for ancillary data delivery 2808 */ 2809 case IPV6_RECVPKTINFO: 2810 if (!checkonly) { 2811 udp->udp_ip_recvpktinfo = onoff; 2812 PASS_OPT_TO_IP(connp); 2813 } 2814 break; 2815 case IPV6_RECVTCLASS: 2816 if (!checkonly) { 2817 udp->udp_ipv6_recvtclass = onoff; 2818 PASS_OPT_TO_IP(connp); 2819 } 2820 break; 2821 case IPV6_RECVPATHMTU: 2822 if (!checkonly) { 2823 udp->udp_ipv6_recvpathmtu = onoff; 2824 PASS_OPT_TO_IP(connp); 2825 } 2826 break; 2827 case IPV6_RECVHOPLIMIT: 2828 if (!checkonly) { 2829 udp->udp_ipv6_recvhoplimit = onoff; 2830 PASS_OPT_TO_IP(connp); 2831 } 2832 break; 2833 case IPV6_RECVHOPOPTS: 2834 if (!checkonly) { 2835 udp->udp_ipv6_recvhopopts = onoff; 2836 PASS_OPT_TO_IP(connp); 2837 } 2838 break; 2839 case IPV6_RECVDSTOPTS: 2840 if (!checkonly) { 2841 udp->udp_ipv6_recvdstopts = onoff; 2842 PASS_OPT_TO_IP(connp); 2843 } 2844 break; 2845 case _OLD_IPV6_RECVDSTOPTS: 2846 if (!checkonly) 2847 udp->udp_old_ipv6_recvdstopts = onoff; 2848 break; 2849 case IPV6_RECVRTHDRDSTOPTS: 2850 if (!checkonly) { 2851 udp->udp_ipv6_recvrthdrdstopts = onoff; 2852 PASS_OPT_TO_IP(connp); 2853 } 2854 break; 2855 case IPV6_RECVRTHDR: 2856 if (!checkonly) { 2857 udp->udp_ipv6_recvrthdr = onoff; 2858 PASS_OPT_TO_IP(connp); 2859 } 2860 break; 2861 /* 2862 * Set sticky options or ancillary data. 2863 * If sticky options, (re)build any extension headers 2864 * that might be needed as a result. 2865 */ 2866 case IPV6_PKTINFO: 2867 /* 2868 * The source address and ifindex are verified 2869 * in ip_opt_set(). For ancillary data the 2870 * source address is checked in ip_wput_v6. 2871 */ 2872 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2873 return (EINVAL); 2874 if (checkonly) 2875 break; 2876 2877 if (inlen == 0) { 2878 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2879 ipp->ipp_sticky_ignored |= 2880 (IPPF_IFINDEX|IPPF_ADDR); 2881 } else { 2882 struct in6_pktinfo *pkti; 2883 2884 pkti = (struct in6_pktinfo *)invalp; 2885 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2886 ipp->ipp_addr = pkti->ipi6_addr; 2887 if (ipp->ipp_ifindex != 0) 2888 ipp->ipp_fields |= IPPF_IFINDEX; 2889 else 2890 ipp->ipp_fields &= ~IPPF_IFINDEX; 2891 if (!IN6_IS_ADDR_UNSPECIFIED( 2892 &ipp->ipp_addr)) 2893 ipp->ipp_fields |= IPPF_ADDR; 2894 else 2895 ipp->ipp_fields &= ~IPPF_ADDR; 2896 } 2897 if (sticky) { 2898 error = udp_build_hdrs(udp); 2899 if (error != 0) 2900 return (error); 2901 PASS_OPT_TO_IP(connp); 2902 } 2903 break; 2904 case IPV6_HOPLIMIT: 2905 if (sticky) 2906 return (EINVAL); 2907 if (inlen != 0 && inlen != sizeof (int)) 2908 return (EINVAL); 2909 if (checkonly) 2910 break; 2911 2912 if (inlen == 0) { 2913 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2914 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2915 } else { 2916 if (*i1 > 255 || *i1 < -1) 2917 return (EINVAL); 2918 if (*i1 == -1) 2919 ipp->ipp_hoplimit = 2920 us->us_ipv6_hoplimit; 2921 else 2922 ipp->ipp_hoplimit = *i1; 2923 ipp->ipp_fields |= IPPF_HOPLIMIT; 2924 } 2925 break; 2926 case IPV6_TCLASS: 2927 if (inlen != 0 && inlen != sizeof (int)) 2928 return (EINVAL); 2929 if (checkonly) 2930 break; 2931 2932 if (inlen == 0) { 2933 ipp->ipp_fields &= ~IPPF_TCLASS; 2934 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2935 } else { 2936 if (*i1 > 255 || *i1 < -1) 2937 return (EINVAL); 2938 if (*i1 == -1) 2939 ipp->ipp_tclass = 0; 2940 else 2941 ipp->ipp_tclass = *i1; 2942 ipp->ipp_fields |= IPPF_TCLASS; 2943 } 2944 if (sticky) { 2945 error = udp_build_hdrs(udp); 2946 if (error != 0) 2947 return (error); 2948 } 2949 break; 2950 case IPV6_NEXTHOP: 2951 /* 2952 * IP will verify that the nexthop is reachable 2953 * and fail for sticky options. 2954 */ 2955 if (inlen != 0 && inlen != sizeof (sin6_t)) 2956 return (EINVAL); 2957 if (checkonly) 2958 break; 2959 2960 if (inlen == 0) { 2961 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2962 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2963 } else { 2964 sin6_t *sin6 = (sin6_t *)invalp; 2965 2966 if (sin6->sin6_family != AF_INET6) { 2967 return (EAFNOSUPPORT); 2968 } 2969 if (IN6_IS_ADDR_V4MAPPED( 2970 &sin6->sin6_addr)) 2971 return (EADDRNOTAVAIL); 2972 ipp->ipp_nexthop = sin6->sin6_addr; 2973 if (!IN6_IS_ADDR_UNSPECIFIED( 2974 &ipp->ipp_nexthop)) 2975 ipp->ipp_fields |= IPPF_NEXTHOP; 2976 else 2977 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2978 } 2979 if (sticky) { 2980 error = udp_build_hdrs(udp); 2981 if (error != 0) 2982 return (error); 2983 PASS_OPT_TO_IP(connp); 2984 } 2985 break; 2986 case IPV6_HOPOPTS: { 2987 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2988 /* 2989 * Sanity checks - minimum size, size a multiple of 2990 * eight bytes, and matching size passed in. 2991 */ 2992 if (inlen != 0 && 2993 inlen != (8 * (hopts->ip6h_len + 1))) 2994 return (EINVAL); 2995 2996 if (checkonly) 2997 break; 2998 2999 error = optcom_pkt_set(invalp, inlen, sticky, 3000 (uchar_t **)&ipp->ipp_hopopts, 3001 &ipp->ipp_hopoptslen, 3002 sticky ? udp->udp_label_len_v6 : 0); 3003 if (error != 0) 3004 return (error); 3005 if (ipp->ipp_hopoptslen == 0) { 3006 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3007 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3008 } else { 3009 ipp->ipp_fields |= IPPF_HOPOPTS; 3010 } 3011 if (sticky) { 3012 error = udp_build_hdrs(udp); 3013 if (error != 0) 3014 return (error); 3015 } 3016 break; 3017 } 3018 case IPV6_RTHDRDSTOPTS: { 3019 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3020 3021 /* 3022 * Sanity checks - minimum size, size a multiple of 3023 * eight bytes, and matching size passed in. 3024 */ 3025 if (inlen != 0 && 3026 inlen != (8 * (dopts->ip6d_len + 1))) 3027 return (EINVAL); 3028 3029 if (checkonly) 3030 break; 3031 3032 if (inlen == 0) { 3033 if (sticky && 3034 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3035 kmem_free(ipp->ipp_rtdstopts, 3036 ipp->ipp_rtdstoptslen); 3037 ipp->ipp_rtdstopts = NULL; 3038 ipp->ipp_rtdstoptslen = 0; 3039 } 3040 3041 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3042 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3043 } else { 3044 error = optcom_pkt_set(invalp, inlen, sticky, 3045 (uchar_t **)&ipp->ipp_rtdstopts, 3046 &ipp->ipp_rtdstoptslen, 0); 3047 if (error != 0) 3048 return (error); 3049 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3050 } 3051 if (sticky) { 3052 error = udp_build_hdrs(udp); 3053 if (error != 0) 3054 return (error); 3055 } 3056 break; 3057 } 3058 case IPV6_DSTOPTS: { 3059 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3060 3061 /* 3062 * Sanity checks - minimum size, size a multiple of 3063 * eight bytes, and matching size passed in. 3064 */ 3065 if (inlen != 0 && 3066 inlen != (8 * (dopts->ip6d_len + 1))) 3067 return (EINVAL); 3068 3069 if (checkonly) 3070 break; 3071 3072 if (inlen == 0) { 3073 if (sticky && 3074 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3075 kmem_free(ipp->ipp_dstopts, 3076 ipp->ipp_dstoptslen); 3077 ipp->ipp_dstopts = NULL; 3078 ipp->ipp_dstoptslen = 0; 3079 } 3080 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3081 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3082 } else { 3083 error = optcom_pkt_set(invalp, inlen, sticky, 3084 (uchar_t **)&ipp->ipp_dstopts, 3085 &ipp->ipp_dstoptslen, 0); 3086 if (error != 0) 3087 return (error); 3088 ipp->ipp_fields |= IPPF_DSTOPTS; 3089 } 3090 if (sticky) { 3091 error = udp_build_hdrs(udp); 3092 if (error != 0) 3093 return (error); 3094 } 3095 break; 3096 } 3097 case IPV6_RTHDR: { 3098 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3099 3100 /* 3101 * Sanity checks - minimum size, size a multiple of 3102 * eight bytes, and matching size passed in. 3103 */ 3104 if (inlen != 0 && 3105 inlen != (8 * (rt->ip6r_len + 1))) 3106 return (EINVAL); 3107 3108 if (checkonly) 3109 break; 3110 3111 if (inlen == 0) { 3112 if (sticky && 3113 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3114 kmem_free(ipp->ipp_rthdr, 3115 ipp->ipp_rthdrlen); 3116 ipp->ipp_rthdr = NULL; 3117 ipp->ipp_rthdrlen = 0; 3118 } 3119 ipp->ipp_fields &= ~IPPF_RTHDR; 3120 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3121 } else { 3122 error = optcom_pkt_set(invalp, inlen, sticky, 3123 (uchar_t **)&ipp->ipp_rthdr, 3124 &ipp->ipp_rthdrlen, 0); 3125 if (error != 0) 3126 return (error); 3127 ipp->ipp_fields |= IPPF_RTHDR; 3128 } 3129 if (sticky) { 3130 error = udp_build_hdrs(udp); 3131 if (error != 0) 3132 return (error); 3133 } 3134 break; 3135 } 3136 3137 case IPV6_DONTFRAG: 3138 if (checkonly) 3139 break; 3140 3141 if (onoff) { 3142 ipp->ipp_fields |= IPPF_DONTFRAG; 3143 } else { 3144 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3145 } 3146 break; 3147 3148 case IPV6_USE_MIN_MTU: 3149 if (inlen != sizeof (int)) 3150 return (EINVAL); 3151 3152 if (*i1 < -1 || *i1 > 1) 3153 return (EINVAL); 3154 3155 if (checkonly) 3156 break; 3157 3158 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3159 ipp->ipp_use_min_mtu = *i1; 3160 break; 3161 3162 case IPV6_SEC_OPT: 3163 case IPV6_SRC_PREFERENCES: 3164 case IPV6_V6ONLY: 3165 /* Handled at the IP level */ 3166 return (-EINVAL); 3167 default: 3168 *outlenp = 0; 3169 return (EINVAL); 3170 } 3171 break; 3172 } /* end IPPROTO_IPV6 */ 3173 case IPPROTO_UDP: 3174 switch (name) { 3175 case UDP_ANONPRIVBIND: 3176 if ((error = secpolicy_net_privaddr(cr, 0, 3177 IPPROTO_UDP)) != 0) { 3178 *outlenp = 0; 3179 return (error); 3180 } 3181 if (!checkonly) { 3182 udp->udp_anon_priv_bind = onoff; 3183 } 3184 break; 3185 case UDP_EXCLBIND: 3186 if (!checkonly) 3187 udp->udp_exclbind = onoff; 3188 break; 3189 case UDP_RCVHDR: 3190 if (!checkonly) 3191 udp->udp_rcvhdr = onoff; 3192 break; 3193 case UDP_NAT_T_ENDPOINT: 3194 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3195 *outlenp = 0; 3196 return (error); 3197 } 3198 3199 /* 3200 * Use udp_family instead so we can avoid ambiguitites 3201 * with AF_INET6 sockets that may switch from IPv4 3202 * to IPv6. 3203 */ 3204 if (udp->udp_family != AF_INET) { 3205 *outlenp = 0; 3206 return (EAFNOSUPPORT); 3207 } 3208 3209 if (!checkonly) { 3210 int size; 3211 3212 udp->udp_nat_t_endpoint = onoff; 3213 3214 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3215 UDPH_SIZE + udp->udp_ip_snd_options_len; 3216 3217 /* Also, adjust wroff */ 3218 if (onoff) { 3219 udp->udp_max_hdr_len += 3220 sizeof (uint32_t); 3221 } 3222 size = udp->udp_max_hdr_len + 3223 us->us_wroff_extra; 3224 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3225 size); 3226 } 3227 break; 3228 default: 3229 *outlenp = 0; 3230 return (EINVAL); 3231 } 3232 break; 3233 default: 3234 *outlenp = 0; 3235 return (EINVAL); 3236 } 3237 /* 3238 * Common case of OK return with outval same as inval. 3239 */ 3240 if (invalp != outvalp) { 3241 /* don't trust bcopy for identical src/dst */ 3242 (void) bcopy(invalp, outvalp, inlen); 3243 } 3244 *outlenp = inlen; 3245 return (0); 3246 } 3247 3248 int 3249 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3250 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3251 void *thisdg_attrs, cred_t *cr) 3252 { 3253 int error; 3254 boolean_t checkonly; 3255 3256 error = 0; 3257 switch (optset_context) { 3258 case SETFN_OPTCOM_CHECKONLY: 3259 checkonly = B_TRUE; 3260 /* 3261 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3262 * inlen != 0 implies value supplied and 3263 * we have to "pretend" to set it. 3264 * inlen == 0 implies that there is no 3265 * value part in T_CHECK request and just validation 3266 * done elsewhere should be enough, we just return here. 3267 */ 3268 if (inlen == 0) { 3269 *outlenp = 0; 3270 goto done; 3271 } 3272 break; 3273 case SETFN_OPTCOM_NEGOTIATE: 3274 checkonly = B_FALSE; 3275 break; 3276 case SETFN_UD_NEGOTIATE: 3277 case SETFN_CONN_NEGOTIATE: 3278 checkonly = B_FALSE; 3279 /* 3280 * Negotiating local and "association-related" options 3281 * through T_UNITDATA_REQ. 3282 * 3283 * Following routine can filter out ones we do not 3284 * want to be "set" this way. 3285 */ 3286 if (!udp_opt_allow_udr_set(level, name)) { 3287 *outlenp = 0; 3288 error = EINVAL; 3289 goto done; 3290 } 3291 break; 3292 default: 3293 /* 3294 * We should never get here 3295 */ 3296 *outlenp = 0; 3297 error = EINVAL; 3298 goto done; 3299 } 3300 3301 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3302 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3303 3304 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3305 outvalp, cr, thisdg_attrs, checkonly); 3306 done: 3307 return (error); 3308 } 3309 3310 /* ARGSUSED */ 3311 int 3312 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3313 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3314 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3315 { 3316 conn_t *connp = Q_TO_CONN(q); 3317 int error; 3318 udp_t *udp = connp->conn_udp; 3319 3320 rw_enter(&udp->udp_rwlock, RW_WRITER); 3321 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3322 outlenp, outvalp, thisdg_attrs, cr); 3323 rw_exit(&udp->udp_rwlock); 3324 return (error); 3325 } 3326 3327 /* 3328 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3329 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3330 * headers, and the udp header. 3331 * Returns failure if can't allocate memory. 3332 */ 3333 static int 3334 udp_build_hdrs(udp_t *udp) 3335 { 3336 udp_stack_t *us = udp->udp_us; 3337 uchar_t *hdrs; 3338 uint_t hdrs_len; 3339 ip6_t *ip6h; 3340 ip6i_t *ip6i; 3341 udpha_t *udpha; 3342 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3343 size_t sth_wroff; 3344 conn_t *connp = udp->udp_connp; 3345 3346 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3347 ASSERT(connp != NULL); 3348 3349 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3350 ASSERT(hdrs_len != 0); 3351 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3352 /* Need to reallocate */ 3353 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3354 if (hdrs == NULL) 3355 return (ENOMEM); 3356 3357 if (udp->udp_sticky_hdrs_len != 0) { 3358 kmem_free(udp->udp_sticky_hdrs, 3359 udp->udp_sticky_hdrs_len); 3360 } 3361 udp->udp_sticky_hdrs = hdrs; 3362 udp->udp_sticky_hdrs_len = hdrs_len; 3363 } 3364 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3365 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3366 3367 /* Set header fields not in ipp */ 3368 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3369 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3370 ip6h = (ip6_t *)&ip6i[1]; 3371 } else { 3372 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3373 } 3374 3375 if (!(ipp->ipp_fields & IPPF_ADDR)) 3376 ip6h->ip6_src = udp->udp_v6src; 3377 3378 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3379 udpha->uha_src_port = udp->udp_port; 3380 3381 /* Try to get everything in a single mblk */ 3382 if (hdrs_len > udp->udp_max_hdr_len) { 3383 udp->udp_max_hdr_len = hdrs_len; 3384 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3385 rw_exit(&udp->udp_rwlock); 3386 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3387 udp->udp_connp, sth_wroff); 3388 rw_enter(&udp->udp_rwlock, RW_WRITER); 3389 } 3390 return (0); 3391 } 3392 3393 /* 3394 * This routine retrieves the value of an ND variable in a udpparam_t 3395 * structure. It is called through nd_getset when a user reads the 3396 * variable. 3397 */ 3398 /* ARGSUSED */ 3399 static int 3400 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3401 { 3402 udpparam_t *udppa = (udpparam_t *)cp; 3403 3404 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3405 return (0); 3406 } 3407 3408 /* 3409 * Walk through the param array specified registering each element with the 3410 * named dispatch (ND) handler. 3411 */ 3412 static boolean_t 3413 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3414 { 3415 for (; cnt-- > 0; udppa++) { 3416 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3417 if (!nd_load(ndp, udppa->udp_param_name, 3418 udp_param_get, udp_param_set, 3419 (caddr_t)udppa)) { 3420 nd_free(ndp); 3421 return (B_FALSE); 3422 } 3423 } 3424 } 3425 if (!nd_load(ndp, "udp_extra_priv_ports", 3426 udp_extra_priv_ports_get, NULL, NULL)) { 3427 nd_free(ndp); 3428 return (B_FALSE); 3429 } 3430 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3431 NULL, udp_extra_priv_ports_add, NULL)) { 3432 nd_free(ndp); 3433 return (B_FALSE); 3434 } 3435 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3436 NULL, udp_extra_priv_ports_del, NULL)) { 3437 nd_free(ndp); 3438 return (B_FALSE); 3439 } 3440 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3441 NULL)) { 3442 nd_free(ndp); 3443 return (B_FALSE); 3444 } 3445 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3446 NULL)) { 3447 nd_free(ndp); 3448 return (B_FALSE); 3449 } 3450 return (B_TRUE); 3451 } 3452 3453 /* This routine sets an ND variable in a udpparam_t structure. */ 3454 /* ARGSUSED */ 3455 static int 3456 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3457 { 3458 long new_value; 3459 udpparam_t *udppa = (udpparam_t *)cp; 3460 3461 /* 3462 * Fail the request if the new value does not lie within the 3463 * required bounds. 3464 */ 3465 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3466 new_value < udppa->udp_param_min || 3467 new_value > udppa->udp_param_max) { 3468 return (EINVAL); 3469 } 3470 3471 /* Set the new value */ 3472 udppa->udp_param_value = new_value; 3473 return (0); 3474 } 3475 3476 /* 3477 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3478 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3479 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3480 * then it's assumed to be allocated to be large enough. 3481 * 3482 * Returns zero if trimming of the security option causes all options to go 3483 * away. 3484 */ 3485 static size_t 3486 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3487 { 3488 struct T_opthdr *toh; 3489 size_t hol = ipp->ipp_hopoptslen; 3490 ip6_hbh_t *dstopt = NULL; 3491 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3492 size_t tlen, olen, plen; 3493 boolean_t deleting; 3494 const struct ip6_opt *sopt, *lastpad; 3495 struct ip6_opt *dopt; 3496 3497 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3498 toh->level = IPPROTO_IPV6; 3499 toh->name = IPV6_HOPOPTS; 3500 toh->status = 0; 3501 dstopt = (ip6_hbh_t *)(toh + 1); 3502 } 3503 3504 /* 3505 * If labeling is enabled, then skip the label option 3506 * but get other options if there are any. 3507 */ 3508 if (is_system_labeled()) { 3509 dopt = NULL; 3510 if (dstopt != NULL) { 3511 /* will fill in ip6h_len later */ 3512 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3513 dopt = (struct ip6_opt *)(dstopt + 1); 3514 } 3515 sopt = (const struct ip6_opt *)(srcopt + 1); 3516 hol -= sizeof (*srcopt); 3517 tlen = sizeof (*dstopt); 3518 lastpad = NULL; 3519 deleting = B_FALSE; 3520 /* 3521 * This loop finds the first (lastpad pointer) of any number of 3522 * pads that preceeds the security option, then treats the 3523 * security option as though it were a pad, and then finds the 3524 * next non-pad option (or end of list). 3525 * 3526 * It then treats the entire block as one big pad. To preserve 3527 * alignment of any options that follow, or just the end of the 3528 * list, it computes a minimal new padding size that keeps the 3529 * same alignment for the next option. 3530 * 3531 * If it encounters just a sequence of pads with no security 3532 * option, those are copied as-is rather than collapsed. 3533 * 3534 * Note that to handle the end of list case, the code makes one 3535 * loop with 'hol' set to zero. 3536 */ 3537 for (;;) { 3538 if (hol > 0) { 3539 if (sopt->ip6o_type == IP6OPT_PAD1) { 3540 if (lastpad == NULL) 3541 lastpad = sopt; 3542 sopt = (const struct ip6_opt *) 3543 &sopt->ip6o_len; 3544 hol--; 3545 continue; 3546 } 3547 olen = sopt->ip6o_len + sizeof (*sopt); 3548 if (olen > hol) 3549 olen = hol; 3550 if (sopt->ip6o_type == IP6OPT_PADN || 3551 sopt->ip6o_type == ip6opt_ls) { 3552 if (sopt->ip6o_type == ip6opt_ls) 3553 deleting = B_TRUE; 3554 if (lastpad == NULL) 3555 lastpad = sopt; 3556 sopt = (const struct ip6_opt *) 3557 ((const char *)sopt + olen); 3558 hol -= olen; 3559 continue; 3560 } 3561 } else { 3562 /* if nothing was copied at all, then delete */ 3563 if (tlen == sizeof (*dstopt)) 3564 return (0); 3565 /* last pass; pick up any trailing padding */ 3566 olen = 0; 3567 } 3568 if (deleting) { 3569 /* 3570 * compute aligning effect of deleted material 3571 * to reproduce with pad. 3572 */ 3573 plen = ((const char *)sopt - 3574 (const char *)lastpad) & 7; 3575 tlen += plen; 3576 if (dopt != NULL) { 3577 if (plen == 1) { 3578 dopt->ip6o_type = IP6OPT_PAD1; 3579 } else if (plen > 1) { 3580 plen -= sizeof (*dopt); 3581 dopt->ip6o_type = IP6OPT_PADN; 3582 dopt->ip6o_len = plen; 3583 if (plen > 0) 3584 bzero(dopt + 1, plen); 3585 } 3586 dopt = (struct ip6_opt *) 3587 ((char *)dopt + plen); 3588 } 3589 deleting = B_FALSE; 3590 lastpad = NULL; 3591 } 3592 /* if there's uncopied padding, then copy that now */ 3593 if (lastpad != NULL) { 3594 olen += (const char *)sopt - 3595 (const char *)lastpad; 3596 sopt = lastpad; 3597 lastpad = NULL; 3598 } 3599 if (dopt != NULL && olen > 0) { 3600 bcopy(sopt, dopt, olen); 3601 dopt = (struct ip6_opt *)((char *)dopt + olen); 3602 } 3603 if (hol == 0) 3604 break; 3605 tlen += olen; 3606 sopt = (const struct ip6_opt *) 3607 ((const char *)sopt + olen); 3608 hol -= olen; 3609 } 3610 /* go back and patch up the length value, rounded upward */ 3611 if (dstopt != NULL) 3612 dstopt->ip6h_len = (tlen - 1) >> 3; 3613 } else { 3614 tlen = hol; 3615 if (dstopt != NULL) 3616 bcopy(srcopt, dstopt, hol); 3617 } 3618 3619 tlen += sizeof (*toh); 3620 if (toh != NULL) 3621 toh->len = tlen; 3622 3623 return (tlen); 3624 } 3625 3626 /* 3627 * Update udp_rcv_opt_len from the packet. 3628 * Called when options received, and when no options received but 3629 * udp_ip_recv_opt_len has previously recorded options. 3630 */ 3631 static void 3632 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3633 { 3634 /* Save the options if any */ 3635 if (opt_len > 0) { 3636 if (opt_len > udp->udp_ip_rcv_options_len) { 3637 /* Need to allocate larger buffer */ 3638 if (udp->udp_ip_rcv_options_len != 0) 3639 mi_free((char *)udp->udp_ip_rcv_options); 3640 udp->udp_ip_rcv_options_len = 0; 3641 udp->udp_ip_rcv_options = 3642 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3643 if (udp->udp_ip_rcv_options != NULL) 3644 udp->udp_ip_rcv_options_len = opt_len; 3645 } 3646 if (udp->udp_ip_rcv_options_len != 0) { 3647 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3648 /* Adjust length if we are resusing the space */ 3649 udp->udp_ip_rcv_options_len = opt_len; 3650 } 3651 } else if (udp->udp_ip_rcv_options_len != 0) { 3652 /* Clear out previously recorded options */ 3653 mi_free((char *)udp->udp_ip_rcv_options); 3654 udp->udp_ip_rcv_options = NULL; 3655 udp->udp_ip_rcv_options_len = 0; 3656 } 3657 } 3658 3659 static void 3660 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3661 { 3662 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3663 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3664 /* 3665 * fallback has started but messages have not been moved yet 3666 */ 3667 if (udp->udp_fallback_queue_head == NULL) { 3668 ASSERT(udp->udp_fallback_queue_tail == NULL); 3669 udp->udp_fallback_queue_head = mp; 3670 udp->udp_fallback_queue_tail = mp; 3671 } else { 3672 ASSERT(udp->udp_fallback_queue_tail != NULL); 3673 udp->udp_fallback_queue_tail->b_next = mp; 3674 udp->udp_fallback_queue_tail = mp; 3675 } 3676 mutex_exit(&udp->udp_recv_lock); 3677 } else { 3678 /* 3679 * no more fallbacks possible, ok to drop lock. 3680 */ 3681 mutex_exit(&udp->udp_recv_lock); 3682 putnext(udp->udp_connp->conn_rq, mp); 3683 } 3684 } 3685 3686 /* ARGSUSED2 */ 3687 static void 3688 udp_input(void *arg1, mblk_t *mp, void *arg2) 3689 { 3690 conn_t *connp = (conn_t *)arg1; 3691 struct T_unitdata_ind *tudi; 3692 uchar_t *rptr; /* Pointer to IP header */ 3693 int hdr_length; /* Length of IP+UDP headers */ 3694 int opt_len; 3695 int udi_size; /* Size of T_unitdata_ind */ 3696 int mp_len; 3697 udp_t *udp; 3698 udpha_t *udpha; 3699 int ipversion; 3700 ip6_pkt_t ipp; 3701 ip6_t *ip6h; 3702 ip6i_t *ip6i; 3703 mblk_t *mp1; 3704 mblk_t *options_mp = NULL; 3705 ip_pktinfo_t *pinfo = NULL; 3706 cred_t *cr = NULL; 3707 pid_t cpid; 3708 uint32_t udp_ip_rcv_options_len; 3709 udp_bits_t udp_bits; 3710 cred_t *rcr = connp->conn_cred; 3711 udp_stack_t *us; 3712 3713 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3714 3715 udp = connp->conn_udp; 3716 us = udp->udp_us; 3717 rptr = mp->b_rptr; 3718 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3719 ASSERT(OK_32PTR(rptr)); 3720 3721 /* 3722 * IP should have prepended the options data in an M_CTL 3723 * Check M_CTL "type" to make sure are not here bcos of 3724 * a valid ICMP message 3725 */ 3726 if (DB_TYPE(mp) == M_CTL) { 3727 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3728 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3729 IN_PKTINFO) { 3730 /* 3731 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3732 * has been prepended to the packet by IP. We need to 3733 * extract the mblk and adjust the rptr 3734 */ 3735 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3736 options_mp = mp; 3737 mp = mp->b_cont; 3738 rptr = mp->b_rptr; 3739 UDP_STAT(us, udp_in_pktinfo); 3740 } else { 3741 /* 3742 * ICMP messages. 3743 */ 3744 udp_icmp_error(connp, mp); 3745 return; 3746 } 3747 } 3748 3749 mp_len = msgdsize(mp); 3750 /* 3751 * This is the inbound data path. 3752 * First, we check to make sure the IP version number is correct, 3753 * and then pull the IP and UDP headers into the first mblk. 3754 */ 3755 3756 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3757 ipp.ipp_fields = 0; 3758 3759 ipversion = IPH_HDR_VERSION(rptr); 3760 3761 rw_enter(&udp->udp_rwlock, RW_READER); 3762 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3763 udp_bits = udp->udp_bits; 3764 rw_exit(&udp->udp_rwlock); 3765 3766 switch (ipversion) { 3767 case IPV4_VERSION: 3768 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3769 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3770 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3771 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3772 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3773 udp->udp_family == AF_INET) { 3774 /* 3775 * Record/update udp_ip_rcv_options with the lock 3776 * held. Not needed for AF_INET6 sockets 3777 * since they don't support a getsockopt of IP_OPTIONS. 3778 */ 3779 rw_enter(&udp->udp_rwlock, RW_WRITER); 3780 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3781 opt_len); 3782 rw_exit(&udp->udp_rwlock); 3783 } 3784 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3785 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3786 udp->udp_ip_recvpktinfo) { 3787 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3788 ipp.ipp_fields |= IPPF_IFINDEX; 3789 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3790 } 3791 } 3792 break; 3793 case IPV6_VERSION: 3794 /* 3795 * IPv6 packets can only be received by applications 3796 * that are prepared to receive IPv6 addresses. 3797 * The IP fanout must ensure this. 3798 */ 3799 ASSERT(udp->udp_family == AF_INET6); 3800 3801 ip6h = (ip6_t *)rptr; 3802 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3803 3804 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3805 uint8_t nexthdrp; 3806 /* Look for ifindex information */ 3807 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3808 ip6i = (ip6i_t *)ip6h; 3809 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3810 goto tossit; 3811 3812 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3813 ASSERT(ip6i->ip6i_ifindex != 0); 3814 ipp.ipp_fields |= IPPF_IFINDEX; 3815 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3816 } 3817 rptr = (uchar_t *)&ip6i[1]; 3818 mp->b_rptr = rptr; 3819 if (rptr == mp->b_wptr) { 3820 mp1 = mp->b_cont; 3821 freeb(mp); 3822 mp = mp1; 3823 rptr = mp->b_rptr; 3824 } 3825 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3826 goto tossit; 3827 ip6h = (ip6_t *)rptr; 3828 mp_len = msgdsize(mp); 3829 } 3830 /* 3831 * Find any potentially interesting extension headers 3832 * as well as the length of the IPv6 + extension 3833 * headers. 3834 */ 3835 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3836 UDPH_SIZE; 3837 ASSERT(nexthdrp == IPPROTO_UDP); 3838 } else { 3839 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3840 ip6i = NULL; 3841 } 3842 break; 3843 default: 3844 ASSERT(0); 3845 } 3846 3847 /* 3848 * IP inspected the UDP header thus all of it must be in the mblk. 3849 * UDP length check is performed for IPv6 packets and IPv4 packets 3850 * to check if the size of the packet as specified 3851 * by the header is the same as the physical size of the packet. 3852 * FIXME? Didn't IP already check this? 3853 */ 3854 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3855 if ((MBLKL(mp) < hdr_length) || 3856 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3857 goto tossit; 3858 } 3859 3860 3861 /* Walk past the headers unless IP_RECVHDR was set. */ 3862 if (!udp_bits.udpb_rcvhdr) { 3863 mp->b_rptr = rptr + hdr_length; 3864 mp_len -= hdr_length; 3865 } 3866 3867 /* 3868 * This is the inbound data path. Packets are passed upstream as 3869 * T_UNITDATA_IND messages with full IP headers still attached. 3870 */ 3871 if (udp->udp_family == AF_INET) { 3872 sin_t *sin; 3873 3874 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3875 3876 /* 3877 * Normally only send up the source address. 3878 * If IP_RECVDSTADDR is set we include the destination IP 3879 * address as an option. With IP_RECVOPTS we include all 3880 * the IP options. 3881 */ 3882 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3883 if (udp_bits.udpb_recvdstaddr) { 3884 udi_size += sizeof (struct T_opthdr) + 3885 sizeof (struct in_addr); 3886 UDP_STAT(us, udp_in_recvdstaddr); 3887 } 3888 3889 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3890 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3891 udi_size += sizeof (struct T_opthdr) + 3892 sizeof (struct in_pktinfo); 3893 UDP_STAT(us, udp_ip_rcvpktinfo); 3894 } 3895 3896 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3897 udi_size += sizeof (struct T_opthdr) + opt_len; 3898 UDP_STAT(us, udp_in_recvopts); 3899 } 3900 3901 /* 3902 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3903 * space accordingly 3904 */ 3905 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3906 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3907 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3908 UDP_STAT(us, udp_in_recvif); 3909 } 3910 3911 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3912 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3913 udi_size += sizeof (struct T_opthdr) + 3914 sizeof (struct sockaddr_dl); 3915 UDP_STAT(us, udp_in_recvslla); 3916 } 3917 3918 if ((udp_bits.udpb_recvucred) && 3919 (cr = DB_CRED(mp)) != NULL) { 3920 udi_size += sizeof (struct T_opthdr) + ucredsize; 3921 cpid = DB_CPID(mp); 3922 UDP_STAT(us, udp_in_recvucred); 3923 } 3924 3925 /* 3926 * If SO_TIMESTAMP is set allocate the appropriate sized 3927 * buffer. Since gethrestime() expects a pointer aligned 3928 * argument, we allocate space necessary for extra 3929 * alignment (even though it might not be used). 3930 */ 3931 if (udp_bits.udpb_timestamp) { 3932 udi_size += sizeof (struct T_opthdr) + 3933 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3934 UDP_STAT(us, udp_in_timestamp); 3935 } 3936 3937 /* 3938 * If IP_RECVTTL is set allocate the appropriate sized buffer 3939 */ 3940 if (udp_bits.udpb_recvttl) { 3941 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3942 UDP_STAT(us, udp_in_recvttl); 3943 } 3944 3945 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3946 mp1 = allocb(udi_size, BPRI_MED); 3947 if (mp1 == NULL) { 3948 freemsg(mp); 3949 if (options_mp != NULL) 3950 freeb(options_mp); 3951 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3952 return; 3953 } 3954 mp1->b_cont = mp; 3955 mp = mp1; 3956 mp->b_datap->db_type = M_PROTO; 3957 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3958 mp->b_wptr = (uchar_t *)tudi + udi_size; 3959 tudi->PRIM_type = T_UNITDATA_IND; 3960 tudi->SRC_length = sizeof (sin_t); 3961 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3962 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3963 sizeof (sin_t); 3964 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3965 tudi->OPT_length = udi_size; 3966 sin = (sin_t *)&tudi[1]; 3967 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3968 sin->sin_port = udpha->uha_src_port; 3969 sin->sin_family = udp->udp_family; 3970 *(uint32_t *)&sin->sin_zero[0] = 0; 3971 *(uint32_t *)&sin->sin_zero[4] = 0; 3972 3973 /* 3974 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3975 * IP_RECVTTL has been set. 3976 */ 3977 if (udi_size != 0) { 3978 /* 3979 * Copy in destination address before options to avoid 3980 * any padding issues. 3981 */ 3982 char *dstopt; 3983 3984 dstopt = (char *)&sin[1]; 3985 if (udp_bits.udpb_recvdstaddr) { 3986 struct T_opthdr *toh; 3987 ipaddr_t *dstptr; 3988 3989 toh = (struct T_opthdr *)dstopt; 3990 toh->level = IPPROTO_IP; 3991 toh->name = IP_RECVDSTADDR; 3992 toh->len = sizeof (struct T_opthdr) + 3993 sizeof (ipaddr_t); 3994 toh->status = 0; 3995 dstopt += sizeof (struct T_opthdr); 3996 dstptr = (ipaddr_t *)dstopt; 3997 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3998 dstopt += sizeof (ipaddr_t); 3999 udi_size -= toh->len; 4000 } 4001 4002 if (udp_bits.udpb_recvopts && opt_len > 0) { 4003 struct T_opthdr *toh; 4004 4005 toh = (struct T_opthdr *)dstopt; 4006 toh->level = IPPROTO_IP; 4007 toh->name = IP_RECVOPTS; 4008 toh->len = sizeof (struct T_opthdr) + opt_len; 4009 toh->status = 0; 4010 dstopt += sizeof (struct T_opthdr); 4011 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4012 opt_len); 4013 dstopt += opt_len; 4014 udi_size -= toh->len; 4015 } 4016 4017 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4018 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4019 struct T_opthdr *toh; 4020 struct in_pktinfo *pktinfop; 4021 4022 toh = (struct T_opthdr *)dstopt; 4023 toh->level = IPPROTO_IP; 4024 toh->name = IP_PKTINFO; 4025 toh->len = sizeof (struct T_opthdr) + 4026 sizeof (*pktinfop); 4027 toh->status = 0; 4028 dstopt += sizeof (struct T_opthdr); 4029 pktinfop = (struct in_pktinfo *)dstopt; 4030 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4031 pktinfop->ipi_spec_dst = 4032 pinfo->ip_pkt_match_addr; 4033 pktinfop->ipi_addr.s_addr = 4034 ((ipha_t *)rptr)->ipha_dst; 4035 4036 dstopt += sizeof (struct in_pktinfo); 4037 udi_size -= toh->len; 4038 } 4039 4040 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4041 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4042 4043 struct T_opthdr *toh; 4044 struct sockaddr_dl *dstptr; 4045 4046 toh = (struct T_opthdr *)dstopt; 4047 toh->level = IPPROTO_IP; 4048 toh->name = IP_RECVSLLA; 4049 toh->len = sizeof (struct T_opthdr) + 4050 sizeof (struct sockaddr_dl); 4051 toh->status = 0; 4052 dstopt += sizeof (struct T_opthdr); 4053 dstptr = (struct sockaddr_dl *)dstopt; 4054 bcopy(&pinfo->ip_pkt_slla, dstptr, 4055 sizeof (struct sockaddr_dl)); 4056 dstopt += sizeof (struct sockaddr_dl); 4057 udi_size -= toh->len; 4058 } 4059 4060 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4061 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4062 4063 struct T_opthdr *toh; 4064 uint_t *dstptr; 4065 4066 toh = (struct T_opthdr *)dstopt; 4067 toh->level = IPPROTO_IP; 4068 toh->name = IP_RECVIF; 4069 toh->len = sizeof (struct T_opthdr) + 4070 sizeof (uint_t); 4071 toh->status = 0; 4072 dstopt += sizeof (struct T_opthdr); 4073 dstptr = (uint_t *)dstopt; 4074 *dstptr = pinfo->ip_pkt_ifindex; 4075 dstopt += sizeof (uint_t); 4076 udi_size -= toh->len; 4077 } 4078 4079 if (cr != NULL) { 4080 struct T_opthdr *toh; 4081 4082 toh = (struct T_opthdr *)dstopt; 4083 toh->level = SOL_SOCKET; 4084 toh->name = SCM_UCRED; 4085 toh->len = sizeof (struct T_opthdr) + ucredsize; 4086 toh->status = 0; 4087 dstopt += sizeof (struct T_opthdr); 4088 (void) cred2ucred(cr, cpid, dstopt, rcr); 4089 dstopt += ucredsize; 4090 udi_size -= toh->len; 4091 } 4092 4093 if (udp_bits.udpb_timestamp) { 4094 struct T_opthdr *toh; 4095 4096 toh = (struct T_opthdr *)dstopt; 4097 toh->level = SOL_SOCKET; 4098 toh->name = SCM_TIMESTAMP; 4099 toh->len = sizeof (struct T_opthdr) + 4100 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4101 toh->status = 0; 4102 dstopt += sizeof (struct T_opthdr); 4103 /* Align for gethrestime() */ 4104 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4105 sizeof (intptr_t)); 4106 gethrestime((timestruc_t *)dstopt); 4107 dstopt = (char *)toh + toh->len; 4108 udi_size -= toh->len; 4109 } 4110 4111 /* 4112 * CAUTION: 4113 * Due to aligment issues 4114 * Processing of IP_RECVTTL option 4115 * should always be the last. Adding 4116 * any option processing after this will 4117 * cause alignment panic. 4118 */ 4119 if (udp_bits.udpb_recvttl) { 4120 struct T_opthdr *toh; 4121 uint8_t *dstptr; 4122 4123 toh = (struct T_opthdr *)dstopt; 4124 toh->level = IPPROTO_IP; 4125 toh->name = IP_RECVTTL; 4126 toh->len = sizeof (struct T_opthdr) + 4127 sizeof (uint8_t); 4128 toh->status = 0; 4129 dstopt += sizeof (struct T_opthdr); 4130 dstptr = (uint8_t *)dstopt; 4131 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4132 dstopt += sizeof (uint8_t); 4133 udi_size -= toh->len; 4134 } 4135 4136 /* Consumed all of allocated space */ 4137 ASSERT(udi_size == 0); 4138 } 4139 } else { 4140 sin6_t *sin6; 4141 4142 /* 4143 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4144 * 4145 * Normally we only send up the address. If receiving of any 4146 * optional receive side information is enabled, we also send 4147 * that up as options. 4148 */ 4149 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4150 4151 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4152 IPPF_RTHDR|IPPF_IFINDEX)) { 4153 if ((udp_bits.udpb_ipv6_recvhopopts) && 4154 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4155 size_t hlen; 4156 4157 UDP_STAT(us, udp_in_recvhopopts); 4158 hlen = copy_hop_opts(&ipp, NULL); 4159 if (hlen == 0) 4160 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4161 udi_size += hlen; 4162 } 4163 if (((udp_bits.udpb_ipv6_recvdstopts) || 4164 udp_bits.udpb_old_ipv6_recvdstopts) && 4165 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4166 udi_size += sizeof (struct T_opthdr) + 4167 ipp.ipp_dstoptslen; 4168 UDP_STAT(us, udp_in_recvdstopts); 4169 } 4170 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4171 udp_bits.udpb_ipv6_recvrthdr && 4172 (ipp.ipp_fields & IPPF_RTHDR)) || 4173 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4174 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4175 udi_size += sizeof (struct T_opthdr) + 4176 ipp.ipp_rtdstoptslen; 4177 UDP_STAT(us, udp_in_recvrtdstopts); 4178 } 4179 if ((udp_bits.udpb_ipv6_recvrthdr) && 4180 (ipp.ipp_fields & IPPF_RTHDR)) { 4181 udi_size += sizeof (struct T_opthdr) + 4182 ipp.ipp_rthdrlen; 4183 UDP_STAT(us, udp_in_recvrthdr); 4184 } 4185 if ((udp_bits.udpb_ip_recvpktinfo) && 4186 (ipp.ipp_fields & IPPF_IFINDEX)) { 4187 udi_size += sizeof (struct T_opthdr) + 4188 sizeof (struct in6_pktinfo); 4189 UDP_STAT(us, udp_in_recvpktinfo); 4190 } 4191 4192 } 4193 if ((udp_bits.udpb_recvucred) && 4194 (cr = DB_CRED(mp)) != NULL) { 4195 udi_size += sizeof (struct T_opthdr) + ucredsize; 4196 cpid = DB_CPID(mp); 4197 UDP_STAT(us, udp_in_recvucred); 4198 } 4199 4200 /* 4201 * If SO_TIMESTAMP is set allocate the appropriate sized 4202 * buffer. Since gethrestime() expects a pointer aligned 4203 * argument, we allocate space necessary for extra 4204 * alignment (even though it might not be used). 4205 */ 4206 if (udp_bits.udpb_timestamp) { 4207 udi_size += sizeof (struct T_opthdr) + 4208 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4209 UDP_STAT(us, udp_in_timestamp); 4210 } 4211 4212 if (udp_bits.udpb_ipv6_recvhoplimit) { 4213 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4214 UDP_STAT(us, udp_in_recvhoplimit); 4215 } 4216 4217 if (udp_bits.udpb_ipv6_recvtclass) { 4218 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4219 UDP_STAT(us, udp_in_recvtclass); 4220 } 4221 4222 mp1 = allocb(udi_size, BPRI_MED); 4223 if (mp1 == NULL) { 4224 freemsg(mp); 4225 if (options_mp != NULL) 4226 freeb(options_mp); 4227 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4228 return; 4229 } 4230 mp1->b_cont = mp; 4231 mp = mp1; 4232 mp->b_datap->db_type = M_PROTO; 4233 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4234 mp->b_wptr = (uchar_t *)tudi + udi_size; 4235 tudi->PRIM_type = T_UNITDATA_IND; 4236 tudi->SRC_length = sizeof (sin6_t); 4237 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4238 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4239 sizeof (sin6_t); 4240 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4241 tudi->OPT_length = udi_size; 4242 sin6 = (sin6_t *)&tudi[1]; 4243 if (ipversion == IPV4_VERSION) { 4244 in6_addr_t v6dst; 4245 4246 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4247 &sin6->sin6_addr); 4248 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4249 &v6dst); 4250 sin6->sin6_flowinfo = 0; 4251 sin6->sin6_scope_id = 0; 4252 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4253 connp->conn_zoneid, us->us_netstack); 4254 } else { 4255 sin6->sin6_addr = ip6h->ip6_src; 4256 /* No sin6_flowinfo per API */ 4257 sin6->sin6_flowinfo = 0; 4258 /* For link-scope source pass up scope id */ 4259 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4260 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4261 sin6->sin6_scope_id = ipp.ipp_ifindex; 4262 else 4263 sin6->sin6_scope_id = 0; 4264 sin6->__sin6_src_id = ip_srcid_find_addr( 4265 &ip6h->ip6_dst, connp->conn_zoneid, 4266 us->us_netstack); 4267 } 4268 sin6->sin6_port = udpha->uha_src_port; 4269 sin6->sin6_family = udp->udp_family; 4270 4271 if (udi_size != 0) { 4272 uchar_t *dstopt; 4273 4274 dstopt = (uchar_t *)&sin6[1]; 4275 if ((udp_bits.udpb_ip_recvpktinfo) && 4276 (ipp.ipp_fields & IPPF_IFINDEX)) { 4277 struct T_opthdr *toh; 4278 struct in6_pktinfo *pkti; 4279 4280 toh = (struct T_opthdr *)dstopt; 4281 toh->level = IPPROTO_IPV6; 4282 toh->name = IPV6_PKTINFO; 4283 toh->len = sizeof (struct T_opthdr) + 4284 sizeof (*pkti); 4285 toh->status = 0; 4286 dstopt += sizeof (struct T_opthdr); 4287 pkti = (struct in6_pktinfo *)dstopt; 4288 if (ipversion == IPV6_VERSION) 4289 pkti->ipi6_addr = ip6h->ip6_dst; 4290 else 4291 IN6_IPADDR_TO_V4MAPPED( 4292 ((ipha_t *)rptr)->ipha_dst, 4293 &pkti->ipi6_addr); 4294 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4295 dstopt += sizeof (*pkti); 4296 udi_size -= toh->len; 4297 } 4298 if (udp_bits.udpb_ipv6_recvhoplimit) { 4299 struct T_opthdr *toh; 4300 4301 toh = (struct T_opthdr *)dstopt; 4302 toh->level = IPPROTO_IPV6; 4303 toh->name = IPV6_HOPLIMIT; 4304 toh->len = sizeof (struct T_opthdr) + 4305 sizeof (uint_t); 4306 toh->status = 0; 4307 dstopt += sizeof (struct T_opthdr); 4308 if (ipversion == IPV6_VERSION) 4309 *(uint_t *)dstopt = ip6h->ip6_hops; 4310 else 4311 *(uint_t *)dstopt = 4312 ((ipha_t *)rptr)->ipha_ttl; 4313 dstopt += sizeof (uint_t); 4314 udi_size -= toh->len; 4315 } 4316 if (udp_bits.udpb_ipv6_recvtclass) { 4317 struct T_opthdr *toh; 4318 4319 toh = (struct T_opthdr *)dstopt; 4320 toh->level = IPPROTO_IPV6; 4321 toh->name = IPV6_TCLASS; 4322 toh->len = sizeof (struct T_opthdr) + 4323 sizeof (uint_t); 4324 toh->status = 0; 4325 dstopt += sizeof (struct T_opthdr); 4326 if (ipversion == IPV6_VERSION) { 4327 *(uint_t *)dstopt = 4328 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4329 } else { 4330 ipha_t *ipha = (ipha_t *)rptr; 4331 *(uint_t *)dstopt = 4332 ipha->ipha_type_of_service; 4333 } 4334 dstopt += sizeof (uint_t); 4335 udi_size -= toh->len; 4336 } 4337 if ((udp_bits.udpb_ipv6_recvhopopts) && 4338 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4339 size_t hlen; 4340 4341 hlen = copy_hop_opts(&ipp, dstopt); 4342 dstopt += hlen; 4343 udi_size -= hlen; 4344 } 4345 if ((udp_bits.udpb_ipv6_recvdstopts) && 4346 (udp_bits.udpb_ipv6_recvrthdr) && 4347 (ipp.ipp_fields & IPPF_RTHDR) && 4348 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4349 struct T_opthdr *toh; 4350 4351 toh = (struct T_opthdr *)dstopt; 4352 toh->level = IPPROTO_IPV6; 4353 toh->name = IPV6_DSTOPTS; 4354 toh->len = sizeof (struct T_opthdr) + 4355 ipp.ipp_rtdstoptslen; 4356 toh->status = 0; 4357 dstopt += sizeof (struct T_opthdr); 4358 bcopy(ipp.ipp_rtdstopts, dstopt, 4359 ipp.ipp_rtdstoptslen); 4360 dstopt += ipp.ipp_rtdstoptslen; 4361 udi_size -= toh->len; 4362 } 4363 if ((udp_bits.udpb_ipv6_recvrthdr) && 4364 (ipp.ipp_fields & IPPF_RTHDR)) { 4365 struct T_opthdr *toh; 4366 4367 toh = (struct T_opthdr *)dstopt; 4368 toh->level = IPPROTO_IPV6; 4369 toh->name = IPV6_RTHDR; 4370 toh->len = sizeof (struct T_opthdr) + 4371 ipp.ipp_rthdrlen; 4372 toh->status = 0; 4373 dstopt += sizeof (struct T_opthdr); 4374 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4375 dstopt += ipp.ipp_rthdrlen; 4376 udi_size -= toh->len; 4377 } 4378 if ((udp_bits.udpb_ipv6_recvdstopts) && 4379 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4380 struct T_opthdr *toh; 4381 4382 toh = (struct T_opthdr *)dstopt; 4383 toh->level = IPPROTO_IPV6; 4384 toh->name = IPV6_DSTOPTS; 4385 toh->len = sizeof (struct T_opthdr) + 4386 ipp.ipp_dstoptslen; 4387 toh->status = 0; 4388 dstopt += sizeof (struct T_opthdr); 4389 bcopy(ipp.ipp_dstopts, dstopt, 4390 ipp.ipp_dstoptslen); 4391 dstopt += ipp.ipp_dstoptslen; 4392 udi_size -= toh->len; 4393 } 4394 if (cr != NULL) { 4395 struct T_opthdr *toh; 4396 4397 toh = (struct T_opthdr *)dstopt; 4398 toh->level = SOL_SOCKET; 4399 toh->name = SCM_UCRED; 4400 toh->len = sizeof (struct T_opthdr) + ucredsize; 4401 toh->status = 0; 4402 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4403 dstopt += toh->len; 4404 udi_size -= toh->len; 4405 } 4406 if (udp_bits.udpb_timestamp) { 4407 struct T_opthdr *toh; 4408 4409 toh = (struct T_opthdr *)dstopt; 4410 toh->level = SOL_SOCKET; 4411 toh->name = SCM_TIMESTAMP; 4412 toh->len = sizeof (struct T_opthdr) + 4413 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4414 toh->status = 0; 4415 dstopt += sizeof (struct T_opthdr); 4416 /* Align for gethrestime() */ 4417 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4418 sizeof (intptr_t)); 4419 gethrestime((timestruc_t *)dstopt); 4420 dstopt = (uchar_t *)toh + toh->len; 4421 udi_size -= toh->len; 4422 } 4423 4424 /* Consumed all of allocated space */ 4425 ASSERT(udi_size == 0); 4426 } 4427 #undef sin6 4428 /* No IP_RECVDSTADDR for IPv6. */ 4429 } 4430 4431 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4432 if (options_mp != NULL) 4433 freeb(options_mp); 4434 4435 if (IPCL_IS_NONSTR(connp)) { 4436 int error; 4437 4438 if ((*connp->conn_upcalls->su_recv) 4439 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4440 NULL) < 0) { 4441 mutex_enter(&udp->udp_recv_lock); 4442 if (error == ENOSPC) { 4443 /* 4444 * let's confirm while holding the lock 4445 */ 4446 if ((*connp->conn_upcalls->su_recv) 4447 (connp->conn_upper_handle, NULL, 0, 0, 4448 &error, NULL) < 0) { 4449 if (error == ENOSPC) { 4450 connp->conn_flow_cntrld = 4451 B_TRUE; 4452 } else { 4453 ASSERT(error == EOPNOTSUPP); 4454 } 4455 } 4456 mutex_exit(&udp->udp_recv_lock); 4457 } else { 4458 ASSERT(error == EOPNOTSUPP); 4459 udp_queue_fallback(udp, mp); 4460 } 4461 } 4462 } else { 4463 putnext(connp->conn_rq, mp); 4464 } 4465 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4466 return; 4467 4468 tossit: 4469 freemsg(mp); 4470 if (options_mp != NULL) 4471 freeb(options_mp); 4472 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4473 } 4474 4475 /* 4476 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4477 * information that can be changing beneath us. 4478 */ 4479 mblk_t * 4480 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4481 { 4482 mblk_t *mpdata; 4483 mblk_t *mp_conn_ctl; 4484 mblk_t *mp_attr_ctl; 4485 mblk_t *mp6_conn_ctl; 4486 mblk_t *mp6_attr_ctl; 4487 mblk_t *mp_conn_tail; 4488 mblk_t *mp_attr_tail; 4489 mblk_t *mp6_conn_tail; 4490 mblk_t *mp6_attr_tail; 4491 struct opthdr *optp; 4492 mib2_udpEntry_t ude; 4493 mib2_udp6Entry_t ude6; 4494 mib2_transportMLPEntry_t mlp; 4495 int state; 4496 zoneid_t zoneid; 4497 int i; 4498 connf_t *connfp; 4499 conn_t *connp = Q_TO_CONN(q); 4500 int v4_conn_idx; 4501 int v6_conn_idx; 4502 boolean_t needattr; 4503 udp_t *udp; 4504 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4505 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4506 mblk_t *mp2ctl; 4507 4508 /* 4509 * make a copy of the original message 4510 */ 4511 mp2ctl = copymsg(mpctl); 4512 4513 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4514 if (mpctl == NULL || 4515 (mpdata = mpctl->b_cont) == NULL || 4516 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4517 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4518 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4519 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4520 freemsg(mp_conn_ctl); 4521 freemsg(mp_attr_ctl); 4522 freemsg(mp6_conn_ctl); 4523 freemsg(mpctl); 4524 freemsg(mp2ctl); 4525 return (0); 4526 } 4527 4528 zoneid = connp->conn_zoneid; 4529 4530 /* fixed length structure for IPv4 and IPv6 counters */ 4531 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4532 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4533 /* synchronize 64- and 32-bit counters */ 4534 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4535 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4536 4537 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4538 optp->level = MIB2_UDP; 4539 optp->name = 0; 4540 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4541 sizeof (us->us_udp_mib)); 4542 optp->len = msgdsize(mpdata); 4543 qreply(q, mpctl); 4544 4545 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4546 v4_conn_idx = v6_conn_idx = 0; 4547 4548 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4549 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4550 connp = NULL; 4551 4552 while ((connp = ipcl_get_next_conn(connfp, connp, 4553 IPCL_UDPCONN))) { 4554 udp = connp->conn_udp; 4555 if (zoneid != connp->conn_zoneid) 4556 continue; 4557 4558 /* 4559 * Note that the port numbers are sent in 4560 * host byte order 4561 */ 4562 4563 if (udp->udp_state == TS_UNBND) 4564 state = MIB2_UDP_unbound; 4565 else if (udp->udp_state == TS_IDLE) 4566 state = MIB2_UDP_idle; 4567 else if (udp->udp_state == TS_DATA_XFER) 4568 state = MIB2_UDP_connected; 4569 else 4570 state = MIB2_UDP_unknown; 4571 4572 needattr = B_FALSE; 4573 bzero(&mlp, sizeof (mlp)); 4574 if (connp->conn_mlp_type != mlptSingle) { 4575 if (connp->conn_mlp_type == mlptShared || 4576 connp->conn_mlp_type == mlptBoth) 4577 mlp.tme_flags |= MIB2_TMEF_SHARED; 4578 if (connp->conn_mlp_type == mlptPrivate || 4579 connp->conn_mlp_type == mlptBoth) 4580 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4581 needattr = B_TRUE; 4582 } 4583 4584 /* 4585 * Create an IPv4 table entry for IPv4 entries and also 4586 * any IPv6 entries which are bound to in6addr_any 4587 * (i.e. anything a IPv4 peer could connect/send to). 4588 */ 4589 if (udp->udp_ipversion == IPV4_VERSION || 4590 (udp->udp_state <= TS_IDLE && 4591 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4592 ude.udpEntryInfo.ue_state = state; 4593 /* 4594 * If in6addr_any this will set it to 4595 * INADDR_ANY 4596 */ 4597 ude.udpLocalAddress = 4598 V4_PART_OF_V6(udp->udp_v6src); 4599 ude.udpLocalPort = ntohs(udp->udp_port); 4600 if (udp->udp_state == TS_DATA_XFER) { 4601 /* 4602 * Can potentially get here for 4603 * v6 socket if another process 4604 * (say, ping) has just done a 4605 * sendto(), changing the state 4606 * from the TS_IDLE above to 4607 * TS_DATA_XFER by the time we hit 4608 * this part of the code. 4609 */ 4610 ude.udpEntryInfo.ue_RemoteAddress = 4611 V4_PART_OF_V6(udp->udp_v6dst); 4612 ude.udpEntryInfo.ue_RemotePort = 4613 ntohs(udp->udp_dstport); 4614 } else { 4615 ude.udpEntryInfo.ue_RemoteAddress = 0; 4616 ude.udpEntryInfo.ue_RemotePort = 0; 4617 } 4618 4619 /* 4620 * We make the assumption that all udp_t 4621 * structs will be created within an address 4622 * region no larger than 32-bits. 4623 */ 4624 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4625 ude.udpCreationProcess = 4626 (udp->udp_open_pid < 0) ? 4627 MIB2_UNKNOWN_PROCESS : 4628 udp->udp_open_pid; 4629 ude.udpCreationTime = udp->udp_open_time; 4630 4631 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4632 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4633 mlp.tme_connidx = v4_conn_idx++; 4634 if (needattr) 4635 (void) snmp_append_data2( 4636 mp_attr_ctl->b_cont, &mp_attr_tail, 4637 (char *)&mlp, sizeof (mlp)); 4638 } 4639 if (udp->udp_ipversion == IPV6_VERSION) { 4640 ude6.udp6EntryInfo.ue_state = state; 4641 ude6.udp6LocalAddress = udp->udp_v6src; 4642 ude6.udp6LocalPort = ntohs(udp->udp_port); 4643 ude6.udp6IfIndex = udp->udp_bound_if; 4644 if (udp->udp_state == TS_DATA_XFER) { 4645 ude6.udp6EntryInfo.ue_RemoteAddress = 4646 udp->udp_v6dst; 4647 ude6.udp6EntryInfo.ue_RemotePort = 4648 ntohs(udp->udp_dstport); 4649 } else { 4650 ude6.udp6EntryInfo.ue_RemoteAddress = 4651 sin6_null.sin6_addr; 4652 ude6.udp6EntryInfo.ue_RemotePort = 0; 4653 } 4654 /* 4655 * We make the assumption that all udp_t 4656 * structs will be created within an address 4657 * region no larger than 32-bits. 4658 */ 4659 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4660 ude6.udp6CreationProcess = 4661 (udp->udp_open_pid < 0) ? 4662 MIB2_UNKNOWN_PROCESS : 4663 udp->udp_open_pid; 4664 ude6.udp6CreationTime = udp->udp_open_time; 4665 4666 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4667 &mp6_conn_tail, (char *)&ude6, 4668 sizeof (ude6)); 4669 mlp.tme_connidx = v6_conn_idx++; 4670 if (needattr) 4671 (void) snmp_append_data2( 4672 mp6_attr_ctl->b_cont, 4673 &mp6_attr_tail, (char *)&mlp, 4674 sizeof (mlp)); 4675 } 4676 } 4677 } 4678 4679 /* IPv4 UDP endpoints */ 4680 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4681 sizeof (struct T_optmgmt_ack)]; 4682 optp->level = MIB2_UDP; 4683 optp->name = MIB2_UDP_ENTRY; 4684 optp->len = msgdsize(mp_conn_ctl->b_cont); 4685 qreply(q, mp_conn_ctl); 4686 4687 /* table of MLP attributes... */ 4688 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4689 sizeof (struct T_optmgmt_ack)]; 4690 optp->level = MIB2_UDP; 4691 optp->name = EXPER_XPORT_MLP; 4692 optp->len = msgdsize(mp_attr_ctl->b_cont); 4693 if (optp->len == 0) 4694 freemsg(mp_attr_ctl); 4695 else 4696 qreply(q, mp_attr_ctl); 4697 4698 /* IPv6 UDP endpoints */ 4699 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4700 sizeof (struct T_optmgmt_ack)]; 4701 optp->level = MIB2_UDP6; 4702 optp->name = MIB2_UDP6_ENTRY; 4703 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4704 qreply(q, mp6_conn_ctl); 4705 4706 /* table of MLP attributes... */ 4707 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4708 sizeof (struct T_optmgmt_ack)]; 4709 optp->level = MIB2_UDP6; 4710 optp->name = EXPER_XPORT_MLP; 4711 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4712 if (optp->len == 0) 4713 freemsg(mp6_attr_ctl); 4714 else 4715 qreply(q, mp6_attr_ctl); 4716 4717 return (mp2ctl); 4718 } 4719 4720 /* 4721 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4722 * NOTE: Per MIB-II, UDP has no writable data. 4723 * TODO: If this ever actually tries to set anything, it needs to be 4724 * to do the appropriate locking. 4725 */ 4726 /* ARGSUSED */ 4727 int 4728 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4729 uchar_t *ptr, int len) 4730 { 4731 switch (level) { 4732 case MIB2_UDP: 4733 return (0); 4734 default: 4735 return (1); 4736 } 4737 } 4738 4739 static void 4740 udp_report_item(mblk_t *mp, udp_t *udp) 4741 { 4742 char *state; 4743 char addrbuf1[INET6_ADDRSTRLEN]; 4744 char addrbuf2[INET6_ADDRSTRLEN]; 4745 uint_t print_len, buf_len; 4746 4747 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4748 ASSERT(buf_len >= 0); 4749 if (buf_len == 0) 4750 return; 4751 4752 if (udp->udp_state == TS_UNBND) 4753 state = "UNBOUND"; 4754 else if (udp->udp_state == TS_IDLE) 4755 state = "IDLE"; 4756 else if (udp->udp_state == TS_DATA_XFER) 4757 state = "CONNECTED"; 4758 else 4759 state = "UnkState"; 4760 print_len = snprintf((char *)mp->b_wptr, buf_len, 4761 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4762 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4763 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4764 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4765 ntohs(udp->udp_dstport), state); 4766 if (print_len < buf_len) { 4767 mp->b_wptr += print_len; 4768 } else { 4769 mp->b_wptr += buf_len; 4770 } 4771 } 4772 4773 /* Report for ndd "udp_status" */ 4774 /* ARGSUSED */ 4775 static int 4776 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4777 { 4778 zoneid_t zoneid; 4779 connf_t *connfp; 4780 conn_t *connp = Q_TO_CONN(q); 4781 udp_t *udp = connp->conn_udp; 4782 int i; 4783 udp_stack_t *us = udp->udp_us; 4784 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4785 4786 /* 4787 * Because of the ndd constraint, at most we can have 64K buffer 4788 * to put in all UDP info. So to be more efficient, just 4789 * allocate a 64K buffer here, assuming we need that large buffer. 4790 * This may be a problem as any user can read udp_status. Therefore 4791 * we limit the rate of doing this using us_ndd_get_info_interval. 4792 * This should be OK as normal users should not do this too often. 4793 */ 4794 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4795 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4796 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4797 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4798 return (0); 4799 } 4800 } 4801 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4802 /* The following may work even if we cannot get a large buf. */ 4803 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4804 return (0); 4805 } 4806 (void) mi_mpprintf(mp, 4807 "UDP " MI_COL_HDRPAD_STR 4808 /* 12345678[89ABCDEF] */ 4809 " zone lport src addr dest addr port state"); 4810 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4811 4812 zoneid = connp->conn_zoneid; 4813 4814 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4815 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4816 connp = NULL; 4817 4818 while ((connp = ipcl_get_next_conn(connfp, connp, 4819 IPCL_UDPCONN))) { 4820 udp = connp->conn_udp; 4821 if (zoneid != GLOBAL_ZONEID && 4822 zoneid != connp->conn_zoneid) 4823 continue; 4824 4825 udp_report_item(mp->b_cont, udp); 4826 } 4827 } 4828 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4829 return (0); 4830 } 4831 4832 /* 4833 * This routine creates a T_UDERROR_IND message and passes it upstream. 4834 * The address and options are copied from the T_UNITDATA_REQ message 4835 * passed in mp. This message is freed. 4836 */ 4837 static void 4838 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4839 t_scalar_t err) 4840 { 4841 struct T_unitdata_req *tudr; 4842 mblk_t *mp1; 4843 uchar_t *optaddr; 4844 t_scalar_t optlen; 4845 4846 if (DB_TYPE(mp) == M_DATA) { 4847 ASSERT(destaddr != NULL && destlen != 0); 4848 optaddr = NULL; 4849 optlen = 0; 4850 } else { 4851 if ((mp->b_wptr < mp->b_rptr) || 4852 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4853 goto done; 4854 } 4855 tudr = (struct T_unitdata_req *)mp->b_rptr; 4856 destaddr = mp->b_rptr + tudr->DEST_offset; 4857 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4858 destaddr + tudr->DEST_length < mp->b_rptr || 4859 destaddr + tudr->DEST_length > mp->b_wptr) { 4860 goto done; 4861 } 4862 optaddr = mp->b_rptr + tudr->OPT_offset; 4863 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4864 optaddr + tudr->OPT_length < mp->b_rptr || 4865 optaddr + tudr->OPT_length > mp->b_wptr) { 4866 goto done; 4867 } 4868 destlen = tudr->DEST_length; 4869 optlen = tudr->OPT_length; 4870 } 4871 4872 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4873 (char *)optaddr, optlen, err); 4874 if (mp1 != NULL) 4875 qreply(q, mp1); 4876 4877 done: 4878 freemsg(mp); 4879 } 4880 4881 /* 4882 * This routine removes a port number association from a stream. It 4883 * is called by udp_wput to handle T_UNBIND_REQ messages. 4884 */ 4885 static void 4886 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4887 { 4888 conn_t *connp = Q_TO_CONN(q); 4889 int error; 4890 4891 error = udp_do_unbind(connp); 4892 if (error) { 4893 if (error < 0) 4894 udp_err_ack(q, mp, -error, 0); 4895 else 4896 udp_err_ack(q, mp, TSYSERR, error); 4897 return; 4898 } 4899 4900 mp = mi_tpi_ok_ack_alloc(mp); 4901 ASSERT(mp != NULL); 4902 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4903 qreply(q, mp); 4904 } 4905 4906 /* 4907 * Don't let port fall into the privileged range. 4908 * Since the extra privileged ports can be arbitrary we also 4909 * ensure that we exclude those from consideration. 4910 * us->us_epriv_ports is not sorted thus we loop over it until 4911 * there are no changes. 4912 */ 4913 static in_port_t 4914 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4915 { 4916 int i; 4917 in_port_t nextport; 4918 boolean_t restart = B_FALSE; 4919 udp_stack_t *us = udp->udp_us; 4920 4921 if (random && udp_random_anon_port != 0) { 4922 (void) random_get_pseudo_bytes((uint8_t *)&port, 4923 sizeof (in_port_t)); 4924 /* 4925 * Unless changed by a sys admin, the smallest anon port 4926 * is 32768 and the largest anon port is 65535. It is 4927 * very likely (50%) for the random port to be smaller 4928 * than the smallest anon port. When that happens, 4929 * add port % (anon port range) to the smallest anon 4930 * port to get the random port. It should fall into the 4931 * valid anon port range. 4932 */ 4933 if (port < us->us_smallest_anon_port) { 4934 port = us->us_smallest_anon_port + 4935 port % (us->us_largest_anon_port - 4936 us->us_smallest_anon_port); 4937 } 4938 } 4939 4940 retry: 4941 if (port < us->us_smallest_anon_port) 4942 port = us->us_smallest_anon_port; 4943 4944 if (port > us->us_largest_anon_port) { 4945 port = us->us_smallest_anon_port; 4946 if (restart) 4947 return (0); 4948 restart = B_TRUE; 4949 } 4950 4951 if (port < us->us_smallest_nonpriv_port) 4952 port = us->us_smallest_nonpriv_port; 4953 4954 for (i = 0; i < us->us_num_epriv_ports; i++) { 4955 if (port == us->us_epriv_ports[i]) { 4956 port++; 4957 /* 4958 * Make sure that the port is in the 4959 * valid range. 4960 */ 4961 goto retry; 4962 } 4963 } 4964 4965 if (is_system_labeled() && 4966 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4967 port, IPPROTO_UDP, B_TRUE)) != 0) { 4968 port = nextport; 4969 goto retry; 4970 } 4971 4972 return (port); 4973 } 4974 4975 static int 4976 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 4977 boolean_t *update_lastdst) 4978 { 4979 int err; 4980 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4981 udp_t *udp = Q_TO_UDP(wq); 4982 udp_stack_t *us = udp->udp_us; 4983 4984 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 4985 opt_storage, udp->udp_connp->conn_mac_exempt, 4986 us->us_netstack->netstack_ip); 4987 if (err == 0) { 4988 err = tsol_update_options(&udp->udp_ip_snd_options, 4989 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4990 opt_storage); 4991 } 4992 if (err != 0) { 4993 DTRACE_PROBE4( 4994 tx__ip__log__info__updatelabel__udp, 4995 char *, "queue(1) failed to update options(2) on mp(3)", 4996 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4997 } else { 4998 *update_lastdst = B_TRUE; 4999 } 5000 return (err); 5001 } 5002 5003 static mblk_t * 5004 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5005 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 5006 cred_t *cr, pid_t pid) 5007 { 5008 udp_t *udp = connp->conn_udp; 5009 mblk_t *mp1 = mp; 5010 mblk_t *mp2; 5011 ipha_t *ipha; 5012 int ip_hdr_length; 5013 uint32_t ip_len; 5014 udpha_t *udpha; 5015 boolean_t lock_held = B_FALSE; 5016 in_port_t uha_src_port; 5017 udpattrs_t attrs; 5018 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5019 uint32_t ip_snd_opt_len = 0; 5020 ip4_pkt_t pktinfo; 5021 ip4_pkt_t *pktinfop = &pktinfo; 5022 ip_opt_info_t optinfo; 5023 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5024 udp_stack_t *us = udp->udp_us; 5025 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5026 queue_t *q = connp->conn_wq; 5027 ire_t *ire; 5028 in6_addr_t v6dst; 5029 boolean_t update_lastdst = B_FALSE; 5030 5031 *error = 0; 5032 pktinfop->ip4_ill_index = 0; 5033 pktinfop->ip4_addr = INADDR_ANY; 5034 optinfo.ip_opt_flags = 0; 5035 optinfo.ip_opt_ill_index = 0; 5036 5037 if (v4dst == INADDR_ANY) 5038 v4dst = htonl(INADDR_LOOPBACK); 5039 5040 /* 5041 * If options passed in, feed it for verification and handling 5042 */ 5043 attrs.udpattr_credset = B_FALSE; 5044 if (IPCL_IS_NONSTR(connp)) { 5045 if (msg->msg_controllen != 0) { 5046 attrs.udpattr_ipp4 = pktinfop; 5047 attrs.udpattr_mb = mp; 5048 5049 rw_enter(&udp->udp_rwlock, RW_WRITER); 5050 *error = process_auxiliary_options(connp, 5051 msg->msg_control, msg->msg_controllen, 5052 &attrs, &udp_opt_obj, udp_opt_set); 5053 rw_exit(&udp->udp_rwlock); 5054 if (*error) 5055 goto done; 5056 } 5057 } else { 5058 if (DB_TYPE(mp) != M_DATA) { 5059 mp1 = mp->b_cont; 5060 if (((struct T_unitdata_req *) 5061 mp->b_rptr)->OPT_length != 0) { 5062 attrs.udpattr_ipp4 = pktinfop; 5063 attrs.udpattr_mb = mp; 5064 if (udp_unitdata_opt_process(q, mp, error, 5065 &attrs) < 0) 5066 goto done; 5067 /* 5068 * Note: success in processing options. 5069 * mp option buffer represented by 5070 * OPT_length/offset now potentially modified 5071 * and contain option setting results 5072 */ 5073 ASSERT(*error == 0); 5074 } 5075 } 5076 } 5077 5078 /* mp1 points to the M_DATA mblk carrying the packet */ 5079 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5080 5081 /* 5082 * Determine whether we need to mark the mblk with the user's 5083 * credentials. 5084 */ 5085 ire = connp->conn_ire_cache; 5086 if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) || 5087 (ire->ire_addr != v4dst) || 5088 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5089 if (cr != NULL && DB_CRED(mp) == NULL) 5090 msg_setcredpid(mp, cr, pid); 5091 } 5092 5093 rw_enter(&udp->udp_rwlock, RW_READER); 5094 lock_held = B_TRUE; 5095 5096 /* 5097 * Cluster and TSOL note: 5098 * udp.udp_v6lastdst is shared by Cluster and TSOL 5099 * udp.udp_lastdstport is used by Cluster 5100 * 5101 * Both Cluster and TSOL need to update the dest addr and/or port. 5102 * Updating is done after both Cluster and TSOL checks, protected 5103 * by conn_lock. 5104 */ 5105 mutex_enter(&connp->conn_lock); 5106 5107 if (cl_inet_connect2 != NULL && 5108 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5109 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5110 udp->udp_lastdstport != port)) { 5111 mutex_exit(&connp->conn_lock); 5112 *error = 0; 5113 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5114 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5115 if (*error != 0) { 5116 *error = EHOSTUNREACH; 5117 goto done; 5118 } 5119 update_lastdst = B_TRUE; 5120 mutex_enter(&connp->conn_lock); 5121 } 5122 5123 /* 5124 * Check if our saved options are valid; update if not. 5125 * TSOL Note: Since we are not in WRITER mode, UDP packets 5126 * to different destination may require different labels, 5127 * or worse, UDP packets to same IP address may require 5128 * different labels due to use of shared all-zones address. 5129 * We use conn_lock to ensure that lastdst, ip_snd_options, 5130 * and ip_snd_options_len are consistent for the current 5131 * destination and are updated atomically. 5132 */ 5133 if (is_system_labeled()) { 5134 /* Using UDP MLP requires SCM_UCRED from user */ 5135 if (connp->conn_mlp_type != mlptSingle && 5136 !attrs.udpattr_credset) { 5137 mutex_exit(&connp->conn_lock); 5138 DTRACE_PROBE4( 5139 tx__ip__log__info__output__udp, 5140 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5141 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5142 *error = ECONNREFUSED; 5143 goto done; 5144 } 5145 /* 5146 * update label option for this UDP socket if 5147 * - the destination has changed, or 5148 * - the UDP socket is MLP 5149 */ 5150 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5151 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5152 connp->conn_mlp_type != mlptSingle) && 5153 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5154 != 0) { 5155 mutex_exit(&connp->conn_lock); 5156 goto done; 5157 } 5158 } 5159 if (update_lastdst) { 5160 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5161 udp->udp_lastdstport = port; 5162 } 5163 if (udp->udp_ip_snd_options_len > 0) { 5164 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5165 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5166 } 5167 mutex_exit(&connp->conn_lock); 5168 5169 /* Add an IP header */ 5170 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5171 (insert_spi ? sizeof (uint32_t) : 0); 5172 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5173 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5174 !OK_32PTR(ipha)) { 5175 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5176 if (mp2 == NULL) { 5177 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5178 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5179 *error = ENOMEM; 5180 goto done; 5181 } 5182 mp2->b_wptr = DB_LIM(mp2); 5183 mp2->b_cont = mp1; 5184 mp1 = mp2; 5185 if (DB_TYPE(mp) != M_DATA) 5186 mp->b_cont = mp1; 5187 else 5188 mp = mp1; 5189 5190 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5191 } 5192 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5193 #ifdef _BIG_ENDIAN 5194 /* Set version, header length, and tos */ 5195 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5196 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5197 udp->udp_type_of_service); 5198 /* Set ttl and protocol */ 5199 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5200 #else 5201 /* Set version, header length, and tos */ 5202 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5203 ((udp->udp_type_of_service << 8) | 5204 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5205 /* Set ttl and protocol */ 5206 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5207 #endif 5208 if (pktinfop->ip4_addr != INADDR_ANY) { 5209 ipha->ipha_src = pktinfop->ip4_addr; 5210 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5211 } else { 5212 /* 5213 * Copy our address into the packet. If this is zero, 5214 * first look at __sin6_src_id for a hint. If we leave the 5215 * source as INADDR_ANY then ip will fill in the real source 5216 * address. 5217 */ 5218 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5219 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5220 in6_addr_t v6src; 5221 5222 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5223 us->us_netstack); 5224 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5225 } 5226 } 5227 uha_src_port = udp->udp_port; 5228 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5229 rw_exit(&udp->udp_rwlock); 5230 lock_held = B_FALSE; 5231 } 5232 5233 if (pktinfop->ip4_ill_index != 0) { 5234 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5235 } 5236 5237 ipha->ipha_fragment_offset_and_flags = 0; 5238 ipha->ipha_ident = 0; 5239 5240 mp1->b_rptr = (uchar_t *)ipha; 5241 5242 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5243 (uintptr_t)UINT_MAX); 5244 5245 /* Determine length of packet */ 5246 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5247 if ((mp2 = mp1->b_cont) != NULL) { 5248 do { 5249 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5250 ip_len += (uint32_t)MBLKL(mp2); 5251 } while ((mp2 = mp2->b_cont) != NULL); 5252 } 5253 /* 5254 * If the size of the packet is greater than the maximum allowed by 5255 * ip, return an error. Passing this down could cause panics because 5256 * the size will have wrapped and be inconsistent with the msg size. 5257 */ 5258 if (ip_len > IP_MAXPACKET) { 5259 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5260 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5261 *error = EMSGSIZE; 5262 goto done; 5263 } 5264 ipha->ipha_length = htons((uint16_t)ip_len); 5265 ip_len -= ip_hdr_length; 5266 ip_len = htons((uint16_t)ip_len); 5267 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5268 5269 /* Insert all-0s SPI now. */ 5270 if (insert_spi) 5271 *((uint32_t *)(udpha + 1)) = 0; 5272 5273 /* 5274 * Copy in the destination address 5275 */ 5276 ipha->ipha_dst = v4dst; 5277 5278 /* 5279 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5280 */ 5281 if (CLASSD(v4dst)) 5282 ipha->ipha_ttl = udp->udp_multicast_ttl; 5283 5284 udpha->uha_dst_port = port; 5285 udpha->uha_src_port = uha_src_port; 5286 5287 if (ip_snd_opt_len > 0) { 5288 uint32_t cksum; 5289 5290 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5291 lock_held = B_FALSE; 5292 rw_exit(&udp->udp_rwlock); 5293 /* 5294 * Massage source route putting first source route in ipha_dst. 5295 * Ignore the destination in T_unitdata_req. 5296 * Create a checksum adjustment for a source route, if any. 5297 */ 5298 cksum = ip_massage_options(ipha, us->us_netstack); 5299 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5300 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5301 (ipha->ipha_dst & 0xFFFF); 5302 if ((int)cksum < 0) 5303 cksum--; 5304 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5305 /* 5306 * IP does the checksum if uha_checksum is non-zero, 5307 * We make it easy for IP to include our pseudo header 5308 * by putting our length in uha_checksum. 5309 */ 5310 cksum += ip_len; 5311 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5312 /* There might be a carry. */ 5313 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5314 #ifdef _LITTLE_ENDIAN 5315 if (us->us_do_checksum) 5316 ip_len = (cksum << 16) | ip_len; 5317 #else 5318 if (us->us_do_checksum) 5319 ip_len = (ip_len << 16) | cksum; 5320 else 5321 ip_len <<= 16; 5322 #endif 5323 } else { 5324 /* 5325 * IP does the checksum if uha_checksum is non-zero, 5326 * We make it easy for IP to include our pseudo header 5327 * by putting our length in uha_checksum. 5328 */ 5329 if (us->us_do_checksum) 5330 ip_len |= (ip_len << 16); 5331 #ifndef _LITTLE_ENDIAN 5332 else 5333 ip_len <<= 16; 5334 #endif 5335 } 5336 ASSERT(!lock_held); 5337 /* Set UDP length and checksum */ 5338 *((uint32_t *)&udpha->uha_length) = ip_len; 5339 if (DB_CRED(mp) != NULL) 5340 mblk_setcred(mp1, DB_CRED(mp)); 5341 5342 if (DB_TYPE(mp) != M_DATA) { 5343 ASSERT(mp != mp1); 5344 freeb(mp); 5345 } 5346 5347 /* mp has been consumed and we'll return success */ 5348 ASSERT(*error == 0); 5349 mp = NULL; 5350 5351 /* We're done. Pass the packet to ip. */ 5352 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5353 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5354 "udp_wput_end: q %p (%S)", q, "end"); 5355 5356 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5357 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5358 connp->conn_dontroute || 5359 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5360 optinfo.ip_opt_ill_index != 0 || 5361 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5362 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5363 ipst->ips_ip_g_mrouter != NULL) { 5364 UDP_STAT(us, udp_ip_send); 5365 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5366 &optinfo); 5367 } else { 5368 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5369 } 5370 5371 done: 5372 if (lock_held) 5373 rw_exit(&udp->udp_rwlock); 5374 if (*error != 0) { 5375 ASSERT(mp != NULL); 5376 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5377 } 5378 return (mp); 5379 } 5380 5381 static void 5382 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5383 { 5384 conn_t *connp = udp->udp_connp; 5385 ipaddr_t src, dst; 5386 ire_t *ire; 5387 ipif_t *ipif = NULL; 5388 mblk_t *ire_fp_mp; 5389 boolean_t retry_caching; 5390 udp_stack_t *us = udp->udp_us; 5391 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5392 5393 dst = ipha->ipha_dst; 5394 src = ipha->ipha_src; 5395 ASSERT(ipha->ipha_ident == 0); 5396 5397 if (CLASSD(dst)) { 5398 int err; 5399 5400 ipif = conn_get_held_ipif(connp, 5401 &connp->conn_multicast_ipif, &err); 5402 5403 if (ipif == NULL || ipif->ipif_isv6 || 5404 (ipif->ipif_ill->ill_phyint->phyint_flags & 5405 PHYI_LOOPBACK)) { 5406 if (ipif != NULL) 5407 ipif_refrele(ipif); 5408 UDP_STAT(us, udp_ip_send); 5409 ip_output(connp, mp, q, IP_WPUT); 5410 return; 5411 } 5412 } 5413 5414 retry_caching = B_FALSE; 5415 mutex_enter(&connp->conn_lock); 5416 ire = connp->conn_ire_cache; 5417 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5418 5419 if (ire == NULL || ire->ire_addr != dst || 5420 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5421 retry_caching = B_TRUE; 5422 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5423 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5424 5425 ASSERT(ipif != NULL); 5426 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5427 retry_caching = B_TRUE; 5428 } 5429 5430 if (!retry_caching) { 5431 ASSERT(ire != NULL); 5432 IRE_REFHOLD(ire); 5433 mutex_exit(&connp->conn_lock); 5434 } else { 5435 boolean_t cached = B_FALSE; 5436 5437 connp->conn_ire_cache = NULL; 5438 mutex_exit(&connp->conn_lock); 5439 5440 /* Release the old ire */ 5441 if (ire != NULL) { 5442 IRE_REFRELE_NOTR(ire); 5443 ire = NULL; 5444 } 5445 5446 if (CLASSD(dst)) { 5447 ASSERT(ipif != NULL); 5448 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5449 connp->conn_zoneid, MBLK_GETLABEL(mp), 5450 MATCH_IRE_ILL, ipst); 5451 } else { 5452 ASSERT(ipif == NULL); 5453 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5454 MBLK_GETLABEL(mp), ipst); 5455 } 5456 5457 if (ire == NULL) { 5458 if (ipif != NULL) 5459 ipif_refrele(ipif); 5460 UDP_STAT(us, udp_ire_null); 5461 ip_output(connp, mp, q, IP_WPUT); 5462 return; 5463 } 5464 IRE_REFHOLD_NOTR(ire); 5465 5466 mutex_enter(&connp->conn_lock); 5467 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5468 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5469 irb_t *irb = ire->ire_bucket; 5470 5471 /* 5472 * IRE's created for non-connection oriented transports 5473 * are normally initialized with IRE_MARK_TEMPORARY set 5474 * in the ire_marks. These IRE's are preferentially 5475 * reaped when the hash chain length in the cache 5476 * bucket exceeds the maximum value specified in 5477 * ip[6]_ire_max_bucket_cnt. This can severely affect 5478 * UDP performance if IRE cache entries that we need 5479 * to reuse are continually removed. To remedy this, 5480 * when we cache the IRE in the conn_t, we remove the 5481 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5482 * set. 5483 */ 5484 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5485 rw_enter(&irb->irb_lock, RW_WRITER); 5486 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5487 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5488 irb->irb_tmp_ire_cnt--; 5489 } 5490 rw_exit(&irb->irb_lock); 5491 } 5492 connp->conn_ire_cache = ire; 5493 cached = B_TRUE; 5494 } 5495 mutex_exit(&connp->conn_lock); 5496 5497 /* 5498 * We can continue to use the ire but since it was not 5499 * cached, we should drop the extra reference. 5500 */ 5501 if (!cached) 5502 IRE_REFRELE_NOTR(ire); 5503 } 5504 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5505 ASSERT(!CLASSD(dst) || ipif != NULL); 5506 5507 /* 5508 * Check if we can take the fast-path. 5509 * Note that "incomplete" ire's (where the link-layer for next hop 5510 * is not resolved, or where the fast-path header in nce_fp_mp is not 5511 * available yet) are sent down the legacy (slow) path 5512 */ 5513 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5514 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5515 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5516 ((ire->ire_nce == NULL) || 5517 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5518 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5519 if (ipif != NULL) 5520 ipif_refrele(ipif); 5521 UDP_STAT(us, udp_ip_ire_send); 5522 IRE_REFRELE(ire); 5523 ip_output(connp, mp, q, IP_WPUT); 5524 return; 5525 } 5526 5527 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5528 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5529 ipha->ipha_src = ipif->ipif_src_addr; 5530 else 5531 ipha->ipha_src = ire->ire_src_addr; 5532 } 5533 5534 if (ipif != NULL) 5535 ipif_refrele(ipif); 5536 5537 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5538 } 5539 5540 static void 5541 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5542 { 5543 ipaddr_t src, dst; 5544 ill_t *ill; 5545 mblk_t *ire_fp_mp; 5546 uint_t ire_fp_mp_len; 5547 uint16_t *up; 5548 uint32_t cksum, hcksum_txflags; 5549 queue_t *dev_q; 5550 udp_t *udp = connp->conn_udp; 5551 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5552 udp_stack_t *us = udp->udp_us; 5553 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5554 boolean_t ll_multicast = B_FALSE; 5555 5556 dev_q = ire->ire_stq->q_next; 5557 ASSERT(dev_q != NULL); 5558 5559 ill = ire_to_ill(ire); 5560 ASSERT(ill != NULL); 5561 5562 /* is queue flow controlled? */ 5563 if (q->q_first != NULL || connp->conn_draining || 5564 DEV_Q_FLOW_BLOCKED(dev_q)) { 5565 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5566 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5567 5568 if (ipst->ips_ip_output_queue) 5569 (void) putq(connp->conn_wq, mp); 5570 else 5571 freemsg(mp); 5572 ire_refrele(ire); 5573 return; 5574 } 5575 5576 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5577 ire_fp_mp_len = MBLKL(ire_fp_mp); 5578 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5579 5580 dst = ipha->ipha_dst; 5581 src = ipha->ipha_src; 5582 5583 5584 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5585 5586 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5587 #ifndef _BIG_ENDIAN 5588 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5589 #endif 5590 5591 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5592 ASSERT(ill->ill_hcksum_capab != NULL); 5593 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5594 } else { 5595 hcksum_txflags = 0; 5596 } 5597 5598 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5599 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5600 5601 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5602 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5603 if (*up != 0) { 5604 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5605 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5606 ntohs(ipha->ipha_length), cksum); 5607 5608 /* Software checksum? */ 5609 if (DB_CKSUMFLAGS(mp) == 0) { 5610 UDP_STAT(us, udp_out_sw_cksum); 5611 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5612 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5613 } 5614 } 5615 5616 if (!CLASSD(dst)) { 5617 ipha->ipha_fragment_offset_and_flags |= 5618 (uint32_t)htons(ire->ire_frag_flag); 5619 } 5620 5621 /* Calculate IP header checksum if hardware isn't capable */ 5622 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5623 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5624 ((uint16_t *)ipha)[4]); 5625 } 5626 5627 if (CLASSD(dst)) { 5628 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5629 ip_multicast_loopback(q, ill, mp, 5630 connp->conn_multicast_loop ? 0 : 5631 IP_FF_NO_MCAST_LOOP, zoneid); 5632 } 5633 5634 /* If multicast TTL is 0 then we are done */ 5635 if (ipha->ipha_ttl == 0) { 5636 freemsg(mp); 5637 ire_refrele(ire); 5638 return; 5639 } 5640 ll_multicast = B_TRUE; 5641 } 5642 5643 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5644 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5645 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5646 5647 UPDATE_OB_PKT_COUNT(ire); 5648 ire->ire_last_used_time = lbolt; 5649 5650 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5651 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5652 ntohs(ipha->ipha_length)); 5653 5654 DTRACE_PROBE4(ip4__physical__out__start, 5655 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5656 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5657 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5658 ll_multicast, ipst); 5659 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5660 if (ipst->ips_ipobs_enabled && mp != NULL) { 5661 zoneid_t szone; 5662 5663 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5664 ipst, ALL_ZONES); 5665 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5666 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5667 } 5668 5669 if (mp != NULL) { 5670 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5671 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5672 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5673 5674 if (ILL_DIRECT_CAPABLE(ill)) { 5675 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5676 5677 (void) idd->idd_tx_df(idd->idd_tx_dh, mp, 5678 (uintptr_t)connp, 0); 5679 } else { 5680 putnext(ire->ire_stq, mp); 5681 } 5682 } 5683 IRE_REFRELE(ire); 5684 } 5685 5686 static boolean_t 5687 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5688 boolean_t *update_lastdst) 5689 { 5690 udp_t *udp = Q_TO_UDP(wq); 5691 int err; 5692 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5693 udp_stack_t *us = udp->udp_us; 5694 5695 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 5696 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5697 us->us_netstack->netstack_ip); 5698 if (err == 0) { 5699 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5700 &udp->udp_label_len_v6, opt_storage); 5701 } 5702 if (err != 0) { 5703 DTRACE_PROBE4( 5704 tx__ip__log__drop__updatelabel__udp6, 5705 char *, "queue(1) failed to update options(2) on mp(3)", 5706 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5707 } else { 5708 *update_lastdst = B_TRUE; 5709 } 5710 return (err); 5711 } 5712 5713 static int 5714 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5715 pid_t pid) 5716 { 5717 udp_t *udp = connp->conn_udp; 5718 udp_stack_t *us = udp->udp_us; 5719 ipaddr_t v4dst; 5720 in_port_t dstport; 5721 boolean_t mapped_addr; 5722 struct sockaddr_storage ss; 5723 sin_t *sin; 5724 sin6_t *sin6; 5725 struct sockaddr *addr; 5726 socklen_t addrlen; 5727 int error; 5728 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5729 5730 /* M_DATA for connected socket */ 5731 5732 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5733 UDP_DBGSTAT(us, udp_data_conn); 5734 5735 mutex_enter(&connp->conn_lock); 5736 if (udp->udp_state != TS_DATA_XFER) { 5737 mutex_exit(&connp->conn_lock); 5738 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5739 UDP_STAT(us, udp_out_err_notconn); 5740 freemsg(mp); 5741 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5742 "udp_wput_end: connp %p (%S)", connp, 5743 "not-connected; address required"); 5744 return (EDESTADDRREQ); 5745 } 5746 5747 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5748 if (mapped_addr) 5749 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5750 5751 /* Initialize addr and addrlen as if they're passed in */ 5752 if (udp->udp_family == AF_INET) { 5753 sin = (sin_t *)&ss; 5754 sin->sin_family = AF_INET; 5755 dstport = sin->sin_port = udp->udp_dstport; 5756 ASSERT(mapped_addr); 5757 sin->sin_addr.s_addr = v4dst; 5758 addr = (struct sockaddr *)sin; 5759 addrlen = sizeof (*sin); 5760 } else { 5761 sin6 = (sin6_t *)&ss; 5762 sin6->sin6_family = AF_INET6; 5763 dstport = sin6->sin6_port = udp->udp_dstport; 5764 sin6->sin6_flowinfo = udp->udp_flowinfo; 5765 sin6->sin6_addr = udp->udp_v6dst; 5766 sin6->sin6_scope_id = 0; 5767 sin6->__sin6_src_id = 0; 5768 addr = (struct sockaddr *)sin6; 5769 addrlen = sizeof (*sin6); 5770 } 5771 mutex_exit(&connp->conn_lock); 5772 5773 if (mapped_addr) { 5774 /* 5775 * Handle both AF_INET and AF_INET6; the latter 5776 * for IPV4 mapped destination addresses. Note 5777 * here that both addr and addrlen point to the 5778 * corresponding struct depending on the address 5779 * family of the socket. 5780 */ 5781 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5782 insert_spi, msg, cr, pid); 5783 } else { 5784 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5785 } 5786 if (error == 0) { 5787 ASSERT(mp == NULL); 5788 return (0); 5789 } 5790 5791 UDP_STAT(us, udp_out_err_output); 5792 ASSERT(mp != NULL); 5793 if (IPCL_IS_NONSTR(connp)) { 5794 freemsg(mp); 5795 return (error); 5796 } else { 5797 /* mp is freed by the following routine */ 5798 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5799 (t_scalar_t)addrlen, (t_scalar_t)error); 5800 return (0); 5801 } 5802 } 5803 5804 /* ARGSUSED */ 5805 static int 5806 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5807 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5808 { 5809 5810 udp_t *udp = connp->conn_udp; 5811 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5812 int error = 0; 5813 sin6_t *sin6; 5814 sin_t *sin; 5815 uint_t srcid; 5816 uint16_t port; 5817 ipaddr_t v4dst; 5818 5819 5820 ASSERT(addr != NULL); 5821 5822 switch (udp->udp_family) { 5823 case AF_INET6: 5824 sin6 = (sin6_t *)addr; 5825 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5826 /* 5827 * Destination is a non-IPv4-compatible IPv6 address. 5828 * Send out an IPv6 format packet. 5829 */ 5830 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5831 pid); 5832 if (error != 0) 5833 goto ud_error; 5834 5835 return (0); 5836 } 5837 /* 5838 * If the local address is not zero or a mapped address 5839 * return an error. It would be possible to send an IPv4 5840 * packet but the response would never make it back to the 5841 * application since it is bound to a non-mapped address. 5842 */ 5843 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5844 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5845 error = EADDRNOTAVAIL; 5846 goto ud_error; 5847 } 5848 /* Send IPv4 packet without modifying udp_ipversion */ 5849 /* Extract port and ipaddr */ 5850 port = sin6->sin6_port; 5851 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5852 srcid = sin6->__sin6_src_id; 5853 break; 5854 5855 case AF_INET: 5856 sin = (sin_t *)addr; 5857 /* Extract port and ipaddr */ 5858 port = sin->sin_port; 5859 v4dst = sin->sin_addr.s_addr; 5860 srcid = 0; 5861 break; 5862 } 5863 5864 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5865 msg, cr, pid); 5866 5867 if (error == 0) { 5868 ASSERT(mp == NULL); 5869 return (0); 5870 } 5871 5872 ud_error: 5873 ASSERT(mp != NULL); 5874 5875 return (error); 5876 } 5877 5878 /* 5879 * This routine handles all messages passed downstream. It either 5880 * consumes the message or passes it downstream; it never queues a 5881 * a message. 5882 * 5883 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5884 * is valid when we are directly beneath the stream head, and thus sockfs 5885 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5886 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5887 * connected endpoints. 5888 */ 5889 void 5890 udp_wput(queue_t *q, mblk_t *mp) 5891 { 5892 conn_t *connp = Q_TO_CONN(q); 5893 udp_t *udp = connp->conn_udp; 5894 int error = 0; 5895 struct sockaddr *addr; 5896 socklen_t addrlen; 5897 udp_stack_t *us = udp->udp_us; 5898 5899 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5900 "udp_wput_start: queue %p mp %p", q, mp); 5901 5902 /* 5903 * We directly handle several cases here: T_UNITDATA_REQ message 5904 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5905 * socket. 5906 */ 5907 switch (DB_TYPE(mp)) { 5908 case M_DATA: 5909 /* 5910 * Quick check for error cases. Checks will be done again 5911 * under the lock later on 5912 */ 5913 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5914 /* Not connected; address is required */ 5915 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5916 UDP_STAT(us, udp_out_err_notconn); 5917 freemsg(mp); 5918 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5919 "udp_wput_end: connp %p (%S)", connp, 5920 "not-connected; address required"); 5921 return; 5922 } 5923 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5924 return; 5925 5926 case M_PROTO: 5927 case M_PCPROTO: { 5928 struct T_unitdata_req *tudr; 5929 5930 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5931 tudr = (struct T_unitdata_req *)mp->b_rptr; 5932 5933 /* Handle valid T_UNITDATA_REQ here */ 5934 if (MBLKL(mp) >= sizeof (*tudr) && 5935 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5936 if (mp->b_cont == NULL) { 5937 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5938 "udp_wput_end: q %p (%S)", q, "badaddr"); 5939 error = EPROTO; 5940 goto ud_error; 5941 } 5942 5943 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5944 tudr->DEST_length)) { 5945 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5946 "udp_wput_end: q %p (%S)", q, "badaddr"); 5947 error = EADDRNOTAVAIL; 5948 goto ud_error; 5949 } 5950 /* 5951 * If a port has not been bound to the stream, fail. 5952 * This is not a problem when sockfs is directly 5953 * above us, because it will ensure that the socket 5954 * is first bound before allowing data to be sent. 5955 */ 5956 if (udp->udp_state == TS_UNBND) { 5957 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5958 "udp_wput_end: q %p (%S)", q, "outstate"); 5959 error = EPROTO; 5960 goto ud_error; 5961 } 5962 addr = (struct sockaddr *) 5963 &mp->b_rptr[tudr->DEST_offset]; 5964 addrlen = tudr->DEST_length; 5965 if (tudr->OPT_length != 0) 5966 UDP_STAT(us, udp_out_opt); 5967 break; 5968 } 5969 /* FALLTHRU */ 5970 } 5971 default: 5972 udp_wput_other(q, mp); 5973 return; 5974 } 5975 ASSERT(addr != NULL); 5976 5977 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5978 -1); 5979 if (error != 0) { 5980 ud_error: 5981 UDP_STAT(us, udp_out_err_output); 5982 ASSERT(mp != NULL); 5983 /* mp is freed by the following routine */ 5984 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5985 (t_scalar_t)error); 5986 } 5987 } 5988 5989 /* ARGSUSED */ 5990 static void 5991 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5992 { 5993 #ifdef DEBUG 5994 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5995 #endif 5996 freemsg(mp); 5997 } 5998 5999 6000 /* 6001 * udp_output_v6(): 6002 * Assumes that udp_wput did some sanity checking on the destination 6003 * address. 6004 */ 6005 static mblk_t * 6006 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6007 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6008 { 6009 ip6_t *ip6h; 6010 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6011 mblk_t *mp1 = mp; 6012 mblk_t *mp2; 6013 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6014 size_t ip_len; 6015 udpha_t *udph; 6016 udp_t *udp = connp->conn_udp; 6017 udp_stack_t *us = udp->udp_us; 6018 queue_t *q = connp->conn_wq; 6019 ip6_pkt_t ipp_s; /* For ancillary data options */ 6020 ip6_pkt_t *ipp = &ipp_s; 6021 ip6_pkt_t *tipp; /* temporary ipp */ 6022 uint32_t csum = 0; 6023 uint_t ignore = 0; 6024 uint_t option_exists = 0, is_sticky = 0; 6025 uint8_t *cp; 6026 uint8_t *nxthdr_ptr; 6027 in6_addr_t ip6_dst; 6028 in_port_t port; 6029 udpattrs_t attrs; 6030 boolean_t opt_present; 6031 ip6_hbh_t *hopoptsptr = NULL; 6032 uint_t hopoptslen = 0; 6033 boolean_t is_ancillary = B_FALSE; 6034 size_t sth_wroff = 0; 6035 ire_t *ire; 6036 boolean_t update_lastdst = B_FALSE; 6037 6038 *error = 0; 6039 6040 /* 6041 * If the local address is a mapped address return 6042 * an error. 6043 * It would be possible to send an IPv6 packet but the 6044 * response would never make it back to the application 6045 * since it is bound to a mapped address. 6046 */ 6047 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6048 *error = EADDRNOTAVAIL; 6049 goto done; 6050 } 6051 6052 ipp->ipp_fields = 0; 6053 ipp->ipp_sticky_ignored = 0; 6054 6055 /* 6056 * If TPI options passed in, feed it for verification and handling 6057 */ 6058 attrs.udpattr_credset = B_FALSE; 6059 opt_present = B_FALSE; 6060 if (IPCL_IS_NONSTR(connp)) { 6061 if (msg->msg_controllen != 0) { 6062 attrs.udpattr_ipp6 = ipp; 6063 attrs.udpattr_mb = mp; 6064 6065 rw_enter(&udp->udp_rwlock, RW_WRITER); 6066 *error = process_auxiliary_options(connp, 6067 msg->msg_control, msg->msg_controllen, 6068 &attrs, &udp_opt_obj, udp_opt_set); 6069 rw_exit(&udp->udp_rwlock); 6070 if (*error) 6071 goto done; 6072 ASSERT(*error == 0); 6073 opt_present = B_TRUE; 6074 } 6075 } else { 6076 if (DB_TYPE(mp) != M_DATA) { 6077 mp1 = mp->b_cont; 6078 if (((struct T_unitdata_req *) 6079 mp->b_rptr)->OPT_length != 0) { 6080 attrs.udpattr_ipp6 = ipp; 6081 attrs.udpattr_mb = mp; 6082 if (udp_unitdata_opt_process(q, mp, error, 6083 &attrs) < 0) { 6084 goto done; 6085 } 6086 ASSERT(*error == 0); 6087 opt_present = B_TRUE; 6088 } 6089 } 6090 } 6091 6092 /* 6093 * Determine whether we need to mark the mblk with the user's 6094 * credentials. 6095 */ 6096 ire = connp->conn_ire_cache; 6097 if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 6098 (ire == NULL) || 6099 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6100 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6101 if (cr != NULL && DB_CRED(mp) == NULL) 6102 msg_setcredpid(mp, cr, pid); 6103 } 6104 6105 rw_enter(&udp->udp_rwlock, RW_READER); 6106 ignore = ipp->ipp_sticky_ignored; 6107 6108 /* mp1 points to the M_DATA mblk carrying the packet */ 6109 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6110 6111 if (sin6->sin6_scope_id != 0 && 6112 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6113 /* 6114 * IPPF_SCOPE_ID is special. It's neither a sticky 6115 * option nor ancillary data. It needs to be 6116 * explicitly set in options_exists. 6117 */ 6118 option_exists |= IPPF_SCOPE_ID; 6119 } 6120 6121 /* 6122 * Compute the destination address 6123 */ 6124 ip6_dst = sin6->sin6_addr; 6125 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6126 ip6_dst = ipv6_loopback; 6127 6128 port = sin6->sin6_port; 6129 6130 /* 6131 * Cluster and TSOL notes, Cluster check: 6132 * see comments in udp_output_v4(). 6133 */ 6134 mutex_enter(&connp->conn_lock); 6135 6136 if (cl_inet_connect2 != NULL && 6137 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6138 port != udp->udp_lastdstport)) { 6139 mutex_exit(&connp->conn_lock); 6140 *error = 0; 6141 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6142 if (*error != 0) { 6143 *error = EHOSTUNREACH; 6144 rw_exit(&udp->udp_rwlock); 6145 goto done; 6146 } 6147 update_lastdst = B_TRUE; 6148 mutex_enter(&connp->conn_lock); 6149 } 6150 6151 /* 6152 * If we're not going to the same destination as last time, then 6153 * recompute the label required. This is done in a separate routine to 6154 * avoid blowing up our stack here. 6155 * 6156 * TSOL Note: Since we are not in WRITER mode, UDP packets 6157 * to different destination may require different labels, 6158 * or worse, UDP packets to same IP address may require 6159 * different labels due to use of shared all-zones address. 6160 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6161 * and sticky ipp_hopoptslen are consistent for the current 6162 * destination and are updated atomically. 6163 */ 6164 if (is_system_labeled()) { 6165 /* Using UDP MLP requires SCM_UCRED from user */ 6166 if (connp->conn_mlp_type != mlptSingle && 6167 !attrs.udpattr_credset) { 6168 DTRACE_PROBE4( 6169 tx__ip__log__info__output__udp6, 6170 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6171 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6172 *error = ECONNREFUSED; 6173 rw_exit(&udp->udp_rwlock); 6174 mutex_exit(&connp->conn_lock); 6175 goto done; 6176 } 6177 /* 6178 * update label option for this UDP socket if 6179 * - the destination has changed, or 6180 * - the UDP socket is MLP 6181 */ 6182 if ((opt_present || 6183 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6184 connp->conn_mlp_type != mlptSingle) && 6185 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6186 &update_lastdst)) != 0) { 6187 rw_exit(&udp->udp_rwlock); 6188 mutex_exit(&connp->conn_lock); 6189 goto done; 6190 } 6191 } 6192 6193 if (update_lastdst) { 6194 udp->udp_v6lastdst = ip6_dst; 6195 udp->udp_lastdstport = port; 6196 } 6197 6198 /* 6199 * If there's a security label here, then we ignore any options the 6200 * user may try to set. We keep the peer's label as a hidden sticky 6201 * option. We make a private copy of this label before releasing the 6202 * lock so that label is kept consistent with the destination addr. 6203 */ 6204 if (udp->udp_label_len_v6 > 0) { 6205 ignore &= ~IPPF_HOPOPTS; 6206 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6207 } 6208 6209 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6210 /* No sticky options nor ancillary data. */ 6211 mutex_exit(&connp->conn_lock); 6212 goto no_options; 6213 } 6214 6215 /* 6216 * Go through the options figuring out where each is going to 6217 * come from and build two masks. The first mask indicates if 6218 * the option exists at all. The second mask indicates if the 6219 * option is sticky or ancillary. 6220 */ 6221 if (!(ignore & IPPF_HOPOPTS)) { 6222 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6223 option_exists |= IPPF_HOPOPTS; 6224 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6225 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6226 option_exists |= IPPF_HOPOPTS; 6227 is_sticky |= IPPF_HOPOPTS; 6228 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6229 hopoptsptr = kmem_alloc( 6230 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6231 if (hopoptsptr == NULL) { 6232 *error = ENOMEM; 6233 mutex_exit(&connp->conn_lock); 6234 goto done; 6235 } 6236 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6237 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6238 hopoptslen); 6239 udp_ip_hdr_len += hopoptslen; 6240 } 6241 } 6242 mutex_exit(&connp->conn_lock); 6243 6244 if (!(ignore & IPPF_RTHDR)) { 6245 if (ipp->ipp_fields & IPPF_RTHDR) { 6246 option_exists |= IPPF_RTHDR; 6247 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6248 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6249 option_exists |= IPPF_RTHDR; 6250 is_sticky |= IPPF_RTHDR; 6251 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6252 } 6253 } 6254 6255 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6256 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6257 option_exists |= IPPF_RTDSTOPTS; 6258 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6259 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6260 option_exists |= IPPF_RTDSTOPTS; 6261 is_sticky |= IPPF_RTDSTOPTS; 6262 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6263 } 6264 } 6265 6266 if (!(ignore & IPPF_DSTOPTS)) { 6267 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6268 option_exists |= IPPF_DSTOPTS; 6269 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6270 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6271 option_exists |= IPPF_DSTOPTS; 6272 is_sticky |= IPPF_DSTOPTS; 6273 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6274 } 6275 } 6276 6277 if (!(ignore & IPPF_IFINDEX)) { 6278 if (ipp->ipp_fields & IPPF_IFINDEX) { 6279 option_exists |= IPPF_IFINDEX; 6280 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6281 option_exists |= IPPF_IFINDEX; 6282 is_sticky |= IPPF_IFINDEX; 6283 } 6284 } 6285 6286 if (!(ignore & IPPF_ADDR)) { 6287 if (ipp->ipp_fields & IPPF_ADDR) { 6288 option_exists |= IPPF_ADDR; 6289 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6290 option_exists |= IPPF_ADDR; 6291 is_sticky |= IPPF_ADDR; 6292 } 6293 } 6294 6295 if (!(ignore & IPPF_DONTFRAG)) { 6296 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6297 option_exists |= IPPF_DONTFRAG; 6298 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6299 option_exists |= IPPF_DONTFRAG; 6300 is_sticky |= IPPF_DONTFRAG; 6301 } 6302 } 6303 6304 if (!(ignore & IPPF_USE_MIN_MTU)) { 6305 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6306 option_exists |= IPPF_USE_MIN_MTU; 6307 } else if (udp->udp_sticky_ipp.ipp_fields & 6308 IPPF_USE_MIN_MTU) { 6309 option_exists |= IPPF_USE_MIN_MTU; 6310 is_sticky |= IPPF_USE_MIN_MTU; 6311 } 6312 } 6313 6314 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6315 option_exists |= IPPF_HOPLIMIT; 6316 /* IPV6_HOPLIMIT can never be sticky */ 6317 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6318 6319 if (!(ignore & IPPF_UNICAST_HOPS) && 6320 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6321 option_exists |= IPPF_UNICAST_HOPS; 6322 is_sticky |= IPPF_UNICAST_HOPS; 6323 } 6324 6325 if (!(ignore & IPPF_MULTICAST_HOPS) && 6326 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6327 option_exists |= IPPF_MULTICAST_HOPS; 6328 is_sticky |= IPPF_MULTICAST_HOPS; 6329 } 6330 6331 if (!(ignore & IPPF_TCLASS)) { 6332 if (ipp->ipp_fields & IPPF_TCLASS) { 6333 option_exists |= IPPF_TCLASS; 6334 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6335 option_exists |= IPPF_TCLASS; 6336 is_sticky |= IPPF_TCLASS; 6337 } 6338 } 6339 6340 if (!(ignore & IPPF_NEXTHOP) && 6341 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6342 option_exists |= IPPF_NEXTHOP; 6343 is_sticky |= IPPF_NEXTHOP; 6344 } 6345 6346 no_options: 6347 6348 /* 6349 * If any options carried in the ip6i_t were specified, we 6350 * need to account for the ip6i_t in the data we'll be sending 6351 * down. 6352 */ 6353 if (option_exists & IPPF_HAS_IP6I) 6354 udp_ip_hdr_len += sizeof (ip6i_t); 6355 6356 /* check/fix buffer config, setup pointers into it */ 6357 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6358 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6359 !OK_32PTR(ip6h)) { 6360 6361 /* Try to get everything in a single mblk next time */ 6362 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6363 udp->udp_max_hdr_len = udp_ip_hdr_len; 6364 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6365 } 6366 6367 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6368 if (mp2 == NULL) { 6369 *error = ENOMEM; 6370 rw_exit(&udp->udp_rwlock); 6371 goto done; 6372 } 6373 mp2->b_wptr = DB_LIM(mp2); 6374 mp2->b_cont = mp1; 6375 mp1 = mp2; 6376 if (DB_TYPE(mp) != M_DATA) 6377 mp->b_cont = mp1; 6378 else 6379 mp = mp1; 6380 6381 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6382 } 6383 mp1->b_rptr = (unsigned char *)ip6h; 6384 ip6i = (ip6i_t *)ip6h; 6385 6386 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6387 if (option_exists & IPPF_HAS_IP6I) { 6388 ip6h = (ip6_t *)&ip6i[1]; 6389 ip6i->ip6i_flags = 0; 6390 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6391 6392 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6393 if (option_exists & IPPF_SCOPE_ID) { 6394 ip6i->ip6i_flags |= IP6I_IFINDEX; 6395 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6396 } else if (option_exists & IPPF_IFINDEX) { 6397 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6398 ASSERT(tipp->ipp_ifindex != 0); 6399 ip6i->ip6i_flags |= IP6I_IFINDEX; 6400 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6401 } 6402 6403 if (option_exists & IPPF_ADDR) { 6404 /* 6405 * Enable per-packet source address verification if 6406 * IPV6_PKTINFO specified the source address. 6407 * ip6_src is set in the transport's _wput function. 6408 */ 6409 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6410 } 6411 6412 if (option_exists & IPPF_DONTFRAG) { 6413 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6414 } 6415 6416 if (option_exists & IPPF_USE_MIN_MTU) { 6417 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6418 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6419 } 6420 6421 if (option_exists & IPPF_NEXTHOP) { 6422 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6423 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6424 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6425 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6426 } 6427 6428 /* 6429 * tell IP this is an ip6i_t private header 6430 */ 6431 ip6i->ip6i_nxt = IPPROTO_RAW; 6432 } 6433 6434 /* Initialize IPv6 header */ 6435 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6436 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6437 6438 /* Set the hoplimit of the outgoing packet. */ 6439 if (option_exists & IPPF_HOPLIMIT) { 6440 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6441 ip6h->ip6_hops = ipp->ipp_hoplimit; 6442 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6443 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6444 ip6h->ip6_hops = udp->udp_multicast_ttl; 6445 if (option_exists & IPPF_MULTICAST_HOPS) 6446 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6447 } else { 6448 ip6h->ip6_hops = udp->udp_ttl; 6449 if (option_exists & IPPF_UNICAST_HOPS) 6450 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6451 } 6452 6453 if (option_exists & IPPF_ADDR) { 6454 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6455 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6456 ip6h->ip6_src = tipp->ipp_addr; 6457 } else { 6458 /* 6459 * The source address was not set using IPV6_PKTINFO. 6460 * First look at the bound source. 6461 * If unspecified fallback to __sin6_src_id. 6462 */ 6463 ip6h->ip6_src = udp->udp_v6src; 6464 if (sin6->__sin6_src_id != 0 && 6465 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6466 ip_srcid_find_id(sin6->__sin6_src_id, 6467 &ip6h->ip6_src, connp->conn_zoneid, 6468 us->us_netstack); 6469 } 6470 } 6471 6472 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6473 cp = (uint8_t *)&ip6h[1]; 6474 6475 /* 6476 * Here's where we have to start stringing together 6477 * any extension headers in the right order: 6478 * Hop-by-hop, destination, routing, and final destination opts. 6479 */ 6480 if (option_exists & IPPF_HOPOPTS) { 6481 /* Hop-by-hop options */ 6482 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6483 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6484 if (hopoptslen == 0) { 6485 hopoptsptr = tipp->ipp_hopopts; 6486 hopoptslen = tipp->ipp_hopoptslen; 6487 is_ancillary = B_TRUE; 6488 } 6489 6490 *nxthdr_ptr = IPPROTO_HOPOPTS; 6491 nxthdr_ptr = &hbh->ip6h_nxt; 6492 6493 bcopy(hopoptsptr, cp, hopoptslen); 6494 cp += hopoptslen; 6495 6496 if (hopoptsptr != NULL && !is_ancillary) { 6497 kmem_free(hopoptsptr, hopoptslen); 6498 hopoptsptr = NULL; 6499 hopoptslen = 0; 6500 } 6501 } 6502 /* 6503 * En-route destination options 6504 * Only do them if there's a routing header as well 6505 */ 6506 if (option_exists & IPPF_RTDSTOPTS) { 6507 ip6_dest_t *dst = (ip6_dest_t *)cp; 6508 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6509 6510 *nxthdr_ptr = IPPROTO_DSTOPTS; 6511 nxthdr_ptr = &dst->ip6d_nxt; 6512 6513 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6514 cp += tipp->ipp_rtdstoptslen; 6515 } 6516 /* 6517 * Routing header next 6518 */ 6519 if (option_exists & IPPF_RTHDR) { 6520 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6521 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6522 6523 *nxthdr_ptr = IPPROTO_ROUTING; 6524 nxthdr_ptr = &rt->ip6r_nxt; 6525 6526 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6527 cp += tipp->ipp_rthdrlen; 6528 } 6529 /* 6530 * Do ultimate destination options 6531 */ 6532 if (option_exists & IPPF_DSTOPTS) { 6533 ip6_dest_t *dest = (ip6_dest_t *)cp; 6534 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6535 6536 *nxthdr_ptr = IPPROTO_DSTOPTS; 6537 nxthdr_ptr = &dest->ip6d_nxt; 6538 6539 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6540 cp += tipp->ipp_dstoptslen; 6541 } 6542 /* 6543 * Now set the last header pointer to the proto passed in 6544 */ 6545 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6546 *nxthdr_ptr = IPPROTO_UDP; 6547 6548 /* Update UDP header */ 6549 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6550 udph->uha_dst_port = sin6->sin6_port; 6551 udph->uha_src_port = udp->udp_port; 6552 6553 /* 6554 * Copy in the destination address 6555 */ 6556 ip6h->ip6_dst = ip6_dst; 6557 6558 ip6h->ip6_vcf = 6559 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6560 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6561 6562 if (option_exists & IPPF_TCLASS) { 6563 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6564 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6565 tipp->ipp_tclass); 6566 } 6567 rw_exit(&udp->udp_rwlock); 6568 6569 if (option_exists & IPPF_RTHDR) { 6570 ip6_rthdr_t *rth; 6571 6572 /* 6573 * Perform any processing needed for source routing. 6574 * We know that all extension headers will be in the same mblk 6575 * as the IPv6 header. 6576 */ 6577 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6578 if (rth != NULL && rth->ip6r_segleft != 0) { 6579 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6580 /* 6581 * Drop packet - only support Type 0 routing. 6582 * Notify the application as well. 6583 */ 6584 *error = EPROTO; 6585 goto done; 6586 } 6587 6588 /* 6589 * rth->ip6r_len is twice the number of 6590 * addresses in the header. Thus it must be even. 6591 */ 6592 if (rth->ip6r_len & 0x1) { 6593 *error = EPROTO; 6594 goto done; 6595 } 6596 /* 6597 * Shuffle the routing header and ip6_dst 6598 * addresses, and get the checksum difference 6599 * between the first hop (in ip6_dst) and 6600 * the destination (in the last routing hdr entry). 6601 */ 6602 csum = ip_massage_options_v6(ip6h, rth, 6603 us->us_netstack); 6604 /* 6605 * Verify that the first hop isn't a mapped address. 6606 * Routers along the path need to do this verification 6607 * for subsequent hops. 6608 */ 6609 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6610 *error = EADDRNOTAVAIL; 6611 goto done; 6612 } 6613 6614 cp += (rth->ip6r_len + 1)*8; 6615 } 6616 } 6617 6618 /* count up length of UDP packet */ 6619 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6620 if ((mp2 = mp1->b_cont) != NULL) { 6621 do { 6622 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6623 ip_len += (uint32_t)MBLKL(mp2); 6624 } while ((mp2 = mp2->b_cont) != NULL); 6625 } 6626 6627 /* 6628 * If the size of the packet is greater than the maximum allowed by 6629 * ip, return an error. Passing this down could cause panics because 6630 * the size will have wrapped and be inconsistent with the msg size. 6631 */ 6632 if (ip_len > IP_MAXPACKET) { 6633 *error = EMSGSIZE; 6634 goto done; 6635 } 6636 6637 /* Store the UDP length. Subtract length of extension hdrs */ 6638 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6639 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6640 6641 /* 6642 * We make it easy for IP to include our pseudo header 6643 * by putting our length in uh_checksum, modified (if 6644 * we have a routing header) by the checksum difference 6645 * between the ultimate destination and first hop addresses. 6646 * Note: UDP over IPv6 must always checksum the packet. 6647 */ 6648 csum += udph->uha_length; 6649 csum = (csum & 0xFFFF) + (csum >> 16); 6650 udph->uha_checksum = (uint16_t)csum; 6651 6652 #ifdef _LITTLE_ENDIAN 6653 ip_len = htons(ip_len); 6654 #endif 6655 ip6h->ip6_plen = ip_len; 6656 if (DB_CRED(mp) != NULL) 6657 mblk_setcred(mp1, DB_CRED(mp)); 6658 6659 if (DB_TYPE(mp) != M_DATA) { 6660 ASSERT(mp != mp1); 6661 freeb(mp); 6662 } 6663 6664 /* mp has been consumed and we'll return success */ 6665 ASSERT(*error == 0); 6666 mp = NULL; 6667 6668 /* We're done. Pass the packet to IP */ 6669 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6670 ip_output_v6(connp, mp1, q, IP_WPUT); 6671 6672 done: 6673 if (sth_wroff != 0) { 6674 (void) proto_set_tx_wroff(RD(q), connp, 6675 udp->udp_max_hdr_len + us->us_wroff_extra); 6676 } 6677 if (hopoptsptr != NULL && !is_ancillary) { 6678 kmem_free(hopoptsptr, hopoptslen); 6679 hopoptsptr = NULL; 6680 } 6681 if (*error != 0) { 6682 ASSERT(mp != NULL); 6683 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6684 } 6685 return (mp); 6686 } 6687 6688 6689 static int 6690 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6691 { 6692 sin_t *sin = (sin_t *)sa; 6693 sin6_t *sin6 = (sin6_t *)sa; 6694 6695 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6696 6697 if (udp->udp_state != TS_DATA_XFER) 6698 return (ENOTCONN); 6699 6700 switch (udp->udp_family) { 6701 case AF_INET: 6702 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6703 6704 if (*salenp < sizeof (sin_t)) 6705 return (EINVAL); 6706 6707 *salenp = sizeof (sin_t); 6708 *sin = sin_null; 6709 sin->sin_family = AF_INET; 6710 sin->sin_port = udp->udp_dstport; 6711 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6712 break; 6713 6714 case AF_INET6: 6715 if (*salenp < sizeof (sin6_t)) 6716 return (EINVAL); 6717 6718 *salenp = sizeof (sin6_t); 6719 *sin6 = sin6_null; 6720 sin6->sin6_family = AF_INET6; 6721 sin6->sin6_port = udp->udp_dstport; 6722 sin6->sin6_addr = udp->udp_v6dst; 6723 sin6->sin6_flowinfo = udp->udp_flowinfo; 6724 break; 6725 } 6726 6727 return (0); 6728 } 6729 6730 static int 6731 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6732 { 6733 sin_t *sin = (sin_t *)sa; 6734 sin6_t *sin6 = (sin6_t *)sa; 6735 6736 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6737 6738 switch (udp->udp_family) { 6739 case AF_INET: 6740 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6741 6742 if (*salenp < sizeof (sin_t)) 6743 return (EINVAL); 6744 6745 *salenp = sizeof (sin_t); 6746 *sin = sin_null; 6747 sin->sin_family = AF_INET; 6748 sin->sin_port = udp->udp_port; 6749 6750 /* 6751 * If udp_v6src is unspecified, we might be bound to broadcast 6752 * / multicast. Use udp_bound_v6src as local address instead 6753 * (that could also still be unspecified). 6754 */ 6755 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6756 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6757 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6758 } else { 6759 sin->sin_addr.s_addr = 6760 V4_PART_OF_V6(udp->udp_bound_v6src); 6761 } 6762 break; 6763 6764 case AF_INET6: 6765 if (*salenp < sizeof (sin6_t)) 6766 return (EINVAL); 6767 6768 *salenp = sizeof (sin6_t); 6769 *sin6 = sin6_null; 6770 sin6->sin6_family = AF_INET6; 6771 sin6->sin6_port = udp->udp_port; 6772 sin6->sin6_flowinfo = udp->udp_flowinfo; 6773 6774 /* 6775 * If udp_v6src is unspecified, we might be bound to broadcast 6776 * / multicast. Use udp_bound_v6src as local address instead 6777 * (that could also still be unspecified). 6778 */ 6779 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6780 sin6->sin6_addr = udp->udp_v6src; 6781 else 6782 sin6->sin6_addr = udp->udp_bound_v6src; 6783 break; 6784 } 6785 6786 return (0); 6787 } 6788 6789 /* 6790 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6791 */ 6792 static void 6793 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6794 { 6795 void *data; 6796 mblk_t *datamp = mp->b_cont; 6797 udp_t *udp = Q_TO_UDP(q); 6798 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6799 6800 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6801 cmdp->cb_error = EPROTO; 6802 qreply(q, mp); 6803 return; 6804 } 6805 data = datamp->b_rptr; 6806 6807 rw_enter(&udp->udp_rwlock, RW_READER); 6808 switch (cmdp->cb_cmd) { 6809 case TI_GETPEERNAME: 6810 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6811 break; 6812 case TI_GETMYNAME: 6813 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6814 break; 6815 default: 6816 cmdp->cb_error = EINVAL; 6817 break; 6818 } 6819 rw_exit(&udp->udp_rwlock); 6820 6821 qreply(q, mp); 6822 } 6823 6824 static void 6825 udp_disable_direct_sockfs(udp_t *udp) 6826 { 6827 udp->udp_issocket = B_FALSE; 6828 if (udp->udp_direct_sockfs) { 6829 /* 6830 * Disable read-side synchronous stream interface and 6831 * drain any queued data. 6832 */ 6833 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6834 ASSERT(!udp->udp_direct_sockfs); 6835 UDP_STAT(udp->udp_us, udp_sock_fallback); 6836 } 6837 } 6838 6839 static void 6840 udp_wput_other(queue_t *q, mblk_t *mp) 6841 { 6842 uchar_t *rptr = mp->b_rptr; 6843 struct datab *db; 6844 struct iocblk *iocp; 6845 cred_t *cr; 6846 conn_t *connp = Q_TO_CONN(q); 6847 udp_t *udp = connp->conn_udp; 6848 udp_stack_t *us; 6849 6850 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6851 "udp_wput_other_start: q %p", q); 6852 6853 us = udp->udp_us; 6854 db = mp->b_datap; 6855 6856 cr = DB_CREDDEF(mp, connp->conn_cred); 6857 6858 switch (db->db_type) { 6859 case M_CMD: 6860 udp_wput_cmdblk(q, mp); 6861 return; 6862 6863 case M_PROTO: 6864 case M_PCPROTO: 6865 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6866 freemsg(mp); 6867 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6868 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6869 return; 6870 } 6871 switch (((t_primp_t)rptr)->type) { 6872 case T_ADDR_REQ: 6873 udp_addr_req(q, mp); 6874 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6875 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6876 return; 6877 case O_T_BIND_REQ: 6878 case T_BIND_REQ: 6879 udp_tpi_bind(q, mp); 6880 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6881 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6882 return; 6883 case T_CONN_REQ: 6884 udp_tpi_connect(q, mp); 6885 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6886 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6887 return; 6888 case T_CAPABILITY_REQ: 6889 udp_capability_req(q, mp); 6890 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6891 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6892 return; 6893 case T_INFO_REQ: 6894 udp_info_req(q, mp); 6895 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6896 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6897 return; 6898 case T_UNITDATA_REQ: 6899 /* 6900 * If a T_UNITDATA_REQ gets here, the address must 6901 * be bad. Valid T_UNITDATA_REQs are handled 6902 * in udp_wput. 6903 */ 6904 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6905 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6906 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6907 return; 6908 case T_UNBIND_REQ: 6909 udp_tpi_unbind(q, mp); 6910 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6911 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6912 return; 6913 case T_SVR4_OPTMGMT_REQ: 6914 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6915 cr)) { 6916 (void) svr4_optcom_req(q, 6917 mp, cr, &udp_opt_obj, B_TRUE); 6918 } 6919 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6920 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6921 return; 6922 6923 case T_OPTMGMT_REQ: 6924 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6925 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6926 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6927 return; 6928 6929 case T_DISCON_REQ: 6930 udp_tpi_disconnect(q, mp); 6931 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6932 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6933 return; 6934 6935 /* The following TPI message is not supported by udp. */ 6936 case O_T_CONN_RES: 6937 case T_CONN_RES: 6938 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6939 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6940 "udp_wput_other_end: q %p (%S)", q, 6941 "connres/disconreq"); 6942 return; 6943 6944 /* The following 3 TPI messages are illegal for udp. */ 6945 case T_DATA_REQ: 6946 case T_EXDATA_REQ: 6947 case T_ORDREL_REQ: 6948 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6949 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6950 "udp_wput_other_end: q %p (%S)", q, 6951 "data/exdata/ordrel"); 6952 return; 6953 default: 6954 break; 6955 } 6956 break; 6957 case M_FLUSH: 6958 if (*rptr & FLUSHW) 6959 flushq(q, FLUSHDATA); 6960 break; 6961 case M_IOCTL: 6962 iocp = (struct iocblk *)mp->b_rptr; 6963 switch (iocp->ioc_cmd) { 6964 case TI_GETPEERNAME: 6965 if (udp->udp_state != TS_DATA_XFER) { 6966 /* 6967 * If a default destination address has not 6968 * been associated with the stream, then we 6969 * don't know the peer's name. 6970 */ 6971 iocp->ioc_error = ENOTCONN; 6972 iocp->ioc_count = 0; 6973 mp->b_datap->db_type = M_IOCACK; 6974 qreply(q, mp); 6975 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6976 "udp_wput_other_end: q %p (%S)", q, 6977 "getpeername"); 6978 return; 6979 } 6980 /* FALLTHRU */ 6981 case TI_GETMYNAME: { 6982 /* 6983 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6984 * need to copyin the user's strbuf structure. 6985 * Processing will continue in the M_IOCDATA case 6986 * below. 6987 */ 6988 mi_copyin(q, mp, NULL, 6989 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6990 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6991 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6992 return; 6993 } 6994 case ND_SET: 6995 /* nd_getset performs the necessary checking */ 6996 case ND_GET: 6997 if (nd_getset(q, us->us_nd, mp)) { 6998 qreply(q, mp); 6999 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7000 "udp_wput_other_end: q %p (%S)", q, "get"); 7001 return; 7002 } 7003 break; 7004 case _SIOCSOCKFALLBACK: 7005 /* 7006 * Either sockmod is about to be popped and the 7007 * socket would now be treated as a plain stream, 7008 * or a module is about to be pushed so we could 7009 * no longer use read-side synchronous stream. 7010 * Drain any queued data and disable direct sockfs 7011 * interface from now on. 7012 */ 7013 if (!udp->udp_issocket) { 7014 DB_TYPE(mp) = M_IOCNAK; 7015 iocp->ioc_error = EINVAL; 7016 } else { 7017 udp_disable_direct_sockfs(udp); 7018 7019 DB_TYPE(mp) = M_IOCACK; 7020 iocp->ioc_error = 0; 7021 } 7022 iocp->ioc_count = 0; 7023 iocp->ioc_rval = 0; 7024 qreply(q, mp); 7025 return; 7026 default: 7027 break; 7028 } 7029 break; 7030 case M_IOCDATA: 7031 udp_wput_iocdata(q, mp); 7032 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7033 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7034 return; 7035 default: 7036 /* Unrecognized messages are passed through without change. */ 7037 break; 7038 } 7039 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7040 "udp_wput_other_end: q %p (%S)", q, "end"); 7041 ip_output(connp, mp, q, IP_WPUT); 7042 } 7043 7044 /* 7045 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7046 * messages. 7047 */ 7048 static void 7049 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7050 { 7051 mblk_t *mp1; 7052 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7053 STRUCT_HANDLE(strbuf, sb); 7054 udp_t *udp = Q_TO_UDP(q); 7055 int error; 7056 uint_t addrlen; 7057 7058 /* Make sure it is one of ours. */ 7059 switch (iocp->ioc_cmd) { 7060 case TI_GETMYNAME: 7061 case TI_GETPEERNAME: 7062 break; 7063 default: 7064 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7065 return; 7066 } 7067 7068 switch (mi_copy_state(q, mp, &mp1)) { 7069 case -1: 7070 return; 7071 case MI_COPY_CASE(MI_COPY_IN, 1): 7072 break; 7073 case MI_COPY_CASE(MI_COPY_OUT, 1): 7074 /* 7075 * The address has been copied out, so now 7076 * copyout the strbuf. 7077 */ 7078 mi_copyout(q, mp); 7079 return; 7080 case MI_COPY_CASE(MI_COPY_OUT, 2): 7081 /* 7082 * The address and strbuf have been copied out. 7083 * We're done, so just acknowledge the original 7084 * M_IOCTL. 7085 */ 7086 mi_copy_done(q, mp, 0); 7087 return; 7088 default: 7089 /* 7090 * Something strange has happened, so acknowledge 7091 * the original M_IOCTL with an EPROTO error. 7092 */ 7093 mi_copy_done(q, mp, EPROTO); 7094 return; 7095 } 7096 7097 /* 7098 * Now we have the strbuf structure for TI_GETMYNAME 7099 * and TI_GETPEERNAME. Next we copyout the requested 7100 * address and then we'll copyout the strbuf. 7101 */ 7102 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7103 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7104 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7105 mi_copy_done(q, mp, EINVAL); 7106 return; 7107 } 7108 7109 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7110 7111 if (mp1 == NULL) 7112 return; 7113 7114 rw_enter(&udp->udp_rwlock, RW_READER); 7115 switch (iocp->ioc_cmd) { 7116 case TI_GETMYNAME: 7117 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7118 break; 7119 case TI_GETPEERNAME: 7120 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7121 break; 7122 } 7123 rw_exit(&udp->udp_rwlock); 7124 7125 if (error != 0) { 7126 mi_copy_done(q, mp, error); 7127 } else { 7128 mp1->b_wptr += addrlen; 7129 STRUCT_FSET(sb, len, addrlen); 7130 7131 /* Copy out the address */ 7132 mi_copyout(q, mp); 7133 } 7134 } 7135 7136 static int 7137 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7138 udpattrs_t *udpattrs) 7139 { 7140 struct T_unitdata_req *udreqp; 7141 int is_absreq_failure; 7142 cred_t *cr; 7143 conn_t *connp = Q_TO_CONN(q); 7144 7145 ASSERT(((t_primp_t)mp->b_rptr)->type); 7146 7147 cr = DB_CREDDEF(mp, connp->conn_cred); 7148 7149 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7150 7151 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7152 udreqp->OPT_offset, cr, &udp_opt_obj, 7153 udpattrs, &is_absreq_failure); 7154 7155 if (*errorp != 0) { 7156 /* 7157 * Note: No special action needed in this 7158 * module for "is_absreq_failure" 7159 */ 7160 return (-1); /* failure */ 7161 } 7162 ASSERT(is_absreq_failure == 0); 7163 return (0); /* success */ 7164 } 7165 7166 void 7167 udp_ddi_g_init(void) 7168 { 7169 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7170 udp_opt_obj.odb_opt_arr_cnt); 7171 7172 /* 7173 * We want to be informed each time a stack is created or 7174 * destroyed in the kernel, so we can maintain the 7175 * set of udp_stack_t's. 7176 */ 7177 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7178 } 7179 7180 void 7181 udp_ddi_g_destroy(void) 7182 { 7183 netstack_unregister(NS_UDP); 7184 } 7185 7186 #define INET_NAME "ip" 7187 7188 /* 7189 * Initialize the UDP stack instance. 7190 */ 7191 static void * 7192 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7193 { 7194 udp_stack_t *us; 7195 udpparam_t *pa; 7196 int i; 7197 int error = 0; 7198 major_t major; 7199 7200 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7201 us->us_netstack = ns; 7202 7203 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7204 us->us_epriv_ports[0] = 2049; 7205 us->us_epriv_ports[1] = 4045; 7206 7207 /* 7208 * The smallest anonymous port in the priviledged port range which UDP 7209 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7210 */ 7211 us->us_min_anonpriv_port = 512; 7212 7213 us->us_bind_fanout_size = udp_bind_fanout_size; 7214 7215 /* Roundup variable that might have been modified in /etc/system */ 7216 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7217 /* Not a power of two. Round up to nearest power of two */ 7218 for (i = 0; i < 31; i++) { 7219 if (us->us_bind_fanout_size < (1 << i)) 7220 break; 7221 } 7222 us->us_bind_fanout_size = 1 << i; 7223 } 7224 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7225 sizeof (udp_fanout_t), KM_SLEEP); 7226 for (i = 0; i < us->us_bind_fanout_size; i++) { 7227 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7228 NULL); 7229 } 7230 7231 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7232 7233 us->us_param_arr = pa; 7234 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7235 7236 (void) udp_param_register(&us->us_nd, 7237 us->us_param_arr, A_CNT(udp_param_arr)); 7238 7239 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7240 us->us_mibkp = udp_kstat_init(stackid); 7241 7242 major = mod_name_to_major(INET_NAME); 7243 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7244 ASSERT(error == 0); 7245 return (us); 7246 } 7247 7248 /* 7249 * Free the UDP stack instance. 7250 */ 7251 static void 7252 udp_stack_fini(netstackid_t stackid, void *arg) 7253 { 7254 udp_stack_t *us = (udp_stack_t *)arg; 7255 int i; 7256 7257 for (i = 0; i < us->us_bind_fanout_size; i++) { 7258 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7259 } 7260 7261 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7262 sizeof (udp_fanout_t)); 7263 7264 us->us_bind_fanout = NULL; 7265 7266 nd_free(&us->us_nd); 7267 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7268 us->us_param_arr = NULL; 7269 7270 udp_kstat_fini(stackid, us->us_mibkp); 7271 us->us_mibkp = NULL; 7272 7273 udp_kstat2_fini(stackid, us->us_kstat); 7274 us->us_kstat = NULL; 7275 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7276 7277 ldi_ident_release(us->us_ldi_ident); 7278 kmem_free(us, sizeof (*us)); 7279 } 7280 7281 static void * 7282 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7283 { 7284 kstat_t *ksp; 7285 7286 udp_stat_t template = { 7287 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7288 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7289 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7290 { "udp_drain", KSTAT_DATA_UINT64 }, 7291 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7292 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7293 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7294 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7295 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7296 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7297 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7298 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7299 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7300 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7301 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7302 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7303 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7304 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7305 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7306 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7307 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7308 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7309 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7310 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7311 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7312 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7313 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7314 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7315 #ifdef DEBUG 7316 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7317 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7318 #endif 7319 }; 7320 7321 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7322 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7323 KSTAT_FLAG_VIRTUAL, stackid); 7324 7325 if (ksp == NULL) 7326 return (NULL); 7327 7328 bcopy(&template, us_statisticsp, sizeof (template)); 7329 ksp->ks_data = (void *)us_statisticsp; 7330 ksp->ks_private = (void *)(uintptr_t)stackid; 7331 7332 kstat_install(ksp); 7333 return (ksp); 7334 } 7335 7336 static void 7337 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7338 { 7339 if (ksp != NULL) { 7340 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7341 kstat_delete_netstack(ksp, stackid); 7342 } 7343 } 7344 7345 static void * 7346 udp_kstat_init(netstackid_t stackid) 7347 { 7348 kstat_t *ksp; 7349 7350 udp_named_kstat_t template = { 7351 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7352 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7353 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7354 { "entrySize", KSTAT_DATA_INT32, 0 }, 7355 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7356 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7357 }; 7358 7359 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7360 KSTAT_TYPE_NAMED, 7361 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7362 7363 if (ksp == NULL || ksp->ks_data == NULL) 7364 return (NULL); 7365 7366 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7367 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7368 7369 bcopy(&template, ksp->ks_data, sizeof (template)); 7370 ksp->ks_update = udp_kstat_update; 7371 ksp->ks_private = (void *)(uintptr_t)stackid; 7372 7373 kstat_install(ksp); 7374 return (ksp); 7375 } 7376 7377 static void 7378 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7379 { 7380 if (ksp != NULL) { 7381 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7382 kstat_delete_netstack(ksp, stackid); 7383 } 7384 } 7385 7386 static int 7387 udp_kstat_update(kstat_t *kp, int rw) 7388 { 7389 udp_named_kstat_t *udpkp; 7390 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7391 netstack_t *ns; 7392 udp_stack_t *us; 7393 7394 if ((kp == NULL) || (kp->ks_data == NULL)) 7395 return (EIO); 7396 7397 if (rw == KSTAT_WRITE) 7398 return (EACCES); 7399 7400 ns = netstack_find_by_stackid(stackid); 7401 if (ns == NULL) 7402 return (-1); 7403 us = ns->netstack_udp; 7404 if (us == NULL) { 7405 netstack_rele(ns); 7406 return (-1); 7407 } 7408 udpkp = (udp_named_kstat_t *)kp->ks_data; 7409 7410 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7411 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7412 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7413 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7414 netstack_rele(ns); 7415 return (0); 7416 } 7417 7418 /* 7419 * Read-side synchronous stream info entry point, called as a 7420 * result of handling certain STREAMS ioctl operations. 7421 */ 7422 static int 7423 udp_rinfop(queue_t *q, infod_t *dp) 7424 { 7425 mblk_t *mp; 7426 uint_t cmd = dp->d_cmd; 7427 int res = 0; 7428 int error = 0; 7429 udp_t *udp = Q_TO_UDP(q); 7430 struct stdata *stp = STREAM(q); 7431 7432 mutex_enter(&udp->udp_drain_lock); 7433 /* If shutdown on read has happened, return nothing */ 7434 mutex_enter(&stp->sd_lock); 7435 if (stp->sd_flag & STREOF) { 7436 mutex_exit(&stp->sd_lock); 7437 goto done; 7438 } 7439 mutex_exit(&stp->sd_lock); 7440 7441 if ((mp = udp->udp_rcv_list_head) == NULL) 7442 goto done; 7443 7444 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7445 7446 if (cmd & INFOD_COUNT) { 7447 /* 7448 * Return the number of messages. 7449 */ 7450 dp->d_count += udp->udp_rcv_msgcnt; 7451 res |= INFOD_COUNT; 7452 } 7453 if (cmd & INFOD_BYTES) { 7454 /* 7455 * Return size of all data messages. 7456 */ 7457 dp->d_bytes += udp->udp_rcv_cnt; 7458 res |= INFOD_BYTES; 7459 } 7460 if (cmd & INFOD_FIRSTBYTES) { 7461 /* 7462 * Return size of first data message. 7463 */ 7464 dp->d_bytes = msgdsize(mp); 7465 res |= INFOD_FIRSTBYTES; 7466 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7467 } 7468 if (cmd & INFOD_COPYOUT) { 7469 mblk_t *mp1 = mp->b_cont; 7470 int n; 7471 /* 7472 * Return data contents of first message. 7473 */ 7474 ASSERT(DB_TYPE(mp1) == M_DATA); 7475 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7476 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7477 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7478 UIO_READ, dp->d_uiop)) != 0) { 7479 goto done; 7480 } 7481 mp1 = mp1->b_cont; 7482 } 7483 res |= INFOD_COPYOUT; 7484 dp->d_cmd &= ~INFOD_COPYOUT; 7485 } 7486 done: 7487 mutex_exit(&udp->udp_drain_lock); 7488 7489 dp->d_res |= res; 7490 7491 return (error); 7492 } 7493 7494 /* 7495 * Read-side synchronous stream entry point. This is called as a result 7496 * of recv/read operation done at sockfs, and is guaranteed to execute 7497 * outside of the interrupt thread context. It returns a single datagram 7498 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7499 */ 7500 static int 7501 udp_rrw(queue_t *q, struiod_t *dp) 7502 { 7503 mblk_t *mp; 7504 udp_t *udp = Q_TO_UDP(q); 7505 udp_stack_t *us = udp->udp_us; 7506 7507 /* 7508 * Dequeue datagram from the head of the list and return 7509 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7510 * set/cleared depending on whether or not there's data 7511 * remaining in the list. 7512 */ 7513 mutex_enter(&udp->udp_drain_lock); 7514 if (!udp->udp_direct_sockfs) { 7515 mutex_exit(&udp->udp_drain_lock); 7516 UDP_STAT(us, udp_rrw_busy); 7517 return (EBUSY); 7518 } 7519 if ((mp = udp->udp_rcv_list_head) != NULL) { 7520 uint_t size = msgdsize(mp); 7521 7522 /* Last datagram in the list? */ 7523 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7524 udp->udp_rcv_list_tail = NULL; 7525 mp->b_next = NULL; 7526 7527 udp->udp_rcv_cnt -= size; 7528 udp->udp_rcv_msgcnt--; 7529 UDP_STAT(us, udp_rrw_msgcnt); 7530 7531 /* No longer flow-controlling? */ 7532 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7533 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7534 udp->udp_drain_qfull = B_FALSE; 7535 } 7536 if (udp->udp_rcv_list_head == NULL) { 7537 /* 7538 * Either we just dequeued the last datagram or 7539 * we get here from sockfs and have nothing to 7540 * return; in this case clear RSLEEP. 7541 */ 7542 ASSERT(udp->udp_rcv_cnt == 0); 7543 ASSERT(udp->udp_rcv_msgcnt == 0); 7544 ASSERT(udp->udp_rcv_list_tail == NULL); 7545 STR_WAKEUP_CLEAR(STREAM(q)); 7546 } else { 7547 /* 7548 * More data follows; we need udp_rrw() to be 7549 * called in future to pick up the rest. 7550 */ 7551 STR_WAKEUP_SET(STREAM(q)); 7552 } 7553 mutex_exit(&udp->udp_drain_lock); 7554 dp->d_mp = mp; 7555 return (0); 7556 } 7557 7558 /* 7559 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7560 * list; this is typically executed within the interrupt thread context 7561 * and so we do things as quickly as possible. 7562 */ 7563 static void 7564 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7565 { 7566 ASSERT(q == RD(q)); 7567 ASSERT(pkt_len == msgdsize(mp)); 7568 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7569 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7570 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7571 7572 mutex_enter(&udp->udp_drain_lock); 7573 /* 7574 * Wake up and signal the receiving app; it is okay to do this 7575 * before enqueueing the mp because we are holding the drain lock. 7576 * One of the advantages of synchronous stream is the ability for 7577 * us to find out when the application performs a read on the 7578 * socket by way of udp_rrw() entry point being called. We need 7579 * to generate SIGPOLL/SIGIO for each received data in the case 7580 * of asynchronous socket just as in the strrput() case. However, 7581 * we only wake the application up when necessary, i.e. during the 7582 * first enqueue. When udp_rrw() is called, we send up a single 7583 * datagram upstream and call STR_WAKEUP_SET() again when there 7584 * are still data remaining in our receive queue. 7585 */ 7586 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7587 if (udp->udp_rcv_list_head == NULL) 7588 udp->udp_rcv_list_head = mp; 7589 else 7590 udp->udp_rcv_list_tail->b_next = mp; 7591 udp->udp_rcv_list_tail = mp; 7592 udp->udp_rcv_cnt += pkt_len; 7593 udp->udp_rcv_msgcnt++; 7594 7595 /* Need to flow-control? */ 7596 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7597 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7598 udp->udp_drain_qfull = B_TRUE; 7599 7600 mutex_exit(&udp->udp_drain_lock); 7601 } 7602 7603 /* 7604 * Drain the contents of receive list to the module upstream; we do 7605 * this during close or when we fallback to the slow mode due to 7606 * sockmod being popped or a module being pushed on top of us. 7607 */ 7608 static void 7609 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7610 { 7611 mblk_t *mp; 7612 udp_stack_t *us = udp->udp_us; 7613 7614 mutex_enter(&udp->udp_drain_lock); 7615 /* 7616 * There is no race with a concurrent udp_input() sending 7617 * up packets using putnext() after we have cleared the 7618 * udp_direct_sockfs flag but before we have completed 7619 * sending up the packets in udp_rcv_list, since we are 7620 * either a writer or we have quiesced the conn. 7621 */ 7622 udp->udp_direct_sockfs = B_FALSE; 7623 mutex_exit(&udp->udp_drain_lock); 7624 7625 if (udp->udp_rcv_list_head != NULL) 7626 UDP_STAT(us, udp_drain); 7627 7628 /* 7629 * Send up everything via putnext(); note here that we 7630 * don't need the udp_drain_lock to protect us since 7631 * nothing can enter udp_rrw() and that we currently 7632 * have exclusive access to this udp. 7633 */ 7634 while ((mp = udp->udp_rcv_list_head) != NULL) { 7635 udp->udp_rcv_list_head = mp->b_next; 7636 mp->b_next = NULL; 7637 udp->udp_rcv_cnt -= msgdsize(mp); 7638 udp->udp_rcv_msgcnt--; 7639 if (closing) { 7640 freemsg(mp); 7641 } else { 7642 ASSERT(q == RD(q)); 7643 putnext(q, mp); 7644 } 7645 } 7646 ASSERT(udp->udp_rcv_cnt == 0); 7647 ASSERT(udp->udp_rcv_msgcnt == 0); 7648 ASSERT(udp->udp_rcv_list_head == NULL); 7649 udp->udp_rcv_list_tail = NULL; 7650 udp->udp_drain_qfull = B_FALSE; 7651 } 7652 7653 static size_t 7654 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7655 { 7656 udp_stack_t *us = udp->udp_us; 7657 7658 /* We add a bit of extra buffering */ 7659 size += size >> 1; 7660 if (size > us->us_max_buf) 7661 size = us->us_max_buf; 7662 7663 udp->udp_rcv_hiwat = size; 7664 return (size); 7665 } 7666 7667 /* 7668 * For the lower queue so that UDP can be a dummy mux. 7669 * Nobody should be sending 7670 * packets up this stream 7671 */ 7672 static void 7673 udp_lrput(queue_t *q, mblk_t *mp) 7674 { 7675 mblk_t *mp1; 7676 7677 switch (mp->b_datap->db_type) { 7678 case M_FLUSH: 7679 /* Turn around */ 7680 if (*mp->b_rptr & FLUSHW) { 7681 *mp->b_rptr &= ~FLUSHR; 7682 qreply(q, mp); 7683 return; 7684 } 7685 break; 7686 } 7687 /* Could receive messages that passed through ar_rput */ 7688 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7689 mp1->b_prev = mp1->b_next = NULL; 7690 freemsg(mp); 7691 } 7692 7693 /* 7694 * For the lower queue so that UDP can be a dummy mux. 7695 * Nobody should be sending packets down this stream. 7696 */ 7697 /* ARGSUSED */ 7698 void 7699 udp_lwput(queue_t *q, mblk_t *mp) 7700 { 7701 freemsg(mp); 7702 } 7703 7704 /* 7705 * Below routines for UDP socket module. 7706 */ 7707 7708 static conn_t * 7709 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7710 { 7711 udp_t *udp; 7712 conn_t *connp; 7713 zoneid_t zoneid; 7714 netstack_t *ns; 7715 udp_stack_t *us; 7716 7717 ns = netstack_find_by_cred(credp); 7718 ASSERT(ns != NULL); 7719 us = ns->netstack_udp; 7720 ASSERT(us != NULL); 7721 7722 /* 7723 * For exclusive stacks we set the zoneid to zero 7724 * to make UDP operate as if in the global zone. 7725 */ 7726 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7727 zoneid = GLOBAL_ZONEID; 7728 else 7729 zoneid = crgetzoneid(credp); 7730 7731 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7732 7733 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7734 if (connp == NULL) { 7735 netstack_rele(ns); 7736 return (NULL); 7737 } 7738 udp = connp->conn_udp; 7739 7740 /* 7741 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7742 * done by netstack_find_by_cred() 7743 */ 7744 netstack_rele(ns); 7745 7746 rw_enter(&udp->udp_rwlock, RW_WRITER); 7747 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7748 ASSERT(connp->conn_udp == udp); 7749 ASSERT(udp->udp_connp == connp); 7750 7751 /* Set the initial state of the stream and the privilege status. */ 7752 udp->udp_state = TS_UNBND; 7753 if (isv6) { 7754 udp->udp_family = AF_INET6; 7755 udp->udp_ipversion = IPV6_VERSION; 7756 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7757 udp->udp_ttl = us->us_ipv6_hoplimit; 7758 connp->conn_af_isv6 = B_TRUE; 7759 connp->conn_flags |= IPCL_ISV6; 7760 } else { 7761 udp->udp_family = AF_INET; 7762 udp->udp_ipversion = IPV4_VERSION; 7763 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7764 udp->udp_ttl = us->us_ipv4_ttl; 7765 connp->conn_af_isv6 = B_FALSE; 7766 connp->conn_flags &= ~IPCL_ISV6; 7767 } 7768 7769 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7770 udp->udp_pending_op = -1; 7771 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7772 connp->conn_zoneid = zoneid; 7773 7774 udp->udp_open_time = lbolt64; 7775 udp->udp_open_pid = curproc->p_pid; 7776 7777 /* 7778 * If the caller has the process-wide flag set, then default to MAC 7779 * exempt mode. This allows read-down to unlabeled hosts. 7780 */ 7781 if (getpflags(NET_MAC_AWARE, credp) != 0) 7782 connp->conn_mac_exempt = B_TRUE; 7783 7784 connp->conn_ulp_labeled = is_system_labeled(); 7785 7786 udp->udp_us = us; 7787 7788 connp->conn_recv = udp_input; 7789 crhold(credp); 7790 connp->conn_cred = credp; 7791 7792 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7793 7794 rw_exit(&udp->udp_rwlock); 7795 7796 return (connp); 7797 } 7798 7799 /* ARGSUSED */ 7800 sock_lower_handle_t 7801 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7802 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7803 { 7804 udp_t *udp = NULL; 7805 udp_stack_t *us; 7806 conn_t *connp; 7807 boolean_t isv6; 7808 7809 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7810 (proto != 0 && proto != IPPROTO_UDP)) { 7811 *errorp = EPROTONOSUPPORT; 7812 return (NULL); 7813 } 7814 7815 if (family == AF_INET6) 7816 isv6 = B_TRUE; 7817 else 7818 isv6 = B_FALSE; 7819 7820 connp = udp_do_open(credp, isv6, flags); 7821 if (connp == NULL) { 7822 *errorp = ENOMEM; 7823 return (NULL); 7824 } 7825 7826 udp = connp->conn_udp; 7827 ASSERT(udp != NULL); 7828 us = udp->udp_us; 7829 ASSERT(us != NULL); 7830 7831 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7832 7833 /* Set flow control */ 7834 rw_enter(&udp->udp_rwlock, RW_WRITER); 7835 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7836 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7837 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7838 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7839 udp->udp_xmit_lowat = us->us_xmit_lowat; 7840 7841 if (udp->udp_family == AF_INET6) { 7842 /* Build initial header template for transmit */ 7843 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7844 rw_exit(&udp->udp_rwlock); 7845 ipcl_conn_destroy(connp); 7846 return (NULL); 7847 } 7848 } 7849 rw_exit(&udp->udp_rwlock); 7850 7851 connp->conn_flow_cntrld = B_FALSE; 7852 7853 ASSERT(us->us_ldi_ident != NULL); 7854 7855 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7856 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7857 udp_do_close(connp); 7858 return (NULL); 7859 } 7860 7861 /* Set the send flow control */ 7862 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7863 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7864 7865 mutex_enter(&connp->conn_lock); 7866 connp->conn_state_flags &= ~CONN_INCIPIENT; 7867 mutex_exit(&connp->conn_lock); 7868 7869 *errorp = 0; 7870 *smodep = SM_ATOMIC; 7871 *sock_downcalls = &sock_udp_downcalls; 7872 return ((sock_lower_handle_t)connp); 7873 } 7874 7875 /* ARGSUSED */ 7876 void 7877 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7878 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7879 { 7880 conn_t *connp = (conn_t *)proto_handle; 7881 udp_t *udp = connp->conn_udp; 7882 udp_stack_t *us = udp->udp_us; 7883 struct sock_proto_props sopp; 7884 7885 connp->conn_upcalls = sock_upcalls; 7886 connp->conn_upper_handle = sock_handle; 7887 7888 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7889 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7890 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7891 sopp.sopp_maxblk = INFPSZ; 7892 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7893 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7894 sopp.sopp_maxpsz = 7895 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7896 UDP_MAXPACKET_IPV6; 7897 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7898 udp_mod_info.mi_minpsz; 7899 7900 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7901 &sopp); 7902 } 7903 7904 static void 7905 udp_do_close(conn_t *connp) 7906 { 7907 udp_t *udp; 7908 7909 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7910 udp = connp->conn_udp; 7911 7912 udp_quiesce_conn(connp); 7913 ip_quiesce_conn(connp); 7914 7915 if (!IPCL_IS_NONSTR(connp)) { 7916 /* 7917 * Disable read-side synchronous stream 7918 * interface and drain any queued data. 7919 */ 7920 ASSERT(connp->conn_wq != NULL); 7921 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7922 ASSERT(!udp->udp_direct_sockfs); 7923 7924 ASSERT(connp->conn_rq != NULL); 7925 qprocsoff(connp->conn_rq); 7926 } 7927 7928 ASSERT(udp->udp_rcv_cnt == 0); 7929 ASSERT(udp->udp_rcv_msgcnt == 0); 7930 ASSERT(udp->udp_rcv_list_head == NULL); 7931 ASSERT(udp->udp_rcv_list_tail == NULL); 7932 7933 udp_close_free(connp); 7934 7935 /* 7936 * Now we are truly single threaded on this stream, and can 7937 * delete the things hanging off the connp, and finally the connp. 7938 * We removed this connp from the fanout list, it cannot be 7939 * accessed thru the fanouts, and we already waited for the 7940 * conn_ref to drop to 0. We are already in close, so 7941 * there cannot be any other thread from the top. qprocsoff 7942 * has completed, and service has completed or won't run in 7943 * future. 7944 */ 7945 ASSERT(connp->conn_ref == 1); 7946 if (!IPCL_IS_NONSTR(connp)) { 7947 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7948 } else { 7949 ip_free_helper_stream(connp); 7950 } 7951 7952 connp->conn_ref--; 7953 ipcl_conn_destroy(connp); 7954 } 7955 7956 /* ARGSUSED */ 7957 int 7958 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7959 { 7960 conn_t *connp = (conn_t *)proto_handle; 7961 7962 udp_do_close(connp); 7963 return (0); 7964 } 7965 7966 static int 7967 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7968 boolean_t bind_to_req_port_only) 7969 { 7970 sin_t *sin; 7971 sin6_t *sin6; 7972 sin6_t sin6addr; 7973 in_port_t port; /* Host byte order */ 7974 in_port_t requested_port; /* Host byte order */ 7975 int count; 7976 in6_addr_t v6src; 7977 int loopmax; 7978 udp_fanout_t *udpf; 7979 in_port_t lport; /* Network byte order */ 7980 zoneid_t zoneid; 7981 udp_t *udp; 7982 boolean_t is_inaddr_any; 7983 mlp_type_t addrtype, mlptype; 7984 udp_stack_t *us; 7985 int error = 0; 7986 mblk_t *mp = NULL; 7987 7988 udp = connp->conn_udp; 7989 us = udp->udp_us; 7990 7991 if (udp->udp_state != TS_UNBND) { 7992 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7993 "udp_bind: bad state, %u", udp->udp_state); 7994 return (-TOUTSTATE); 7995 } 7996 7997 switch (len) { 7998 case 0: 7999 if (udp->udp_family == AF_INET) { 8000 sin = (sin_t *)&sin6addr; 8001 *sin = sin_null; 8002 sin->sin_family = AF_INET; 8003 sin->sin_addr.s_addr = INADDR_ANY; 8004 udp->udp_ipversion = IPV4_VERSION; 8005 } else { 8006 ASSERT(udp->udp_family == AF_INET6); 8007 sin6 = (sin6_t *)&sin6addr; 8008 *sin6 = sin6_null; 8009 sin6->sin6_family = AF_INET6; 8010 V6_SET_ZERO(sin6->sin6_addr); 8011 udp->udp_ipversion = IPV6_VERSION; 8012 } 8013 port = 0; 8014 break; 8015 8016 case sizeof (sin_t): /* Complete IPv4 address */ 8017 sin = (sin_t *)sa; 8018 8019 if (sin == NULL || !OK_32PTR((char *)sin)) 8020 return (EINVAL); 8021 8022 if (udp->udp_family != AF_INET || 8023 sin->sin_family != AF_INET) { 8024 return (EAFNOSUPPORT); 8025 } 8026 port = ntohs(sin->sin_port); 8027 break; 8028 8029 case sizeof (sin6_t): /* complete IPv6 address */ 8030 sin6 = (sin6_t *)sa; 8031 8032 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8033 return (EINVAL); 8034 8035 if (udp->udp_family != AF_INET6 || 8036 sin6->sin6_family != AF_INET6) { 8037 return (EAFNOSUPPORT); 8038 } 8039 port = ntohs(sin6->sin6_port); 8040 break; 8041 8042 default: /* Invalid request */ 8043 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8044 "udp_bind: bad ADDR_length length %u", len); 8045 return (-TBADADDR); 8046 } 8047 8048 requested_port = port; 8049 8050 if (requested_port == 0 || !bind_to_req_port_only) 8051 bind_to_req_port_only = B_FALSE; 8052 else /* T_BIND_REQ and requested_port != 0 */ 8053 bind_to_req_port_only = B_TRUE; 8054 8055 if (requested_port == 0) { 8056 /* 8057 * If the application passed in zero for the port number, it 8058 * doesn't care which port number we bind to. Get one in the 8059 * valid range. 8060 */ 8061 if (udp->udp_anon_priv_bind) { 8062 port = udp_get_next_priv_port(udp); 8063 } else { 8064 port = udp_update_next_port(udp, 8065 us->us_next_port_to_try, B_TRUE); 8066 } 8067 } else { 8068 /* 8069 * If the port is in the well-known privileged range, 8070 * make sure the caller was privileged. 8071 */ 8072 int i; 8073 boolean_t priv = B_FALSE; 8074 8075 if (port < us->us_smallest_nonpriv_port) { 8076 priv = B_TRUE; 8077 } else { 8078 for (i = 0; i < us->us_num_epriv_ports; i++) { 8079 if (port == us->us_epriv_ports[i]) { 8080 priv = B_TRUE; 8081 break; 8082 } 8083 } 8084 } 8085 8086 if (priv) { 8087 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8088 return (-TACCES); 8089 } 8090 } 8091 8092 if (port == 0) 8093 return (-TNOADDR); 8094 8095 /* 8096 * The state must be TS_UNBND. TPI mandates that users must send 8097 * TPI primitives only 1 at a time and wait for the response before 8098 * sending the next primitive. 8099 */ 8100 rw_enter(&udp->udp_rwlock, RW_WRITER); 8101 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8102 rw_exit(&udp->udp_rwlock); 8103 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8104 "udp_bind: bad state, %u", udp->udp_state); 8105 return (-TOUTSTATE); 8106 } 8107 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8108 udp->udp_pending_op = T_BIND_REQ; 8109 /* 8110 * Copy the source address into our udp structure. This address 8111 * may still be zero; if so, IP will fill in the correct address 8112 * each time an outbound packet is passed to it. Since the udp is 8113 * not yet in the bind hash list, we don't grab the uf_lock to 8114 * change udp_ipversion 8115 */ 8116 if (udp->udp_family == AF_INET) { 8117 ASSERT(sin != NULL); 8118 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8119 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8120 udp->udp_ip_snd_options_len; 8121 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8122 } else { 8123 ASSERT(sin6 != NULL); 8124 v6src = sin6->sin6_addr; 8125 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8126 /* 8127 * no need to hold the uf_lock to set the udp_ipversion 8128 * since we are not yet in the fanout list 8129 */ 8130 udp->udp_ipversion = IPV4_VERSION; 8131 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8132 UDPH_SIZE + udp->udp_ip_snd_options_len; 8133 } else { 8134 udp->udp_ipversion = IPV6_VERSION; 8135 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8136 } 8137 } 8138 8139 /* 8140 * If udp_reuseaddr is not set, then we have to make sure that 8141 * the IP address and port number the application requested 8142 * (or we selected for the application) is not being used by 8143 * another stream. If another stream is already using the 8144 * requested IP address and port, the behavior depends on 8145 * "bind_to_req_port_only". If set the bind fails; otherwise we 8146 * search for any an unused port to bind to the the stream. 8147 * 8148 * As per the BSD semantics, as modified by the Deering multicast 8149 * changes, if udp_reuseaddr is set, then we allow multiple binds 8150 * to the same port independent of the local IP address. 8151 * 8152 * This is slightly different than in SunOS 4.X which did not 8153 * support IP multicast. Note that the change implemented by the 8154 * Deering multicast code effects all binds - not only binding 8155 * to IP multicast addresses. 8156 * 8157 * Note that when binding to port zero we ignore SO_REUSEADDR in 8158 * order to guarantee a unique port. 8159 */ 8160 8161 count = 0; 8162 if (udp->udp_anon_priv_bind) { 8163 /* 8164 * loopmax = (IPPORT_RESERVED-1) - 8165 * us->us_min_anonpriv_port + 1 8166 */ 8167 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8168 } else { 8169 loopmax = us->us_largest_anon_port - 8170 us->us_smallest_anon_port + 1; 8171 } 8172 8173 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8174 zoneid = connp->conn_zoneid; 8175 8176 for (;;) { 8177 udp_t *udp1; 8178 boolean_t found_exclbind = B_FALSE; 8179 8180 /* 8181 * Walk through the list of udp streams bound to 8182 * requested port with the same IP address. 8183 */ 8184 lport = htons(port); 8185 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8186 us->us_bind_fanout_size)]; 8187 mutex_enter(&udpf->uf_lock); 8188 for (udp1 = udpf->uf_udp; udp1 != NULL; 8189 udp1 = udp1->udp_bind_hash) { 8190 if (lport != udp1->udp_port) 8191 continue; 8192 8193 /* 8194 * On a labeled system, we must treat bindings to ports 8195 * on shared IP addresses by sockets with MAC exemption 8196 * privilege as being in all zones, as there's 8197 * otherwise no way to identify the right receiver. 8198 */ 8199 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8200 IPCL_ZONE_MATCH(connp, 8201 udp1->udp_connp->conn_zoneid)) && 8202 !connp->conn_mac_exempt && \ 8203 !udp1->udp_connp->conn_mac_exempt) 8204 continue; 8205 8206 /* 8207 * If UDP_EXCLBIND is set for either the bound or 8208 * binding endpoint, the semantics of bind 8209 * is changed according to the following chart. 8210 * 8211 * spec = specified address (v4 or v6) 8212 * unspec = unspecified address (v4 or v6) 8213 * A = specified addresses are different for endpoints 8214 * 8215 * bound bind to allowed? 8216 * ------------------------------------- 8217 * unspec unspec no 8218 * unspec spec no 8219 * spec unspec no 8220 * spec spec yes if A 8221 * 8222 * For labeled systems, SO_MAC_EXEMPT behaves the same 8223 * as UDP_EXCLBIND, except that zoneid is ignored. 8224 */ 8225 if (udp1->udp_exclbind || udp->udp_exclbind || 8226 udp1->udp_connp->conn_mac_exempt || 8227 connp->conn_mac_exempt) { 8228 if (V6_OR_V4_INADDR_ANY( 8229 udp1->udp_bound_v6src) || 8230 is_inaddr_any || 8231 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8232 &v6src)) { 8233 found_exclbind = B_TRUE; 8234 break; 8235 } 8236 continue; 8237 } 8238 8239 /* 8240 * Check ipversion to allow IPv4 and IPv6 sockets to 8241 * have disjoint port number spaces. 8242 */ 8243 if (udp->udp_ipversion != udp1->udp_ipversion) { 8244 8245 /* 8246 * On the first time through the loop, if the 8247 * the user intentionally specified a 8248 * particular port number, then ignore any 8249 * bindings of the other protocol that may 8250 * conflict. This allows the user to bind IPv6 8251 * alone and get both v4 and v6, or bind both 8252 * both and get each seperately. On subsequent 8253 * times through the loop, we're checking a 8254 * port that we chose (not the user) and thus 8255 * we do not allow casual duplicate bindings. 8256 */ 8257 if (count == 0 && requested_port != 0) 8258 continue; 8259 } 8260 8261 /* 8262 * No difference depending on SO_REUSEADDR. 8263 * 8264 * If existing port is bound to a 8265 * non-wildcard IP address and 8266 * the requesting stream is bound to 8267 * a distinct different IP addresses 8268 * (non-wildcard, also), keep going. 8269 */ 8270 if (!is_inaddr_any && 8271 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8272 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8273 &v6src)) { 8274 continue; 8275 } 8276 break; 8277 } 8278 8279 if (!found_exclbind && 8280 (udp->udp_reuseaddr && requested_port != 0)) { 8281 break; 8282 } 8283 8284 if (udp1 == NULL) { 8285 /* 8286 * No other stream has this IP address 8287 * and port number. We can use it. 8288 */ 8289 break; 8290 } 8291 mutex_exit(&udpf->uf_lock); 8292 if (bind_to_req_port_only) { 8293 /* 8294 * We get here only when requested port 8295 * is bound (and only first of the for() 8296 * loop iteration). 8297 * 8298 * The semantics of this bind request 8299 * require it to fail so we return from 8300 * the routine (and exit the loop). 8301 * 8302 */ 8303 udp->udp_pending_op = -1; 8304 rw_exit(&udp->udp_rwlock); 8305 return (-TADDRBUSY); 8306 } 8307 8308 if (udp->udp_anon_priv_bind) { 8309 port = udp_get_next_priv_port(udp); 8310 } else { 8311 if ((count == 0) && (requested_port != 0)) { 8312 /* 8313 * If the application wants us to find 8314 * a port, get one to start with. Set 8315 * requested_port to 0, so that we will 8316 * update us->us_next_port_to_try below. 8317 */ 8318 port = udp_update_next_port(udp, 8319 us->us_next_port_to_try, B_TRUE); 8320 requested_port = 0; 8321 } else { 8322 port = udp_update_next_port(udp, port + 1, 8323 B_FALSE); 8324 } 8325 } 8326 8327 if (port == 0 || ++count >= loopmax) { 8328 /* 8329 * We've tried every possible port number and 8330 * there are none available, so send an error 8331 * to the user. 8332 */ 8333 udp->udp_pending_op = -1; 8334 rw_exit(&udp->udp_rwlock); 8335 return (-TNOADDR); 8336 } 8337 } 8338 8339 /* 8340 * Copy the source address into our udp structure. This address 8341 * may still be zero; if so, ip will fill in the correct address 8342 * each time an outbound packet is passed to it. 8343 * If we are binding to a broadcast or multicast address then 8344 * udp_post_ip_bind_connect will clear the source address 8345 * when udp_do_bind success. 8346 */ 8347 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8348 udp->udp_port = lport; 8349 /* 8350 * Now reset the the next anonymous port if the application requested 8351 * an anonymous port, or we handed out the next anonymous port. 8352 */ 8353 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8354 us->us_next_port_to_try = port + 1; 8355 } 8356 8357 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8358 if (udp->udp_family == AF_INET) { 8359 sin->sin_port = udp->udp_port; 8360 } else { 8361 sin6->sin6_port = udp->udp_port; 8362 /* Rebuild the header template */ 8363 error = udp_build_hdrs(udp); 8364 if (error != 0) { 8365 udp->udp_pending_op = -1; 8366 rw_exit(&udp->udp_rwlock); 8367 mutex_exit(&udpf->uf_lock); 8368 return (error); 8369 } 8370 } 8371 udp->udp_state = TS_IDLE; 8372 udp_bind_hash_insert(udpf, udp); 8373 mutex_exit(&udpf->uf_lock); 8374 rw_exit(&udp->udp_rwlock); 8375 8376 if (cl_inet_bind) { 8377 /* 8378 * Running in cluster mode - register bind information 8379 */ 8380 if (udp->udp_ipversion == IPV4_VERSION) { 8381 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8382 IPPROTO_UDP, AF_INET, 8383 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8384 (in_port_t)udp->udp_port, NULL); 8385 } else { 8386 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8387 IPPROTO_UDP, AF_INET6, 8388 (uint8_t *)&(udp->udp_v6src), 8389 (in_port_t)udp->udp_port, NULL); 8390 } 8391 } 8392 8393 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8394 if (is_system_labeled() && (!connp->conn_anon_port || 8395 connp->conn_anon_mlp)) { 8396 uint16_t mlpport; 8397 cred_t *cr = connp->conn_cred; 8398 zone_t *zone; 8399 8400 zone = crgetzone(cr); 8401 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8402 mlptSingle; 8403 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8404 &v6src, us->us_netstack->netstack_ip); 8405 if (addrtype == mlptSingle) { 8406 rw_enter(&udp->udp_rwlock, RW_WRITER); 8407 udp->udp_pending_op = -1; 8408 rw_exit(&udp->udp_rwlock); 8409 connp->conn_anon_port = B_FALSE; 8410 connp->conn_mlp_type = mlptSingle; 8411 return (-TNOADDR); 8412 } 8413 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8414 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8415 addrtype); 8416 if (mlptype != mlptSingle && 8417 (connp->conn_mlp_type == mlptSingle || 8418 secpolicy_net_bindmlp(cr) != 0)) { 8419 if (udp->udp_debug) { 8420 (void) strlog(UDP_MOD_ID, 0, 1, 8421 SL_ERROR|SL_TRACE, 8422 "udp_bind: no priv for multilevel port %d", 8423 mlpport); 8424 } 8425 rw_enter(&udp->udp_rwlock, RW_WRITER); 8426 udp->udp_pending_op = -1; 8427 rw_exit(&udp->udp_rwlock); 8428 connp->conn_anon_port = B_FALSE; 8429 connp->conn_mlp_type = mlptSingle; 8430 return (-TACCES); 8431 } 8432 8433 /* 8434 * If we're specifically binding a shared IP address and the 8435 * port is MLP on shared addresses, then check to see if this 8436 * zone actually owns the MLP. Reject if not. 8437 */ 8438 if (mlptype == mlptShared && addrtype == mlptShared) { 8439 /* 8440 * No need to handle exclusive-stack zones since 8441 * ALL_ZONES only applies to the shared stack. 8442 */ 8443 zoneid_t mlpzone; 8444 8445 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8446 htons(mlpport)); 8447 if (connp->conn_zoneid != mlpzone) { 8448 if (udp->udp_debug) { 8449 (void) strlog(UDP_MOD_ID, 0, 1, 8450 SL_ERROR|SL_TRACE, 8451 "udp_bind: attempt to bind port " 8452 "%d on shared addr in zone %d " 8453 "(should be %d)", 8454 mlpport, connp->conn_zoneid, 8455 mlpzone); 8456 } 8457 rw_enter(&udp->udp_rwlock, RW_WRITER); 8458 udp->udp_pending_op = -1; 8459 rw_exit(&udp->udp_rwlock); 8460 connp->conn_anon_port = B_FALSE; 8461 connp->conn_mlp_type = mlptSingle; 8462 return (-TACCES); 8463 } 8464 } 8465 if (connp->conn_anon_port) { 8466 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8467 port, B_TRUE); 8468 if (error != 0) { 8469 if (udp->udp_debug) { 8470 (void) strlog(UDP_MOD_ID, 0, 1, 8471 SL_ERROR|SL_TRACE, 8472 "udp_bind: cannot establish anon " 8473 "MLP for port %d", port); 8474 } 8475 rw_enter(&udp->udp_rwlock, RW_WRITER); 8476 udp->udp_pending_op = -1; 8477 rw_exit(&udp->udp_rwlock); 8478 connp->conn_anon_port = B_FALSE; 8479 connp->conn_mlp_type = mlptSingle; 8480 return (-TACCES); 8481 } 8482 } 8483 connp->conn_mlp_type = mlptype; 8484 } 8485 8486 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8487 /* 8488 * Append a request for an IRE if udp_v6src not 8489 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8490 */ 8491 mp = allocb(sizeof (ire_t), BPRI_HI); 8492 if (!mp) { 8493 rw_enter(&udp->udp_rwlock, RW_WRITER); 8494 udp->udp_pending_op = -1; 8495 rw_exit(&udp->udp_rwlock); 8496 return (ENOMEM); 8497 } 8498 mp->b_wptr += sizeof (ire_t); 8499 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8500 } 8501 if (udp->udp_family == AF_INET6) { 8502 ASSERT(udp->udp_connp->conn_af_isv6); 8503 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8504 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8505 } else { 8506 ASSERT(!udp->udp_connp->conn_af_isv6); 8507 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8508 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8509 B_TRUE); 8510 } 8511 8512 (void) udp_post_ip_bind_connect(udp, mp, error); 8513 return (error); 8514 } 8515 8516 int 8517 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8518 socklen_t len, cred_t *cr) 8519 { 8520 int error; 8521 conn_t *connp; 8522 8523 connp = (conn_t *)proto_handle; 8524 8525 if (sa == NULL) 8526 error = udp_do_unbind(connp); 8527 else 8528 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8529 8530 if (error < 0) { 8531 if (error == -TOUTSTATE) 8532 error = EINVAL; 8533 else 8534 error = proto_tlitosyserr(-error); 8535 } 8536 8537 return (error); 8538 } 8539 8540 static int 8541 udp_implicit_bind(conn_t *connp, cred_t *cr) 8542 { 8543 int error; 8544 8545 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8546 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8547 } 8548 8549 /* 8550 * This routine removes a port number association from a stream. It 8551 * is called by udp_unbind and udp_tpi_unbind. 8552 */ 8553 static int 8554 udp_do_unbind(conn_t *connp) 8555 { 8556 udp_t *udp = connp->conn_udp; 8557 udp_fanout_t *udpf; 8558 udp_stack_t *us = udp->udp_us; 8559 8560 if (cl_inet_unbind != NULL) { 8561 /* 8562 * Running in cluster mode - register unbind information 8563 */ 8564 if (udp->udp_ipversion == IPV4_VERSION) { 8565 (*cl_inet_unbind)( 8566 connp->conn_netstack->netstack_stackid, 8567 IPPROTO_UDP, AF_INET, 8568 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8569 (in_port_t)udp->udp_port, NULL); 8570 } else { 8571 (*cl_inet_unbind)( 8572 connp->conn_netstack->netstack_stackid, 8573 IPPROTO_UDP, AF_INET6, 8574 (uint8_t *)&(udp->udp_v6src), 8575 (in_port_t)udp->udp_port, NULL); 8576 } 8577 } 8578 8579 rw_enter(&udp->udp_rwlock, RW_WRITER); 8580 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8581 rw_exit(&udp->udp_rwlock); 8582 return (-TOUTSTATE); 8583 } 8584 udp->udp_pending_op = T_UNBIND_REQ; 8585 rw_exit(&udp->udp_rwlock); 8586 8587 /* 8588 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8589 * and therefore ip_unbind must never return NULL. 8590 */ 8591 ip_unbind(connp); 8592 8593 /* 8594 * Once we're unbound from IP, the pending operation may be cleared 8595 * here. 8596 */ 8597 rw_enter(&udp->udp_rwlock, RW_WRITER); 8598 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8599 us->us_bind_fanout_size)]; 8600 8601 mutex_enter(&udpf->uf_lock); 8602 udp_bind_hash_remove(udp, B_TRUE); 8603 V6_SET_ZERO(udp->udp_v6src); 8604 V6_SET_ZERO(udp->udp_bound_v6src); 8605 udp->udp_port = 0; 8606 mutex_exit(&udpf->uf_lock); 8607 8608 udp->udp_pending_op = -1; 8609 udp->udp_state = TS_UNBND; 8610 if (udp->udp_family == AF_INET6) 8611 (void) udp_build_hdrs(udp); 8612 rw_exit(&udp->udp_rwlock); 8613 8614 return (0); 8615 } 8616 8617 static int 8618 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8619 { 8620 ire_t *ire; 8621 udp_fanout_t *udpf; 8622 udp_stack_t *us = udp->udp_us; 8623 8624 ASSERT(udp->udp_pending_op != -1); 8625 rw_enter(&udp->udp_rwlock, RW_WRITER); 8626 if (error == 0) { 8627 /* For udp_do_connect() success */ 8628 /* udp_do_bind() success will do nothing in here */ 8629 /* 8630 * If a broadcast/multicast address was bound, set 8631 * the source address to 0. 8632 * This ensures no datagrams with broadcast address 8633 * as source address are emitted (which would violate 8634 * RFC1122 - Hosts requirements) 8635 * 8636 * Note that when connecting the returned IRE is 8637 * for the destination address and we only perform 8638 * the broadcast check for the source address (it 8639 * is OK to connect to a broadcast/multicast address.) 8640 */ 8641 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8642 ire = (ire_t *)ire_mp->b_rptr; 8643 8644 /* 8645 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8646 * multicast local address. 8647 */ 8648 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8649 us->us_bind_fanout_size)]; 8650 if (ire->ire_type == IRE_BROADCAST && 8651 udp->udp_state != TS_DATA_XFER) { 8652 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8653 udp->udp_pending_op == O_T_BIND_REQ); 8654 /* 8655 * This was just a local bind to a broadcast 8656 * addr. 8657 */ 8658 mutex_enter(&udpf->uf_lock); 8659 V6_SET_ZERO(udp->udp_v6src); 8660 mutex_exit(&udpf->uf_lock); 8661 if (udp->udp_family == AF_INET6) 8662 (void) udp_build_hdrs(udp); 8663 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8664 if (udp->udp_family == AF_INET6) 8665 (void) udp_build_hdrs(udp); 8666 } 8667 } 8668 } else { 8669 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8670 us->us_bind_fanout_size)]; 8671 mutex_enter(&udpf->uf_lock); 8672 8673 if (udp->udp_state == TS_DATA_XFER) { 8674 /* Connect failed */ 8675 /* Revert back to the bound source */ 8676 udp->udp_v6src = udp->udp_bound_v6src; 8677 udp->udp_state = TS_IDLE; 8678 } else { 8679 /* For udp_do_bind() failed */ 8680 V6_SET_ZERO(udp->udp_v6src); 8681 V6_SET_ZERO(udp->udp_bound_v6src); 8682 udp->udp_state = TS_UNBND; 8683 udp_bind_hash_remove(udp, B_TRUE); 8684 udp->udp_port = 0; 8685 } 8686 mutex_exit(&udpf->uf_lock); 8687 if (udp->udp_family == AF_INET6) 8688 (void) udp_build_hdrs(udp); 8689 } 8690 udp->udp_pending_op = -1; 8691 rw_exit(&udp->udp_rwlock); 8692 if (ire_mp != NULL) 8693 freeb(ire_mp); 8694 return (error); 8695 } 8696 8697 /* 8698 * It associates a default destination address with the stream. 8699 */ 8700 static int 8701 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 8702 { 8703 sin6_t *sin6; 8704 sin_t *sin; 8705 in6_addr_t v6dst; 8706 ipaddr_t v4dst; 8707 uint16_t dstport; 8708 uint32_t flowinfo; 8709 mblk_t *ire_mp; 8710 udp_fanout_t *udpf; 8711 udp_t *udp, *udp1; 8712 ushort_t ipversion; 8713 udp_stack_t *us; 8714 int error; 8715 8716 udp = connp->conn_udp; 8717 us = udp->udp_us; 8718 8719 /* 8720 * Address has been verified by the caller 8721 */ 8722 switch (len) { 8723 default: 8724 /* 8725 * Should never happen 8726 */ 8727 return (EINVAL); 8728 8729 case sizeof (sin_t): 8730 sin = (sin_t *)sa; 8731 v4dst = sin->sin_addr.s_addr; 8732 dstport = sin->sin_port; 8733 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8734 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8735 ipversion = IPV4_VERSION; 8736 break; 8737 8738 case sizeof (sin6_t): 8739 sin6 = (sin6_t *)sa; 8740 v6dst = sin6->sin6_addr; 8741 dstport = sin6->sin6_port; 8742 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8743 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8744 ipversion = IPV4_VERSION; 8745 flowinfo = 0; 8746 } else { 8747 ipversion = IPV6_VERSION; 8748 flowinfo = sin6->sin6_flowinfo; 8749 } 8750 break; 8751 } 8752 8753 if (dstport == 0) 8754 return (-TBADADDR); 8755 8756 rw_enter(&udp->udp_rwlock, RW_WRITER); 8757 8758 /* 8759 * This UDP must have bound to a port already before doing a connect. 8760 * TPI mandates that users must send TPI primitives only 1 at a time 8761 * and wait for the response before sending the next primitive. 8762 */ 8763 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8764 rw_exit(&udp->udp_rwlock); 8765 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8766 "udp_connect: bad state, %u", udp->udp_state); 8767 return (-TOUTSTATE); 8768 } 8769 udp->udp_pending_op = T_CONN_REQ; 8770 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8771 8772 if (ipversion == IPV4_VERSION) { 8773 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8774 udp->udp_ip_snd_options_len; 8775 } else { 8776 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8777 } 8778 8779 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8780 us->us_bind_fanout_size)]; 8781 8782 mutex_enter(&udpf->uf_lock); 8783 if (udp->udp_state == TS_DATA_XFER) { 8784 /* Already connected - clear out state */ 8785 udp->udp_v6src = udp->udp_bound_v6src; 8786 udp->udp_state = TS_IDLE; 8787 } 8788 8789 /* 8790 * Create a default IP header with no IP options. 8791 */ 8792 udp->udp_dstport = dstport; 8793 udp->udp_ipversion = ipversion; 8794 if (ipversion == IPV4_VERSION) { 8795 /* 8796 * Interpret a zero destination to mean loopback. 8797 * Update the T_CONN_REQ (sin/sin6) since it is used to 8798 * generate the T_CONN_CON. 8799 */ 8800 if (v4dst == INADDR_ANY) { 8801 v4dst = htonl(INADDR_LOOPBACK); 8802 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8803 if (udp->udp_family == AF_INET) { 8804 sin->sin_addr.s_addr = v4dst; 8805 } else { 8806 sin6->sin6_addr = v6dst; 8807 } 8808 } 8809 udp->udp_v6dst = v6dst; 8810 udp->udp_flowinfo = 0; 8811 8812 /* 8813 * If the destination address is multicast and 8814 * an outgoing multicast interface has been set, 8815 * use the address of that interface as our 8816 * source address if no source address has been set. 8817 */ 8818 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8819 CLASSD(v4dst) && 8820 udp->udp_multicast_if_addr != INADDR_ANY) { 8821 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8822 &udp->udp_v6src); 8823 } 8824 } else { 8825 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8826 /* 8827 * Interpret a zero destination to mean loopback. 8828 * Update the T_CONN_REQ (sin/sin6) since it is used to 8829 * generate the T_CONN_CON. 8830 */ 8831 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8832 v6dst = ipv6_loopback; 8833 sin6->sin6_addr = v6dst; 8834 } 8835 udp->udp_v6dst = v6dst; 8836 udp->udp_flowinfo = flowinfo; 8837 /* 8838 * If the destination address is multicast and 8839 * an outgoing multicast interface has been set, 8840 * then the ip bind logic will pick the correct source 8841 * address (i.e. matching the outgoing multicast interface). 8842 */ 8843 } 8844 8845 /* 8846 * Verify that the src/port/dst/port is unique for all 8847 * connections in TS_DATA_XFER 8848 */ 8849 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8850 if (udp1->udp_state != TS_DATA_XFER) 8851 continue; 8852 if (udp->udp_port != udp1->udp_port || 8853 udp->udp_ipversion != udp1->udp_ipversion || 8854 dstport != udp1->udp_dstport || 8855 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8856 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8857 !(IPCL_ZONE_MATCH(udp->udp_connp, 8858 udp1->udp_connp->conn_zoneid) || 8859 IPCL_ZONE_MATCH(udp1->udp_connp, 8860 udp->udp_connp->conn_zoneid))) 8861 continue; 8862 mutex_exit(&udpf->uf_lock); 8863 udp->udp_pending_op = -1; 8864 rw_exit(&udp->udp_rwlock); 8865 return (-TBADADDR); 8866 } 8867 8868 if (cl_inet_connect2 != NULL) { 8869 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8870 if (error != 0) { 8871 mutex_exit(&udpf->uf_lock); 8872 udp->udp_pending_op = -1; 8873 rw_exit(&udp->udp_rwlock); 8874 return (-TBADADDR); 8875 } 8876 } 8877 8878 udp->udp_state = TS_DATA_XFER; 8879 mutex_exit(&udpf->uf_lock); 8880 8881 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8882 if (ire_mp == NULL) { 8883 mutex_enter(&udpf->uf_lock); 8884 udp->udp_state = TS_IDLE; 8885 udp->udp_pending_op = -1; 8886 mutex_exit(&udpf->uf_lock); 8887 rw_exit(&udp->udp_rwlock); 8888 return (ENOMEM); 8889 } 8890 8891 rw_exit(&udp->udp_rwlock); 8892 8893 ire_mp->b_wptr += sizeof (ire_t); 8894 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8895 8896 if (udp->udp_family == AF_INET) { 8897 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8898 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8899 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8900 B_TRUE, B_TRUE); 8901 } else { 8902 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8903 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8904 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE); 8905 } 8906 8907 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8908 } 8909 8910 /* ARGSUSED */ 8911 static int 8912 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8913 socklen_t len, sock_connid_t *id, cred_t *cr) 8914 { 8915 conn_t *connp = (conn_t *)proto_handle; 8916 udp_t *udp = connp->conn_udp; 8917 int error; 8918 boolean_t did_bind = B_FALSE; 8919 8920 if (sa == NULL) { 8921 /* 8922 * Disconnect 8923 * Make sure we are connected 8924 */ 8925 if (udp->udp_state != TS_DATA_XFER) 8926 return (EINVAL); 8927 8928 error = udp_disconnect(connp); 8929 return (error); 8930 } 8931 8932 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8933 if (error != 0) 8934 goto done; 8935 8936 /* do an implicit bind if necessary */ 8937 if (udp->udp_state == TS_UNBND) { 8938 error = udp_implicit_bind(connp, cr); 8939 /* 8940 * We could be racing with an actual bind, in which case 8941 * we would see EPROTO. We cross our fingers and try 8942 * to connect. 8943 */ 8944 if (!(error == 0 || error == EPROTO)) 8945 goto done; 8946 did_bind = B_TRUE; 8947 } 8948 /* 8949 * set SO_DGRAM_ERRIND 8950 */ 8951 udp->udp_dgram_errind = B_TRUE; 8952 8953 error = udp_do_connect(connp, sa, len); 8954 8955 if (error != 0 && did_bind) { 8956 int unbind_err; 8957 8958 unbind_err = udp_do_unbind(connp); 8959 ASSERT(unbind_err == 0); 8960 } 8961 8962 if (error == 0) { 8963 *id = 0; 8964 (*connp->conn_upcalls->su_connected) 8965 (connp->conn_upper_handle, 0, NULL, -1); 8966 } else if (error < 0) { 8967 error = proto_tlitosyserr(-error); 8968 } 8969 8970 done: 8971 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8972 /* 8973 * No need to hold locks to set state 8974 * after connect failure socket state is undefined 8975 * We set the state only to imitate old sockfs behavior 8976 */ 8977 udp->udp_state = TS_IDLE; 8978 } 8979 return (error); 8980 } 8981 8982 /* ARGSUSED */ 8983 int 8984 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8985 cred_t *cr) 8986 { 8987 conn_t *connp = (conn_t *)proto_handle; 8988 udp_t *udp = connp->conn_udp; 8989 udp_stack_t *us = udp->udp_us; 8990 int error = 0; 8991 8992 ASSERT(DB_TYPE(mp) == M_DATA); 8993 8994 /* 8995 * If the socket is connected and no change in destination 8996 */ 8997 if (msg->msg_namelen == 0) { 8998 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8999 if (error == EDESTADDRREQ) 9000 return (error); 9001 else 9002 return (udp->udp_dgram_errind ? error : 0); 9003 } 9004 9005 /* 9006 * Do an implicit bind if necessary. 9007 */ 9008 if (udp->udp_state == TS_UNBND) { 9009 error = udp_implicit_bind(connp, cr); 9010 /* 9011 * We could be racing with an actual bind, in which case 9012 * we would see EPROTO. We cross our fingers and try 9013 * to send. 9014 */ 9015 if (!(error == 0 || error == EPROTO)) { 9016 freemsg(mp); 9017 return (error); 9018 } 9019 } 9020 9021 rw_enter(&udp->udp_rwlock, RW_WRITER); 9022 9023 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9024 rw_exit(&udp->udp_rwlock); 9025 freemsg(mp); 9026 return (EISCONN); 9027 } 9028 9029 9030 if (udp->udp_delayed_error != 0) { 9031 boolean_t match; 9032 9033 error = udp->udp_delayed_error; 9034 match = B_FALSE; 9035 udp->udp_delayed_error = 0; 9036 switch (udp->udp_family) { 9037 case AF_INET: { 9038 /* Compare just IP address and port */ 9039 sin_t *sin1 = (sin_t *)msg->msg_name; 9040 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9041 9042 if (msg->msg_namelen == sizeof (sin_t) && 9043 sin1->sin_port == sin2->sin_port && 9044 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9045 match = B_TRUE; 9046 9047 break; 9048 } 9049 case AF_INET6: { 9050 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9051 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9052 9053 if (msg->msg_namelen == sizeof (sin6_t) && 9054 sin1->sin6_port == sin2->sin6_port && 9055 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9056 &sin2->sin6_addr)) 9057 match = B_TRUE; 9058 break; 9059 } 9060 default: 9061 ASSERT(0); 9062 } 9063 9064 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9065 9066 if (match) { 9067 rw_exit(&udp->udp_rwlock); 9068 freemsg(mp); 9069 return (error); 9070 } 9071 } 9072 9073 error = proto_verify_ip_addr(udp->udp_family, 9074 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9075 rw_exit(&udp->udp_rwlock); 9076 9077 if (error != 0) { 9078 freemsg(mp); 9079 return (error); 9080 } 9081 9082 error = udp_send_not_connected(connp, mp, 9083 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9084 curproc->p_pid); 9085 if (error != 0) { 9086 UDP_STAT(us, udp_out_err_output); 9087 freemsg(mp); 9088 } 9089 return (udp->udp_dgram_errind ? error : 0); 9090 } 9091 9092 void 9093 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9094 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9095 { 9096 conn_t *connp = (conn_t *)proto_handle; 9097 udp_t *udp; 9098 struct T_capability_ack tca; 9099 struct sockaddr_in6 laddr, faddr; 9100 socklen_t laddrlen, faddrlen; 9101 short opts; 9102 struct stroptions *stropt; 9103 mblk_t *stropt_mp; 9104 int error; 9105 9106 udp = connp->conn_udp; 9107 9108 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9109 9110 /* 9111 * setup the fallback stream that was allocated 9112 */ 9113 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9114 connp->conn_minor_arena = WR(q)->q_ptr; 9115 9116 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9117 9118 WR(q)->q_qinfo = &udp_winit; 9119 9120 connp->conn_rq = RD(q); 9121 connp->conn_wq = WR(q); 9122 9123 /* Notify stream head about options before sending up data */ 9124 stropt_mp->b_datap->db_type = M_SETOPTS; 9125 stropt_mp->b_wptr += sizeof (*stropt); 9126 stropt = (struct stroptions *)stropt_mp->b_rptr; 9127 stropt->so_flags = SO_WROFF | SO_HIWAT; 9128 stropt->so_wroff = 9129 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9130 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9131 putnext(RD(q), stropt_mp); 9132 9133 /* 9134 * Free the helper stream 9135 */ 9136 ip_free_helper_stream(connp); 9137 9138 if (!direct_sockfs) 9139 udp_disable_direct_sockfs(udp); 9140 9141 /* 9142 * Collect the information needed to sync with the sonode 9143 */ 9144 udp_do_capability_ack(udp, &tca, TC1_INFO); 9145 9146 laddrlen = faddrlen = sizeof (sin6_t); 9147 (void) udp_getsockname((sock_lower_handle_t)connp, 9148 (struct sockaddr *)&laddr, &laddrlen, NULL); 9149 error = udp_getpeername((sock_lower_handle_t)connp, 9150 (struct sockaddr *)&faddr, &faddrlen, NULL); 9151 if (error != 0) 9152 faddrlen = 0; 9153 9154 opts = 0; 9155 if (udp->udp_dgram_errind) 9156 opts |= SO_DGRAM_ERRIND; 9157 if (udp->udp_dontroute) 9158 opts |= SO_DONTROUTE; 9159 9160 /* 9161 * Once we grab the drain lock, no data will be send up 9162 * to the socket. So we notify the socket that the endpoint 9163 * is quiescent and it's therefore safe move data from 9164 * the socket to the stream head. 9165 */ 9166 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9167 (struct sockaddr *)&laddr, laddrlen, 9168 (struct sockaddr *)&faddr, faddrlen, opts); 9169 9170 /* 9171 * push up any packets that were queued in udp_t 9172 */ 9173 9174 mutex_enter(&udp->udp_recv_lock); 9175 while (udp->udp_fallback_queue_head != NULL) { 9176 mblk_t *mp; 9177 mp = udp->udp_fallback_queue_head; 9178 udp->udp_fallback_queue_head = mp->b_next; 9179 mutex_exit(&udp->udp_recv_lock); 9180 mp->b_next = NULL; 9181 putnext(RD(q), mp); 9182 mutex_enter(&udp->udp_recv_lock); 9183 } 9184 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9185 /* 9186 * No longer a streams less socket 9187 */ 9188 connp->conn_flags &= ~IPCL_NONSTR; 9189 mutex_exit(&udp->udp_recv_lock); 9190 9191 ASSERT(connp->conn_ref >= 1); 9192 } 9193 9194 static int 9195 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9196 { 9197 sin_t *sin = (sin_t *)sa; 9198 sin6_t *sin6 = (sin6_t *)sa; 9199 9200 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9201 ASSERT(udp != NULL); 9202 9203 if (udp->udp_state != TS_DATA_XFER) 9204 return (ENOTCONN); 9205 9206 switch (udp->udp_family) { 9207 case AF_INET: 9208 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9209 9210 if (*salenp < sizeof (sin_t)) 9211 return (EINVAL); 9212 9213 *salenp = sizeof (sin_t); 9214 *sin = sin_null; 9215 sin->sin_family = AF_INET; 9216 sin->sin_port = udp->udp_dstport; 9217 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9218 break; 9219 case AF_INET6: 9220 if (*salenp < sizeof (sin6_t)) 9221 return (EINVAL); 9222 9223 *salenp = sizeof (sin6_t); 9224 *sin6 = sin6_null; 9225 sin6->sin6_family = AF_INET6; 9226 sin6->sin6_port = udp->udp_dstport; 9227 sin6->sin6_addr = udp->udp_v6dst; 9228 sin6->sin6_flowinfo = udp->udp_flowinfo; 9229 break; 9230 } 9231 9232 return (0); 9233 } 9234 9235 /* ARGSUSED */ 9236 int 9237 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9238 socklen_t *salenp, cred_t *cr) 9239 { 9240 conn_t *connp = (conn_t *)proto_handle; 9241 udp_t *udp = connp->conn_udp; 9242 int error; 9243 9244 ASSERT(udp != NULL); 9245 9246 rw_enter(&udp->udp_rwlock, RW_READER); 9247 9248 error = udp_do_getpeername(udp, sa, salenp); 9249 9250 rw_exit(&udp->udp_rwlock); 9251 9252 return (error); 9253 } 9254 9255 static int 9256 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9257 { 9258 sin_t *sin = (sin_t *)sa; 9259 sin6_t *sin6 = (sin6_t *)sa; 9260 9261 ASSERT(udp != NULL); 9262 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9263 9264 switch (udp->udp_family) { 9265 case AF_INET: 9266 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9267 9268 if (*salenp < sizeof (sin_t)) 9269 return (EINVAL); 9270 9271 *salenp = sizeof (sin_t); 9272 *sin = sin_null; 9273 sin->sin_family = AF_INET; 9274 if (udp->udp_state == TS_UNBND) { 9275 break; 9276 } 9277 sin->sin_port = udp->udp_port; 9278 9279 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9280 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9281 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9282 } else { 9283 /* 9284 * INADDR_ANY 9285 * udp_v6src is not set, we might be bound to 9286 * broadcast/multicast. Use udp_bound_v6src as 9287 * local address instead (that could 9288 * also still be INADDR_ANY) 9289 */ 9290 sin->sin_addr.s_addr = 9291 V4_PART_OF_V6(udp->udp_bound_v6src); 9292 } 9293 break; 9294 9295 case AF_INET6: 9296 if (*salenp < sizeof (sin6_t)) 9297 return (EINVAL); 9298 9299 *salenp = sizeof (sin6_t); 9300 *sin6 = sin6_null; 9301 sin6->sin6_family = AF_INET6; 9302 if (udp->udp_state == TS_UNBND) { 9303 break; 9304 } 9305 sin6->sin6_port = udp->udp_port; 9306 9307 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9308 sin6->sin6_addr = udp->udp_v6src; 9309 } else { 9310 /* 9311 * UNSPECIFIED 9312 * udp_v6src is not set, we might be bound to 9313 * broadcast/multicast. Use udp_bound_v6src as 9314 * local address instead (that could 9315 * also still be UNSPECIFIED) 9316 */ 9317 sin6->sin6_addr = udp->udp_bound_v6src; 9318 } 9319 } 9320 return (0); 9321 } 9322 9323 /* ARGSUSED */ 9324 int 9325 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9326 socklen_t *salenp, cred_t *cr) 9327 { 9328 conn_t *connp = (conn_t *)proto_handle; 9329 udp_t *udp = connp->conn_udp; 9330 int error; 9331 9332 ASSERT(udp != NULL); 9333 rw_enter(&udp->udp_rwlock, RW_READER); 9334 9335 error = udp_do_getsockname(udp, sa, salenp); 9336 9337 rw_exit(&udp->udp_rwlock); 9338 9339 return (error); 9340 } 9341 9342 int 9343 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9344 void *optvalp, socklen_t *optlen, cred_t *cr) 9345 { 9346 conn_t *connp = (conn_t *)proto_handle; 9347 udp_t *udp = connp->conn_udp; 9348 int error; 9349 t_uscalar_t max_optbuf_len; 9350 void *optvalp_buf; 9351 int len; 9352 9353 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9354 udp_opt_obj.odb_opt_des_arr, 9355 udp_opt_obj.odb_opt_arr_cnt, 9356 udp_opt_obj.odb_topmost_tpiprovider, 9357 B_FALSE, B_TRUE, cr); 9358 if (error != 0) { 9359 if (error < 0) 9360 error = proto_tlitosyserr(-error); 9361 return (error); 9362 } 9363 9364 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9365 rw_enter(&udp->udp_rwlock, RW_READER); 9366 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9367 rw_exit(&udp->udp_rwlock); 9368 9369 if (len < 0) { 9370 /* 9371 * Pass on to IP 9372 */ 9373 kmem_free(optvalp_buf, max_optbuf_len); 9374 return (ip_get_options(connp, level, option_name, 9375 optvalp, optlen, cr)); 9376 } else { 9377 /* 9378 * update optlen and copy option value 9379 */ 9380 t_uscalar_t size = MIN(len, *optlen); 9381 bcopy(optvalp_buf, optvalp, size); 9382 bcopy(&size, optlen, sizeof (size)); 9383 9384 kmem_free(optvalp_buf, max_optbuf_len); 9385 return (0); 9386 } 9387 } 9388 9389 int 9390 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9391 const void *optvalp, socklen_t optlen, cred_t *cr) 9392 { 9393 conn_t *connp = (conn_t *)proto_handle; 9394 udp_t *udp = connp->conn_udp; 9395 int error; 9396 9397 error = proto_opt_check(level, option_name, optlen, NULL, 9398 udp_opt_obj.odb_opt_des_arr, 9399 udp_opt_obj.odb_opt_arr_cnt, 9400 udp_opt_obj.odb_topmost_tpiprovider, 9401 B_TRUE, B_FALSE, cr); 9402 9403 if (error != 0) { 9404 if (error < 0) 9405 error = proto_tlitosyserr(-error); 9406 return (error); 9407 } 9408 9409 rw_enter(&udp->udp_rwlock, RW_WRITER); 9410 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9411 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9412 NULL, cr); 9413 rw_exit(&udp->udp_rwlock); 9414 9415 if (error < 0) { 9416 /* 9417 * Pass on to ip 9418 */ 9419 error = ip_set_options(connp, level, option_name, optvalp, 9420 optlen, cr); 9421 } 9422 9423 return (error); 9424 } 9425 9426 void 9427 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9428 { 9429 conn_t *connp = (conn_t *)proto_handle; 9430 udp_t *udp = connp->conn_udp; 9431 9432 mutex_enter(&udp->udp_recv_lock); 9433 connp->conn_flow_cntrld = B_FALSE; 9434 mutex_exit(&udp->udp_recv_lock); 9435 } 9436 9437 /* ARGSUSED */ 9438 int 9439 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9440 { 9441 conn_t *connp = (conn_t *)proto_handle; 9442 9443 /* shut down the send side */ 9444 if (how != SHUT_RD) 9445 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9446 SOCK_OPCTL_SHUT_SEND, 0); 9447 /* shut down the recv side */ 9448 if (how != SHUT_WR) 9449 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9450 SOCK_OPCTL_SHUT_RECV, 0); 9451 return (0); 9452 } 9453 9454 int 9455 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9456 int mode, int32_t *rvalp, cred_t *cr) 9457 { 9458 conn_t *connp = (conn_t *)proto_handle; 9459 int error; 9460 9461 switch (cmd) { 9462 case ND_SET: 9463 case ND_GET: 9464 case _SIOCSOCKFALLBACK: 9465 case TI_GETPEERNAME: 9466 case TI_GETMYNAME: 9467 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9468 cmd)); 9469 error = EINVAL; 9470 break; 9471 default: 9472 /* 9473 * Pass on to IP using helper stream 9474 */ 9475 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9476 cmd, arg, mode, cr, rvalp); 9477 break; 9478 } 9479 return (error); 9480 } 9481 9482 /* ARGSUSED */ 9483 int 9484 udp_accept(sock_lower_handle_t lproto_handle, 9485 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9486 cred_t *cr) 9487 { 9488 return (EOPNOTSUPP); 9489 } 9490 9491 /* ARGSUSED */ 9492 int 9493 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9494 { 9495 return (EOPNOTSUPP); 9496 } 9497 9498 sock_downcalls_t sock_udp_downcalls = { 9499 udp_activate, /* sd_activate */ 9500 udp_accept, /* sd_accept */ 9501 udp_bind, /* sd_bind */ 9502 udp_listen, /* sd_listen */ 9503 udp_connect, /* sd_connect */ 9504 udp_getpeername, /* sd_getpeername */ 9505 udp_getsockname, /* sd_getsockname */ 9506 udp_getsockopt, /* sd_getsockopt */ 9507 udp_setsockopt, /* sd_setsockopt */ 9508 udp_send, /* sd_send */ 9509 NULL, /* sd_send_uio */ 9510 NULL, /* sd_recv_uio */ 9511 NULL, /* sd_poll */ 9512 udp_shutdown, /* sd_shutdown */ 9513 udp_clr_flowctrl, /* sd_setflowctrl */ 9514 udp_ioctl, /* sd_ioctl */ 9515 udp_close /* sd_close */ 9516 }; 9517