1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 100 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 101 * We also use conn_lock when updating things that affect the IP classifier 102 * lookup. 103 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 104 * 105 * The fanout lock uf_lock: 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure and a few other fields in the udp_t. A UDP endpoint is removed 113 * from the bind hash list only when it is being unbound or being closed. 114 * The per bucket lock also protects a UDP endpoint's state changes. 115 * 116 * The udp_rwlock: 117 * This protects most of the other fields in the udp_t. The exact list of 118 * fields which are protected by each of the above locks is documented in 119 * the udp_t structure definition. 120 * 121 * Plumbing notes: 122 * UDP is always a device driver. For compatibility with mibopen() code 123 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 124 * dummy module. 125 * 126 * The above implies that we don't support any intermediate module to 127 * reside in between /dev/ip and udp -- in fact, we never supported such 128 * scenario in the past as the inter-layer communication semantics have 129 * always been private. 130 */ 131 132 /* For /etc/system control */ 133 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 134 135 #define NDD_TOO_QUICK_MSG \ 136 "ndd get info rate too high for non-privileged users, try again " \ 137 "later.\n" 138 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 139 140 /* Option processing attrs */ 141 typedef struct udpattrs_s { 142 union { 143 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 144 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 145 } udpattr_ippu; 146 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 147 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 148 mblk_t *udpattr_mb; 149 boolean_t udpattr_credset; 150 } udpattrs_t; 151 152 static void udp_addr_req(queue_t *q, mblk_t *mp); 153 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 154 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 155 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 156 static int udp_build_hdrs(udp_t *udp); 157 static void udp_capability_req(queue_t *q, mblk_t *mp); 158 static int udp_tpi_close(queue_t *q, int flags); 159 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 160 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 161 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 162 int sys_error); 163 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 164 t_scalar_t tlierr, int unixerr); 165 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 166 cred_t *cr); 167 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 168 char *value, caddr_t cp, cred_t *cr); 169 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 170 char *value, caddr_t cp, cred_t *cr); 171 static void udp_icmp_error(conn_t *, mblk_t *); 172 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 173 static void udp_info_req(queue_t *q, mblk_t *mp); 174 static void udp_input(void *, mblk_t *, void *); 175 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 176 t_scalar_t addr_length); 177 static void udp_lrput(queue_t *, mblk_t *); 178 static void udp_lwput(queue_t *, mblk_t *); 179 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 180 cred_t *credp, boolean_t isv6); 181 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 182 cred_t *credp); 183 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 184 cred_t *credp); 185 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 186 int *errorp, udpattrs_t *udpattrs); 187 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 188 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 189 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 190 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 191 cred_t *cr); 192 static void udp_report_item(mblk_t *mp, udp_t *udp); 193 static int udp_rinfop(queue_t *q, infod_t *dp); 194 static int udp_rrw(queue_t *q, struiod_t *dp); 195 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 196 cred_t *cr); 197 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 198 ipha_t *ipha); 199 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 200 t_scalar_t destlen, t_scalar_t err); 201 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 202 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 203 boolean_t random); 204 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 205 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 206 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 207 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 208 static void udp_wput_other(queue_t *q, mblk_t *mp); 209 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 210 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 211 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 212 213 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 214 static void udp_stack_fini(netstackid_t stackid, void *arg); 215 216 static void *udp_kstat_init(netstackid_t stackid); 217 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 218 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 219 static void udp_kstat2_fini(netstackid_t, kstat_t *); 220 static int udp_kstat_update(kstat_t *kp, int rw); 221 222 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 223 uint_t pkt_len); 224 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 225 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 226 227 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 228 cred_t *, pid_t); 229 230 /* Common routine for TPI and socket module */ 231 static conn_t *udp_do_open(cred_t *, boolean_t, int); 232 static void udp_do_close(conn_t *); 233 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 234 boolean_t); 235 static int udp_do_unbind(conn_t *); 236 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 237 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 238 239 int udp_getsockname(sock_lower_handle_t, 240 struct sockaddr *, socklen_t *, cred_t *); 241 int udp_getpeername(sock_lower_handle_t, 242 struct sockaddr *, socklen_t *, cred_t *); 243 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t); 244 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 245 246 #define UDP_RECV_HIWATER (56 * 1024) 247 #define UDP_RECV_LOWATER 128 248 #define UDP_XMIT_HIWATER (56 * 1024) 249 #define UDP_XMIT_LOWATER 1024 250 251 /* 252 * The following is defined in tcp.c 253 */ 254 extern int (*cl_inet_connect2)(netstackid_t stack_id, 255 uint8_t protocol, boolean_t is_outgoing, 256 sa_family_t addr_family, 257 uint8_t *laddrp, in_port_t lport, 258 uint8_t *faddrp, in_port_t fport, void *args); 259 260 /* 261 * Checks if the given destination addr/port is allowed out. 262 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 263 * Called for each connect() and for sendto()/sendmsg() to a different 264 * destination. 265 * For connect(), called in udp_connect(). 266 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 267 * 268 * This macro assumes that the cl_inet_connect2 hook is not NULL. 269 * Please check this before calling this macro. 270 * 271 * void 272 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 273 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 274 */ 275 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 276 (err) = 0; \ 277 /* \ 278 * Running in cluster mode - check and register active \ 279 * "connection" information \ 280 */ \ 281 if ((udp)->udp_ipversion == IPV4_VERSION) \ 282 (err) = (*cl_inet_connect2)( \ 283 (cp)->conn_netstack->netstack_stackid, \ 284 IPPROTO_UDP, is_outgoing, AF_INET, \ 285 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 286 (udp)->udp_port, \ 287 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 288 (in_port_t)(fport), NULL); \ 289 else \ 290 (err) = (*cl_inet_connect2)( \ 291 (cp)->conn_netstack->netstack_stackid, \ 292 IPPROTO_UDP, is_outgoing, AF_INET6, \ 293 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 294 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 295 } 296 297 static struct module_info udp_mod_info = { 298 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 299 }; 300 301 /* 302 * Entry points for UDP as a device. 303 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 304 */ 305 static struct qinit udp_rinitv4 = { 306 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 307 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 308 }; 309 310 static struct qinit udp_rinitv6 = { 311 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 312 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 313 }; 314 315 static struct qinit udp_winit = { 316 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 317 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 318 }; 319 320 /* UDP entry point during fallback */ 321 struct qinit udp_fallback_sock_winit = { 322 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 323 }; 324 325 /* 326 * UDP needs to handle I_LINK and I_PLINK since ifconfig 327 * likes to use it as a place to hang the various streams. 328 */ 329 static struct qinit udp_lrinit = { 330 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 331 &udp_mod_info 332 }; 333 334 static struct qinit udp_lwinit = { 335 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 336 &udp_mod_info 337 }; 338 339 /* For AF_INET aka /dev/udp */ 340 struct streamtab udpinfov4 = { 341 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 342 }; 343 344 /* For AF_INET6 aka /dev/udp6 */ 345 struct streamtab udpinfov6 = { 346 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 347 }; 348 349 static sin_t sin_null; /* Zero address for quick clears */ 350 static sin6_t sin6_null; /* Zero address for quick clears */ 351 352 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 353 354 /* Default structure copied into T_INFO_ACK messages */ 355 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 356 T_INFO_ACK, 357 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 358 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 359 T_INVALID, /* CDATA_size. udp does not support connect data. */ 360 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 361 sizeof (sin_t), /* ADDR_size. */ 362 0, /* OPT_size - not initialized here */ 363 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 364 T_CLTS, /* SERV_type. udp supports connection-less. */ 365 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 366 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 367 }; 368 369 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 370 371 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 372 T_INFO_ACK, 373 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 374 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 375 T_INVALID, /* CDATA_size. udp does not support connect data. */ 376 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 377 sizeof (sin6_t), /* ADDR_size. */ 378 0, /* OPT_size - not initialized here */ 379 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 380 T_CLTS, /* SERV_type. udp supports connection-less. */ 381 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 382 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 383 }; 384 385 /* largest UDP port number */ 386 #define UDP_MAX_PORT 65535 387 388 /* 389 * Table of ND variables supported by udp. These are loaded into us_nd 390 * in udp_open. 391 * All of these are alterable, within the min/max values given, at run time. 392 */ 393 /* BEGIN CSTYLED */ 394 udpparam_t udp_param_arr[] = { 395 /*min max value name */ 396 { 0L, 256, 32, "udp_wroff_extra" }, 397 { 1L, 255, 255, "udp_ipv4_ttl" }, 398 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 399 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 400 { 0, 1, 1, "udp_do_checksum" }, 401 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 402 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 403 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 404 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 405 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 406 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 407 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 408 }; 409 /* END CSTYLED */ 410 411 /* Setable in /etc/system */ 412 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 413 uint32_t udp_random_anon_port = 1; 414 415 /* 416 * Hook functions to enable cluster networking. 417 * On non-clustered systems these vectors must always be NULL 418 */ 419 420 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 421 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 422 void *args) = NULL; 423 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 424 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 425 void *args) = NULL; 426 427 typedef union T_primitives *t_primp_t; 428 429 /* 430 * Return the next anonymous port in the privileged port range for 431 * bind checking. 432 * 433 * Trusted Extension (TX) notes: TX allows administrator to mark or 434 * reserve ports as Multilevel ports (MLP). MLP has special function 435 * on TX systems. Once a port is made MLP, it's not available as 436 * ordinary port. This creates "holes" in the port name space. It 437 * may be necessary to skip the "holes" find a suitable anon port. 438 */ 439 static in_port_t 440 udp_get_next_priv_port(udp_t *udp) 441 { 442 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 443 in_port_t nextport; 444 boolean_t restart = B_FALSE; 445 udp_stack_t *us = udp->udp_us; 446 447 retry: 448 if (next_priv_port < us->us_min_anonpriv_port || 449 next_priv_port >= IPPORT_RESERVED) { 450 next_priv_port = IPPORT_RESERVED - 1; 451 if (restart) 452 return (0); 453 restart = B_TRUE; 454 } 455 456 if (is_system_labeled() && 457 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 458 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 459 next_priv_port = nextport; 460 goto retry; 461 } 462 463 return (next_priv_port--); 464 } 465 466 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 467 /* ARGSUSED */ 468 static int 469 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 470 { 471 udp_fanout_t *udpf; 472 int i; 473 zoneid_t zoneid; 474 conn_t *connp; 475 udp_t *udp; 476 udp_stack_t *us; 477 478 connp = Q_TO_CONN(q); 479 udp = connp->conn_udp; 480 us = udp->udp_us; 481 482 /* Refer to comments in udp_status_report(). */ 483 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 484 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 485 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 486 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 487 return (0); 488 } 489 } 490 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 491 /* The following may work even if we cannot get a large buf. */ 492 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 493 return (0); 494 } 495 496 (void) mi_mpprintf(mp, 497 "UDP " MI_COL_HDRPAD_STR 498 /* 12345678[89ABCDEF] */ 499 " zone lport src addr dest addr port state"); 500 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 501 502 zoneid = connp->conn_zoneid; 503 504 for (i = 0; i < us->us_bind_fanout_size; i++) { 505 udpf = &us->us_bind_fanout[i]; 506 mutex_enter(&udpf->uf_lock); 507 508 /* Print the hash index. */ 509 udp = udpf->uf_udp; 510 if (zoneid != GLOBAL_ZONEID) { 511 /* skip to first entry in this zone; might be none */ 512 while (udp != NULL && 513 udp->udp_connp->conn_zoneid != zoneid) 514 udp = udp->udp_bind_hash; 515 } 516 if (udp != NULL) { 517 uint_t print_len, buf_len; 518 519 buf_len = mp->b_cont->b_datap->db_lim - 520 mp->b_cont->b_wptr; 521 print_len = snprintf((char *)mp->b_cont->b_wptr, 522 buf_len, "%d\n", i); 523 if (print_len < buf_len) { 524 mp->b_cont->b_wptr += print_len; 525 } else { 526 mp->b_cont->b_wptr += buf_len; 527 } 528 for (; udp != NULL; udp = udp->udp_bind_hash) { 529 if (zoneid == GLOBAL_ZONEID || 530 zoneid == udp->udp_connp->conn_zoneid) 531 udp_report_item(mp->b_cont, udp); 532 } 533 } 534 mutex_exit(&udpf->uf_lock); 535 } 536 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 537 return (0); 538 } 539 540 /* 541 * Hash list removal routine for udp_t structures. 542 */ 543 static void 544 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 545 { 546 udp_t *udpnext; 547 kmutex_t *lockp; 548 udp_stack_t *us = udp->udp_us; 549 550 if (udp->udp_ptpbhn == NULL) 551 return; 552 553 /* 554 * Extract the lock pointer in case there are concurrent 555 * hash_remove's for this instance. 556 */ 557 ASSERT(udp->udp_port != 0); 558 if (!caller_holds_lock) { 559 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 560 us->us_bind_fanout_size)].uf_lock; 561 ASSERT(lockp != NULL); 562 mutex_enter(lockp); 563 } 564 if (udp->udp_ptpbhn != NULL) { 565 udpnext = udp->udp_bind_hash; 566 if (udpnext != NULL) { 567 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 568 udp->udp_bind_hash = NULL; 569 } 570 *udp->udp_ptpbhn = udpnext; 571 udp->udp_ptpbhn = NULL; 572 } 573 if (!caller_holds_lock) { 574 mutex_exit(lockp); 575 } 576 } 577 578 static void 579 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 580 { 581 udp_t **udpp; 582 udp_t *udpnext; 583 584 ASSERT(MUTEX_HELD(&uf->uf_lock)); 585 ASSERT(udp->udp_ptpbhn == NULL); 586 udpp = &uf->uf_udp; 587 udpnext = udpp[0]; 588 if (udpnext != NULL) { 589 /* 590 * If the new udp bound to the INADDR_ANY address 591 * and the first one in the list is not bound to 592 * INADDR_ANY we skip all entries until we find the 593 * first one bound to INADDR_ANY. 594 * This makes sure that applications binding to a 595 * specific address get preference over those binding to 596 * INADDR_ANY. 597 */ 598 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 599 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 600 while ((udpnext = udpp[0]) != NULL && 601 !V6_OR_V4_INADDR_ANY( 602 udpnext->udp_bound_v6src)) { 603 udpp = &(udpnext->udp_bind_hash); 604 } 605 if (udpnext != NULL) 606 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 607 } else { 608 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 609 } 610 } 611 udp->udp_bind_hash = udpnext; 612 udp->udp_ptpbhn = udpp; 613 udpp[0] = udp; 614 } 615 616 /* 617 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 618 * passed to udp_wput. 619 * It associates a port number and local address with the stream. 620 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 621 * protocol type (IPPROTO_UDP) placed in the message following the address. 622 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 623 * (Called as writer.) 624 * 625 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 626 * without setting SO_REUSEADDR. This is needed so that they 627 * can be viewed as two independent transport protocols. 628 * However, anonymouns ports are allocated from the same range to avoid 629 * duplicating the us->us_next_port_to_try. 630 */ 631 static void 632 udp_tpi_bind(queue_t *q, mblk_t *mp) 633 { 634 sin_t *sin; 635 sin6_t *sin6; 636 mblk_t *mp1; 637 struct T_bind_req *tbr; 638 conn_t *connp; 639 udp_t *udp; 640 int error; 641 struct sockaddr *sa; 642 643 connp = Q_TO_CONN(q); 644 udp = connp->conn_udp; 645 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 646 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 647 "udp_bind: bad req, len %u", 648 (uint_t)(mp->b_wptr - mp->b_rptr)); 649 udp_err_ack(q, mp, TPROTO, 0); 650 return; 651 } 652 if (udp->udp_state != TS_UNBND) { 653 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 654 "udp_bind: bad state, %u", udp->udp_state); 655 udp_err_ack(q, mp, TOUTSTATE, 0); 656 return; 657 } 658 /* 659 * Reallocate the message to make sure we have enough room for an 660 * address and the protocol type. 661 */ 662 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 663 if (!mp1) { 664 udp_err_ack(q, mp, TSYSERR, ENOMEM); 665 return; 666 } 667 668 mp = mp1; 669 670 /* Reset the message type in preparation for shipping it back. */ 671 DB_TYPE(mp) = M_PCPROTO; 672 673 tbr = (struct T_bind_req *)mp->b_rptr; 674 switch (tbr->ADDR_length) { 675 case 0: /* Request for a generic port */ 676 tbr->ADDR_offset = sizeof (struct T_bind_req); 677 if (udp->udp_family == AF_INET) { 678 tbr->ADDR_length = sizeof (sin_t); 679 sin = (sin_t *)&tbr[1]; 680 *sin = sin_null; 681 sin->sin_family = AF_INET; 682 mp->b_wptr = (uchar_t *)&sin[1]; 683 sa = (struct sockaddr *)sin; 684 } else { 685 ASSERT(udp->udp_family == AF_INET6); 686 tbr->ADDR_length = sizeof (sin6_t); 687 sin6 = (sin6_t *)&tbr[1]; 688 *sin6 = sin6_null; 689 sin6->sin6_family = AF_INET6; 690 mp->b_wptr = (uchar_t *)&sin6[1]; 691 sa = (struct sockaddr *)sin6; 692 } 693 break; 694 695 case sizeof (sin_t): /* Complete IPv4 address */ 696 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 697 sizeof (sin_t)); 698 if (sa == NULL || !OK_32PTR((char *)sa)) { 699 udp_err_ack(q, mp, TSYSERR, EINVAL); 700 return; 701 } 702 if (udp->udp_family != AF_INET || 703 sa->sa_family != AF_INET) { 704 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 705 return; 706 } 707 break; 708 709 case sizeof (sin6_t): /* complete IPv6 address */ 710 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 711 sizeof (sin6_t)); 712 if (sa == NULL || !OK_32PTR((char *)sa)) { 713 udp_err_ack(q, mp, TSYSERR, EINVAL); 714 return; 715 } 716 if (udp->udp_family != AF_INET6 || 717 sa->sa_family != AF_INET6) { 718 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 719 return; 720 } 721 break; 722 723 default: /* Invalid request */ 724 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 725 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 726 udp_err_ack(q, mp, TBADADDR, 0); 727 return; 728 } 729 730 731 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 732 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 733 tbr->PRIM_type != O_T_BIND_REQ); 734 735 if (error != 0) { 736 if (error > 0) { 737 udp_err_ack(q, mp, TSYSERR, error); 738 } else { 739 udp_err_ack(q, mp, -error, 0); 740 } 741 } else { 742 tbr->PRIM_type = T_BIND_ACK; 743 qreply(q, mp); 744 } 745 } 746 747 /* 748 * This routine handles each T_CONN_REQ message passed to udp. It 749 * associates a default destination address with the stream. 750 * 751 * This routine sends down a T_BIND_REQ to IP with the following mblks: 752 * T_BIND_REQ - specifying local and remote address/port 753 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 754 * T_OK_ACK - for the T_CONN_REQ 755 * T_CONN_CON - to keep the TPI user happy 756 * 757 * The connect completes in udp_do_connect. 758 * When a T_BIND_ACK is received information is extracted from the IRE 759 * and the two appended messages are sent to the TPI user. 760 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 761 * convert it to an error ack for the appropriate primitive. 762 */ 763 static void 764 udp_tpi_connect(queue_t *q, mblk_t *mp) 765 { 766 mblk_t *mp1; 767 udp_t *udp; 768 conn_t *connp = Q_TO_CONN(q); 769 int error; 770 socklen_t len; 771 struct sockaddr *sa; 772 struct T_conn_req *tcr; 773 774 udp = connp->conn_udp; 775 tcr = (struct T_conn_req *)mp->b_rptr; 776 777 /* A bit of sanity checking */ 778 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 779 udp_err_ack(q, mp, TPROTO, 0); 780 return; 781 } 782 783 if (tcr->OPT_length != 0) { 784 udp_err_ack(q, mp, TBADOPT, 0); 785 return; 786 } 787 788 /* 789 * Determine packet type based on type of address passed in 790 * the request should contain an IPv4 or IPv6 address. 791 * Make sure that address family matches the type of 792 * family of the the address passed down 793 */ 794 len = tcr->DEST_length; 795 switch (tcr->DEST_length) { 796 default: 797 udp_err_ack(q, mp, TBADADDR, 0); 798 return; 799 800 case sizeof (sin_t): 801 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 802 sizeof (sin_t)); 803 break; 804 805 case sizeof (sin6_t): 806 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 807 sizeof (sin6_t)); 808 break; 809 } 810 811 error = proto_verify_ip_addr(udp->udp_family, sa, len); 812 if (error != 0) { 813 udp_err_ack(q, mp, TSYSERR, error); 814 return; 815 } 816 817 /* 818 * We have to send a connection confirmation to 819 * keep TLI happy. 820 */ 821 if (udp->udp_family == AF_INET) { 822 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 823 sizeof (sin_t), NULL, 0); 824 } else { 825 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 826 sizeof (sin6_t), NULL, 0); 827 } 828 if (mp1 == NULL) { 829 udp_err_ack(q, mp, TSYSERR, ENOMEM); 830 return; 831 } 832 833 /* 834 * ok_ack for T_CONN_REQ 835 */ 836 mp = mi_tpi_ok_ack_alloc(mp); 837 if (mp == NULL) { 838 /* Unable to reuse the T_CONN_REQ for the ack. */ 839 freemsg(mp1); 840 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 841 return; 842 } 843 844 error = udp_do_connect(connp, sa, len); 845 if (error != 0) { 846 freeb(mp1); 847 if (error < 0) 848 udp_err_ack(q, mp, -error, 0); 849 else 850 udp_err_ack(q, mp, TSYSERR, error); 851 } else { 852 putnext(connp->conn_rq, mp); 853 putnext(connp->conn_rq, mp1); 854 } 855 } 856 857 static int 858 udp_tpi_close(queue_t *q, int flags) 859 { 860 conn_t *connp; 861 862 if (flags & SO_FALLBACK) { 863 /* 864 * stream is being closed while in fallback 865 * simply free the resources that were allocated 866 */ 867 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 868 qprocsoff(q); 869 goto done; 870 } 871 872 connp = Q_TO_CONN(q); 873 udp_do_close(connp); 874 done: 875 q->q_ptr = WR(q)->q_ptr = NULL; 876 return (0); 877 } 878 879 /* 880 * Called in the close path to quiesce the conn 881 */ 882 void 883 udp_quiesce_conn(conn_t *connp) 884 { 885 udp_t *udp = connp->conn_udp; 886 887 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 888 /* 889 * Running in cluster mode - register unbind information 890 */ 891 if (udp->udp_ipversion == IPV4_VERSION) { 892 (*cl_inet_unbind)( 893 connp->conn_netstack->netstack_stackid, 894 IPPROTO_UDP, AF_INET, 895 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 896 (in_port_t)udp->udp_port, NULL); 897 } else { 898 (*cl_inet_unbind)( 899 connp->conn_netstack->netstack_stackid, 900 IPPROTO_UDP, AF_INET6, 901 (uint8_t *)(&(udp->udp_v6src)), 902 (in_port_t)udp->udp_port, NULL); 903 } 904 } 905 906 udp_bind_hash_remove(udp, B_FALSE); 907 908 } 909 910 void 911 udp_close_free(conn_t *connp) 912 { 913 udp_t *udp = connp->conn_udp; 914 915 /* If there are any options associated with the stream, free them. */ 916 if (udp->udp_ip_snd_options != NULL) { 917 mi_free((char *)udp->udp_ip_snd_options); 918 udp->udp_ip_snd_options = NULL; 919 udp->udp_ip_snd_options_len = 0; 920 } 921 922 if (udp->udp_ip_rcv_options != NULL) { 923 mi_free((char *)udp->udp_ip_rcv_options); 924 udp->udp_ip_rcv_options = NULL; 925 udp->udp_ip_rcv_options_len = 0; 926 } 927 928 /* Free memory associated with sticky options */ 929 if (udp->udp_sticky_hdrs_len != 0) { 930 kmem_free(udp->udp_sticky_hdrs, 931 udp->udp_sticky_hdrs_len); 932 udp->udp_sticky_hdrs = NULL; 933 udp->udp_sticky_hdrs_len = 0; 934 } 935 936 ip6_pkt_free(&udp->udp_sticky_ipp); 937 938 /* 939 * Clear any fields which the kmem_cache constructor clears. 940 * Only udp_connp needs to be preserved. 941 * TBD: We should make this more efficient to avoid clearing 942 * everything. 943 */ 944 ASSERT(udp->udp_connp == connp); 945 bzero(udp, sizeof (udp_t)); 946 udp->udp_connp = connp; 947 } 948 949 static int 950 udp_do_disconnect(conn_t *connp) 951 { 952 udp_t *udp; 953 mblk_t *ire_mp; 954 udp_fanout_t *udpf; 955 udp_stack_t *us; 956 int error; 957 958 udp = connp->conn_udp; 959 us = udp->udp_us; 960 rw_enter(&udp->udp_rwlock, RW_WRITER); 961 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 962 rw_exit(&udp->udp_rwlock); 963 return (-TOUTSTATE); 964 } 965 udp->udp_pending_op = T_DISCON_REQ; 966 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 967 us->us_bind_fanout_size)]; 968 mutex_enter(&udpf->uf_lock); 969 udp->udp_v6src = udp->udp_bound_v6src; 970 udp->udp_state = TS_IDLE; 971 mutex_exit(&udpf->uf_lock); 972 973 if (udp->udp_family == AF_INET6) { 974 /* Rebuild the header template */ 975 error = udp_build_hdrs(udp); 976 if (error != 0) { 977 udp->udp_pending_op = -1; 978 rw_exit(&udp->udp_rwlock); 979 return (error); 980 } 981 } 982 983 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 984 if (ire_mp == NULL) { 985 mutex_enter(&udpf->uf_lock); 986 udp->udp_pending_op = -1; 987 mutex_exit(&udpf->uf_lock); 988 rw_exit(&udp->udp_rwlock); 989 return (ENOMEM); 990 } 991 992 rw_exit(&udp->udp_rwlock); 993 994 if (udp->udp_family == AF_INET6) { 995 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 996 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 997 } else { 998 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 999 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1000 } 1001 1002 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1003 } 1004 1005 1006 static void 1007 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1008 { 1009 conn_t *connp = Q_TO_CONN(q); 1010 int error; 1011 1012 /* 1013 * Allocate the largest primitive we need to send back 1014 * T_error_ack is > than T_ok_ack 1015 */ 1016 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1017 if (mp == NULL) { 1018 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1019 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1020 return; 1021 } 1022 1023 error = udp_do_disconnect(connp); 1024 1025 if (error != 0) { 1026 if (error < 0) { 1027 udp_err_ack(q, mp, -error, 0); 1028 } else { 1029 udp_err_ack(q, mp, TSYSERR, error); 1030 } 1031 } else { 1032 mp = mi_tpi_ok_ack_alloc(mp); 1033 ASSERT(mp != NULL); 1034 qreply(q, mp); 1035 } 1036 } 1037 1038 int 1039 udp_disconnect(conn_t *connp) 1040 { 1041 int error; 1042 udp_t *udp = connp->conn_udp; 1043 1044 udp->udp_dgram_errind = B_FALSE; 1045 1046 error = udp_do_disconnect(connp); 1047 1048 if (error < 0) 1049 error = proto_tlitosyserr(-error); 1050 1051 return (error); 1052 } 1053 1054 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1055 static void 1056 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1057 { 1058 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1059 qreply(q, mp); 1060 } 1061 1062 /* Shorthand to generate and send TPI error acks to our client */ 1063 static void 1064 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1065 int sys_error) 1066 { 1067 struct T_error_ack *teackp; 1068 1069 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1070 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1071 teackp = (struct T_error_ack *)mp->b_rptr; 1072 teackp->ERROR_prim = primitive; 1073 teackp->TLI_error = t_error; 1074 teackp->UNIX_error = sys_error; 1075 qreply(q, mp); 1076 } 1077 } 1078 1079 /*ARGSUSED*/ 1080 static int 1081 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1082 { 1083 int i; 1084 udp_t *udp = Q_TO_UDP(q); 1085 udp_stack_t *us = udp->udp_us; 1086 1087 for (i = 0; i < us->us_num_epriv_ports; i++) { 1088 if (us->us_epriv_ports[i] != 0) 1089 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1090 } 1091 return (0); 1092 } 1093 1094 /* ARGSUSED */ 1095 static int 1096 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1097 cred_t *cr) 1098 { 1099 long new_value; 1100 int i; 1101 udp_t *udp = Q_TO_UDP(q); 1102 udp_stack_t *us = udp->udp_us; 1103 1104 /* 1105 * Fail the request if the new value does not lie within the 1106 * port number limits. 1107 */ 1108 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1109 new_value <= 0 || new_value >= 65536) { 1110 return (EINVAL); 1111 } 1112 1113 /* Check if the value is already in the list */ 1114 for (i = 0; i < us->us_num_epriv_ports; i++) { 1115 if (new_value == us->us_epriv_ports[i]) { 1116 return (EEXIST); 1117 } 1118 } 1119 /* Find an empty slot */ 1120 for (i = 0; i < us->us_num_epriv_ports; i++) { 1121 if (us->us_epriv_ports[i] == 0) 1122 break; 1123 } 1124 if (i == us->us_num_epriv_ports) { 1125 return (EOVERFLOW); 1126 } 1127 1128 /* Set the new value */ 1129 us->us_epriv_ports[i] = (in_port_t)new_value; 1130 return (0); 1131 } 1132 1133 /* ARGSUSED */ 1134 static int 1135 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1136 cred_t *cr) 1137 { 1138 long new_value; 1139 int i; 1140 udp_t *udp = Q_TO_UDP(q); 1141 udp_stack_t *us = udp->udp_us; 1142 1143 /* 1144 * Fail the request if the new value does not lie within the 1145 * port number limits. 1146 */ 1147 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1148 new_value <= 0 || new_value >= 65536) { 1149 return (EINVAL); 1150 } 1151 1152 /* Check that the value is already in the list */ 1153 for (i = 0; i < us->us_num_epriv_ports; i++) { 1154 if (us->us_epriv_ports[i] == new_value) 1155 break; 1156 } 1157 if (i == us->us_num_epriv_ports) { 1158 return (ESRCH); 1159 } 1160 1161 /* Clear the value */ 1162 us->us_epriv_ports[i] = 0; 1163 return (0); 1164 } 1165 1166 /* At minimum we need 4 bytes of UDP header */ 1167 #define ICMP_MIN_UDP_HDR 4 1168 1169 /* 1170 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1171 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1172 * Assumes that IP has pulled up everything up to and including the ICMP header. 1173 */ 1174 static void 1175 udp_icmp_error(conn_t *connp, mblk_t *mp) 1176 { 1177 icmph_t *icmph; 1178 ipha_t *ipha; 1179 int iph_hdr_length; 1180 udpha_t *udpha; 1181 sin_t sin; 1182 sin6_t sin6; 1183 mblk_t *mp1; 1184 int error = 0; 1185 udp_t *udp = connp->conn_udp; 1186 1187 mp1 = NULL; 1188 ipha = (ipha_t *)mp->b_rptr; 1189 1190 ASSERT(OK_32PTR(mp->b_rptr)); 1191 1192 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1193 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1194 udp_icmp_error_ipv6(connp, mp); 1195 return; 1196 } 1197 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1198 1199 /* Skip past the outer IP and ICMP headers */ 1200 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1201 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1202 ipha = (ipha_t *)&icmph[1]; 1203 1204 /* Skip past the inner IP and find the ULP header */ 1205 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1206 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1207 1208 switch (icmph->icmph_type) { 1209 case ICMP_DEST_UNREACHABLE: 1210 switch (icmph->icmph_code) { 1211 case ICMP_FRAGMENTATION_NEEDED: 1212 /* 1213 * IP has already adjusted the path MTU. 1214 */ 1215 break; 1216 case ICMP_PORT_UNREACHABLE: 1217 case ICMP_PROTOCOL_UNREACHABLE: 1218 error = ECONNREFUSED; 1219 break; 1220 default: 1221 /* Transient errors */ 1222 break; 1223 } 1224 break; 1225 default: 1226 /* Transient errors */ 1227 break; 1228 } 1229 if (error == 0) { 1230 freemsg(mp); 1231 return; 1232 } 1233 1234 /* 1235 * Deliver T_UDERROR_IND when the application has asked for it. 1236 * The socket layer enables this automatically when connected. 1237 */ 1238 if (!udp->udp_dgram_errind) { 1239 freemsg(mp); 1240 return; 1241 } 1242 1243 1244 switch (udp->udp_family) { 1245 case AF_INET: 1246 sin = sin_null; 1247 sin.sin_family = AF_INET; 1248 sin.sin_addr.s_addr = ipha->ipha_dst; 1249 sin.sin_port = udpha->uha_dst_port; 1250 if (IPCL_IS_NONSTR(connp)) { 1251 rw_enter(&udp->udp_rwlock, RW_WRITER); 1252 if (udp->udp_state == TS_DATA_XFER) { 1253 if (sin.sin_port == udp->udp_dstport && 1254 sin.sin_addr.s_addr == 1255 V4_PART_OF_V6(udp->udp_v6dst)) { 1256 1257 rw_exit(&udp->udp_rwlock); 1258 (*connp->conn_upcalls->su_set_error) 1259 (connp->conn_upper_handle, error); 1260 goto done; 1261 } 1262 } else { 1263 udp->udp_delayed_error = error; 1264 *((sin_t *)&udp->udp_delayed_addr) = sin; 1265 } 1266 rw_exit(&udp->udp_rwlock); 1267 } else { 1268 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1269 NULL, 0, error); 1270 } 1271 break; 1272 case AF_INET6: 1273 sin6 = sin6_null; 1274 sin6.sin6_family = AF_INET6; 1275 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1276 sin6.sin6_port = udpha->uha_dst_port; 1277 if (IPCL_IS_NONSTR(connp)) { 1278 rw_enter(&udp->udp_rwlock, RW_WRITER); 1279 if (udp->udp_state == TS_DATA_XFER) { 1280 if (sin6.sin6_port == udp->udp_dstport && 1281 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1282 &udp->udp_v6dst)) { 1283 rw_exit(&udp->udp_rwlock); 1284 (*connp->conn_upcalls->su_set_error) 1285 (connp->conn_upper_handle, error); 1286 goto done; 1287 } 1288 } else { 1289 udp->udp_delayed_error = error; 1290 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1291 } 1292 rw_exit(&udp->udp_rwlock); 1293 } else { 1294 1295 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1296 NULL, 0, error); 1297 } 1298 break; 1299 } 1300 if (mp1 != NULL) 1301 putnext(connp->conn_rq, mp1); 1302 done: 1303 freemsg(mp); 1304 } 1305 1306 /* 1307 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1308 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1309 * Assumes that IP has pulled up all the extension headers as well as the 1310 * ICMPv6 header. 1311 */ 1312 static void 1313 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1314 { 1315 icmp6_t *icmp6; 1316 ip6_t *ip6h, *outer_ip6h; 1317 uint16_t iph_hdr_length; 1318 uint8_t *nexthdrp; 1319 udpha_t *udpha; 1320 sin6_t sin6; 1321 mblk_t *mp1; 1322 int error = 0; 1323 udp_t *udp = connp->conn_udp; 1324 udp_stack_t *us = udp->udp_us; 1325 1326 outer_ip6h = (ip6_t *)mp->b_rptr; 1327 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1328 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1329 else 1330 iph_hdr_length = IPV6_HDR_LEN; 1331 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1332 ip6h = (ip6_t *)&icmp6[1]; 1333 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1334 freemsg(mp); 1335 return; 1336 } 1337 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1338 1339 switch (icmp6->icmp6_type) { 1340 case ICMP6_DST_UNREACH: 1341 switch (icmp6->icmp6_code) { 1342 case ICMP6_DST_UNREACH_NOPORT: 1343 error = ECONNREFUSED; 1344 break; 1345 case ICMP6_DST_UNREACH_ADMIN: 1346 case ICMP6_DST_UNREACH_NOROUTE: 1347 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1348 case ICMP6_DST_UNREACH_ADDR: 1349 /* Transient errors */ 1350 break; 1351 default: 1352 break; 1353 } 1354 break; 1355 case ICMP6_PACKET_TOO_BIG: { 1356 struct T_unitdata_ind *tudi; 1357 struct T_opthdr *toh; 1358 size_t udi_size; 1359 mblk_t *newmp; 1360 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1361 sizeof (struct ip6_mtuinfo); 1362 sin6_t *sin6; 1363 struct ip6_mtuinfo *mtuinfo; 1364 1365 /* 1366 * If the application has requested to receive path mtu 1367 * information, send up an empty message containing an 1368 * IPV6_PATHMTU ancillary data item. 1369 */ 1370 if (!udp->udp_ipv6_recvpathmtu) 1371 break; 1372 1373 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1374 opt_length; 1375 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1376 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1377 break; 1378 } 1379 1380 /* 1381 * newmp->b_cont is left to NULL on purpose. This is an 1382 * empty message containing only ancillary data. 1383 */ 1384 newmp->b_datap->db_type = M_PROTO; 1385 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1386 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1387 tudi->PRIM_type = T_UNITDATA_IND; 1388 tudi->SRC_length = sizeof (sin6_t); 1389 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1390 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1391 tudi->OPT_length = opt_length; 1392 1393 sin6 = (sin6_t *)&tudi[1]; 1394 bzero(sin6, sizeof (sin6_t)); 1395 sin6->sin6_family = AF_INET6; 1396 sin6->sin6_addr = udp->udp_v6dst; 1397 1398 toh = (struct T_opthdr *)&sin6[1]; 1399 toh->level = IPPROTO_IPV6; 1400 toh->name = IPV6_PATHMTU; 1401 toh->len = opt_length; 1402 toh->status = 0; 1403 1404 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1405 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1406 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1407 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1408 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1409 /* 1410 * We've consumed everything we need from the original 1411 * message. Free it, then send our empty message. 1412 */ 1413 freemsg(mp); 1414 if (!IPCL_IS_NONSTR(connp)) { 1415 putnext(connp->conn_rq, newmp); 1416 } else { 1417 (*connp->conn_upcalls->su_recv) 1418 (connp->conn_upper_handle, newmp, 0, 0, &error, 1419 NULL); 1420 } 1421 return; 1422 } 1423 case ICMP6_TIME_EXCEEDED: 1424 /* Transient errors */ 1425 break; 1426 case ICMP6_PARAM_PROB: 1427 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1428 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1429 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1430 (uchar_t *)nexthdrp) { 1431 error = ECONNREFUSED; 1432 break; 1433 } 1434 break; 1435 } 1436 if (error == 0) { 1437 freemsg(mp); 1438 return; 1439 } 1440 1441 /* 1442 * Deliver T_UDERROR_IND when the application has asked for it. 1443 * The socket layer enables this automatically when connected. 1444 */ 1445 if (!udp->udp_dgram_errind) { 1446 freemsg(mp); 1447 return; 1448 } 1449 1450 sin6 = sin6_null; 1451 sin6.sin6_family = AF_INET6; 1452 sin6.sin6_addr = ip6h->ip6_dst; 1453 sin6.sin6_port = udpha->uha_dst_port; 1454 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1455 1456 if (IPCL_IS_NONSTR(connp)) { 1457 rw_enter(&udp->udp_rwlock, RW_WRITER); 1458 if (udp->udp_state == TS_DATA_XFER) { 1459 if (sin6.sin6_port == udp->udp_dstport && 1460 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1461 &udp->udp_v6dst)) { 1462 rw_exit(&udp->udp_rwlock); 1463 (*connp->conn_upcalls->su_set_error) 1464 (connp->conn_upper_handle, error); 1465 goto done; 1466 } 1467 } else { 1468 udp->udp_delayed_error = error; 1469 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1470 } 1471 rw_exit(&udp->udp_rwlock); 1472 } else { 1473 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1474 NULL, 0, error); 1475 if (mp1 != NULL) 1476 putnext(connp->conn_rq, mp1); 1477 } 1478 1479 done: 1480 freemsg(mp); 1481 } 1482 1483 /* 1484 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1485 * The local address is filled in if endpoint is bound. The remote address 1486 * is filled in if remote address has been precified ("connected endpoint") 1487 * (The concept of connected CLTS sockets is alien to published TPI 1488 * but we support it anyway). 1489 */ 1490 static void 1491 udp_addr_req(queue_t *q, mblk_t *mp) 1492 { 1493 sin_t *sin; 1494 sin6_t *sin6; 1495 mblk_t *ackmp; 1496 struct T_addr_ack *taa; 1497 udp_t *udp = Q_TO_UDP(q); 1498 1499 /* Make it large enough for worst case */ 1500 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1501 2 * sizeof (sin6_t), 1); 1502 if (ackmp == NULL) { 1503 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1504 return; 1505 } 1506 taa = (struct T_addr_ack *)ackmp->b_rptr; 1507 1508 bzero(taa, sizeof (struct T_addr_ack)); 1509 ackmp->b_wptr = (uchar_t *)&taa[1]; 1510 1511 taa->PRIM_type = T_ADDR_ACK; 1512 ackmp->b_datap->db_type = M_PCPROTO; 1513 rw_enter(&udp->udp_rwlock, RW_READER); 1514 /* 1515 * Note: Following code assumes 32 bit alignment of basic 1516 * data structures like sin_t and struct T_addr_ack. 1517 */ 1518 if (udp->udp_state != TS_UNBND) { 1519 /* 1520 * Fill in local address first 1521 */ 1522 taa->LOCADDR_offset = sizeof (*taa); 1523 if (udp->udp_family == AF_INET) { 1524 taa->LOCADDR_length = sizeof (sin_t); 1525 sin = (sin_t *)&taa[1]; 1526 /* Fill zeroes and then initialize non-zero fields */ 1527 *sin = sin_null; 1528 sin->sin_family = AF_INET; 1529 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1530 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1531 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1532 sin->sin_addr.s_addr); 1533 } else { 1534 /* 1535 * INADDR_ANY 1536 * udp_v6src is not set, we might be bound to 1537 * broadcast/multicast. Use udp_bound_v6src as 1538 * local address instead (that could 1539 * also still be INADDR_ANY) 1540 */ 1541 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1542 sin->sin_addr.s_addr); 1543 } 1544 sin->sin_port = udp->udp_port; 1545 ackmp->b_wptr = (uchar_t *)&sin[1]; 1546 if (udp->udp_state == TS_DATA_XFER) { 1547 /* 1548 * connected, fill remote address too 1549 */ 1550 taa->REMADDR_length = sizeof (sin_t); 1551 /* assumed 32-bit alignment */ 1552 taa->REMADDR_offset = taa->LOCADDR_offset + 1553 taa->LOCADDR_length; 1554 1555 sin = (sin_t *)(ackmp->b_rptr + 1556 taa->REMADDR_offset); 1557 /* initialize */ 1558 *sin = sin_null; 1559 sin->sin_family = AF_INET; 1560 sin->sin_addr.s_addr = 1561 V4_PART_OF_V6(udp->udp_v6dst); 1562 sin->sin_port = udp->udp_dstport; 1563 ackmp->b_wptr = (uchar_t *)&sin[1]; 1564 } 1565 } else { 1566 taa->LOCADDR_length = sizeof (sin6_t); 1567 sin6 = (sin6_t *)&taa[1]; 1568 /* Fill zeroes and then initialize non-zero fields */ 1569 *sin6 = sin6_null; 1570 sin6->sin6_family = AF_INET6; 1571 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1572 sin6->sin6_addr = udp->udp_v6src; 1573 } else { 1574 /* 1575 * UNSPECIFIED 1576 * udp_v6src is not set, we might be bound to 1577 * broadcast/multicast. Use udp_bound_v6src as 1578 * local address instead (that could 1579 * also still be UNSPECIFIED) 1580 */ 1581 sin6->sin6_addr = 1582 udp->udp_bound_v6src; 1583 } 1584 sin6->sin6_port = udp->udp_port; 1585 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1586 if (udp->udp_state == TS_DATA_XFER) { 1587 /* 1588 * connected, fill remote address too 1589 */ 1590 taa->REMADDR_length = sizeof (sin6_t); 1591 /* assumed 32-bit alignment */ 1592 taa->REMADDR_offset = taa->LOCADDR_offset + 1593 taa->LOCADDR_length; 1594 1595 sin6 = (sin6_t *)(ackmp->b_rptr + 1596 taa->REMADDR_offset); 1597 /* initialize */ 1598 *sin6 = sin6_null; 1599 sin6->sin6_family = AF_INET6; 1600 sin6->sin6_addr = udp->udp_v6dst; 1601 sin6->sin6_port = udp->udp_dstport; 1602 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1603 } 1604 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1605 } 1606 } 1607 rw_exit(&udp->udp_rwlock); 1608 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1609 qreply(q, ackmp); 1610 } 1611 1612 static void 1613 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1614 { 1615 if (udp->udp_family == AF_INET) { 1616 *tap = udp_g_t_info_ack_ipv4; 1617 } else { 1618 *tap = udp_g_t_info_ack_ipv6; 1619 } 1620 tap->CURRENT_state = udp->udp_state; 1621 tap->OPT_size = udp_max_optsize; 1622 } 1623 1624 static void 1625 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1626 t_uscalar_t cap_bits1) 1627 { 1628 tcap->CAP_bits1 = 0; 1629 1630 if (cap_bits1 & TC1_INFO) { 1631 udp_copy_info(&tcap->INFO_ack, udp); 1632 tcap->CAP_bits1 |= TC1_INFO; 1633 } 1634 } 1635 1636 /* 1637 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1638 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1639 * udp_g_t_info_ack. The current state of the stream is copied from 1640 * udp_state. 1641 */ 1642 static void 1643 udp_capability_req(queue_t *q, mblk_t *mp) 1644 { 1645 t_uscalar_t cap_bits1; 1646 struct T_capability_ack *tcap; 1647 udp_t *udp = Q_TO_UDP(q); 1648 1649 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1650 1651 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1652 mp->b_datap->db_type, T_CAPABILITY_ACK); 1653 if (!mp) 1654 return; 1655 1656 tcap = (struct T_capability_ack *)mp->b_rptr; 1657 udp_do_capability_ack(udp, tcap, cap_bits1); 1658 1659 qreply(q, mp); 1660 } 1661 1662 /* 1663 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1664 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1665 * The current state of the stream is copied from udp_state. 1666 */ 1667 static void 1668 udp_info_req(queue_t *q, mblk_t *mp) 1669 { 1670 udp_t *udp = Q_TO_UDP(q); 1671 1672 /* Create a T_INFO_ACK message. */ 1673 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1674 T_INFO_ACK); 1675 if (!mp) 1676 return; 1677 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1678 qreply(q, mp); 1679 } 1680 1681 /* 1682 * IP recognizes seven kinds of bind requests: 1683 * 1684 * - A zero-length address binds only to the protocol number. 1685 * 1686 * - A 4-byte address is treated as a request to 1687 * validate that the address is a valid local IPv4 1688 * address, appropriate for an application to bind to. 1689 * IP does the verification, but does not make any note 1690 * of the address at this time. 1691 * 1692 * - A 16-byte address contains is treated as a request 1693 * to validate a local IPv6 address, as the 4-byte 1694 * address case above. 1695 * 1696 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1697 * use it for the inbound fanout of packets. 1698 * 1699 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1700 * use it for the inbound fanout of packets. 1701 * 1702 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1703 * information consisting of local and remote addresses 1704 * and ports. In this case, the addresses are both 1705 * validated as appropriate for this operation, and, if 1706 * so, the information is retained for use in the 1707 * inbound fanout. 1708 * 1709 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1710 * fanout information, like the 12-byte case above. 1711 * 1712 * IP will also fill in the IRE request mblk with information 1713 * regarding our peer. In all cases, we notify IP of our protocol 1714 * type by appending a single protocol byte to the bind request. 1715 */ 1716 static mblk_t * 1717 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1718 { 1719 char *cp; 1720 mblk_t *mp; 1721 struct T_bind_req *tbr; 1722 ipa_conn_t *ac; 1723 ipa6_conn_t *ac6; 1724 sin_t *sin; 1725 sin6_t *sin6; 1726 1727 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1728 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1729 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1730 if (!mp) 1731 return (mp); 1732 mp->b_datap->db_type = M_PROTO; 1733 tbr = (struct T_bind_req *)mp->b_rptr; 1734 tbr->PRIM_type = bind_prim; 1735 tbr->ADDR_offset = sizeof (*tbr); 1736 tbr->CONIND_number = 0; 1737 tbr->ADDR_length = addr_length; 1738 cp = (char *)&tbr[1]; 1739 switch (addr_length) { 1740 case sizeof (ipa_conn_t): 1741 ASSERT(udp->udp_family == AF_INET); 1742 /* Append a request for an IRE */ 1743 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1744 if (!mp->b_cont) { 1745 freemsg(mp); 1746 return (NULL); 1747 } 1748 mp->b_cont->b_wptr += sizeof (ire_t); 1749 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1750 1751 /* cp known to be 32 bit aligned */ 1752 ac = (ipa_conn_t *)cp; 1753 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1754 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1755 ac->ac_fport = udp->udp_dstport; 1756 ac->ac_lport = udp->udp_port; 1757 break; 1758 1759 case sizeof (ipa6_conn_t): 1760 ASSERT(udp->udp_family == AF_INET6); 1761 /* Append a request for an IRE */ 1762 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1763 if (!mp->b_cont) { 1764 freemsg(mp); 1765 return (NULL); 1766 } 1767 mp->b_cont->b_wptr += sizeof (ire_t); 1768 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1769 1770 /* cp known to be 32 bit aligned */ 1771 ac6 = (ipa6_conn_t *)cp; 1772 ac6->ac6_laddr = udp->udp_v6src; 1773 ac6->ac6_faddr = udp->udp_v6dst; 1774 ac6->ac6_fport = udp->udp_dstport; 1775 ac6->ac6_lport = udp->udp_port; 1776 break; 1777 1778 case sizeof (sin_t): 1779 ASSERT(udp->udp_family == AF_INET); 1780 /* Append a request for an IRE */ 1781 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1782 if (!mp->b_cont) { 1783 freemsg(mp); 1784 return (NULL); 1785 } 1786 mp->b_cont->b_wptr += sizeof (ire_t); 1787 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1788 1789 sin = (sin_t *)cp; 1790 *sin = sin_null; 1791 sin->sin_family = AF_INET; 1792 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1793 sin->sin_port = udp->udp_port; 1794 break; 1795 1796 case sizeof (sin6_t): 1797 ASSERT(udp->udp_family == AF_INET6); 1798 /* Append a request for an IRE */ 1799 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1800 if (!mp->b_cont) { 1801 freemsg(mp); 1802 return (NULL); 1803 } 1804 mp->b_cont->b_wptr += sizeof (ire_t); 1805 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1806 1807 sin6 = (sin6_t *)cp; 1808 *sin6 = sin6_null; 1809 sin6->sin6_family = AF_INET6; 1810 sin6->sin6_addr = udp->udp_bound_v6src; 1811 sin6->sin6_port = udp->udp_port; 1812 break; 1813 } 1814 /* Add protocol number to end */ 1815 cp[addr_length] = (char)IPPROTO_UDP; 1816 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1817 return (mp); 1818 } 1819 1820 /* For /dev/udp aka AF_INET open */ 1821 static int 1822 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1823 { 1824 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1825 } 1826 1827 /* For /dev/udp6 aka AF_INET6 open */ 1828 static int 1829 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1830 { 1831 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1832 } 1833 1834 /* 1835 * This is the open routine for udp. It allocates a udp_t structure for 1836 * the stream and, on the first open of the module, creates an ND table. 1837 */ 1838 /*ARGSUSED2*/ 1839 static int 1840 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1841 boolean_t isv6) 1842 { 1843 int error; 1844 udp_t *udp; 1845 conn_t *connp; 1846 dev_t conn_dev; 1847 udp_stack_t *us; 1848 vmem_t *minor_arena; 1849 1850 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1851 1852 /* If the stream is already open, return immediately. */ 1853 if (q->q_ptr != NULL) 1854 return (0); 1855 1856 if (sflag == MODOPEN) 1857 return (EINVAL); 1858 1859 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1860 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1861 minor_arena = ip_minor_arena_la; 1862 } else { 1863 /* 1864 * Either minor numbers in the large arena were exhausted 1865 * or a non socket application is doing the open. 1866 * Try to allocate from the small arena. 1867 */ 1868 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1869 return (EBUSY); 1870 1871 minor_arena = ip_minor_arena_sa; 1872 } 1873 1874 if (flag & SO_FALLBACK) { 1875 /* 1876 * Non streams socket needs a stream to fallback to 1877 */ 1878 RD(q)->q_ptr = (void *)conn_dev; 1879 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1880 WR(q)->q_ptr = (void *)minor_arena; 1881 qprocson(q); 1882 return (0); 1883 } 1884 1885 connp = udp_do_open(credp, isv6, KM_SLEEP); 1886 if (connp == NULL) { 1887 inet_minor_free(minor_arena, conn_dev); 1888 return (ENOMEM); 1889 } 1890 udp = connp->conn_udp; 1891 us = udp->udp_us; 1892 1893 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1894 connp->conn_dev = conn_dev; 1895 connp->conn_minor_arena = minor_arena; 1896 1897 /* 1898 * Initialize the udp_t structure for this stream. 1899 */ 1900 q->q_ptr = connp; 1901 WR(q)->q_ptr = connp; 1902 connp->conn_rq = q; 1903 connp->conn_wq = WR(q); 1904 1905 rw_enter(&udp->udp_rwlock, RW_WRITER); 1906 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1907 ASSERT(connp->conn_udp == udp); 1908 ASSERT(udp->udp_connp == connp); 1909 1910 if (flag & SO_SOCKSTR) { 1911 connp->conn_flags |= IPCL_SOCKET; 1912 udp->udp_issocket = B_TRUE; 1913 udp->udp_direct_sockfs = B_TRUE; 1914 } 1915 1916 q->q_hiwat = us->us_recv_hiwat; 1917 WR(q)->q_hiwat = us->us_xmit_hiwat; 1918 WR(q)->q_lowat = us->us_xmit_lowat; 1919 1920 qprocson(q); 1921 1922 if (udp->udp_family == AF_INET6) { 1923 /* Build initial header template for transmit */ 1924 if ((error = udp_build_hdrs(udp)) != 0) { 1925 rw_exit(&udp->udp_rwlock); 1926 qprocsoff(q); 1927 inet_minor_free(minor_arena, conn_dev); 1928 ipcl_conn_destroy(connp); 1929 return (error); 1930 } 1931 } 1932 rw_exit(&udp->udp_rwlock); 1933 1934 /* Set the Stream head write offset and high watermark. */ 1935 (void) proto_set_tx_wroff(q, connp, 1936 udp->udp_max_hdr_len + us->us_wroff_extra); 1937 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1938 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1939 1940 mutex_enter(&connp->conn_lock); 1941 connp->conn_state_flags &= ~CONN_INCIPIENT; 1942 mutex_exit(&connp->conn_lock); 1943 return (0); 1944 } 1945 1946 /* 1947 * Which UDP options OK to set through T_UNITDATA_REQ... 1948 */ 1949 /* ARGSUSED */ 1950 static boolean_t 1951 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1952 { 1953 return (B_TRUE); 1954 } 1955 1956 /* 1957 * This routine gets default values of certain options whose default 1958 * values are maintained by protcol specific code 1959 */ 1960 /* ARGSUSED */ 1961 int 1962 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1963 { 1964 udp_t *udp = Q_TO_UDP(q); 1965 udp_stack_t *us = udp->udp_us; 1966 int *i1 = (int *)ptr; 1967 1968 switch (level) { 1969 case IPPROTO_IP: 1970 switch (name) { 1971 case IP_MULTICAST_TTL: 1972 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1973 return (sizeof (uchar_t)); 1974 case IP_MULTICAST_LOOP: 1975 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1976 return (sizeof (uchar_t)); 1977 } 1978 break; 1979 case IPPROTO_IPV6: 1980 switch (name) { 1981 case IPV6_MULTICAST_HOPS: 1982 *i1 = IP_DEFAULT_MULTICAST_TTL; 1983 return (sizeof (int)); 1984 case IPV6_MULTICAST_LOOP: 1985 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1986 return (sizeof (int)); 1987 case IPV6_UNICAST_HOPS: 1988 *i1 = us->us_ipv6_hoplimit; 1989 return (sizeof (int)); 1990 } 1991 break; 1992 } 1993 return (-1); 1994 } 1995 1996 /* 1997 * This routine retrieves the current status of socket options. 1998 * It returns the size of the option retrieved. 1999 */ 2000 static int 2001 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2002 { 2003 udp_t *udp = connp->conn_udp; 2004 udp_stack_t *us = udp->udp_us; 2005 int *i1 = (int *)ptr; 2006 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2007 int len; 2008 2009 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2010 switch (level) { 2011 case SOL_SOCKET: 2012 switch (name) { 2013 case SO_DEBUG: 2014 *i1 = udp->udp_debug; 2015 break; /* goto sizeof (int) option return */ 2016 case SO_REUSEADDR: 2017 *i1 = udp->udp_reuseaddr; 2018 break; /* goto sizeof (int) option return */ 2019 case SO_TYPE: 2020 *i1 = SOCK_DGRAM; 2021 break; /* goto sizeof (int) option return */ 2022 2023 /* 2024 * The following three items are available here, 2025 * but are only meaningful to IP. 2026 */ 2027 case SO_DONTROUTE: 2028 *i1 = udp->udp_dontroute; 2029 break; /* goto sizeof (int) option return */ 2030 case SO_USELOOPBACK: 2031 *i1 = udp->udp_useloopback; 2032 break; /* goto sizeof (int) option return */ 2033 case SO_BROADCAST: 2034 *i1 = udp->udp_broadcast; 2035 break; /* goto sizeof (int) option return */ 2036 2037 case SO_SNDBUF: 2038 *i1 = udp->udp_xmit_hiwat; 2039 break; /* goto sizeof (int) option return */ 2040 case SO_RCVBUF: 2041 *i1 = udp->udp_rcv_disply_hiwat; 2042 break; /* goto sizeof (int) option return */ 2043 case SO_DGRAM_ERRIND: 2044 *i1 = udp->udp_dgram_errind; 2045 break; /* goto sizeof (int) option return */ 2046 case SO_RECVUCRED: 2047 *i1 = udp->udp_recvucred; 2048 break; /* goto sizeof (int) option return */ 2049 case SO_TIMESTAMP: 2050 *i1 = udp->udp_timestamp; 2051 break; /* goto sizeof (int) option return */ 2052 case SO_ANON_MLP: 2053 *i1 = connp->conn_anon_mlp; 2054 break; /* goto sizeof (int) option return */ 2055 case SO_MAC_EXEMPT: 2056 *i1 = connp->conn_mac_exempt; 2057 break; /* goto sizeof (int) option return */ 2058 case SO_ALLZONES: 2059 *i1 = connp->conn_allzones; 2060 break; /* goto sizeof (int) option return */ 2061 case SO_EXCLBIND: 2062 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2063 break; 2064 case SO_PROTOTYPE: 2065 *i1 = IPPROTO_UDP; 2066 break; 2067 case SO_DOMAIN: 2068 *i1 = udp->udp_family; 2069 break; 2070 default: 2071 return (-1); 2072 } 2073 break; 2074 case IPPROTO_IP: 2075 if (udp->udp_family != AF_INET) 2076 return (-1); 2077 switch (name) { 2078 case IP_OPTIONS: 2079 case T_IP_OPTIONS: 2080 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2081 if (len > 0) { 2082 bcopy(udp->udp_ip_rcv_options + 2083 udp->udp_label_len, ptr, len); 2084 } 2085 return (len); 2086 case IP_TOS: 2087 case T_IP_TOS: 2088 *i1 = (int)udp->udp_type_of_service; 2089 break; /* goto sizeof (int) option return */ 2090 case IP_TTL: 2091 *i1 = (int)udp->udp_ttl; 2092 break; /* goto sizeof (int) option return */ 2093 case IP_DHCPINIT_IF: 2094 return (-EINVAL); 2095 case IP_NEXTHOP: 2096 case IP_RECVPKTINFO: 2097 /* 2098 * This also handles IP_PKTINFO. 2099 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2100 * Differentiation is based on the size of the argument 2101 * passed in. 2102 * This option is handled in IP which will return an 2103 * error for IP_PKTINFO as it's not supported as a 2104 * sticky option. 2105 */ 2106 return (-EINVAL); 2107 case IP_MULTICAST_IF: 2108 /* 0 address if not set */ 2109 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2110 return (sizeof (ipaddr_t)); 2111 case IP_MULTICAST_TTL: 2112 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2113 return (sizeof (uchar_t)); 2114 case IP_MULTICAST_LOOP: 2115 *ptr = connp->conn_multicast_loop; 2116 return (sizeof (uint8_t)); 2117 case IP_RECVOPTS: 2118 *i1 = udp->udp_recvopts; 2119 break; /* goto sizeof (int) option return */ 2120 case IP_RECVDSTADDR: 2121 *i1 = udp->udp_recvdstaddr; 2122 break; /* goto sizeof (int) option return */ 2123 case IP_RECVIF: 2124 *i1 = udp->udp_recvif; 2125 break; /* goto sizeof (int) option return */ 2126 case IP_RECVSLLA: 2127 *i1 = udp->udp_recvslla; 2128 break; /* goto sizeof (int) option return */ 2129 case IP_RECVTTL: 2130 *i1 = udp->udp_recvttl; 2131 break; /* goto sizeof (int) option return */ 2132 case IP_ADD_MEMBERSHIP: 2133 case IP_DROP_MEMBERSHIP: 2134 case IP_BLOCK_SOURCE: 2135 case IP_UNBLOCK_SOURCE: 2136 case IP_ADD_SOURCE_MEMBERSHIP: 2137 case IP_DROP_SOURCE_MEMBERSHIP: 2138 case MCAST_JOIN_GROUP: 2139 case MCAST_LEAVE_GROUP: 2140 case MCAST_BLOCK_SOURCE: 2141 case MCAST_UNBLOCK_SOURCE: 2142 case MCAST_JOIN_SOURCE_GROUP: 2143 case MCAST_LEAVE_SOURCE_GROUP: 2144 case IP_DONTFAILOVER_IF: 2145 /* cannot "get" the value for these */ 2146 return (-1); 2147 case IP_BOUND_IF: 2148 /* Zero if not set */ 2149 *i1 = udp->udp_bound_if; 2150 break; /* goto sizeof (int) option return */ 2151 case IP_UNSPEC_SRC: 2152 *i1 = udp->udp_unspec_source; 2153 break; /* goto sizeof (int) option return */ 2154 case IP_BROADCAST_TTL: 2155 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2156 return (sizeof (uchar_t)); 2157 default: 2158 return (-1); 2159 } 2160 break; 2161 case IPPROTO_IPV6: 2162 if (udp->udp_family != AF_INET6) 2163 return (-1); 2164 switch (name) { 2165 case IPV6_UNICAST_HOPS: 2166 *i1 = (unsigned int)udp->udp_ttl; 2167 break; /* goto sizeof (int) option return */ 2168 case IPV6_MULTICAST_IF: 2169 /* 0 index if not set */ 2170 *i1 = udp->udp_multicast_if_index; 2171 break; /* goto sizeof (int) option return */ 2172 case IPV6_MULTICAST_HOPS: 2173 *i1 = udp->udp_multicast_ttl; 2174 break; /* goto sizeof (int) option return */ 2175 case IPV6_MULTICAST_LOOP: 2176 *i1 = connp->conn_multicast_loop; 2177 break; /* goto sizeof (int) option return */ 2178 case IPV6_JOIN_GROUP: 2179 case IPV6_LEAVE_GROUP: 2180 case MCAST_JOIN_GROUP: 2181 case MCAST_LEAVE_GROUP: 2182 case MCAST_BLOCK_SOURCE: 2183 case MCAST_UNBLOCK_SOURCE: 2184 case MCAST_JOIN_SOURCE_GROUP: 2185 case MCAST_LEAVE_SOURCE_GROUP: 2186 /* cannot "get" the value for these */ 2187 return (-1); 2188 case IPV6_BOUND_IF: 2189 /* Zero if not set */ 2190 *i1 = udp->udp_bound_if; 2191 break; /* goto sizeof (int) option return */ 2192 case IPV6_UNSPEC_SRC: 2193 *i1 = udp->udp_unspec_source; 2194 break; /* goto sizeof (int) option return */ 2195 case IPV6_RECVPKTINFO: 2196 *i1 = udp->udp_ip_recvpktinfo; 2197 break; /* goto sizeof (int) option return */ 2198 case IPV6_RECVTCLASS: 2199 *i1 = udp->udp_ipv6_recvtclass; 2200 break; /* goto sizeof (int) option return */ 2201 case IPV6_RECVPATHMTU: 2202 *i1 = udp->udp_ipv6_recvpathmtu; 2203 break; /* goto sizeof (int) option return */ 2204 case IPV6_RECVHOPLIMIT: 2205 *i1 = udp->udp_ipv6_recvhoplimit; 2206 break; /* goto sizeof (int) option return */ 2207 case IPV6_RECVHOPOPTS: 2208 *i1 = udp->udp_ipv6_recvhopopts; 2209 break; /* goto sizeof (int) option return */ 2210 case IPV6_RECVDSTOPTS: 2211 *i1 = udp->udp_ipv6_recvdstopts; 2212 break; /* goto sizeof (int) option return */ 2213 case _OLD_IPV6_RECVDSTOPTS: 2214 *i1 = udp->udp_old_ipv6_recvdstopts; 2215 break; /* goto sizeof (int) option return */ 2216 case IPV6_RECVRTHDRDSTOPTS: 2217 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2218 break; /* goto sizeof (int) option return */ 2219 case IPV6_RECVRTHDR: 2220 *i1 = udp->udp_ipv6_recvrthdr; 2221 break; /* goto sizeof (int) option return */ 2222 case IPV6_PKTINFO: { 2223 /* XXX assumes that caller has room for max size! */ 2224 struct in6_pktinfo *pkti; 2225 2226 pkti = (struct in6_pktinfo *)ptr; 2227 if (ipp->ipp_fields & IPPF_IFINDEX) 2228 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2229 else 2230 pkti->ipi6_ifindex = 0; 2231 if (ipp->ipp_fields & IPPF_ADDR) 2232 pkti->ipi6_addr = ipp->ipp_addr; 2233 else 2234 pkti->ipi6_addr = ipv6_all_zeros; 2235 return (sizeof (struct in6_pktinfo)); 2236 } 2237 case IPV6_TCLASS: 2238 if (ipp->ipp_fields & IPPF_TCLASS) 2239 *i1 = ipp->ipp_tclass; 2240 else 2241 *i1 = IPV6_FLOW_TCLASS( 2242 IPV6_DEFAULT_VERS_AND_FLOW); 2243 break; /* goto sizeof (int) option return */ 2244 case IPV6_NEXTHOP: { 2245 sin6_t *sin6 = (sin6_t *)ptr; 2246 2247 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2248 return (0); 2249 *sin6 = sin6_null; 2250 sin6->sin6_family = AF_INET6; 2251 sin6->sin6_addr = ipp->ipp_nexthop; 2252 return (sizeof (sin6_t)); 2253 } 2254 case IPV6_HOPOPTS: 2255 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2256 return (0); 2257 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2258 return (0); 2259 /* 2260 * The cipso/label option is added by kernel. 2261 * User is not usually aware of this option. 2262 * We copy out the hbh opt after the label option. 2263 */ 2264 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2265 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2266 if (udp->udp_label_len_v6 > 0) { 2267 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2268 ptr[1] = (ipp->ipp_hopoptslen - 2269 udp->udp_label_len_v6 + 7) / 8 - 1; 2270 } 2271 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2272 case IPV6_RTHDRDSTOPTS: 2273 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2274 return (0); 2275 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2276 return (ipp->ipp_rtdstoptslen); 2277 case IPV6_RTHDR: 2278 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2279 return (0); 2280 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2281 return (ipp->ipp_rthdrlen); 2282 case IPV6_DSTOPTS: 2283 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2284 return (0); 2285 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2286 return (ipp->ipp_dstoptslen); 2287 case IPV6_PATHMTU: 2288 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2289 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2290 us->us_netstack)); 2291 default: 2292 return (-1); 2293 } 2294 break; 2295 case IPPROTO_UDP: 2296 switch (name) { 2297 case UDP_ANONPRIVBIND: 2298 *i1 = udp->udp_anon_priv_bind; 2299 break; 2300 case UDP_EXCLBIND: 2301 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2302 break; 2303 case UDP_RCVHDR: 2304 *i1 = udp->udp_rcvhdr ? 1 : 0; 2305 break; 2306 case UDP_NAT_T_ENDPOINT: 2307 *i1 = udp->udp_nat_t_endpoint; 2308 break; 2309 default: 2310 return (-1); 2311 } 2312 break; 2313 default: 2314 return (-1); 2315 } 2316 return (sizeof (int)); 2317 } 2318 2319 int 2320 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2321 { 2322 udp_t *udp; 2323 int err; 2324 2325 udp = Q_TO_UDP(q); 2326 2327 rw_enter(&udp->udp_rwlock, RW_READER); 2328 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2329 rw_exit(&udp->udp_rwlock); 2330 return (err); 2331 } 2332 2333 /* 2334 * This routine sets socket options. 2335 */ 2336 /* ARGSUSED */ 2337 static int 2338 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2339 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2340 void *thisdg_attrs, boolean_t checkonly) 2341 { 2342 udpattrs_t *attrs = thisdg_attrs; 2343 int *i1 = (int *)invalp; 2344 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2345 udp_t *udp = connp->conn_udp; 2346 udp_stack_t *us = udp->udp_us; 2347 int error; 2348 uint_t newlen; 2349 size_t sth_wroff; 2350 2351 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2352 /* 2353 * For fixed length options, no sanity check 2354 * of passed in length is done. It is assumed *_optcom_req() 2355 * routines do the right thing. 2356 */ 2357 switch (level) { 2358 case SOL_SOCKET: 2359 switch (name) { 2360 case SO_REUSEADDR: 2361 if (!checkonly) { 2362 udp->udp_reuseaddr = onoff; 2363 PASS_OPT_TO_IP(connp); 2364 } 2365 break; 2366 case SO_DEBUG: 2367 if (!checkonly) 2368 udp->udp_debug = onoff; 2369 break; 2370 /* 2371 * The following three items are available here, 2372 * but are only meaningful to IP. 2373 */ 2374 case SO_DONTROUTE: 2375 if (!checkonly) { 2376 udp->udp_dontroute = onoff; 2377 PASS_OPT_TO_IP(connp); 2378 } 2379 break; 2380 case SO_USELOOPBACK: 2381 if (!checkonly) { 2382 udp->udp_useloopback = onoff; 2383 PASS_OPT_TO_IP(connp); 2384 } 2385 break; 2386 case SO_BROADCAST: 2387 if (!checkonly) { 2388 udp->udp_broadcast = onoff; 2389 PASS_OPT_TO_IP(connp); 2390 } 2391 break; 2392 2393 case SO_SNDBUF: 2394 if (*i1 > us->us_max_buf) { 2395 *outlenp = 0; 2396 return (ENOBUFS); 2397 } 2398 if (!checkonly) { 2399 udp->udp_xmit_hiwat = *i1; 2400 connp->conn_wq->q_hiwat = *i1; 2401 } 2402 break; 2403 case SO_RCVBUF: 2404 if (*i1 > us->us_max_buf) { 2405 *outlenp = 0; 2406 return (ENOBUFS); 2407 } 2408 if (!checkonly) { 2409 int size; 2410 2411 udp->udp_rcv_disply_hiwat = *i1; 2412 size = udp_set_rcv_hiwat(udp, *i1); 2413 rw_exit(&udp->udp_rwlock); 2414 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2415 size); 2416 rw_enter(&udp->udp_rwlock, RW_WRITER); 2417 } 2418 break; 2419 case SO_DGRAM_ERRIND: 2420 if (!checkonly) 2421 udp->udp_dgram_errind = onoff; 2422 break; 2423 case SO_RECVUCRED: 2424 if (!checkonly) 2425 udp->udp_recvucred = onoff; 2426 break; 2427 case SO_ALLZONES: 2428 /* 2429 * "soft" error (negative) 2430 * option not handled at this level 2431 * Do not modify *outlenp. 2432 */ 2433 return (-EINVAL); 2434 case SO_TIMESTAMP: 2435 if (!checkonly) 2436 udp->udp_timestamp = onoff; 2437 break; 2438 case SO_ANON_MLP: 2439 if (!checkonly) { 2440 connp->conn_anon_mlp = onoff; 2441 PASS_OPT_TO_IP(connp); 2442 } 2443 break; 2444 case SO_MAC_EXEMPT: 2445 if (secpolicy_net_mac_aware(cr) != 0 || 2446 udp->udp_state != TS_UNBND) 2447 return (EACCES); 2448 if (!checkonly) { 2449 connp->conn_mac_exempt = onoff; 2450 PASS_OPT_TO_IP(connp); 2451 } 2452 break; 2453 case SCM_UCRED: { 2454 struct ucred_s *ucr; 2455 cred_t *cr, *newcr; 2456 ts_label_t *tsl; 2457 2458 /* 2459 * Only sockets that have proper privileges and are 2460 * bound to MLPs will have any other value here, so 2461 * this implicitly tests for privilege to set label. 2462 */ 2463 if (connp->conn_mlp_type == mlptSingle) 2464 break; 2465 ucr = (struct ucred_s *)invalp; 2466 if (inlen != ucredsize || 2467 ucr->uc_labeloff < sizeof (*ucr) || 2468 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2469 return (EINVAL); 2470 if (!checkonly) { 2471 mblk_t *mb; 2472 2473 if (attrs == NULL || 2474 (mb = attrs->udpattr_mb) == NULL) 2475 return (EINVAL); 2476 if ((cr = DB_CRED(mb)) == NULL) 2477 cr = udp->udp_connp->conn_cred; 2478 ASSERT(cr != NULL); 2479 if ((tsl = crgetlabel(cr)) == NULL) 2480 return (EINVAL); 2481 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2482 tsl->tsl_doi, KM_NOSLEEP); 2483 if (newcr == NULL) 2484 return (ENOSR); 2485 mblk_setcred(mb, newcr); 2486 attrs->udpattr_credset = B_TRUE; 2487 crfree(newcr); 2488 } 2489 break; 2490 } 2491 case SO_EXCLBIND: 2492 if (!checkonly) 2493 udp->udp_exclbind = onoff; 2494 break; 2495 default: 2496 *outlenp = 0; 2497 return (EINVAL); 2498 } 2499 break; 2500 case IPPROTO_IP: 2501 if (udp->udp_family != AF_INET) { 2502 *outlenp = 0; 2503 return (ENOPROTOOPT); 2504 } 2505 switch (name) { 2506 case IP_OPTIONS: 2507 case T_IP_OPTIONS: 2508 /* Save options for use by IP. */ 2509 newlen = inlen + udp->udp_label_len; 2510 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2511 *outlenp = 0; 2512 return (EINVAL); 2513 } 2514 if (checkonly) 2515 break; 2516 2517 /* 2518 * Update the stored options taking into account 2519 * any CIPSO option which we should not overwrite. 2520 */ 2521 if (!tsol_option_set(&udp->udp_ip_snd_options, 2522 &udp->udp_ip_snd_options_len, 2523 udp->udp_label_len, invalp, inlen)) { 2524 *outlenp = 0; 2525 return (ENOMEM); 2526 } 2527 2528 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2529 UDPH_SIZE + udp->udp_ip_snd_options_len; 2530 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2531 rw_exit(&udp->udp_rwlock); 2532 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2533 sth_wroff); 2534 rw_enter(&udp->udp_rwlock, RW_WRITER); 2535 break; 2536 2537 case IP_TTL: 2538 if (!checkonly) { 2539 udp->udp_ttl = (uchar_t)*i1; 2540 } 2541 break; 2542 case IP_TOS: 2543 case T_IP_TOS: 2544 if (!checkonly) { 2545 udp->udp_type_of_service = (uchar_t)*i1; 2546 } 2547 break; 2548 case IP_MULTICAST_IF: { 2549 /* 2550 * TODO should check OPTMGMT reply and undo this if 2551 * there is an error. 2552 */ 2553 struct in_addr *inap = (struct in_addr *)invalp; 2554 if (!checkonly) { 2555 udp->udp_multicast_if_addr = 2556 inap->s_addr; 2557 PASS_OPT_TO_IP(connp); 2558 } 2559 break; 2560 } 2561 case IP_MULTICAST_TTL: 2562 if (!checkonly) 2563 udp->udp_multicast_ttl = *invalp; 2564 break; 2565 case IP_MULTICAST_LOOP: 2566 if (!checkonly) { 2567 connp->conn_multicast_loop = *invalp; 2568 PASS_OPT_TO_IP(connp); 2569 } 2570 break; 2571 case IP_RECVOPTS: 2572 if (!checkonly) 2573 udp->udp_recvopts = onoff; 2574 break; 2575 case IP_RECVDSTADDR: 2576 if (!checkonly) 2577 udp->udp_recvdstaddr = onoff; 2578 break; 2579 case IP_RECVIF: 2580 if (!checkonly) { 2581 udp->udp_recvif = onoff; 2582 PASS_OPT_TO_IP(connp); 2583 } 2584 break; 2585 case IP_RECVSLLA: 2586 if (!checkonly) { 2587 udp->udp_recvslla = onoff; 2588 PASS_OPT_TO_IP(connp); 2589 } 2590 break; 2591 case IP_RECVTTL: 2592 if (!checkonly) 2593 udp->udp_recvttl = onoff; 2594 break; 2595 case IP_PKTINFO: { 2596 /* 2597 * This also handles IP_RECVPKTINFO. 2598 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2599 * Differentiation is based on the size of the 2600 * argument passed in. 2601 */ 2602 struct in_pktinfo *pktinfop; 2603 ip4_pkt_t *attr_pktinfop; 2604 2605 if (checkonly) 2606 break; 2607 2608 if (inlen == sizeof (int)) { 2609 /* 2610 * This is IP_RECVPKTINFO option. 2611 * Keep a local copy of whether this option is 2612 * set or not and pass it down to IP for 2613 * processing. 2614 */ 2615 2616 udp->udp_ip_recvpktinfo = onoff; 2617 return (-EINVAL); 2618 } 2619 2620 if (attrs == NULL || 2621 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2622 /* 2623 * sticky option or no buffer to return 2624 * the results. 2625 */ 2626 return (EINVAL); 2627 } 2628 2629 if (inlen != sizeof (struct in_pktinfo)) 2630 return (EINVAL); 2631 2632 pktinfop = (struct in_pktinfo *)invalp; 2633 2634 /* 2635 * At least one of the values should be specified 2636 */ 2637 if (pktinfop->ipi_ifindex == 0 && 2638 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2639 return (EINVAL); 2640 } 2641 2642 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2643 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2644 2645 break; 2646 } 2647 case IP_ADD_MEMBERSHIP: 2648 case IP_DROP_MEMBERSHIP: 2649 case IP_BLOCK_SOURCE: 2650 case IP_UNBLOCK_SOURCE: 2651 case IP_ADD_SOURCE_MEMBERSHIP: 2652 case IP_DROP_SOURCE_MEMBERSHIP: 2653 case MCAST_JOIN_GROUP: 2654 case MCAST_LEAVE_GROUP: 2655 case MCAST_BLOCK_SOURCE: 2656 case MCAST_UNBLOCK_SOURCE: 2657 case MCAST_JOIN_SOURCE_GROUP: 2658 case MCAST_LEAVE_SOURCE_GROUP: 2659 case IP_SEC_OPT: 2660 case IP_NEXTHOP: 2661 case IP_DHCPINIT_IF: 2662 /* 2663 * "soft" error (negative) 2664 * option not handled at this level 2665 * Do not modify *outlenp. 2666 */ 2667 return (-EINVAL); 2668 case IP_BOUND_IF: 2669 if (!checkonly) { 2670 udp->udp_bound_if = *i1; 2671 PASS_OPT_TO_IP(connp); 2672 } 2673 break; 2674 case IP_UNSPEC_SRC: 2675 if (!checkonly) { 2676 udp->udp_unspec_source = onoff; 2677 PASS_OPT_TO_IP(connp); 2678 } 2679 break; 2680 case IP_BROADCAST_TTL: 2681 if (!checkonly) 2682 connp->conn_broadcast_ttl = *invalp; 2683 break; 2684 default: 2685 *outlenp = 0; 2686 return (EINVAL); 2687 } 2688 break; 2689 case IPPROTO_IPV6: { 2690 ip6_pkt_t *ipp; 2691 boolean_t sticky; 2692 2693 if (udp->udp_family != AF_INET6) { 2694 *outlenp = 0; 2695 return (ENOPROTOOPT); 2696 } 2697 /* 2698 * Deal with both sticky options and ancillary data 2699 */ 2700 sticky = B_FALSE; 2701 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2702 NULL) { 2703 /* sticky options, or none */ 2704 ipp = &udp->udp_sticky_ipp; 2705 sticky = B_TRUE; 2706 } 2707 2708 switch (name) { 2709 case IPV6_MULTICAST_IF: 2710 if (!checkonly) { 2711 udp->udp_multicast_if_index = *i1; 2712 PASS_OPT_TO_IP(connp); 2713 } 2714 break; 2715 case IPV6_UNICAST_HOPS: 2716 /* -1 means use default */ 2717 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2718 *outlenp = 0; 2719 return (EINVAL); 2720 } 2721 if (!checkonly) { 2722 if (*i1 == -1) { 2723 udp->udp_ttl = ipp->ipp_unicast_hops = 2724 us->us_ipv6_hoplimit; 2725 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2726 /* Pass modified value to IP. */ 2727 *i1 = udp->udp_ttl; 2728 } else { 2729 udp->udp_ttl = ipp->ipp_unicast_hops = 2730 (uint8_t)*i1; 2731 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2732 } 2733 /* Rebuild the header template */ 2734 error = udp_build_hdrs(udp); 2735 if (error != 0) { 2736 *outlenp = 0; 2737 return (error); 2738 } 2739 } 2740 break; 2741 case IPV6_MULTICAST_HOPS: 2742 /* -1 means use default */ 2743 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2744 *outlenp = 0; 2745 return (EINVAL); 2746 } 2747 if (!checkonly) { 2748 if (*i1 == -1) { 2749 udp->udp_multicast_ttl = 2750 ipp->ipp_multicast_hops = 2751 IP_DEFAULT_MULTICAST_TTL; 2752 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2753 /* Pass modified value to IP. */ 2754 *i1 = udp->udp_multicast_ttl; 2755 } else { 2756 udp->udp_multicast_ttl = 2757 ipp->ipp_multicast_hops = 2758 (uint8_t)*i1; 2759 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2760 } 2761 } 2762 break; 2763 case IPV6_MULTICAST_LOOP: 2764 if (*i1 != 0 && *i1 != 1) { 2765 *outlenp = 0; 2766 return (EINVAL); 2767 } 2768 if (!checkonly) { 2769 connp->conn_multicast_loop = *i1; 2770 PASS_OPT_TO_IP(connp); 2771 } 2772 break; 2773 case IPV6_JOIN_GROUP: 2774 case IPV6_LEAVE_GROUP: 2775 case MCAST_JOIN_GROUP: 2776 case MCAST_LEAVE_GROUP: 2777 case MCAST_BLOCK_SOURCE: 2778 case MCAST_UNBLOCK_SOURCE: 2779 case MCAST_JOIN_SOURCE_GROUP: 2780 case MCAST_LEAVE_SOURCE_GROUP: 2781 /* 2782 * "soft" error (negative) 2783 * option not handled at this level 2784 * Note: Do not modify *outlenp 2785 */ 2786 return (-EINVAL); 2787 case IPV6_BOUND_IF: 2788 if (!checkonly) { 2789 udp->udp_bound_if = *i1; 2790 PASS_OPT_TO_IP(connp); 2791 } 2792 break; 2793 case IPV6_UNSPEC_SRC: 2794 if (!checkonly) { 2795 udp->udp_unspec_source = onoff; 2796 PASS_OPT_TO_IP(connp); 2797 } 2798 break; 2799 /* 2800 * Set boolean switches for ancillary data delivery 2801 */ 2802 case IPV6_RECVPKTINFO: 2803 if (!checkonly) { 2804 udp->udp_ip_recvpktinfo = onoff; 2805 PASS_OPT_TO_IP(connp); 2806 } 2807 break; 2808 case IPV6_RECVTCLASS: 2809 if (!checkonly) { 2810 udp->udp_ipv6_recvtclass = onoff; 2811 PASS_OPT_TO_IP(connp); 2812 } 2813 break; 2814 case IPV6_RECVPATHMTU: 2815 if (!checkonly) { 2816 udp->udp_ipv6_recvpathmtu = onoff; 2817 PASS_OPT_TO_IP(connp); 2818 } 2819 break; 2820 case IPV6_RECVHOPLIMIT: 2821 if (!checkonly) { 2822 udp->udp_ipv6_recvhoplimit = onoff; 2823 PASS_OPT_TO_IP(connp); 2824 } 2825 break; 2826 case IPV6_RECVHOPOPTS: 2827 if (!checkonly) { 2828 udp->udp_ipv6_recvhopopts = onoff; 2829 PASS_OPT_TO_IP(connp); 2830 } 2831 break; 2832 case IPV6_RECVDSTOPTS: 2833 if (!checkonly) { 2834 udp->udp_ipv6_recvdstopts = onoff; 2835 PASS_OPT_TO_IP(connp); 2836 } 2837 break; 2838 case _OLD_IPV6_RECVDSTOPTS: 2839 if (!checkonly) 2840 udp->udp_old_ipv6_recvdstopts = onoff; 2841 break; 2842 case IPV6_RECVRTHDRDSTOPTS: 2843 if (!checkonly) { 2844 udp->udp_ipv6_recvrthdrdstopts = onoff; 2845 PASS_OPT_TO_IP(connp); 2846 } 2847 break; 2848 case IPV6_RECVRTHDR: 2849 if (!checkonly) { 2850 udp->udp_ipv6_recvrthdr = onoff; 2851 PASS_OPT_TO_IP(connp); 2852 } 2853 break; 2854 /* 2855 * Set sticky options or ancillary data. 2856 * If sticky options, (re)build any extension headers 2857 * that might be needed as a result. 2858 */ 2859 case IPV6_PKTINFO: 2860 /* 2861 * The source address and ifindex are verified 2862 * in ip_opt_set(). For ancillary data the 2863 * source address is checked in ip_wput_v6. 2864 */ 2865 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2866 return (EINVAL); 2867 if (checkonly) 2868 break; 2869 2870 if (inlen == 0) { 2871 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2872 ipp->ipp_sticky_ignored |= 2873 (IPPF_IFINDEX|IPPF_ADDR); 2874 } else { 2875 struct in6_pktinfo *pkti; 2876 2877 pkti = (struct in6_pktinfo *)invalp; 2878 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2879 ipp->ipp_addr = pkti->ipi6_addr; 2880 if (ipp->ipp_ifindex != 0) 2881 ipp->ipp_fields |= IPPF_IFINDEX; 2882 else 2883 ipp->ipp_fields &= ~IPPF_IFINDEX; 2884 if (!IN6_IS_ADDR_UNSPECIFIED( 2885 &ipp->ipp_addr)) 2886 ipp->ipp_fields |= IPPF_ADDR; 2887 else 2888 ipp->ipp_fields &= ~IPPF_ADDR; 2889 } 2890 if (sticky) { 2891 error = udp_build_hdrs(udp); 2892 if (error != 0) 2893 return (error); 2894 PASS_OPT_TO_IP(connp); 2895 } 2896 break; 2897 case IPV6_HOPLIMIT: 2898 if (sticky) 2899 return (EINVAL); 2900 if (inlen != 0 && inlen != sizeof (int)) 2901 return (EINVAL); 2902 if (checkonly) 2903 break; 2904 2905 if (inlen == 0) { 2906 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2907 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2908 } else { 2909 if (*i1 > 255 || *i1 < -1) 2910 return (EINVAL); 2911 if (*i1 == -1) 2912 ipp->ipp_hoplimit = 2913 us->us_ipv6_hoplimit; 2914 else 2915 ipp->ipp_hoplimit = *i1; 2916 ipp->ipp_fields |= IPPF_HOPLIMIT; 2917 } 2918 break; 2919 case IPV6_TCLASS: 2920 if (inlen != 0 && inlen != sizeof (int)) 2921 return (EINVAL); 2922 if (checkonly) 2923 break; 2924 2925 if (inlen == 0) { 2926 ipp->ipp_fields &= ~IPPF_TCLASS; 2927 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2928 } else { 2929 if (*i1 > 255 || *i1 < -1) 2930 return (EINVAL); 2931 if (*i1 == -1) 2932 ipp->ipp_tclass = 0; 2933 else 2934 ipp->ipp_tclass = *i1; 2935 ipp->ipp_fields |= IPPF_TCLASS; 2936 } 2937 if (sticky) { 2938 error = udp_build_hdrs(udp); 2939 if (error != 0) 2940 return (error); 2941 } 2942 break; 2943 case IPV6_NEXTHOP: 2944 /* 2945 * IP will verify that the nexthop is reachable 2946 * and fail for sticky options. 2947 */ 2948 if (inlen != 0 && inlen != sizeof (sin6_t)) 2949 return (EINVAL); 2950 if (checkonly) 2951 break; 2952 2953 if (inlen == 0) { 2954 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2955 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2956 } else { 2957 sin6_t *sin6 = (sin6_t *)invalp; 2958 2959 if (sin6->sin6_family != AF_INET6) { 2960 return (EAFNOSUPPORT); 2961 } 2962 if (IN6_IS_ADDR_V4MAPPED( 2963 &sin6->sin6_addr)) 2964 return (EADDRNOTAVAIL); 2965 ipp->ipp_nexthop = sin6->sin6_addr; 2966 if (!IN6_IS_ADDR_UNSPECIFIED( 2967 &ipp->ipp_nexthop)) 2968 ipp->ipp_fields |= IPPF_NEXTHOP; 2969 else 2970 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2971 } 2972 if (sticky) { 2973 error = udp_build_hdrs(udp); 2974 if (error != 0) 2975 return (error); 2976 PASS_OPT_TO_IP(connp); 2977 } 2978 break; 2979 case IPV6_HOPOPTS: { 2980 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2981 /* 2982 * Sanity checks - minimum size, size a multiple of 2983 * eight bytes, and matching size passed in. 2984 */ 2985 if (inlen != 0 && 2986 inlen != (8 * (hopts->ip6h_len + 1))) 2987 return (EINVAL); 2988 2989 if (checkonly) 2990 break; 2991 2992 error = optcom_pkt_set(invalp, inlen, sticky, 2993 (uchar_t **)&ipp->ipp_hopopts, 2994 &ipp->ipp_hopoptslen, 2995 sticky ? udp->udp_label_len_v6 : 0); 2996 if (error != 0) 2997 return (error); 2998 if (ipp->ipp_hopoptslen == 0) { 2999 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3000 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3001 } else { 3002 ipp->ipp_fields |= IPPF_HOPOPTS; 3003 } 3004 if (sticky) { 3005 error = udp_build_hdrs(udp); 3006 if (error != 0) 3007 return (error); 3008 } 3009 break; 3010 } 3011 case IPV6_RTHDRDSTOPTS: { 3012 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3013 3014 /* 3015 * Sanity checks - minimum size, size a multiple of 3016 * eight bytes, and matching size passed in. 3017 */ 3018 if (inlen != 0 && 3019 inlen != (8 * (dopts->ip6d_len + 1))) 3020 return (EINVAL); 3021 3022 if (checkonly) 3023 break; 3024 3025 if (inlen == 0) { 3026 if (sticky && 3027 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3028 kmem_free(ipp->ipp_rtdstopts, 3029 ipp->ipp_rtdstoptslen); 3030 ipp->ipp_rtdstopts = NULL; 3031 ipp->ipp_rtdstoptslen = 0; 3032 } 3033 3034 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3035 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3036 } else { 3037 error = optcom_pkt_set(invalp, inlen, sticky, 3038 (uchar_t **)&ipp->ipp_rtdstopts, 3039 &ipp->ipp_rtdstoptslen, 0); 3040 if (error != 0) 3041 return (error); 3042 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3043 } 3044 if (sticky) { 3045 error = udp_build_hdrs(udp); 3046 if (error != 0) 3047 return (error); 3048 } 3049 break; 3050 } 3051 case IPV6_DSTOPTS: { 3052 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3053 3054 /* 3055 * Sanity checks - minimum size, size a multiple of 3056 * eight bytes, and matching size passed in. 3057 */ 3058 if (inlen != 0 && 3059 inlen != (8 * (dopts->ip6d_len + 1))) 3060 return (EINVAL); 3061 3062 if (checkonly) 3063 break; 3064 3065 if (inlen == 0) { 3066 if (sticky && 3067 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3068 kmem_free(ipp->ipp_dstopts, 3069 ipp->ipp_dstoptslen); 3070 ipp->ipp_dstopts = NULL; 3071 ipp->ipp_dstoptslen = 0; 3072 } 3073 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3074 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3075 } else { 3076 error = optcom_pkt_set(invalp, inlen, sticky, 3077 (uchar_t **)&ipp->ipp_dstopts, 3078 &ipp->ipp_dstoptslen, 0); 3079 if (error != 0) 3080 return (error); 3081 ipp->ipp_fields |= IPPF_DSTOPTS; 3082 } 3083 if (sticky) { 3084 error = udp_build_hdrs(udp); 3085 if (error != 0) 3086 return (error); 3087 } 3088 break; 3089 } 3090 case IPV6_RTHDR: { 3091 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3092 3093 /* 3094 * Sanity checks - minimum size, size a multiple of 3095 * eight bytes, and matching size passed in. 3096 */ 3097 if (inlen != 0 && 3098 inlen != (8 * (rt->ip6r_len + 1))) 3099 return (EINVAL); 3100 3101 if (checkonly) 3102 break; 3103 3104 if (inlen == 0) { 3105 if (sticky && 3106 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3107 kmem_free(ipp->ipp_rthdr, 3108 ipp->ipp_rthdrlen); 3109 ipp->ipp_rthdr = NULL; 3110 ipp->ipp_rthdrlen = 0; 3111 } 3112 ipp->ipp_fields &= ~IPPF_RTHDR; 3113 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3114 } else { 3115 error = optcom_pkt_set(invalp, inlen, sticky, 3116 (uchar_t **)&ipp->ipp_rthdr, 3117 &ipp->ipp_rthdrlen, 0); 3118 if (error != 0) 3119 return (error); 3120 ipp->ipp_fields |= IPPF_RTHDR; 3121 } 3122 if (sticky) { 3123 error = udp_build_hdrs(udp); 3124 if (error != 0) 3125 return (error); 3126 } 3127 break; 3128 } 3129 3130 case IPV6_DONTFRAG: 3131 if (checkonly) 3132 break; 3133 3134 if (onoff) { 3135 ipp->ipp_fields |= IPPF_DONTFRAG; 3136 } else { 3137 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3138 } 3139 break; 3140 3141 case IPV6_USE_MIN_MTU: 3142 if (inlen != sizeof (int)) 3143 return (EINVAL); 3144 3145 if (*i1 < -1 || *i1 > 1) 3146 return (EINVAL); 3147 3148 if (checkonly) 3149 break; 3150 3151 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3152 ipp->ipp_use_min_mtu = *i1; 3153 break; 3154 3155 case IPV6_BOUND_PIF: 3156 case IPV6_SEC_OPT: 3157 case IPV6_DONTFAILOVER_IF: 3158 case IPV6_SRC_PREFERENCES: 3159 case IPV6_V6ONLY: 3160 /* Handled at the IP level */ 3161 return (-EINVAL); 3162 default: 3163 *outlenp = 0; 3164 return (EINVAL); 3165 } 3166 break; 3167 } /* end IPPROTO_IPV6 */ 3168 case IPPROTO_UDP: 3169 switch (name) { 3170 case UDP_ANONPRIVBIND: 3171 if ((error = secpolicy_net_privaddr(cr, 0, 3172 IPPROTO_UDP)) != 0) { 3173 *outlenp = 0; 3174 return (error); 3175 } 3176 if (!checkonly) { 3177 udp->udp_anon_priv_bind = onoff; 3178 } 3179 break; 3180 case UDP_EXCLBIND: 3181 if (!checkonly) 3182 udp->udp_exclbind = onoff; 3183 break; 3184 case UDP_RCVHDR: 3185 if (!checkonly) 3186 udp->udp_rcvhdr = onoff; 3187 break; 3188 case UDP_NAT_T_ENDPOINT: 3189 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3190 *outlenp = 0; 3191 return (error); 3192 } 3193 3194 /* 3195 * Use udp_family instead so we can avoid ambiguitites 3196 * with AF_INET6 sockets that may switch from IPv4 3197 * to IPv6. 3198 */ 3199 if (udp->udp_family != AF_INET) { 3200 *outlenp = 0; 3201 return (EAFNOSUPPORT); 3202 } 3203 3204 if (!checkonly) { 3205 int size; 3206 3207 udp->udp_nat_t_endpoint = onoff; 3208 3209 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3210 UDPH_SIZE + udp->udp_ip_snd_options_len; 3211 3212 /* Also, adjust wroff */ 3213 if (onoff) { 3214 udp->udp_max_hdr_len += 3215 sizeof (uint32_t); 3216 } 3217 size = udp->udp_max_hdr_len + 3218 us->us_wroff_extra; 3219 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3220 size); 3221 } 3222 break; 3223 default: 3224 *outlenp = 0; 3225 return (EINVAL); 3226 } 3227 break; 3228 default: 3229 *outlenp = 0; 3230 return (EINVAL); 3231 } 3232 /* 3233 * Common case of OK return with outval same as inval. 3234 */ 3235 if (invalp != outvalp) { 3236 /* don't trust bcopy for identical src/dst */ 3237 (void) bcopy(invalp, outvalp, inlen); 3238 } 3239 *outlenp = inlen; 3240 return (0); 3241 } 3242 3243 int 3244 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3245 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3246 void *thisdg_attrs, cred_t *cr) 3247 { 3248 int error; 3249 boolean_t checkonly; 3250 3251 error = 0; 3252 switch (optset_context) { 3253 case SETFN_OPTCOM_CHECKONLY: 3254 checkonly = B_TRUE; 3255 /* 3256 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3257 * inlen != 0 implies value supplied and 3258 * we have to "pretend" to set it. 3259 * inlen == 0 implies that there is no 3260 * value part in T_CHECK request and just validation 3261 * done elsewhere should be enough, we just return here. 3262 */ 3263 if (inlen == 0) { 3264 *outlenp = 0; 3265 goto done; 3266 } 3267 break; 3268 case SETFN_OPTCOM_NEGOTIATE: 3269 checkonly = B_FALSE; 3270 break; 3271 case SETFN_UD_NEGOTIATE: 3272 case SETFN_CONN_NEGOTIATE: 3273 checkonly = B_FALSE; 3274 /* 3275 * Negotiating local and "association-related" options 3276 * through T_UNITDATA_REQ. 3277 * 3278 * Following routine can filter out ones we do not 3279 * want to be "set" this way. 3280 */ 3281 if (!udp_opt_allow_udr_set(level, name)) { 3282 *outlenp = 0; 3283 error = EINVAL; 3284 goto done; 3285 } 3286 break; 3287 default: 3288 /* 3289 * We should never get here 3290 */ 3291 *outlenp = 0; 3292 error = EINVAL; 3293 goto done; 3294 } 3295 3296 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3297 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3298 3299 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3300 outvalp, cr, thisdg_attrs, checkonly); 3301 done: 3302 return (error); 3303 } 3304 3305 /* ARGSUSED */ 3306 int 3307 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3308 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3309 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3310 { 3311 conn_t *connp = Q_TO_CONN(q); 3312 int error; 3313 udp_t *udp = connp->conn_udp; 3314 3315 rw_enter(&udp->udp_rwlock, RW_WRITER); 3316 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3317 outlenp, outvalp, thisdg_attrs, cr); 3318 rw_exit(&udp->udp_rwlock); 3319 return (error); 3320 } 3321 3322 /* 3323 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3324 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3325 * headers, and the udp header. 3326 * Returns failure if can't allocate memory. 3327 */ 3328 static int 3329 udp_build_hdrs(udp_t *udp) 3330 { 3331 udp_stack_t *us = udp->udp_us; 3332 uchar_t *hdrs; 3333 uint_t hdrs_len; 3334 ip6_t *ip6h; 3335 ip6i_t *ip6i; 3336 udpha_t *udpha; 3337 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3338 size_t sth_wroff; 3339 conn_t *connp = udp->udp_connp; 3340 3341 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3342 ASSERT(connp != NULL); 3343 3344 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3345 ASSERT(hdrs_len != 0); 3346 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3347 /* Need to reallocate */ 3348 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3349 if (hdrs == NULL) 3350 return (ENOMEM); 3351 3352 if (udp->udp_sticky_hdrs_len != 0) { 3353 kmem_free(udp->udp_sticky_hdrs, 3354 udp->udp_sticky_hdrs_len); 3355 } 3356 udp->udp_sticky_hdrs = hdrs; 3357 udp->udp_sticky_hdrs_len = hdrs_len; 3358 } 3359 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3360 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3361 3362 /* Set header fields not in ipp */ 3363 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3364 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3365 ip6h = (ip6_t *)&ip6i[1]; 3366 } else { 3367 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3368 } 3369 3370 if (!(ipp->ipp_fields & IPPF_ADDR)) 3371 ip6h->ip6_src = udp->udp_v6src; 3372 3373 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3374 udpha->uha_src_port = udp->udp_port; 3375 3376 /* Try to get everything in a single mblk */ 3377 if (hdrs_len > udp->udp_max_hdr_len) { 3378 udp->udp_max_hdr_len = hdrs_len; 3379 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3380 rw_exit(&udp->udp_rwlock); 3381 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3382 udp->udp_connp, sth_wroff); 3383 rw_enter(&udp->udp_rwlock, RW_WRITER); 3384 } 3385 return (0); 3386 } 3387 3388 /* 3389 * This routine retrieves the value of an ND variable in a udpparam_t 3390 * structure. It is called through nd_getset when a user reads the 3391 * variable. 3392 */ 3393 /* ARGSUSED */ 3394 static int 3395 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3396 { 3397 udpparam_t *udppa = (udpparam_t *)cp; 3398 3399 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3400 return (0); 3401 } 3402 3403 /* 3404 * Walk through the param array specified registering each element with the 3405 * named dispatch (ND) handler. 3406 */ 3407 static boolean_t 3408 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3409 { 3410 for (; cnt-- > 0; udppa++) { 3411 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3412 if (!nd_load(ndp, udppa->udp_param_name, 3413 udp_param_get, udp_param_set, 3414 (caddr_t)udppa)) { 3415 nd_free(ndp); 3416 return (B_FALSE); 3417 } 3418 } 3419 } 3420 if (!nd_load(ndp, "udp_extra_priv_ports", 3421 udp_extra_priv_ports_get, NULL, NULL)) { 3422 nd_free(ndp); 3423 return (B_FALSE); 3424 } 3425 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3426 NULL, udp_extra_priv_ports_add, NULL)) { 3427 nd_free(ndp); 3428 return (B_FALSE); 3429 } 3430 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3431 NULL, udp_extra_priv_ports_del, NULL)) { 3432 nd_free(ndp); 3433 return (B_FALSE); 3434 } 3435 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3436 NULL)) { 3437 nd_free(ndp); 3438 return (B_FALSE); 3439 } 3440 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3441 NULL)) { 3442 nd_free(ndp); 3443 return (B_FALSE); 3444 } 3445 return (B_TRUE); 3446 } 3447 3448 /* This routine sets an ND variable in a udpparam_t structure. */ 3449 /* ARGSUSED */ 3450 static int 3451 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3452 { 3453 long new_value; 3454 udpparam_t *udppa = (udpparam_t *)cp; 3455 3456 /* 3457 * Fail the request if the new value does not lie within the 3458 * required bounds. 3459 */ 3460 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3461 new_value < udppa->udp_param_min || 3462 new_value > udppa->udp_param_max) { 3463 return (EINVAL); 3464 } 3465 3466 /* Set the new value */ 3467 udppa->udp_param_value = new_value; 3468 return (0); 3469 } 3470 3471 /* 3472 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3473 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3474 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3475 * then it's assumed to be allocated to be large enough. 3476 * 3477 * Returns zero if trimming of the security option causes all options to go 3478 * away. 3479 */ 3480 static size_t 3481 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3482 { 3483 struct T_opthdr *toh; 3484 size_t hol = ipp->ipp_hopoptslen; 3485 ip6_hbh_t *dstopt = NULL; 3486 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3487 size_t tlen, olen, plen; 3488 boolean_t deleting; 3489 const struct ip6_opt *sopt, *lastpad; 3490 struct ip6_opt *dopt; 3491 3492 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3493 toh->level = IPPROTO_IPV6; 3494 toh->name = IPV6_HOPOPTS; 3495 toh->status = 0; 3496 dstopt = (ip6_hbh_t *)(toh + 1); 3497 } 3498 3499 /* 3500 * If labeling is enabled, then skip the label option 3501 * but get other options if there are any. 3502 */ 3503 if (is_system_labeled()) { 3504 dopt = NULL; 3505 if (dstopt != NULL) { 3506 /* will fill in ip6h_len later */ 3507 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3508 dopt = (struct ip6_opt *)(dstopt + 1); 3509 } 3510 sopt = (const struct ip6_opt *)(srcopt + 1); 3511 hol -= sizeof (*srcopt); 3512 tlen = sizeof (*dstopt); 3513 lastpad = NULL; 3514 deleting = B_FALSE; 3515 /* 3516 * This loop finds the first (lastpad pointer) of any number of 3517 * pads that preceeds the security option, then treats the 3518 * security option as though it were a pad, and then finds the 3519 * next non-pad option (or end of list). 3520 * 3521 * It then treats the entire block as one big pad. To preserve 3522 * alignment of any options that follow, or just the end of the 3523 * list, it computes a minimal new padding size that keeps the 3524 * same alignment for the next option. 3525 * 3526 * If it encounters just a sequence of pads with no security 3527 * option, those are copied as-is rather than collapsed. 3528 * 3529 * Note that to handle the end of list case, the code makes one 3530 * loop with 'hol' set to zero. 3531 */ 3532 for (;;) { 3533 if (hol > 0) { 3534 if (sopt->ip6o_type == IP6OPT_PAD1) { 3535 if (lastpad == NULL) 3536 lastpad = sopt; 3537 sopt = (const struct ip6_opt *) 3538 &sopt->ip6o_len; 3539 hol--; 3540 continue; 3541 } 3542 olen = sopt->ip6o_len + sizeof (*sopt); 3543 if (olen > hol) 3544 olen = hol; 3545 if (sopt->ip6o_type == IP6OPT_PADN || 3546 sopt->ip6o_type == ip6opt_ls) { 3547 if (sopt->ip6o_type == ip6opt_ls) 3548 deleting = B_TRUE; 3549 if (lastpad == NULL) 3550 lastpad = sopt; 3551 sopt = (const struct ip6_opt *) 3552 ((const char *)sopt + olen); 3553 hol -= olen; 3554 continue; 3555 } 3556 } else { 3557 /* if nothing was copied at all, then delete */ 3558 if (tlen == sizeof (*dstopt)) 3559 return (0); 3560 /* last pass; pick up any trailing padding */ 3561 olen = 0; 3562 } 3563 if (deleting) { 3564 /* 3565 * compute aligning effect of deleted material 3566 * to reproduce with pad. 3567 */ 3568 plen = ((const char *)sopt - 3569 (const char *)lastpad) & 7; 3570 tlen += plen; 3571 if (dopt != NULL) { 3572 if (plen == 1) { 3573 dopt->ip6o_type = IP6OPT_PAD1; 3574 } else if (plen > 1) { 3575 plen -= sizeof (*dopt); 3576 dopt->ip6o_type = IP6OPT_PADN; 3577 dopt->ip6o_len = plen; 3578 if (plen > 0) 3579 bzero(dopt + 1, plen); 3580 } 3581 dopt = (struct ip6_opt *) 3582 ((char *)dopt + plen); 3583 } 3584 deleting = B_FALSE; 3585 lastpad = NULL; 3586 } 3587 /* if there's uncopied padding, then copy that now */ 3588 if (lastpad != NULL) { 3589 olen += (const char *)sopt - 3590 (const char *)lastpad; 3591 sopt = lastpad; 3592 lastpad = NULL; 3593 } 3594 if (dopt != NULL && olen > 0) { 3595 bcopy(sopt, dopt, olen); 3596 dopt = (struct ip6_opt *)((char *)dopt + olen); 3597 } 3598 if (hol == 0) 3599 break; 3600 tlen += olen; 3601 sopt = (const struct ip6_opt *) 3602 ((const char *)sopt + olen); 3603 hol -= olen; 3604 } 3605 /* go back and patch up the length value, rounded upward */ 3606 if (dstopt != NULL) 3607 dstopt->ip6h_len = (tlen - 1) >> 3; 3608 } else { 3609 tlen = hol; 3610 if (dstopt != NULL) 3611 bcopy(srcopt, dstopt, hol); 3612 } 3613 3614 tlen += sizeof (*toh); 3615 if (toh != NULL) 3616 toh->len = tlen; 3617 3618 return (tlen); 3619 } 3620 3621 /* 3622 * Update udp_rcv_opt_len from the packet. 3623 * Called when options received, and when no options received but 3624 * udp_ip_recv_opt_len has previously recorded options. 3625 */ 3626 static void 3627 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3628 { 3629 /* Save the options if any */ 3630 if (opt_len > 0) { 3631 if (opt_len > udp->udp_ip_rcv_options_len) { 3632 /* Need to allocate larger buffer */ 3633 if (udp->udp_ip_rcv_options_len != 0) 3634 mi_free((char *)udp->udp_ip_rcv_options); 3635 udp->udp_ip_rcv_options_len = 0; 3636 udp->udp_ip_rcv_options = 3637 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3638 if (udp->udp_ip_rcv_options != NULL) 3639 udp->udp_ip_rcv_options_len = opt_len; 3640 } 3641 if (udp->udp_ip_rcv_options_len != 0) { 3642 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3643 /* Adjust length if we are resusing the space */ 3644 udp->udp_ip_rcv_options_len = opt_len; 3645 } 3646 } else if (udp->udp_ip_rcv_options_len != 0) { 3647 /* Clear out previously recorded options */ 3648 mi_free((char *)udp->udp_ip_rcv_options); 3649 udp->udp_ip_rcv_options = NULL; 3650 udp->udp_ip_rcv_options_len = 0; 3651 } 3652 } 3653 3654 static void 3655 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3656 { 3657 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3658 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3659 /* 3660 * fallback has started but messages have not been moved yet 3661 */ 3662 if (udp->udp_fallback_queue_head == NULL) { 3663 ASSERT(udp->udp_fallback_queue_tail == NULL); 3664 udp->udp_fallback_queue_head = mp; 3665 udp->udp_fallback_queue_tail = mp; 3666 } else { 3667 ASSERT(udp->udp_fallback_queue_tail != NULL); 3668 udp->udp_fallback_queue_tail->b_next = mp; 3669 udp->udp_fallback_queue_tail = mp; 3670 } 3671 mutex_exit(&udp->udp_recv_lock); 3672 } else { 3673 /* 3674 * no more fallbacks possible, ok to drop lock. 3675 */ 3676 mutex_exit(&udp->udp_recv_lock); 3677 putnext(udp->udp_connp->conn_rq, mp); 3678 } 3679 } 3680 3681 /* ARGSUSED2 */ 3682 static void 3683 udp_input(void *arg1, mblk_t *mp, void *arg2) 3684 { 3685 conn_t *connp = (conn_t *)arg1; 3686 struct T_unitdata_ind *tudi; 3687 uchar_t *rptr; /* Pointer to IP header */ 3688 int hdr_length; /* Length of IP+UDP headers */ 3689 int opt_len; 3690 int udi_size; /* Size of T_unitdata_ind */ 3691 int mp_len; 3692 udp_t *udp; 3693 udpha_t *udpha; 3694 int ipversion; 3695 ip6_pkt_t ipp; 3696 ip6_t *ip6h; 3697 ip6i_t *ip6i; 3698 mblk_t *mp1; 3699 mblk_t *options_mp = NULL; 3700 ip_pktinfo_t *pinfo = NULL; 3701 cred_t *cr = NULL; 3702 pid_t cpid; 3703 uint32_t udp_ip_rcv_options_len; 3704 udp_bits_t udp_bits; 3705 cred_t *rcr = connp->conn_cred; 3706 udp_stack_t *us; 3707 3708 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3709 3710 udp = connp->conn_udp; 3711 us = udp->udp_us; 3712 rptr = mp->b_rptr; 3713 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3714 ASSERT(OK_32PTR(rptr)); 3715 3716 /* 3717 * IP should have prepended the options data in an M_CTL 3718 * Check M_CTL "type" to make sure are not here bcos of 3719 * a valid ICMP message 3720 */ 3721 if (DB_TYPE(mp) == M_CTL) { 3722 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3723 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3724 IN_PKTINFO) { 3725 /* 3726 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3727 * has been prepended to the packet by IP. We need to 3728 * extract the mblk and adjust the rptr 3729 */ 3730 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3731 options_mp = mp; 3732 mp = mp->b_cont; 3733 rptr = mp->b_rptr; 3734 UDP_STAT(us, udp_in_pktinfo); 3735 } else { 3736 /* 3737 * ICMP messages. 3738 */ 3739 udp_icmp_error(connp, mp); 3740 return; 3741 } 3742 } 3743 3744 mp_len = msgdsize(mp); 3745 /* 3746 * This is the inbound data path. 3747 * First, we check to make sure the IP version number is correct, 3748 * and then pull the IP and UDP headers into the first mblk. 3749 */ 3750 3751 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3752 ipp.ipp_fields = 0; 3753 3754 ipversion = IPH_HDR_VERSION(rptr); 3755 3756 rw_enter(&udp->udp_rwlock, RW_READER); 3757 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3758 udp_bits = udp->udp_bits; 3759 rw_exit(&udp->udp_rwlock); 3760 3761 switch (ipversion) { 3762 case IPV4_VERSION: 3763 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3764 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3765 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3766 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3767 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3768 udp->udp_family == AF_INET) { 3769 /* 3770 * Record/update udp_ip_rcv_options with the lock 3771 * held. Not needed for AF_INET6 sockets 3772 * since they don't support a getsockopt of IP_OPTIONS. 3773 */ 3774 rw_enter(&udp->udp_rwlock, RW_WRITER); 3775 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3776 opt_len); 3777 rw_exit(&udp->udp_rwlock); 3778 } 3779 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3780 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3781 udp->udp_ip_recvpktinfo) { 3782 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3783 ipp.ipp_fields |= IPPF_IFINDEX; 3784 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3785 } 3786 } 3787 break; 3788 case IPV6_VERSION: 3789 /* 3790 * IPv6 packets can only be received by applications 3791 * that are prepared to receive IPv6 addresses. 3792 * The IP fanout must ensure this. 3793 */ 3794 ASSERT(udp->udp_family == AF_INET6); 3795 3796 ip6h = (ip6_t *)rptr; 3797 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3798 3799 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3800 uint8_t nexthdrp; 3801 /* Look for ifindex information */ 3802 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3803 ip6i = (ip6i_t *)ip6h; 3804 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3805 goto tossit; 3806 3807 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3808 ASSERT(ip6i->ip6i_ifindex != 0); 3809 ipp.ipp_fields |= IPPF_IFINDEX; 3810 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3811 } 3812 rptr = (uchar_t *)&ip6i[1]; 3813 mp->b_rptr = rptr; 3814 if (rptr == mp->b_wptr) { 3815 mp1 = mp->b_cont; 3816 freeb(mp); 3817 mp = mp1; 3818 rptr = mp->b_rptr; 3819 } 3820 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3821 goto tossit; 3822 ip6h = (ip6_t *)rptr; 3823 mp_len = msgdsize(mp); 3824 } 3825 /* 3826 * Find any potentially interesting extension headers 3827 * as well as the length of the IPv6 + extension 3828 * headers. 3829 */ 3830 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3831 UDPH_SIZE; 3832 ASSERT(nexthdrp == IPPROTO_UDP); 3833 } else { 3834 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3835 ip6i = NULL; 3836 } 3837 break; 3838 default: 3839 ASSERT(0); 3840 } 3841 3842 /* 3843 * IP inspected the UDP header thus all of it must be in the mblk. 3844 * UDP length check is performed for IPv6 packets and IPv4 packets 3845 * to check if the size of the packet as specified 3846 * by the header is the same as the physical size of the packet. 3847 * FIXME? Didn't IP already check this? 3848 */ 3849 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3850 if ((MBLKL(mp) < hdr_length) || 3851 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3852 goto tossit; 3853 } 3854 3855 3856 /* Walk past the headers unless IP_RECVHDR was set. */ 3857 if (!udp_bits.udpb_rcvhdr) { 3858 mp->b_rptr = rptr + hdr_length; 3859 mp_len -= hdr_length; 3860 } 3861 3862 /* 3863 * This is the inbound data path. Packets are passed upstream as 3864 * T_UNITDATA_IND messages with full IP headers still attached. 3865 */ 3866 if (udp->udp_family == AF_INET) { 3867 sin_t *sin; 3868 3869 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3870 3871 /* 3872 * Normally only send up the source address. 3873 * If IP_RECVDSTADDR is set we include the destination IP 3874 * address as an option. With IP_RECVOPTS we include all 3875 * the IP options. 3876 */ 3877 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3878 if (udp_bits.udpb_recvdstaddr) { 3879 udi_size += sizeof (struct T_opthdr) + 3880 sizeof (struct in_addr); 3881 UDP_STAT(us, udp_in_recvdstaddr); 3882 } 3883 3884 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3885 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3886 udi_size += sizeof (struct T_opthdr) + 3887 sizeof (struct in_pktinfo); 3888 UDP_STAT(us, udp_ip_rcvpktinfo); 3889 } 3890 3891 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3892 udi_size += sizeof (struct T_opthdr) + opt_len; 3893 UDP_STAT(us, udp_in_recvopts); 3894 } 3895 3896 /* 3897 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3898 * space accordingly 3899 */ 3900 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3901 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3902 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3903 UDP_STAT(us, udp_in_recvif); 3904 } 3905 3906 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3907 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3908 udi_size += sizeof (struct T_opthdr) + 3909 sizeof (struct sockaddr_dl); 3910 UDP_STAT(us, udp_in_recvslla); 3911 } 3912 3913 if ((udp_bits.udpb_recvucred) && 3914 (cr = DB_CRED(mp)) != NULL) { 3915 udi_size += sizeof (struct T_opthdr) + ucredsize; 3916 cpid = DB_CPID(mp); 3917 UDP_STAT(us, udp_in_recvucred); 3918 } 3919 3920 /* 3921 * If SO_TIMESTAMP is set allocate the appropriate sized 3922 * buffer. Since gethrestime() expects a pointer aligned 3923 * argument, we allocate space necessary for extra 3924 * alignment (even though it might not be used). 3925 */ 3926 if (udp_bits.udpb_timestamp) { 3927 udi_size += sizeof (struct T_opthdr) + 3928 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3929 UDP_STAT(us, udp_in_timestamp); 3930 } 3931 3932 /* 3933 * If IP_RECVTTL is set allocate the appropriate sized buffer 3934 */ 3935 if (udp_bits.udpb_recvttl) { 3936 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3937 UDP_STAT(us, udp_in_recvttl); 3938 } 3939 3940 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3941 mp1 = allocb(udi_size, BPRI_MED); 3942 if (mp1 == NULL) { 3943 freemsg(mp); 3944 if (options_mp != NULL) 3945 freeb(options_mp); 3946 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3947 return; 3948 } 3949 mp1->b_cont = mp; 3950 mp = mp1; 3951 mp->b_datap->db_type = M_PROTO; 3952 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3953 mp->b_wptr = (uchar_t *)tudi + udi_size; 3954 tudi->PRIM_type = T_UNITDATA_IND; 3955 tudi->SRC_length = sizeof (sin_t); 3956 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3957 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3958 sizeof (sin_t); 3959 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3960 tudi->OPT_length = udi_size; 3961 sin = (sin_t *)&tudi[1]; 3962 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3963 sin->sin_port = udpha->uha_src_port; 3964 sin->sin_family = udp->udp_family; 3965 *(uint32_t *)&sin->sin_zero[0] = 0; 3966 *(uint32_t *)&sin->sin_zero[4] = 0; 3967 3968 /* 3969 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3970 * IP_RECVTTL has been set. 3971 */ 3972 if (udi_size != 0) { 3973 /* 3974 * Copy in destination address before options to avoid 3975 * any padding issues. 3976 */ 3977 char *dstopt; 3978 3979 dstopt = (char *)&sin[1]; 3980 if (udp_bits.udpb_recvdstaddr) { 3981 struct T_opthdr *toh; 3982 ipaddr_t *dstptr; 3983 3984 toh = (struct T_opthdr *)dstopt; 3985 toh->level = IPPROTO_IP; 3986 toh->name = IP_RECVDSTADDR; 3987 toh->len = sizeof (struct T_opthdr) + 3988 sizeof (ipaddr_t); 3989 toh->status = 0; 3990 dstopt += sizeof (struct T_opthdr); 3991 dstptr = (ipaddr_t *)dstopt; 3992 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3993 dstopt += sizeof (ipaddr_t); 3994 udi_size -= toh->len; 3995 } 3996 3997 if (udp_bits.udpb_recvopts && opt_len > 0) { 3998 struct T_opthdr *toh; 3999 4000 toh = (struct T_opthdr *)dstopt; 4001 toh->level = IPPROTO_IP; 4002 toh->name = IP_RECVOPTS; 4003 toh->len = sizeof (struct T_opthdr) + opt_len; 4004 toh->status = 0; 4005 dstopt += sizeof (struct T_opthdr); 4006 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4007 opt_len); 4008 dstopt += opt_len; 4009 udi_size -= toh->len; 4010 } 4011 4012 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4013 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4014 struct T_opthdr *toh; 4015 struct in_pktinfo *pktinfop; 4016 4017 toh = (struct T_opthdr *)dstopt; 4018 toh->level = IPPROTO_IP; 4019 toh->name = IP_PKTINFO; 4020 toh->len = sizeof (struct T_opthdr) + 4021 sizeof (*pktinfop); 4022 toh->status = 0; 4023 dstopt += sizeof (struct T_opthdr); 4024 pktinfop = (struct in_pktinfo *)dstopt; 4025 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4026 pktinfop->ipi_spec_dst = 4027 pinfo->ip_pkt_match_addr; 4028 pktinfop->ipi_addr.s_addr = 4029 ((ipha_t *)rptr)->ipha_dst; 4030 4031 dstopt += sizeof (struct in_pktinfo); 4032 udi_size -= toh->len; 4033 } 4034 4035 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4036 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4037 4038 struct T_opthdr *toh; 4039 struct sockaddr_dl *dstptr; 4040 4041 toh = (struct T_opthdr *)dstopt; 4042 toh->level = IPPROTO_IP; 4043 toh->name = IP_RECVSLLA; 4044 toh->len = sizeof (struct T_opthdr) + 4045 sizeof (struct sockaddr_dl); 4046 toh->status = 0; 4047 dstopt += sizeof (struct T_opthdr); 4048 dstptr = (struct sockaddr_dl *)dstopt; 4049 bcopy(&pinfo->ip_pkt_slla, dstptr, 4050 sizeof (struct sockaddr_dl)); 4051 dstopt += sizeof (struct sockaddr_dl); 4052 udi_size -= toh->len; 4053 } 4054 4055 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4056 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4057 4058 struct T_opthdr *toh; 4059 uint_t *dstptr; 4060 4061 toh = (struct T_opthdr *)dstopt; 4062 toh->level = IPPROTO_IP; 4063 toh->name = IP_RECVIF; 4064 toh->len = sizeof (struct T_opthdr) + 4065 sizeof (uint_t); 4066 toh->status = 0; 4067 dstopt += sizeof (struct T_opthdr); 4068 dstptr = (uint_t *)dstopt; 4069 *dstptr = pinfo->ip_pkt_ifindex; 4070 dstopt += sizeof (uint_t); 4071 udi_size -= toh->len; 4072 } 4073 4074 if (cr != NULL) { 4075 struct T_opthdr *toh; 4076 4077 toh = (struct T_opthdr *)dstopt; 4078 toh->level = SOL_SOCKET; 4079 toh->name = SCM_UCRED; 4080 toh->len = sizeof (struct T_opthdr) + ucredsize; 4081 toh->status = 0; 4082 dstopt += sizeof (struct T_opthdr); 4083 (void) cred2ucred(cr, cpid, dstopt, rcr); 4084 dstopt += ucredsize; 4085 udi_size -= toh->len; 4086 } 4087 4088 if (udp_bits.udpb_timestamp) { 4089 struct T_opthdr *toh; 4090 4091 toh = (struct T_opthdr *)dstopt; 4092 toh->level = SOL_SOCKET; 4093 toh->name = SCM_TIMESTAMP; 4094 toh->len = sizeof (struct T_opthdr) + 4095 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4096 toh->status = 0; 4097 dstopt += sizeof (struct T_opthdr); 4098 /* Align for gethrestime() */ 4099 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4100 sizeof (intptr_t)); 4101 gethrestime((timestruc_t *)dstopt); 4102 dstopt = (char *)toh + toh->len; 4103 udi_size -= toh->len; 4104 } 4105 4106 /* 4107 * CAUTION: 4108 * Due to aligment issues 4109 * Processing of IP_RECVTTL option 4110 * should always be the last. Adding 4111 * any option processing after this will 4112 * cause alignment panic. 4113 */ 4114 if (udp_bits.udpb_recvttl) { 4115 struct T_opthdr *toh; 4116 uint8_t *dstptr; 4117 4118 toh = (struct T_opthdr *)dstopt; 4119 toh->level = IPPROTO_IP; 4120 toh->name = IP_RECVTTL; 4121 toh->len = sizeof (struct T_opthdr) + 4122 sizeof (uint8_t); 4123 toh->status = 0; 4124 dstopt += sizeof (struct T_opthdr); 4125 dstptr = (uint8_t *)dstopt; 4126 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4127 dstopt += sizeof (uint8_t); 4128 udi_size -= toh->len; 4129 } 4130 4131 /* Consumed all of allocated space */ 4132 ASSERT(udi_size == 0); 4133 } 4134 } else { 4135 sin6_t *sin6; 4136 4137 /* 4138 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4139 * 4140 * Normally we only send up the address. If receiving of any 4141 * optional receive side information is enabled, we also send 4142 * that up as options. 4143 */ 4144 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4145 4146 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4147 IPPF_RTHDR|IPPF_IFINDEX)) { 4148 if ((udp_bits.udpb_ipv6_recvhopopts) && 4149 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4150 size_t hlen; 4151 4152 UDP_STAT(us, udp_in_recvhopopts); 4153 hlen = copy_hop_opts(&ipp, NULL); 4154 if (hlen == 0) 4155 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4156 udi_size += hlen; 4157 } 4158 if (((udp_bits.udpb_ipv6_recvdstopts) || 4159 udp_bits.udpb_old_ipv6_recvdstopts) && 4160 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4161 udi_size += sizeof (struct T_opthdr) + 4162 ipp.ipp_dstoptslen; 4163 UDP_STAT(us, udp_in_recvdstopts); 4164 } 4165 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4166 udp_bits.udpb_ipv6_recvrthdr && 4167 (ipp.ipp_fields & IPPF_RTHDR)) || 4168 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4169 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4170 udi_size += sizeof (struct T_opthdr) + 4171 ipp.ipp_rtdstoptslen; 4172 UDP_STAT(us, udp_in_recvrtdstopts); 4173 } 4174 if ((udp_bits.udpb_ipv6_recvrthdr) && 4175 (ipp.ipp_fields & IPPF_RTHDR)) { 4176 udi_size += sizeof (struct T_opthdr) + 4177 ipp.ipp_rthdrlen; 4178 UDP_STAT(us, udp_in_recvrthdr); 4179 } 4180 if ((udp_bits.udpb_ip_recvpktinfo) && 4181 (ipp.ipp_fields & IPPF_IFINDEX)) { 4182 udi_size += sizeof (struct T_opthdr) + 4183 sizeof (struct in6_pktinfo); 4184 UDP_STAT(us, udp_in_recvpktinfo); 4185 } 4186 4187 } 4188 if ((udp_bits.udpb_recvucred) && 4189 (cr = DB_CRED(mp)) != NULL) { 4190 udi_size += sizeof (struct T_opthdr) + ucredsize; 4191 cpid = DB_CPID(mp); 4192 UDP_STAT(us, udp_in_recvucred); 4193 } 4194 4195 /* 4196 * If SO_TIMESTAMP is set allocate the appropriate sized 4197 * buffer. Since gethrestime() expects a pointer aligned 4198 * argument, we allocate space necessary for extra 4199 * alignment (even though it might not be used). 4200 */ 4201 if (udp_bits.udpb_timestamp) { 4202 udi_size += sizeof (struct T_opthdr) + 4203 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4204 UDP_STAT(us, udp_in_timestamp); 4205 } 4206 4207 if (udp_bits.udpb_ipv6_recvhoplimit) { 4208 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4209 UDP_STAT(us, udp_in_recvhoplimit); 4210 } 4211 4212 if (udp_bits.udpb_ipv6_recvtclass) { 4213 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4214 UDP_STAT(us, udp_in_recvtclass); 4215 } 4216 4217 mp1 = allocb(udi_size, BPRI_MED); 4218 if (mp1 == NULL) { 4219 freemsg(mp); 4220 if (options_mp != NULL) 4221 freeb(options_mp); 4222 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4223 return; 4224 } 4225 mp1->b_cont = mp; 4226 mp = mp1; 4227 mp->b_datap->db_type = M_PROTO; 4228 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4229 mp->b_wptr = (uchar_t *)tudi + udi_size; 4230 tudi->PRIM_type = T_UNITDATA_IND; 4231 tudi->SRC_length = sizeof (sin6_t); 4232 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4233 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4234 sizeof (sin6_t); 4235 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4236 tudi->OPT_length = udi_size; 4237 sin6 = (sin6_t *)&tudi[1]; 4238 if (ipversion == IPV4_VERSION) { 4239 in6_addr_t v6dst; 4240 4241 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4242 &sin6->sin6_addr); 4243 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4244 &v6dst); 4245 sin6->sin6_flowinfo = 0; 4246 sin6->sin6_scope_id = 0; 4247 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4248 connp->conn_zoneid, us->us_netstack); 4249 } else { 4250 sin6->sin6_addr = ip6h->ip6_src; 4251 /* No sin6_flowinfo per API */ 4252 sin6->sin6_flowinfo = 0; 4253 /* For link-scope source pass up scope id */ 4254 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4255 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4256 sin6->sin6_scope_id = ipp.ipp_ifindex; 4257 else 4258 sin6->sin6_scope_id = 0; 4259 sin6->__sin6_src_id = ip_srcid_find_addr( 4260 &ip6h->ip6_dst, connp->conn_zoneid, 4261 us->us_netstack); 4262 } 4263 sin6->sin6_port = udpha->uha_src_port; 4264 sin6->sin6_family = udp->udp_family; 4265 4266 if (udi_size != 0) { 4267 uchar_t *dstopt; 4268 4269 dstopt = (uchar_t *)&sin6[1]; 4270 if ((udp_bits.udpb_ip_recvpktinfo) && 4271 (ipp.ipp_fields & IPPF_IFINDEX)) { 4272 struct T_opthdr *toh; 4273 struct in6_pktinfo *pkti; 4274 4275 toh = (struct T_opthdr *)dstopt; 4276 toh->level = IPPROTO_IPV6; 4277 toh->name = IPV6_PKTINFO; 4278 toh->len = sizeof (struct T_opthdr) + 4279 sizeof (*pkti); 4280 toh->status = 0; 4281 dstopt += sizeof (struct T_opthdr); 4282 pkti = (struct in6_pktinfo *)dstopt; 4283 if (ipversion == IPV6_VERSION) 4284 pkti->ipi6_addr = ip6h->ip6_dst; 4285 else 4286 IN6_IPADDR_TO_V4MAPPED( 4287 ((ipha_t *)rptr)->ipha_dst, 4288 &pkti->ipi6_addr); 4289 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4290 dstopt += sizeof (*pkti); 4291 udi_size -= toh->len; 4292 } 4293 if (udp_bits.udpb_ipv6_recvhoplimit) { 4294 struct T_opthdr *toh; 4295 4296 toh = (struct T_opthdr *)dstopt; 4297 toh->level = IPPROTO_IPV6; 4298 toh->name = IPV6_HOPLIMIT; 4299 toh->len = sizeof (struct T_opthdr) + 4300 sizeof (uint_t); 4301 toh->status = 0; 4302 dstopt += sizeof (struct T_opthdr); 4303 if (ipversion == IPV6_VERSION) 4304 *(uint_t *)dstopt = ip6h->ip6_hops; 4305 else 4306 *(uint_t *)dstopt = 4307 ((ipha_t *)rptr)->ipha_ttl; 4308 dstopt += sizeof (uint_t); 4309 udi_size -= toh->len; 4310 } 4311 if (udp_bits.udpb_ipv6_recvtclass) { 4312 struct T_opthdr *toh; 4313 4314 toh = (struct T_opthdr *)dstopt; 4315 toh->level = IPPROTO_IPV6; 4316 toh->name = IPV6_TCLASS; 4317 toh->len = sizeof (struct T_opthdr) + 4318 sizeof (uint_t); 4319 toh->status = 0; 4320 dstopt += sizeof (struct T_opthdr); 4321 if (ipversion == IPV6_VERSION) { 4322 *(uint_t *)dstopt = 4323 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4324 } else { 4325 ipha_t *ipha = (ipha_t *)rptr; 4326 *(uint_t *)dstopt = 4327 ipha->ipha_type_of_service; 4328 } 4329 dstopt += sizeof (uint_t); 4330 udi_size -= toh->len; 4331 } 4332 if ((udp_bits.udpb_ipv6_recvhopopts) && 4333 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4334 size_t hlen; 4335 4336 hlen = copy_hop_opts(&ipp, dstopt); 4337 dstopt += hlen; 4338 udi_size -= hlen; 4339 } 4340 if ((udp_bits.udpb_ipv6_recvdstopts) && 4341 (udp_bits.udpb_ipv6_recvrthdr) && 4342 (ipp.ipp_fields & IPPF_RTHDR) && 4343 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4344 struct T_opthdr *toh; 4345 4346 toh = (struct T_opthdr *)dstopt; 4347 toh->level = IPPROTO_IPV6; 4348 toh->name = IPV6_DSTOPTS; 4349 toh->len = sizeof (struct T_opthdr) + 4350 ipp.ipp_rtdstoptslen; 4351 toh->status = 0; 4352 dstopt += sizeof (struct T_opthdr); 4353 bcopy(ipp.ipp_rtdstopts, dstopt, 4354 ipp.ipp_rtdstoptslen); 4355 dstopt += ipp.ipp_rtdstoptslen; 4356 udi_size -= toh->len; 4357 } 4358 if ((udp_bits.udpb_ipv6_recvrthdr) && 4359 (ipp.ipp_fields & IPPF_RTHDR)) { 4360 struct T_opthdr *toh; 4361 4362 toh = (struct T_opthdr *)dstopt; 4363 toh->level = IPPROTO_IPV6; 4364 toh->name = IPV6_RTHDR; 4365 toh->len = sizeof (struct T_opthdr) + 4366 ipp.ipp_rthdrlen; 4367 toh->status = 0; 4368 dstopt += sizeof (struct T_opthdr); 4369 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4370 dstopt += ipp.ipp_rthdrlen; 4371 udi_size -= toh->len; 4372 } 4373 if ((udp_bits.udpb_ipv6_recvdstopts) && 4374 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4375 struct T_opthdr *toh; 4376 4377 toh = (struct T_opthdr *)dstopt; 4378 toh->level = IPPROTO_IPV6; 4379 toh->name = IPV6_DSTOPTS; 4380 toh->len = sizeof (struct T_opthdr) + 4381 ipp.ipp_dstoptslen; 4382 toh->status = 0; 4383 dstopt += sizeof (struct T_opthdr); 4384 bcopy(ipp.ipp_dstopts, dstopt, 4385 ipp.ipp_dstoptslen); 4386 dstopt += ipp.ipp_dstoptslen; 4387 udi_size -= toh->len; 4388 } 4389 if (cr != NULL) { 4390 struct T_opthdr *toh; 4391 4392 toh = (struct T_opthdr *)dstopt; 4393 toh->level = SOL_SOCKET; 4394 toh->name = SCM_UCRED; 4395 toh->len = sizeof (struct T_opthdr) + ucredsize; 4396 toh->status = 0; 4397 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4398 dstopt += toh->len; 4399 udi_size -= toh->len; 4400 } 4401 if (udp_bits.udpb_timestamp) { 4402 struct T_opthdr *toh; 4403 4404 toh = (struct T_opthdr *)dstopt; 4405 toh->level = SOL_SOCKET; 4406 toh->name = SCM_TIMESTAMP; 4407 toh->len = sizeof (struct T_opthdr) + 4408 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4409 toh->status = 0; 4410 dstopt += sizeof (struct T_opthdr); 4411 /* Align for gethrestime() */ 4412 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4413 sizeof (intptr_t)); 4414 gethrestime((timestruc_t *)dstopt); 4415 dstopt = (uchar_t *)toh + toh->len; 4416 udi_size -= toh->len; 4417 } 4418 4419 /* Consumed all of allocated space */ 4420 ASSERT(udi_size == 0); 4421 } 4422 #undef sin6 4423 /* No IP_RECVDSTADDR for IPv6. */ 4424 } 4425 4426 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4427 if (options_mp != NULL) 4428 freeb(options_mp); 4429 4430 if (IPCL_IS_NONSTR(connp)) { 4431 int error; 4432 4433 if ((*connp->conn_upcalls->su_recv) 4434 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4435 NULL) < 0) { 4436 mutex_enter(&udp->udp_recv_lock); 4437 if (error == ENOSPC) { 4438 /* 4439 * let's confirm while holding the lock 4440 */ 4441 if ((*connp->conn_upcalls->su_recv) 4442 (connp->conn_upper_handle, NULL, 0, 0, 4443 &error, NULL) < 0) { 4444 if (error == ENOSPC) { 4445 connp->conn_flow_cntrld = 4446 B_TRUE; 4447 } else { 4448 ASSERT(error == EOPNOTSUPP); 4449 } 4450 } 4451 mutex_exit(&udp->udp_recv_lock); 4452 } else { 4453 ASSERT(error == EOPNOTSUPP); 4454 udp_queue_fallback(udp, mp); 4455 } 4456 } 4457 } else { 4458 putnext(connp->conn_rq, mp); 4459 } 4460 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4461 return; 4462 4463 tossit: 4464 freemsg(mp); 4465 if (options_mp != NULL) 4466 freeb(options_mp); 4467 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4468 } 4469 4470 /* 4471 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4472 * information that can be changing beneath us. 4473 */ 4474 mblk_t * 4475 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4476 { 4477 mblk_t *mpdata; 4478 mblk_t *mp_conn_ctl; 4479 mblk_t *mp_attr_ctl; 4480 mblk_t *mp6_conn_ctl; 4481 mblk_t *mp6_attr_ctl; 4482 mblk_t *mp_conn_tail; 4483 mblk_t *mp_attr_tail; 4484 mblk_t *mp6_conn_tail; 4485 mblk_t *mp6_attr_tail; 4486 struct opthdr *optp; 4487 mib2_udpEntry_t ude; 4488 mib2_udp6Entry_t ude6; 4489 mib2_transportMLPEntry_t mlp; 4490 int state; 4491 zoneid_t zoneid; 4492 int i; 4493 connf_t *connfp; 4494 conn_t *connp = Q_TO_CONN(q); 4495 int v4_conn_idx; 4496 int v6_conn_idx; 4497 boolean_t needattr; 4498 udp_t *udp; 4499 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4500 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4501 mblk_t *mp2ctl; 4502 4503 /* 4504 * make a copy of the original message 4505 */ 4506 mp2ctl = copymsg(mpctl); 4507 4508 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4509 if (mpctl == NULL || 4510 (mpdata = mpctl->b_cont) == NULL || 4511 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4512 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4513 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4514 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4515 freemsg(mp_conn_ctl); 4516 freemsg(mp_attr_ctl); 4517 freemsg(mp6_conn_ctl); 4518 freemsg(mpctl); 4519 freemsg(mp2ctl); 4520 return (0); 4521 } 4522 4523 zoneid = connp->conn_zoneid; 4524 4525 /* fixed length structure for IPv4 and IPv6 counters */ 4526 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4527 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4528 /* synchronize 64- and 32-bit counters */ 4529 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4530 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4531 4532 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4533 optp->level = MIB2_UDP; 4534 optp->name = 0; 4535 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4536 sizeof (us->us_udp_mib)); 4537 optp->len = msgdsize(mpdata); 4538 qreply(q, mpctl); 4539 4540 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4541 v4_conn_idx = v6_conn_idx = 0; 4542 4543 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4544 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4545 connp = NULL; 4546 4547 while ((connp = ipcl_get_next_conn(connfp, connp, 4548 IPCL_UDPCONN))) { 4549 udp = connp->conn_udp; 4550 if (zoneid != connp->conn_zoneid) 4551 continue; 4552 4553 /* 4554 * Note that the port numbers are sent in 4555 * host byte order 4556 */ 4557 4558 if (udp->udp_state == TS_UNBND) 4559 state = MIB2_UDP_unbound; 4560 else if (udp->udp_state == TS_IDLE) 4561 state = MIB2_UDP_idle; 4562 else if (udp->udp_state == TS_DATA_XFER) 4563 state = MIB2_UDP_connected; 4564 else 4565 state = MIB2_UDP_unknown; 4566 4567 needattr = B_FALSE; 4568 bzero(&mlp, sizeof (mlp)); 4569 if (connp->conn_mlp_type != mlptSingle) { 4570 if (connp->conn_mlp_type == mlptShared || 4571 connp->conn_mlp_type == mlptBoth) 4572 mlp.tme_flags |= MIB2_TMEF_SHARED; 4573 if (connp->conn_mlp_type == mlptPrivate || 4574 connp->conn_mlp_type == mlptBoth) 4575 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4576 needattr = B_TRUE; 4577 } 4578 4579 /* 4580 * Create an IPv4 table entry for IPv4 entries and also 4581 * any IPv6 entries which are bound to in6addr_any 4582 * (i.e. anything a IPv4 peer could connect/send to). 4583 */ 4584 if (udp->udp_ipversion == IPV4_VERSION || 4585 (udp->udp_state <= TS_IDLE && 4586 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4587 ude.udpEntryInfo.ue_state = state; 4588 /* 4589 * If in6addr_any this will set it to 4590 * INADDR_ANY 4591 */ 4592 ude.udpLocalAddress = 4593 V4_PART_OF_V6(udp->udp_v6src); 4594 ude.udpLocalPort = ntohs(udp->udp_port); 4595 if (udp->udp_state == TS_DATA_XFER) { 4596 /* 4597 * Can potentially get here for 4598 * v6 socket if another process 4599 * (say, ping) has just done a 4600 * sendto(), changing the state 4601 * from the TS_IDLE above to 4602 * TS_DATA_XFER by the time we hit 4603 * this part of the code. 4604 */ 4605 ude.udpEntryInfo.ue_RemoteAddress = 4606 V4_PART_OF_V6(udp->udp_v6dst); 4607 ude.udpEntryInfo.ue_RemotePort = 4608 ntohs(udp->udp_dstport); 4609 } else { 4610 ude.udpEntryInfo.ue_RemoteAddress = 0; 4611 ude.udpEntryInfo.ue_RemotePort = 0; 4612 } 4613 4614 /* 4615 * We make the assumption that all udp_t 4616 * structs will be created within an address 4617 * region no larger than 32-bits. 4618 */ 4619 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4620 ude.udpCreationProcess = 4621 (udp->udp_open_pid < 0) ? 4622 MIB2_UNKNOWN_PROCESS : 4623 udp->udp_open_pid; 4624 ude.udpCreationTime = udp->udp_open_time; 4625 4626 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4627 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4628 mlp.tme_connidx = v4_conn_idx++; 4629 if (needattr) 4630 (void) snmp_append_data2( 4631 mp_attr_ctl->b_cont, &mp_attr_tail, 4632 (char *)&mlp, sizeof (mlp)); 4633 } 4634 if (udp->udp_ipversion == IPV6_VERSION) { 4635 ude6.udp6EntryInfo.ue_state = state; 4636 ude6.udp6LocalAddress = udp->udp_v6src; 4637 ude6.udp6LocalPort = ntohs(udp->udp_port); 4638 ude6.udp6IfIndex = udp->udp_bound_if; 4639 if (udp->udp_state == TS_DATA_XFER) { 4640 ude6.udp6EntryInfo.ue_RemoteAddress = 4641 udp->udp_v6dst; 4642 ude6.udp6EntryInfo.ue_RemotePort = 4643 ntohs(udp->udp_dstport); 4644 } else { 4645 ude6.udp6EntryInfo.ue_RemoteAddress = 4646 sin6_null.sin6_addr; 4647 ude6.udp6EntryInfo.ue_RemotePort = 0; 4648 } 4649 /* 4650 * We make the assumption that all udp_t 4651 * structs will be created within an address 4652 * region no larger than 32-bits. 4653 */ 4654 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4655 ude6.udp6CreationProcess = 4656 (udp->udp_open_pid < 0) ? 4657 MIB2_UNKNOWN_PROCESS : 4658 udp->udp_open_pid; 4659 ude6.udp6CreationTime = udp->udp_open_time; 4660 4661 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4662 &mp6_conn_tail, (char *)&ude6, 4663 sizeof (ude6)); 4664 mlp.tme_connidx = v6_conn_idx++; 4665 if (needattr) 4666 (void) snmp_append_data2( 4667 mp6_attr_ctl->b_cont, 4668 &mp6_attr_tail, (char *)&mlp, 4669 sizeof (mlp)); 4670 } 4671 } 4672 } 4673 4674 /* IPv4 UDP endpoints */ 4675 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4676 sizeof (struct T_optmgmt_ack)]; 4677 optp->level = MIB2_UDP; 4678 optp->name = MIB2_UDP_ENTRY; 4679 optp->len = msgdsize(mp_conn_ctl->b_cont); 4680 qreply(q, mp_conn_ctl); 4681 4682 /* table of MLP attributes... */ 4683 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4684 sizeof (struct T_optmgmt_ack)]; 4685 optp->level = MIB2_UDP; 4686 optp->name = EXPER_XPORT_MLP; 4687 optp->len = msgdsize(mp_attr_ctl->b_cont); 4688 if (optp->len == 0) 4689 freemsg(mp_attr_ctl); 4690 else 4691 qreply(q, mp_attr_ctl); 4692 4693 /* IPv6 UDP endpoints */ 4694 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4695 sizeof (struct T_optmgmt_ack)]; 4696 optp->level = MIB2_UDP6; 4697 optp->name = MIB2_UDP6_ENTRY; 4698 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4699 qreply(q, mp6_conn_ctl); 4700 4701 /* table of MLP attributes... */ 4702 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4703 sizeof (struct T_optmgmt_ack)]; 4704 optp->level = MIB2_UDP6; 4705 optp->name = EXPER_XPORT_MLP; 4706 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4707 if (optp->len == 0) 4708 freemsg(mp6_attr_ctl); 4709 else 4710 qreply(q, mp6_attr_ctl); 4711 4712 return (mp2ctl); 4713 } 4714 4715 /* 4716 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4717 * NOTE: Per MIB-II, UDP has no writable data. 4718 * TODO: If this ever actually tries to set anything, it needs to be 4719 * to do the appropriate locking. 4720 */ 4721 /* ARGSUSED */ 4722 int 4723 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4724 uchar_t *ptr, int len) 4725 { 4726 switch (level) { 4727 case MIB2_UDP: 4728 return (0); 4729 default: 4730 return (1); 4731 } 4732 } 4733 4734 static void 4735 udp_report_item(mblk_t *mp, udp_t *udp) 4736 { 4737 char *state; 4738 char addrbuf1[INET6_ADDRSTRLEN]; 4739 char addrbuf2[INET6_ADDRSTRLEN]; 4740 uint_t print_len, buf_len; 4741 4742 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4743 ASSERT(buf_len >= 0); 4744 if (buf_len == 0) 4745 return; 4746 4747 if (udp->udp_state == TS_UNBND) 4748 state = "UNBOUND"; 4749 else if (udp->udp_state == TS_IDLE) 4750 state = "IDLE"; 4751 else if (udp->udp_state == TS_DATA_XFER) 4752 state = "CONNECTED"; 4753 else 4754 state = "UnkState"; 4755 print_len = snprintf((char *)mp->b_wptr, buf_len, 4756 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4757 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4758 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4759 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4760 ntohs(udp->udp_dstport), state); 4761 if (print_len < buf_len) { 4762 mp->b_wptr += print_len; 4763 } else { 4764 mp->b_wptr += buf_len; 4765 } 4766 } 4767 4768 /* Report for ndd "udp_status" */ 4769 /* ARGSUSED */ 4770 static int 4771 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4772 { 4773 zoneid_t zoneid; 4774 connf_t *connfp; 4775 conn_t *connp = Q_TO_CONN(q); 4776 udp_t *udp = connp->conn_udp; 4777 int i; 4778 udp_stack_t *us = udp->udp_us; 4779 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4780 4781 /* 4782 * Because of the ndd constraint, at most we can have 64K buffer 4783 * to put in all UDP info. So to be more efficient, just 4784 * allocate a 64K buffer here, assuming we need that large buffer. 4785 * This may be a problem as any user can read udp_status. Therefore 4786 * we limit the rate of doing this using us_ndd_get_info_interval. 4787 * This should be OK as normal users should not do this too often. 4788 */ 4789 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4790 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4791 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4792 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4793 return (0); 4794 } 4795 } 4796 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4797 /* The following may work even if we cannot get a large buf. */ 4798 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4799 return (0); 4800 } 4801 (void) mi_mpprintf(mp, 4802 "UDP " MI_COL_HDRPAD_STR 4803 /* 12345678[89ABCDEF] */ 4804 " zone lport src addr dest addr port state"); 4805 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4806 4807 zoneid = connp->conn_zoneid; 4808 4809 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4810 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4811 connp = NULL; 4812 4813 while ((connp = ipcl_get_next_conn(connfp, connp, 4814 IPCL_UDPCONN))) { 4815 udp = connp->conn_udp; 4816 if (zoneid != GLOBAL_ZONEID && 4817 zoneid != connp->conn_zoneid) 4818 continue; 4819 4820 udp_report_item(mp->b_cont, udp); 4821 } 4822 } 4823 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4824 return (0); 4825 } 4826 4827 /* 4828 * This routine creates a T_UDERROR_IND message and passes it upstream. 4829 * The address and options are copied from the T_UNITDATA_REQ message 4830 * passed in mp. This message is freed. 4831 */ 4832 static void 4833 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4834 t_scalar_t err) 4835 { 4836 struct T_unitdata_req *tudr; 4837 mblk_t *mp1; 4838 uchar_t *optaddr; 4839 t_scalar_t optlen; 4840 4841 if (DB_TYPE(mp) == M_DATA) { 4842 ASSERT(destaddr != NULL && destlen != 0); 4843 optaddr = NULL; 4844 optlen = 0; 4845 } else { 4846 if ((mp->b_wptr < mp->b_rptr) || 4847 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4848 goto done; 4849 } 4850 tudr = (struct T_unitdata_req *)mp->b_rptr; 4851 destaddr = mp->b_rptr + tudr->DEST_offset; 4852 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4853 destaddr + tudr->DEST_length < mp->b_rptr || 4854 destaddr + tudr->DEST_length > mp->b_wptr) { 4855 goto done; 4856 } 4857 optaddr = mp->b_rptr + tudr->OPT_offset; 4858 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4859 optaddr + tudr->OPT_length < mp->b_rptr || 4860 optaddr + tudr->OPT_length > mp->b_wptr) { 4861 goto done; 4862 } 4863 destlen = tudr->DEST_length; 4864 optlen = tudr->OPT_length; 4865 } 4866 4867 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4868 (char *)optaddr, optlen, err); 4869 if (mp1 != NULL) 4870 qreply(q, mp1); 4871 4872 done: 4873 freemsg(mp); 4874 } 4875 4876 /* 4877 * This routine removes a port number association from a stream. It 4878 * is called by udp_wput to handle T_UNBIND_REQ messages. 4879 */ 4880 static void 4881 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4882 { 4883 conn_t *connp = Q_TO_CONN(q); 4884 int error; 4885 4886 error = udp_do_unbind(connp); 4887 if (error) { 4888 if (error < 0) 4889 udp_err_ack(q, mp, -error, 0); 4890 else 4891 udp_err_ack(q, mp, TSYSERR, error); 4892 return; 4893 } 4894 4895 mp = mi_tpi_ok_ack_alloc(mp); 4896 ASSERT(mp != NULL); 4897 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4898 qreply(q, mp); 4899 } 4900 4901 /* 4902 * Don't let port fall into the privileged range. 4903 * Since the extra privileged ports can be arbitrary we also 4904 * ensure that we exclude those from consideration. 4905 * us->us_epriv_ports is not sorted thus we loop over it until 4906 * there are no changes. 4907 */ 4908 static in_port_t 4909 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4910 { 4911 int i; 4912 in_port_t nextport; 4913 boolean_t restart = B_FALSE; 4914 udp_stack_t *us = udp->udp_us; 4915 4916 if (random && udp_random_anon_port != 0) { 4917 (void) random_get_pseudo_bytes((uint8_t *)&port, 4918 sizeof (in_port_t)); 4919 /* 4920 * Unless changed by a sys admin, the smallest anon port 4921 * is 32768 and the largest anon port is 65535. It is 4922 * very likely (50%) for the random port to be smaller 4923 * than the smallest anon port. When that happens, 4924 * add port % (anon port range) to the smallest anon 4925 * port to get the random port. It should fall into the 4926 * valid anon port range. 4927 */ 4928 if (port < us->us_smallest_anon_port) { 4929 port = us->us_smallest_anon_port + 4930 port % (us->us_largest_anon_port - 4931 us->us_smallest_anon_port); 4932 } 4933 } 4934 4935 retry: 4936 if (port < us->us_smallest_anon_port) 4937 port = us->us_smallest_anon_port; 4938 4939 if (port > us->us_largest_anon_port) { 4940 port = us->us_smallest_anon_port; 4941 if (restart) 4942 return (0); 4943 restart = B_TRUE; 4944 } 4945 4946 if (port < us->us_smallest_nonpriv_port) 4947 port = us->us_smallest_nonpriv_port; 4948 4949 for (i = 0; i < us->us_num_epriv_ports; i++) { 4950 if (port == us->us_epriv_ports[i]) { 4951 port++; 4952 /* 4953 * Make sure that the port is in the 4954 * valid range. 4955 */ 4956 goto retry; 4957 } 4958 } 4959 4960 if (is_system_labeled() && 4961 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4962 port, IPPROTO_UDP, B_TRUE)) != 0) { 4963 port = nextport; 4964 goto retry; 4965 } 4966 4967 return (port); 4968 } 4969 4970 static int 4971 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 4972 boolean_t *update_lastdst) 4973 { 4974 int err; 4975 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4976 udp_t *udp = Q_TO_UDP(wq); 4977 udp_stack_t *us = udp->udp_us; 4978 4979 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 4980 opt_storage, udp->udp_connp->conn_mac_exempt, 4981 us->us_netstack->netstack_ip); 4982 if (err == 0) { 4983 err = tsol_update_options(&udp->udp_ip_snd_options, 4984 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4985 opt_storage); 4986 } 4987 if (err != 0) { 4988 DTRACE_PROBE4( 4989 tx__ip__log__info__updatelabel__udp, 4990 char *, "queue(1) failed to update options(2) on mp(3)", 4991 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4992 } else { 4993 *update_lastdst = B_TRUE; 4994 } 4995 return (err); 4996 } 4997 4998 static mblk_t * 4999 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5000 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 5001 cred_t *cr, pid_t pid) 5002 { 5003 udp_t *udp = connp->conn_udp; 5004 mblk_t *mp1 = mp; 5005 mblk_t *mp2; 5006 ipha_t *ipha; 5007 int ip_hdr_length; 5008 uint32_t ip_len; 5009 udpha_t *udpha; 5010 boolean_t lock_held = B_FALSE; 5011 in_port_t uha_src_port; 5012 udpattrs_t attrs; 5013 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5014 uint32_t ip_snd_opt_len = 0; 5015 ip4_pkt_t pktinfo; 5016 ip4_pkt_t *pktinfop = &pktinfo; 5017 ip_opt_info_t optinfo; 5018 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5019 udp_stack_t *us = udp->udp_us; 5020 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5021 queue_t *q = connp->conn_wq; 5022 ire_t *ire; 5023 in6_addr_t v6dst; 5024 boolean_t update_lastdst = B_FALSE; 5025 5026 *error = 0; 5027 pktinfop->ip4_ill_index = 0; 5028 pktinfop->ip4_addr = INADDR_ANY; 5029 optinfo.ip_opt_flags = 0; 5030 optinfo.ip_opt_ill_index = 0; 5031 5032 if (v4dst == INADDR_ANY) 5033 v4dst = htonl(INADDR_LOOPBACK); 5034 5035 /* 5036 * If options passed in, feed it for verification and handling 5037 */ 5038 attrs.udpattr_credset = B_FALSE; 5039 if (IPCL_IS_NONSTR(connp)) { 5040 if (msg->msg_controllen != 0) { 5041 attrs.udpattr_ipp4 = pktinfop; 5042 attrs.udpattr_mb = mp; 5043 5044 rw_enter(&udp->udp_rwlock, RW_WRITER); 5045 *error = process_auxiliary_options(connp, 5046 msg->msg_control, msg->msg_controllen, 5047 &attrs, &udp_opt_obj, udp_opt_set); 5048 rw_exit(&udp->udp_rwlock); 5049 if (*error) 5050 goto done; 5051 } 5052 } else { 5053 if (DB_TYPE(mp) != M_DATA) { 5054 mp1 = mp->b_cont; 5055 if (((struct T_unitdata_req *) 5056 mp->b_rptr)->OPT_length != 0) { 5057 attrs.udpattr_ipp4 = pktinfop; 5058 attrs.udpattr_mb = mp; 5059 if (udp_unitdata_opt_process(q, mp, error, 5060 &attrs) < 0) 5061 goto done; 5062 /* 5063 * Note: success in processing options. 5064 * mp option buffer represented by 5065 * OPT_length/offset now potentially modified 5066 * and contain option setting results 5067 */ 5068 ASSERT(*error == 0); 5069 } 5070 } 5071 } 5072 5073 /* mp1 points to the M_DATA mblk carrying the packet */ 5074 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5075 5076 /* 5077 * Determine whether we need to mark the mblk with the user's 5078 * credentials. 5079 */ 5080 ire = connp->conn_ire_cache; 5081 if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) || 5082 (ire->ire_addr != v4dst) || 5083 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5084 if (cr != NULL && DB_CRED(mp) == NULL) 5085 msg_setcredpid(mp, cr, pid); 5086 } 5087 5088 rw_enter(&udp->udp_rwlock, RW_READER); 5089 lock_held = B_TRUE; 5090 5091 /* 5092 * Cluster and TSOL note: 5093 * udp.udp_v6lastdst is shared by Cluster and TSOL 5094 * udp.udp_lastdstport is used by Cluster 5095 * 5096 * Both Cluster and TSOL need to update the dest addr and/or port. 5097 * Updating is done after both Cluster and TSOL checks, protected 5098 * by conn_lock. 5099 */ 5100 mutex_enter(&connp->conn_lock); 5101 5102 if (cl_inet_connect2 != NULL && 5103 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5104 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5105 udp->udp_lastdstport != port)) { 5106 mutex_exit(&connp->conn_lock); 5107 *error = 0; 5108 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5109 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5110 if (*error != 0) { 5111 *error = EHOSTUNREACH; 5112 goto done; 5113 } 5114 update_lastdst = B_TRUE; 5115 mutex_enter(&connp->conn_lock); 5116 } 5117 5118 /* 5119 * Check if our saved options are valid; update if not. 5120 * TSOL Note: Since we are not in WRITER mode, UDP packets 5121 * to different destination may require different labels, 5122 * or worse, UDP packets to same IP address may require 5123 * different labels due to use of shared all-zones address. 5124 * We use conn_lock to ensure that lastdst, ip_snd_options, 5125 * and ip_snd_options_len are consistent for the current 5126 * destination and are updated atomically. 5127 */ 5128 if (is_system_labeled()) { 5129 /* Using UDP MLP requires SCM_UCRED from user */ 5130 if (connp->conn_mlp_type != mlptSingle && 5131 !attrs.udpattr_credset) { 5132 mutex_exit(&connp->conn_lock); 5133 DTRACE_PROBE4( 5134 tx__ip__log__info__output__udp, 5135 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5136 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5137 *error = ECONNREFUSED; 5138 goto done; 5139 } 5140 /* 5141 * update label option for this UDP socket if 5142 * - the destination has changed, or 5143 * - the UDP socket is MLP 5144 */ 5145 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5146 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5147 connp->conn_mlp_type != mlptSingle) && 5148 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5149 != 0) { 5150 mutex_exit(&connp->conn_lock); 5151 goto done; 5152 } 5153 } 5154 if (update_lastdst) { 5155 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5156 udp->udp_lastdstport = port; 5157 } 5158 if (udp->udp_ip_snd_options_len > 0) { 5159 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5160 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5161 } 5162 mutex_exit(&connp->conn_lock); 5163 5164 /* Add an IP header */ 5165 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5166 (insert_spi ? sizeof (uint32_t) : 0); 5167 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5168 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5169 !OK_32PTR(ipha)) { 5170 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5171 if (mp2 == NULL) { 5172 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5173 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5174 *error = ENOMEM; 5175 goto done; 5176 } 5177 mp2->b_wptr = DB_LIM(mp2); 5178 mp2->b_cont = mp1; 5179 mp1 = mp2; 5180 if (DB_TYPE(mp) != M_DATA) 5181 mp->b_cont = mp1; 5182 else 5183 mp = mp1; 5184 5185 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5186 } 5187 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5188 #ifdef _BIG_ENDIAN 5189 /* Set version, header length, and tos */ 5190 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5191 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5192 udp->udp_type_of_service); 5193 /* Set ttl and protocol */ 5194 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5195 #else 5196 /* Set version, header length, and tos */ 5197 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5198 ((udp->udp_type_of_service << 8) | 5199 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5200 /* Set ttl and protocol */ 5201 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5202 #endif 5203 if (pktinfop->ip4_addr != INADDR_ANY) { 5204 ipha->ipha_src = pktinfop->ip4_addr; 5205 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5206 } else { 5207 /* 5208 * Copy our address into the packet. If this is zero, 5209 * first look at __sin6_src_id for a hint. If we leave the 5210 * source as INADDR_ANY then ip will fill in the real source 5211 * address. 5212 */ 5213 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5214 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5215 in6_addr_t v6src; 5216 5217 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5218 us->us_netstack); 5219 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5220 } 5221 } 5222 uha_src_port = udp->udp_port; 5223 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5224 rw_exit(&udp->udp_rwlock); 5225 lock_held = B_FALSE; 5226 } 5227 5228 if (pktinfop->ip4_ill_index != 0) { 5229 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5230 } 5231 5232 ipha->ipha_fragment_offset_and_flags = 0; 5233 ipha->ipha_ident = 0; 5234 5235 mp1->b_rptr = (uchar_t *)ipha; 5236 5237 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5238 (uintptr_t)UINT_MAX); 5239 5240 /* Determine length of packet */ 5241 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5242 if ((mp2 = mp1->b_cont) != NULL) { 5243 do { 5244 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5245 ip_len += (uint32_t)MBLKL(mp2); 5246 } while ((mp2 = mp2->b_cont) != NULL); 5247 } 5248 /* 5249 * If the size of the packet is greater than the maximum allowed by 5250 * ip, return an error. Passing this down could cause panics because 5251 * the size will have wrapped and be inconsistent with the msg size. 5252 */ 5253 if (ip_len > IP_MAXPACKET) { 5254 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5255 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5256 *error = EMSGSIZE; 5257 goto done; 5258 } 5259 ipha->ipha_length = htons((uint16_t)ip_len); 5260 ip_len -= ip_hdr_length; 5261 ip_len = htons((uint16_t)ip_len); 5262 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5263 5264 /* Insert all-0s SPI now. */ 5265 if (insert_spi) 5266 *((uint32_t *)(udpha + 1)) = 0; 5267 5268 /* 5269 * Copy in the destination address 5270 */ 5271 ipha->ipha_dst = v4dst; 5272 5273 /* 5274 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5275 */ 5276 if (CLASSD(v4dst)) 5277 ipha->ipha_ttl = udp->udp_multicast_ttl; 5278 5279 udpha->uha_dst_port = port; 5280 udpha->uha_src_port = uha_src_port; 5281 5282 if (ip_snd_opt_len > 0) { 5283 uint32_t cksum; 5284 5285 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5286 lock_held = B_FALSE; 5287 rw_exit(&udp->udp_rwlock); 5288 /* 5289 * Massage source route putting first source route in ipha_dst. 5290 * Ignore the destination in T_unitdata_req. 5291 * Create a checksum adjustment for a source route, if any. 5292 */ 5293 cksum = ip_massage_options(ipha, us->us_netstack); 5294 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5295 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5296 (ipha->ipha_dst & 0xFFFF); 5297 if ((int)cksum < 0) 5298 cksum--; 5299 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5300 /* 5301 * IP does the checksum if uha_checksum is non-zero, 5302 * We make it easy for IP to include our pseudo header 5303 * by putting our length in uha_checksum. 5304 */ 5305 cksum += ip_len; 5306 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5307 /* There might be a carry. */ 5308 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5309 #ifdef _LITTLE_ENDIAN 5310 if (us->us_do_checksum) 5311 ip_len = (cksum << 16) | ip_len; 5312 #else 5313 if (us->us_do_checksum) 5314 ip_len = (ip_len << 16) | cksum; 5315 else 5316 ip_len <<= 16; 5317 #endif 5318 } else { 5319 /* 5320 * IP does the checksum if uha_checksum is non-zero, 5321 * We make it easy for IP to include our pseudo header 5322 * by putting our length in uha_checksum. 5323 */ 5324 if (us->us_do_checksum) 5325 ip_len |= (ip_len << 16); 5326 #ifndef _LITTLE_ENDIAN 5327 else 5328 ip_len <<= 16; 5329 #endif 5330 } 5331 ASSERT(!lock_held); 5332 /* Set UDP length and checksum */ 5333 *((uint32_t *)&udpha->uha_length) = ip_len; 5334 if (DB_CRED(mp) != NULL) 5335 mblk_setcred(mp1, DB_CRED(mp)); 5336 5337 if (DB_TYPE(mp) != M_DATA) { 5338 ASSERT(mp != mp1); 5339 freeb(mp); 5340 } 5341 5342 /* mp has been consumed and we'll return success */ 5343 ASSERT(*error == 0); 5344 mp = NULL; 5345 5346 /* We're done. Pass the packet to ip. */ 5347 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5348 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5349 "udp_wput_end: q %p (%S)", q, "end"); 5350 5351 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5352 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5353 connp->conn_dontroute || 5354 connp->conn_nofailover_ill != NULL || 5355 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5356 optinfo.ip_opt_ill_index != 0 || 5357 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5358 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5359 ipst->ips_ip_g_mrouter != NULL) { 5360 UDP_STAT(us, udp_ip_send); 5361 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5362 &optinfo); 5363 } else { 5364 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5365 } 5366 5367 done: 5368 if (lock_held) 5369 rw_exit(&udp->udp_rwlock); 5370 if (*error != 0) { 5371 ASSERT(mp != NULL); 5372 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5373 } 5374 return (mp); 5375 } 5376 5377 static void 5378 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5379 { 5380 conn_t *connp = udp->udp_connp; 5381 ipaddr_t src, dst; 5382 ire_t *ire; 5383 ipif_t *ipif = NULL; 5384 mblk_t *ire_fp_mp; 5385 boolean_t retry_caching; 5386 udp_stack_t *us = udp->udp_us; 5387 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5388 5389 dst = ipha->ipha_dst; 5390 src = ipha->ipha_src; 5391 ASSERT(ipha->ipha_ident == 0); 5392 5393 if (CLASSD(dst)) { 5394 int err; 5395 5396 ipif = conn_get_held_ipif(connp, 5397 &connp->conn_multicast_ipif, &err); 5398 5399 if (ipif == NULL || ipif->ipif_isv6 || 5400 (ipif->ipif_ill->ill_phyint->phyint_flags & 5401 PHYI_LOOPBACK)) { 5402 if (ipif != NULL) 5403 ipif_refrele(ipif); 5404 UDP_STAT(us, udp_ip_send); 5405 ip_output(connp, mp, q, IP_WPUT); 5406 return; 5407 } 5408 } 5409 5410 retry_caching = B_FALSE; 5411 mutex_enter(&connp->conn_lock); 5412 ire = connp->conn_ire_cache; 5413 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5414 5415 if (ire == NULL || ire->ire_addr != dst || 5416 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5417 retry_caching = B_TRUE; 5418 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5419 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5420 5421 ASSERT(ipif != NULL); 5422 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 5423 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 5424 retry_caching = B_TRUE; 5425 } 5426 5427 if (!retry_caching) { 5428 ASSERT(ire != NULL); 5429 IRE_REFHOLD(ire); 5430 mutex_exit(&connp->conn_lock); 5431 } else { 5432 boolean_t cached = B_FALSE; 5433 5434 connp->conn_ire_cache = NULL; 5435 mutex_exit(&connp->conn_lock); 5436 5437 /* Release the old ire */ 5438 if (ire != NULL) { 5439 IRE_REFRELE_NOTR(ire); 5440 ire = NULL; 5441 } 5442 5443 if (CLASSD(dst)) { 5444 ASSERT(ipif != NULL); 5445 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5446 connp->conn_zoneid, MBLK_GETLABEL(mp), 5447 MATCH_IRE_ILL_GROUP, ipst); 5448 } else { 5449 ASSERT(ipif == NULL); 5450 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5451 MBLK_GETLABEL(mp), ipst); 5452 } 5453 5454 if (ire == NULL) { 5455 if (ipif != NULL) 5456 ipif_refrele(ipif); 5457 UDP_STAT(us, udp_ire_null); 5458 ip_output(connp, mp, q, IP_WPUT); 5459 return; 5460 } 5461 IRE_REFHOLD_NOTR(ire); 5462 5463 mutex_enter(&connp->conn_lock); 5464 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5465 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5466 irb_t *irb = ire->ire_bucket; 5467 5468 /* 5469 * IRE's created for non-connection oriented transports 5470 * are normally initialized with IRE_MARK_TEMPORARY set 5471 * in the ire_marks. These IRE's are preferentially 5472 * reaped when the hash chain length in the cache 5473 * bucket exceeds the maximum value specified in 5474 * ip[6]_ire_max_bucket_cnt. This can severely affect 5475 * UDP performance if IRE cache entries that we need 5476 * to reuse are continually removed. To remedy this, 5477 * when we cache the IRE in the conn_t, we remove the 5478 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5479 * set. 5480 */ 5481 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5482 rw_enter(&irb->irb_lock, RW_WRITER); 5483 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5484 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5485 irb->irb_tmp_ire_cnt--; 5486 } 5487 rw_exit(&irb->irb_lock); 5488 } 5489 connp->conn_ire_cache = ire; 5490 cached = B_TRUE; 5491 } 5492 mutex_exit(&connp->conn_lock); 5493 5494 /* 5495 * We can continue to use the ire but since it was not 5496 * cached, we should drop the extra reference. 5497 */ 5498 if (!cached) 5499 IRE_REFRELE_NOTR(ire); 5500 } 5501 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5502 ASSERT(!CLASSD(dst) || ipif != NULL); 5503 5504 /* 5505 * Check if we can take the fast-path. 5506 * Note that "incomplete" ire's (where the link-layer for next hop 5507 * is not resolved, or where the fast-path header in nce_fp_mp is not 5508 * available yet) are sent down the legacy (slow) path 5509 */ 5510 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5511 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5512 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5513 ((ire->ire_nce == NULL) || 5514 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5515 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5516 if (ipif != NULL) 5517 ipif_refrele(ipif); 5518 UDP_STAT(us, udp_ip_ire_send); 5519 IRE_REFRELE(ire); 5520 ip_output(connp, mp, q, IP_WPUT); 5521 return; 5522 } 5523 5524 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5525 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5526 ipha->ipha_src = ipif->ipif_src_addr; 5527 else 5528 ipha->ipha_src = ire->ire_src_addr; 5529 } 5530 5531 if (ipif != NULL) 5532 ipif_refrele(ipif); 5533 5534 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5535 } 5536 5537 static void 5538 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5539 { 5540 ipaddr_t src, dst; 5541 ill_t *ill; 5542 mblk_t *ire_fp_mp; 5543 uint_t ire_fp_mp_len; 5544 uint16_t *up; 5545 uint32_t cksum, hcksum_txflags; 5546 queue_t *dev_q; 5547 udp_t *udp = connp->conn_udp; 5548 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5549 udp_stack_t *us = udp->udp_us; 5550 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5551 boolean_t ll_multicast = B_FALSE; 5552 5553 dev_q = ire->ire_stq->q_next; 5554 ASSERT(dev_q != NULL); 5555 5556 ill = ire_to_ill(ire); 5557 ASSERT(ill != NULL); 5558 5559 /* is queue flow controlled? */ 5560 if (q->q_first != NULL || connp->conn_draining || 5561 DEV_Q_FLOW_BLOCKED(dev_q)) { 5562 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5563 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5564 5565 if (ipst->ips_ip_output_queue) 5566 (void) putq(connp->conn_wq, mp); 5567 else 5568 freemsg(mp); 5569 ire_refrele(ire); 5570 return; 5571 } 5572 5573 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5574 ire_fp_mp_len = MBLKL(ire_fp_mp); 5575 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5576 5577 dst = ipha->ipha_dst; 5578 src = ipha->ipha_src; 5579 5580 5581 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5582 5583 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5584 #ifndef _BIG_ENDIAN 5585 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5586 #endif 5587 5588 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5589 ASSERT(ill->ill_hcksum_capab != NULL); 5590 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5591 } else { 5592 hcksum_txflags = 0; 5593 } 5594 5595 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5596 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5597 5598 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5599 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5600 if (*up != 0) { 5601 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5602 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5603 ntohs(ipha->ipha_length), cksum); 5604 5605 /* Software checksum? */ 5606 if (DB_CKSUMFLAGS(mp) == 0) { 5607 UDP_STAT(us, udp_out_sw_cksum); 5608 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5609 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5610 } 5611 } 5612 5613 if (!CLASSD(dst)) { 5614 ipha->ipha_fragment_offset_and_flags |= 5615 (uint32_t)htons(ire->ire_frag_flag); 5616 } 5617 5618 /* Calculate IP header checksum if hardware isn't capable */ 5619 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5620 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5621 ((uint16_t *)ipha)[4]); 5622 } 5623 5624 if (CLASSD(dst)) { 5625 boolean_t ilm_exists; 5626 5627 ILM_WALKER_HOLD(ill); 5628 ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL); 5629 ILM_WALKER_RELE(ill); 5630 if (ilm_exists) { 5631 ip_multicast_loopback(q, ill, mp, 5632 connp->conn_multicast_loop ? 0 : 5633 IP_FF_NO_MCAST_LOOP, zoneid); 5634 } 5635 5636 /* If multicast TTL is 0 then we are done */ 5637 if (ipha->ipha_ttl == 0) { 5638 freemsg(mp); 5639 ire_refrele(ire); 5640 return; 5641 } 5642 ll_multicast = B_TRUE; 5643 } 5644 5645 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5646 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5647 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5648 5649 UPDATE_OB_PKT_COUNT(ire); 5650 ire->ire_last_used_time = lbolt; 5651 5652 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5653 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5654 ntohs(ipha->ipha_length)); 5655 5656 DTRACE_PROBE4(ip4__physical__out__start, 5657 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5658 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5659 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5660 ll_multicast, ipst); 5661 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5662 if (ipst->ips_ipobs_enabled && mp != NULL) { 5663 zoneid_t szone; 5664 5665 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5666 ipst, ALL_ZONES); 5667 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5668 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5669 } 5670 5671 if (mp != NULL) { 5672 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5673 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5674 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5675 5676 if (ILL_DIRECT_CAPABLE(ill)) { 5677 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5678 5679 (void) idd->idd_tx_df(idd->idd_tx_dh, mp, 5680 (uintptr_t)connp, 0); 5681 } else { 5682 putnext(ire->ire_stq, mp); 5683 } 5684 } 5685 IRE_REFRELE(ire); 5686 } 5687 5688 static boolean_t 5689 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5690 boolean_t *update_lastdst) 5691 { 5692 udp_t *udp = Q_TO_UDP(wq); 5693 int err; 5694 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5695 udp_stack_t *us = udp->udp_us; 5696 5697 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 5698 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5699 us->us_netstack->netstack_ip); 5700 if (err == 0) { 5701 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5702 &udp->udp_label_len_v6, opt_storage); 5703 } 5704 if (err != 0) { 5705 DTRACE_PROBE4( 5706 tx__ip__log__drop__updatelabel__udp6, 5707 char *, "queue(1) failed to update options(2) on mp(3)", 5708 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5709 } else { 5710 *update_lastdst = B_TRUE; 5711 } 5712 return (err); 5713 } 5714 5715 static int 5716 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5717 pid_t pid) 5718 { 5719 udp_t *udp = connp->conn_udp; 5720 udp_stack_t *us = udp->udp_us; 5721 ipaddr_t v4dst; 5722 in_port_t dstport; 5723 boolean_t mapped_addr; 5724 struct sockaddr_storage ss; 5725 sin_t *sin; 5726 sin6_t *sin6; 5727 struct sockaddr *addr; 5728 socklen_t addrlen; 5729 int error; 5730 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5731 5732 /* M_DATA for connected socket */ 5733 5734 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5735 UDP_DBGSTAT(us, udp_data_conn); 5736 5737 mutex_enter(&connp->conn_lock); 5738 if (udp->udp_state != TS_DATA_XFER) { 5739 mutex_exit(&connp->conn_lock); 5740 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5741 UDP_STAT(us, udp_out_err_notconn); 5742 freemsg(mp); 5743 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5744 "udp_wput_end: connp %p (%S)", connp, 5745 "not-connected; address required"); 5746 return (EDESTADDRREQ); 5747 } 5748 5749 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5750 if (mapped_addr) 5751 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5752 5753 /* Initialize addr and addrlen as if they're passed in */ 5754 if (udp->udp_family == AF_INET) { 5755 sin = (sin_t *)&ss; 5756 sin->sin_family = AF_INET; 5757 dstport = sin->sin_port = udp->udp_dstport; 5758 ASSERT(mapped_addr); 5759 sin->sin_addr.s_addr = v4dst; 5760 addr = (struct sockaddr *)sin; 5761 addrlen = sizeof (*sin); 5762 } else { 5763 sin6 = (sin6_t *)&ss; 5764 sin6->sin6_family = AF_INET6; 5765 dstport = sin6->sin6_port = udp->udp_dstport; 5766 sin6->sin6_flowinfo = udp->udp_flowinfo; 5767 sin6->sin6_addr = udp->udp_v6dst; 5768 sin6->sin6_scope_id = 0; 5769 sin6->__sin6_src_id = 0; 5770 addr = (struct sockaddr *)sin6; 5771 addrlen = sizeof (*sin6); 5772 } 5773 mutex_exit(&connp->conn_lock); 5774 5775 if (mapped_addr) { 5776 /* 5777 * Handle both AF_INET and AF_INET6; the latter 5778 * for IPV4 mapped destination addresses. Note 5779 * here that both addr and addrlen point to the 5780 * corresponding struct depending on the address 5781 * family of the socket. 5782 */ 5783 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5784 insert_spi, msg, cr, pid); 5785 } else { 5786 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5787 } 5788 if (error == 0) { 5789 ASSERT(mp == NULL); 5790 return (0); 5791 } 5792 5793 UDP_STAT(us, udp_out_err_output); 5794 ASSERT(mp != NULL); 5795 if (IPCL_IS_NONSTR(connp)) { 5796 freemsg(mp); 5797 return (error); 5798 } else { 5799 /* mp is freed by the following routine */ 5800 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5801 (t_scalar_t)addrlen, (t_scalar_t)error); 5802 return (0); 5803 } 5804 } 5805 5806 /* ARGSUSED */ 5807 static int 5808 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5809 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5810 { 5811 5812 udp_t *udp = connp->conn_udp; 5813 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5814 int error = 0; 5815 sin6_t *sin6; 5816 sin_t *sin; 5817 uint_t srcid; 5818 uint16_t port; 5819 ipaddr_t v4dst; 5820 5821 5822 ASSERT(addr != NULL); 5823 5824 switch (udp->udp_family) { 5825 case AF_INET6: 5826 sin6 = (sin6_t *)addr; 5827 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5828 /* 5829 * Destination is a non-IPv4-compatible IPv6 address. 5830 * Send out an IPv6 format packet. 5831 */ 5832 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5833 pid); 5834 if (error != 0) 5835 goto ud_error; 5836 5837 return (0); 5838 } 5839 /* 5840 * If the local address is not zero or a mapped address 5841 * return an error. It would be possible to send an IPv4 5842 * packet but the response would never make it back to the 5843 * application since it is bound to a non-mapped address. 5844 */ 5845 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5846 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5847 error = EADDRNOTAVAIL; 5848 goto ud_error; 5849 } 5850 /* Send IPv4 packet without modifying udp_ipversion */ 5851 /* Extract port and ipaddr */ 5852 port = sin6->sin6_port; 5853 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5854 srcid = sin6->__sin6_src_id; 5855 break; 5856 5857 case AF_INET: 5858 sin = (sin_t *)addr; 5859 /* Extract port and ipaddr */ 5860 port = sin->sin_port; 5861 v4dst = sin->sin_addr.s_addr; 5862 srcid = 0; 5863 break; 5864 } 5865 5866 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5867 msg, cr, pid); 5868 5869 if (error == 0) { 5870 ASSERT(mp == NULL); 5871 return (0); 5872 } 5873 5874 ud_error: 5875 ASSERT(mp != NULL); 5876 5877 return (error); 5878 } 5879 5880 /* 5881 * This routine handles all messages passed downstream. It either 5882 * consumes the message or passes it downstream; it never queues a 5883 * a message. 5884 * 5885 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5886 * is valid when we are directly beneath the stream head, and thus sockfs 5887 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5888 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5889 * connected endpoints. 5890 */ 5891 void 5892 udp_wput(queue_t *q, mblk_t *mp) 5893 { 5894 conn_t *connp = Q_TO_CONN(q); 5895 udp_t *udp = connp->conn_udp; 5896 int error = 0; 5897 struct sockaddr *addr; 5898 socklen_t addrlen; 5899 udp_stack_t *us = udp->udp_us; 5900 5901 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5902 "udp_wput_start: queue %p mp %p", q, mp); 5903 5904 /* 5905 * We directly handle several cases here: T_UNITDATA_REQ message 5906 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5907 * socket. 5908 */ 5909 switch (DB_TYPE(mp)) { 5910 case M_DATA: 5911 /* 5912 * Quick check for error cases. Checks will be done again 5913 * under the lock later on 5914 */ 5915 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5916 /* Not connected; address is required */ 5917 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5918 UDP_STAT(us, udp_out_err_notconn); 5919 freemsg(mp); 5920 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5921 "udp_wput_end: connp %p (%S)", connp, 5922 "not-connected; address required"); 5923 return; 5924 } 5925 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5926 return; 5927 5928 case M_PROTO: 5929 case M_PCPROTO: { 5930 struct T_unitdata_req *tudr; 5931 5932 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5933 tudr = (struct T_unitdata_req *)mp->b_rptr; 5934 5935 /* Handle valid T_UNITDATA_REQ here */ 5936 if (MBLKL(mp) >= sizeof (*tudr) && 5937 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5938 if (mp->b_cont == NULL) { 5939 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5940 "udp_wput_end: q %p (%S)", q, "badaddr"); 5941 error = EPROTO; 5942 goto ud_error; 5943 } 5944 5945 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5946 tudr->DEST_length)) { 5947 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5948 "udp_wput_end: q %p (%S)", q, "badaddr"); 5949 error = EADDRNOTAVAIL; 5950 goto ud_error; 5951 } 5952 /* 5953 * If a port has not been bound to the stream, fail. 5954 * This is not a problem when sockfs is directly 5955 * above us, because it will ensure that the socket 5956 * is first bound before allowing data to be sent. 5957 */ 5958 if (udp->udp_state == TS_UNBND) { 5959 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5960 "udp_wput_end: q %p (%S)", q, "outstate"); 5961 error = EPROTO; 5962 goto ud_error; 5963 } 5964 addr = (struct sockaddr *) 5965 &mp->b_rptr[tudr->DEST_offset]; 5966 addrlen = tudr->DEST_length; 5967 if (tudr->OPT_length != 0) 5968 UDP_STAT(us, udp_out_opt); 5969 break; 5970 } 5971 /* FALLTHRU */ 5972 } 5973 default: 5974 udp_wput_other(q, mp); 5975 return; 5976 } 5977 ASSERT(addr != NULL); 5978 5979 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5980 -1); 5981 if (error != 0) { 5982 ud_error: 5983 UDP_STAT(us, udp_out_err_output); 5984 ASSERT(mp != NULL); 5985 /* mp is freed by the following routine */ 5986 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5987 (t_scalar_t)error); 5988 } 5989 } 5990 5991 /* ARGSUSED */ 5992 static void 5993 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5994 { 5995 #ifdef DEBUG 5996 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5997 #endif 5998 freemsg(mp); 5999 } 6000 6001 6002 /* 6003 * udp_output_v6(): 6004 * Assumes that udp_wput did some sanity checking on the destination 6005 * address. 6006 */ 6007 static mblk_t * 6008 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6009 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6010 { 6011 ip6_t *ip6h; 6012 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6013 mblk_t *mp1 = mp; 6014 mblk_t *mp2; 6015 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6016 size_t ip_len; 6017 udpha_t *udph; 6018 udp_t *udp = connp->conn_udp; 6019 udp_stack_t *us = udp->udp_us; 6020 queue_t *q = connp->conn_wq; 6021 ip6_pkt_t ipp_s; /* For ancillary data options */ 6022 ip6_pkt_t *ipp = &ipp_s; 6023 ip6_pkt_t *tipp; /* temporary ipp */ 6024 uint32_t csum = 0; 6025 uint_t ignore = 0; 6026 uint_t option_exists = 0, is_sticky = 0; 6027 uint8_t *cp; 6028 uint8_t *nxthdr_ptr; 6029 in6_addr_t ip6_dst; 6030 in_port_t port; 6031 udpattrs_t attrs; 6032 boolean_t opt_present; 6033 ip6_hbh_t *hopoptsptr = NULL; 6034 uint_t hopoptslen = 0; 6035 boolean_t is_ancillary = B_FALSE; 6036 size_t sth_wroff = 0; 6037 ire_t *ire; 6038 boolean_t update_lastdst = B_FALSE; 6039 6040 *error = 0; 6041 6042 /* 6043 * If the local address is a mapped address return 6044 * an error. 6045 * It would be possible to send an IPv6 packet but the 6046 * response would never make it back to the application 6047 * since it is bound to a mapped address. 6048 */ 6049 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6050 *error = EADDRNOTAVAIL; 6051 goto done; 6052 } 6053 6054 ipp->ipp_fields = 0; 6055 ipp->ipp_sticky_ignored = 0; 6056 6057 /* 6058 * If TPI options passed in, feed it for verification and handling 6059 */ 6060 attrs.udpattr_credset = B_FALSE; 6061 opt_present = B_FALSE; 6062 if (IPCL_IS_NONSTR(connp)) { 6063 if (msg->msg_controllen != 0) { 6064 attrs.udpattr_ipp6 = ipp; 6065 attrs.udpattr_mb = mp; 6066 6067 rw_enter(&udp->udp_rwlock, RW_WRITER); 6068 *error = process_auxiliary_options(connp, 6069 msg->msg_control, msg->msg_controllen, 6070 &attrs, &udp_opt_obj, udp_opt_set); 6071 rw_exit(&udp->udp_rwlock); 6072 if (*error) 6073 goto done; 6074 ASSERT(*error == 0); 6075 opt_present = B_TRUE; 6076 } 6077 } else { 6078 if (DB_TYPE(mp) != M_DATA) { 6079 mp1 = mp->b_cont; 6080 if (((struct T_unitdata_req *) 6081 mp->b_rptr)->OPT_length != 0) { 6082 attrs.udpattr_ipp6 = ipp; 6083 attrs.udpattr_mb = mp; 6084 if (udp_unitdata_opt_process(q, mp, error, 6085 &attrs) < 0) { 6086 goto done; 6087 } 6088 ASSERT(*error == 0); 6089 opt_present = B_TRUE; 6090 } 6091 } 6092 } 6093 6094 /* 6095 * Determine whether we need to mark the mblk with the user's 6096 * credentials. 6097 */ 6098 ire = connp->conn_ire_cache; 6099 if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 6100 (ire == NULL) || 6101 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6102 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6103 if (cr != NULL && DB_CRED(mp) == NULL) 6104 msg_setcredpid(mp, cr, pid); 6105 } 6106 6107 rw_enter(&udp->udp_rwlock, RW_READER); 6108 ignore = ipp->ipp_sticky_ignored; 6109 6110 /* mp1 points to the M_DATA mblk carrying the packet */ 6111 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6112 6113 if (sin6->sin6_scope_id != 0 && 6114 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6115 /* 6116 * IPPF_SCOPE_ID is special. It's neither a sticky 6117 * option nor ancillary data. It needs to be 6118 * explicitly set in options_exists. 6119 */ 6120 option_exists |= IPPF_SCOPE_ID; 6121 } 6122 6123 /* 6124 * Compute the destination address 6125 */ 6126 ip6_dst = sin6->sin6_addr; 6127 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6128 ip6_dst = ipv6_loopback; 6129 6130 port = sin6->sin6_port; 6131 6132 /* 6133 * Cluster and TSOL notes, Cluster check: 6134 * see comments in udp_output_v4(). 6135 */ 6136 mutex_enter(&connp->conn_lock); 6137 6138 if (cl_inet_connect2 != NULL && 6139 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6140 port != udp->udp_lastdstport)) { 6141 mutex_exit(&connp->conn_lock); 6142 *error = 0; 6143 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6144 if (*error != 0) { 6145 *error = EHOSTUNREACH; 6146 rw_exit(&udp->udp_rwlock); 6147 goto done; 6148 } 6149 update_lastdst = B_TRUE; 6150 mutex_enter(&connp->conn_lock); 6151 } 6152 6153 /* 6154 * If we're not going to the same destination as last time, then 6155 * recompute the label required. This is done in a separate routine to 6156 * avoid blowing up our stack here. 6157 * 6158 * TSOL Note: Since we are not in WRITER mode, UDP packets 6159 * to different destination may require different labels, 6160 * or worse, UDP packets to same IP address may require 6161 * different labels due to use of shared all-zones address. 6162 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6163 * and sticky ipp_hopoptslen are consistent for the current 6164 * destination and are updated atomically. 6165 */ 6166 if (is_system_labeled()) { 6167 /* Using UDP MLP requires SCM_UCRED from user */ 6168 if (connp->conn_mlp_type != mlptSingle && 6169 !attrs.udpattr_credset) { 6170 DTRACE_PROBE4( 6171 tx__ip__log__info__output__udp6, 6172 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6173 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6174 *error = ECONNREFUSED; 6175 rw_exit(&udp->udp_rwlock); 6176 mutex_exit(&connp->conn_lock); 6177 goto done; 6178 } 6179 /* 6180 * update label option for this UDP socket if 6181 * - the destination has changed, or 6182 * - the UDP socket is MLP 6183 */ 6184 if ((opt_present || 6185 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6186 connp->conn_mlp_type != mlptSingle) && 6187 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6188 &update_lastdst)) != 0) { 6189 rw_exit(&udp->udp_rwlock); 6190 mutex_exit(&connp->conn_lock); 6191 goto done; 6192 } 6193 } 6194 6195 if (update_lastdst) { 6196 udp->udp_v6lastdst = ip6_dst; 6197 udp->udp_lastdstport = port; 6198 } 6199 6200 /* 6201 * If there's a security label here, then we ignore any options the 6202 * user may try to set. We keep the peer's label as a hidden sticky 6203 * option. We make a private copy of this label before releasing the 6204 * lock so that label is kept consistent with the destination addr. 6205 */ 6206 if (udp->udp_label_len_v6 > 0) { 6207 ignore &= ~IPPF_HOPOPTS; 6208 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6209 } 6210 6211 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6212 /* No sticky options nor ancillary data. */ 6213 mutex_exit(&connp->conn_lock); 6214 goto no_options; 6215 } 6216 6217 /* 6218 * Go through the options figuring out where each is going to 6219 * come from and build two masks. The first mask indicates if 6220 * the option exists at all. The second mask indicates if the 6221 * option is sticky or ancillary. 6222 */ 6223 if (!(ignore & IPPF_HOPOPTS)) { 6224 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6225 option_exists |= IPPF_HOPOPTS; 6226 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6227 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6228 option_exists |= IPPF_HOPOPTS; 6229 is_sticky |= IPPF_HOPOPTS; 6230 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6231 hopoptsptr = kmem_alloc( 6232 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6233 if (hopoptsptr == NULL) { 6234 *error = ENOMEM; 6235 mutex_exit(&connp->conn_lock); 6236 goto done; 6237 } 6238 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6239 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6240 hopoptslen); 6241 udp_ip_hdr_len += hopoptslen; 6242 } 6243 } 6244 mutex_exit(&connp->conn_lock); 6245 6246 if (!(ignore & IPPF_RTHDR)) { 6247 if (ipp->ipp_fields & IPPF_RTHDR) { 6248 option_exists |= IPPF_RTHDR; 6249 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6250 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6251 option_exists |= IPPF_RTHDR; 6252 is_sticky |= IPPF_RTHDR; 6253 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6254 } 6255 } 6256 6257 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6258 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6259 option_exists |= IPPF_RTDSTOPTS; 6260 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6261 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6262 option_exists |= IPPF_RTDSTOPTS; 6263 is_sticky |= IPPF_RTDSTOPTS; 6264 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6265 } 6266 } 6267 6268 if (!(ignore & IPPF_DSTOPTS)) { 6269 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6270 option_exists |= IPPF_DSTOPTS; 6271 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6272 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6273 option_exists |= IPPF_DSTOPTS; 6274 is_sticky |= IPPF_DSTOPTS; 6275 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6276 } 6277 } 6278 6279 if (!(ignore & IPPF_IFINDEX)) { 6280 if (ipp->ipp_fields & IPPF_IFINDEX) { 6281 option_exists |= IPPF_IFINDEX; 6282 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6283 option_exists |= IPPF_IFINDEX; 6284 is_sticky |= IPPF_IFINDEX; 6285 } 6286 } 6287 6288 if (!(ignore & IPPF_ADDR)) { 6289 if (ipp->ipp_fields & IPPF_ADDR) { 6290 option_exists |= IPPF_ADDR; 6291 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6292 option_exists |= IPPF_ADDR; 6293 is_sticky |= IPPF_ADDR; 6294 } 6295 } 6296 6297 if (!(ignore & IPPF_DONTFRAG)) { 6298 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6299 option_exists |= IPPF_DONTFRAG; 6300 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6301 option_exists |= IPPF_DONTFRAG; 6302 is_sticky |= IPPF_DONTFRAG; 6303 } 6304 } 6305 6306 if (!(ignore & IPPF_USE_MIN_MTU)) { 6307 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6308 option_exists |= IPPF_USE_MIN_MTU; 6309 } else if (udp->udp_sticky_ipp.ipp_fields & 6310 IPPF_USE_MIN_MTU) { 6311 option_exists |= IPPF_USE_MIN_MTU; 6312 is_sticky |= IPPF_USE_MIN_MTU; 6313 } 6314 } 6315 6316 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6317 option_exists |= IPPF_HOPLIMIT; 6318 /* IPV6_HOPLIMIT can never be sticky */ 6319 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6320 6321 if (!(ignore & IPPF_UNICAST_HOPS) && 6322 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6323 option_exists |= IPPF_UNICAST_HOPS; 6324 is_sticky |= IPPF_UNICAST_HOPS; 6325 } 6326 6327 if (!(ignore & IPPF_MULTICAST_HOPS) && 6328 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6329 option_exists |= IPPF_MULTICAST_HOPS; 6330 is_sticky |= IPPF_MULTICAST_HOPS; 6331 } 6332 6333 if (!(ignore & IPPF_TCLASS)) { 6334 if (ipp->ipp_fields & IPPF_TCLASS) { 6335 option_exists |= IPPF_TCLASS; 6336 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6337 option_exists |= IPPF_TCLASS; 6338 is_sticky |= IPPF_TCLASS; 6339 } 6340 } 6341 6342 if (!(ignore & IPPF_NEXTHOP) && 6343 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6344 option_exists |= IPPF_NEXTHOP; 6345 is_sticky |= IPPF_NEXTHOP; 6346 } 6347 6348 no_options: 6349 6350 /* 6351 * If any options carried in the ip6i_t were specified, we 6352 * need to account for the ip6i_t in the data we'll be sending 6353 * down. 6354 */ 6355 if (option_exists & IPPF_HAS_IP6I) 6356 udp_ip_hdr_len += sizeof (ip6i_t); 6357 6358 /* check/fix buffer config, setup pointers into it */ 6359 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6360 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6361 !OK_32PTR(ip6h)) { 6362 6363 /* Try to get everything in a single mblk next time */ 6364 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6365 udp->udp_max_hdr_len = udp_ip_hdr_len; 6366 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6367 } 6368 6369 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6370 if (mp2 == NULL) { 6371 *error = ENOMEM; 6372 rw_exit(&udp->udp_rwlock); 6373 goto done; 6374 } 6375 mp2->b_wptr = DB_LIM(mp2); 6376 mp2->b_cont = mp1; 6377 mp1 = mp2; 6378 if (DB_TYPE(mp) != M_DATA) 6379 mp->b_cont = mp1; 6380 else 6381 mp = mp1; 6382 6383 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6384 } 6385 mp1->b_rptr = (unsigned char *)ip6h; 6386 ip6i = (ip6i_t *)ip6h; 6387 6388 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6389 if (option_exists & IPPF_HAS_IP6I) { 6390 ip6h = (ip6_t *)&ip6i[1]; 6391 ip6i->ip6i_flags = 0; 6392 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6393 6394 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6395 if (option_exists & IPPF_SCOPE_ID) { 6396 ip6i->ip6i_flags |= IP6I_IFINDEX; 6397 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6398 } else if (option_exists & IPPF_IFINDEX) { 6399 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6400 ASSERT(tipp->ipp_ifindex != 0); 6401 ip6i->ip6i_flags |= IP6I_IFINDEX; 6402 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6403 } 6404 6405 if (option_exists & IPPF_ADDR) { 6406 /* 6407 * Enable per-packet source address verification if 6408 * IPV6_PKTINFO specified the source address. 6409 * ip6_src is set in the transport's _wput function. 6410 */ 6411 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6412 } 6413 6414 if (option_exists & IPPF_DONTFRAG) { 6415 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6416 } 6417 6418 if (option_exists & IPPF_USE_MIN_MTU) { 6419 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6420 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6421 } 6422 6423 if (option_exists & IPPF_NEXTHOP) { 6424 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6425 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6426 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6427 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6428 } 6429 6430 /* 6431 * tell IP this is an ip6i_t private header 6432 */ 6433 ip6i->ip6i_nxt = IPPROTO_RAW; 6434 } 6435 6436 /* Initialize IPv6 header */ 6437 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6438 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6439 6440 /* Set the hoplimit of the outgoing packet. */ 6441 if (option_exists & IPPF_HOPLIMIT) { 6442 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6443 ip6h->ip6_hops = ipp->ipp_hoplimit; 6444 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6445 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6446 ip6h->ip6_hops = udp->udp_multicast_ttl; 6447 if (option_exists & IPPF_MULTICAST_HOPS) 6448 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6449 } else { 6450 ip6h->ip6_hops = udp->udp_ttl; 6451 if (option_exists & IPPF_UNICAST_HOPS) 6452 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6453 } 6454 6455 if (option_exists & IPPF_ADDR) { 6456 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6457 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6458 ip6h->ip6_src = tipp->ipp_addr; 6459 } else { 6460 /* 6461 * The source address was not set using IPV6_PKTINFO. 6462 * First look at the bound source. 6463 * If unspecified fallback to __sin6_src_id. 6464 */ 6465 ip6h->ip6_src = udp->udp_v6src; 6466 if (sin6->__sin6_src_id != 0 && 6467 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6468 ip_srcid_find_id(sin6->__sin6_src_id, 6469 &ip6h->ip6_src, connp->conn_zoneid, 6470 us->us_netstack); 6471 } 6472 } 6473 6474 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6475 cp = (uint8_t *)&ip6h[1]; 6476 6477 /* 6478 * Here's where we have to start stringing together 6479 * any extension headers in the right order: 6480 * Hop-by-hop, destination, routing, and final destination opts. 6481 */ 6482 if (option_exists & IPPF_HOPOPTS) { 6483 /* Hop-by-hop options */ 6484 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6485 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6486 if (hopoptslen == 0) { 6487 hopoptsptr = tipp->ipp_hopopts; 6488 hopoptslen = tipp->ipp_hopoptslen; 6489 is_ancillary = B_TRUE; 6490 } 6491 6492 *nxthdr_ptr = IPPROTO_HOPOPTS; 6493 nxthdr_ptr = &hbh->ip6h_nxt; 6494 6495 bcopy(hopoptsptr, cp, hopoptslen); 6496 cp += hopoptslen; 6497 6498 if (hopoptsptr != NULL && !is_ancillary) { 6499 kmem_free(hopoptsptr, hopoptslen); 6500 hopoptsptr = NULL; 6501 hopoptslen = 0; 6502 } 6503 } 6504 /* 6505 * En-route destination options 6506 * Only do them if there's a routing header as well 6507 */ 6508 if (option_exists & IPPF_RTDSTOPTS) { 6509 ip6_dest_t *dst = (ip6_dest_t *)cp; 6510 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6511 6512 *nxthdr_ptr = IPPROTO_DSTOPTS; 6513 nxthdr_ptr = &dst->ip6d_nxt; 6514 6515 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6516 cp += tipp->ipp_rtdstoptslen; 6517 } 6518 /* 6519 * Routing header next 6520 */ 6521 if (option_exists & IPPF_RTHDR) { 6522 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6523 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6524 6525 *nxthdr_ptr = IPPROTO_ROUTING; 6526 nxthdr_ptr = &rt->ip6r_nxt; 6527 6528 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6529 cp += tipp->ipp_rthdrlen; 6530 } 6531 /* 6532 * Do ultimate destination options 6533 */ 6534 if (option_exists & IPPF_DSTOPTS) { 6535 ip6_dest_t *dest = (ip6_dest_t *)cp; 6536 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6537 6538 *nxthdr_ptr = IPPROTO_DSTOPTS; 6539 nxthdr_ptr = &dest->ip6d_nxt; 6540 6541 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6542 cp += tipp->ipp_dstoptslen; 6543 } 6544 /* 6545 * Now set the last header pointer to the proto passed in 6546 */ 6547 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6548 *nxthdr_ptr = IPPROTO_UDP; 6549 6550 /* Update UDP header */ 6551 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6552 udph->uha_dst_port = sin6->sin6_port; 6553 udph->uha_src_port = udp->udp_port; 6554 6555 /* 6556 * Copy in the destination address 6557 */ 6558 ip6h->ip6_dst = ip6_dst; 6559 6560 ip6h->ip6_vcf = 6561 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6562 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6563 6564 if (option_exists & IPPF_TCLASS) { 6565 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6566 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6567 tipp->ipp_tclass); 6568 } 6569 rw_exit(&udp->udp_rwlock); 6570 6571 if (option_exists & IPPF_RTHDR) { 6572 ip6_rthdr_t *rth; 6573 6574 /* 6575 * Perform any processing needed for source routing. 6576 * We know that all extension headers will be in the same mblk 6577 * as the IPv6 header. 6578 */ 6579 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6580 if (rth != NULL && rth->ip6r_segleft != 0) { 6581 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6582 /* 6583 * Drop packet - only support Type 0 routing. 6584 * Notify the application as well. 6585 */ 6586 *error = EPROTO; 6587 goto done; 6588 } 6589 6590 /* 6591 * rth->ip6r_len is twice the number of 6592 * addresses in the header. Thus it must be even. 6593 */ 6594 if (rth->ip6r_len & 0x1) { 6595 *error = EPROTO; 6596 goto done; 6597 } 6598 /* 6599 * Shuffle the routing header and ip6_dst 6600 * addresses, and get the checksum difference 6601 * between the first hop (in ip6_dst) and 6602 * the destination (in the last routing hdr entry). 6603 */ 6604 csum = ip_massage_options_v6(ip6h, rth, 6605 us->us_netstack); 6606 /* 6607 * Verify that the first hop isn't a mapped address. 6608 * Routers along the path need to do this verification 6609 * for subsequent hops. 6610 */ 6611 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6612 *error = EADDRNOTAVAIL; 6613 goto done; 6614 } 6615 6616 cp += (rth->ip6r_len + 1)*8; 6617 } 6618 } 6619 6620 /* count up length of UDP packet */ 6621 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6622 if ((mp2 = mp1->b_cont) != NULL) { 6623 do { 6624 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6625 ip_len += (uint32_t)MBLKL(mp2); 6626 } while ((mp2 = mp2->b_cont) != NULL); 6627 } 6628 6629 /* 6630 * If the size of the packet is greater than the maximum allowed by 6631 * ip, return an error. Passing this down could cause panics because 6632 * the size will have wrapped and be inconsistent with the msg size. 6633 */ 6634 if (ip_len > IP_MAXPACKET) { 6635 *error = EMSGSIZE; 6636 goto done; 6637 } 6638 6639 /* Store the UDP length. Subtract length of extension hdrs */ 6640 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6641 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6642 6643 /* 6644 * We make it easy for IP to include our pseudo header 6645 * by putting our length in uh_checksum, modified (if 6646 * we have a routing header) by the checksum difference 6647 * between the ultimate destination and first hop addresses. 6648 * Note: UDP over IPv6 must always checksum the packet. 6649 */ 6650 csum += udph->uha_length; 6651 csum = (csum & 0xFFFF) + (csum >> 16); 6652 udph->uha_checksum = (uint16_t)csum; 6653 6654 #ifdef _LITTLE_ENDIAN 6655 ip_len = htons(ip_len); 6656 #endif 6657 ip6h->ip6_plen = ip_len; 6658 if (DB_CRED(mp) != NULL) 6659 mblk_setcred(mp1, DB_CRED(mp)); 6660 6661 if (DB_TYPE(mp) != M_DATA) { 6662 ASSERT(mp != mp1); 6663 freeb(mp); 6664 } 6665 6666 /* mp has been consumed and we'll return success */ 6667 ASSERT(*error == 0); 6668 mp = NULL; 6669 6670 /* We're done. Pass the packet to IP */ 6671 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6672 ip_output_v6(connp, mp1, q, IP_WPUT); 6673 6674 done: 6675 if (sth_wroff != 0) { 6676 (void) proto_set_tx_wroff(RD(q), connp, 6677 udp->udp_max_hdr_len + us->us_wroff_extra); 6678 } 6679 if (hopoptsptr != NULL && !is_ancillary) { 6680 kmem_free(hopoptsptr, hopoptslen); 6681 hopoptsptr = NULL; 6682 } 6683 if (*error != 0) { 6684 ASSERT(mp != NULL); 6685 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6686 } 6687 return (mp); 6688 } 6689 6690 6691 static int 6692 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6693 { 6694 sin_t *sin = (sin_t *)sa; 6695 sin6_t *sin6 = (sin6_t *)sa; 6696 6697 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6698 6699 if (udp->udp_state != TS_DATA_XFER) 6700 return (ENOTCONN); 6701 6702 switch (udp->udp_family) { 6703 case AF_INET: 6704 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6705 6706 if (*salenp < sizeof (sin_t)) 6707 return (EINVAL); 6708 6709 *salenp = sizeof (sin_t); 6710 *sin = sin_null; 6711 sin->sin_family = AF_INET; 6712 sin->sin_port = udp->udp_dstport; 6713 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6714 break; 6715 6716 case AF_INET6: 6717 if (*salenp < sizeof (sin6_t)) 6718 return (EINVAL); 6719 6720 *salenp = sizeof (sin6_t); 6721 *sin6 = sin6_null; 6722 sin6->sin6_family = AF_INET6; 6723 sin6->sin6_port = udp->udp_dstport; 6724 sin6->sin6_addr = udp->udp_v6dst; 6725 sin6->sin6_flowinfo = udp->udp_flowinfo; 6726 break; 6727 } 6728 6729 return (0); 6730 } 6731 6732 static int 6733 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6734 { 6735 sin_t *sin = (sin_t *)sa; 6736 sin6_t *sin6 = (sin6_t *)sa; 6737 6738 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6739 6740 switch (udp->udp_family) { 6741 case AF_INET: 6742 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6743 6744 if (*salenp < sizeof (sin_t)) 6745 return (EINVAL); 6746 6747 *salenp = sizeof (sin_t); 6748 *sin = sin_null; 6749 sin->sin_family = AF_INET; 6750 sin->sin_port = udp->udp_port; 6751 6752 /* 6753 * If udp_v6src is unspecified, we might be bound to broadcast 6754 * / multicast. Use udp_bound_v6src as local address instead 6755 * (that could also still be unspecified). 6756 */ 6757 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6758 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6759 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6760 } else { 6761 sin->sin_addr.s_addr = 6762 V4_PART_OF_V6(udp->udp_bound_v6src); 6763 } 6764 break; 6765 6766 case AF_INET6: 6767 if (*salenp < sizeof (sin6_t)) 6768 return (EINVAL); 6769 6770 *salenp = sizeof (sin6_t); 6771 *sin6 = sin6_null; 6772 sin6->sin6_family = AF_INET6; 6773 sin6->sin6_port = udp->udp_port; 6774 sin6->sin6_flowinfo = udp->udp_flowinfo; 6775 6776 /* 6777 * If udp_v6src is unspecified, we might be bound to broadcast 6778 * / multicast. Use udp_bound_v6src as local address instead 6779 * (that could also still be unspecified). 6780 */ 6781 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6782 sin6->sin6_addr = udp->udp_v6src; 6783 else 6784 sin6->sin6_addr = udp->udp_bound_v6src; 6785 break; 6786 } 6787 6788 return (0); 6789 } 6790 6791 /* 6792 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6793 */ 6794 static void 6795 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6796 { 6797 void *data; 6798 mblk_t *datamp = mp->b_cont; 6799 udp_t *udp = Q_TO_UDP(q); 6800 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6801 6802 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6803 cmdp->cb_error = EPROTO; 6804 qreply(q, mp); 6805 return; 6806 } 6807 data = datamp->b_rptr; 6808 6809 rw_enter(&udp->udp_rwlock, RW_READER); 6810 switch (cmdp->cb_cmd) { 6811 case TI_GETPEERNAME: 6812 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6813 break; 6814 case TI_GETMYNAME: 6815 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6816 break; 6817 default: 6818 cmdp->cb_error = EINVAL; 6819 break; 6820 } 6821 rw_exit(&udp->udp_rwlock); 6822 6823 qreply(q, mp); 6824 } 6825 6826 static void 6827 udp_disable_direct_sockfs(udp_t *udp) 6828 { 6829 udp->udp_issocket = B_FALSE; 6830 if (udp->udp_direct_sockfs) { 6831 /* 6832 * Disable read-side synchronous stream interface and 6833 * drain any queued data. 6834 */ 6835 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6836 ASSERT(!udp->udp_direct_sockfs); 6837 UDP_STAT(udp->udp_us, udp_sock_fallback); 6838 } 6839 } 6840 6841 static void 6842 udp_wput_other(queue_t *q, mblk_t *mp) 6843 { 6844 uchar_t *rptr = mp->b_rptr; 6845 struct datab *db; 6846 struct iocblk *iocp; 6847 cred_t *cr; 6848 conn_t *connp = Q_TO_CONN(q); 6849 udp_t *udp = connp->conn_udp; 6850 udp_stack_t *us; 6851 6852 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6853 "udp_wput_other_start: q %p", q); 6854 6855 us = udp->udp_us; 6856 db = mp->b_datap; 6857 6858 cr = DB_CREDDEF(mp, connp->conn_cred); 6859 6860 switch (db->db_type) { 6861 case M_CMD: 6862 udp_wput_cmdblk(q, mp); 6863 return; 6864 6865 case M_PROTO: 6866 case M_PCPROTO: 6867 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6868 freemsg(mp); 6869 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6870 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6871 return; 6872 } 6873 switch (((t_primp_t)rptr)->type) { 6874 case T_ADDR_REQ: 6875 udp_addr_req(q, mp); 6876 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6877 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6878 return; 6879 case O_T_BIND_REQ: 6880 case T_BIND_REQ: 6881 udp_tpi_bind(q, mp); 6882 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6883 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6884 return; 6885 case T_CONN_REQ: 6886 udp_tpi_connect(q, mp); 6887 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6888 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6889 return; 6890 case T_CAPABILITY_REQ: 6891 udp_capability_req(q, mp); 6892 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6893 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6894 return; 6895 case T_INFO_REQ: 6896 udp_info_req(q, mp); 6897 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6898 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6899 return; 6900 case T_UNITDATA_REQ: 6901 /* 6902 * If a T_UNITDATA_REQ gets here, the address must 6903 * be bad. Valid T_UNITDATA_REQs are handled 6904 * in udp_wput. 6905 */ 6906 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6907 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6908 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6909 return; 6910 case T_UNBIND_REQ: 6911 udp_tpi_unbind(q, mp); 6912 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6913 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6914 return; 6915 case T_SVR4_OPTMGMT_REQ: 6916 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6917 cr)) { 6918 (void) svr4_optcom_req(q, 6919 mp, cr, &udp_opt_obj, B_TRUE); 6920 } 6921 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6922 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6923 return; 6924 6925 case T_OPTMGMT_REQ: 6926 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6927 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6928 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6929 return; 6930 6931 case T_DISCON_REQ: 6932 udp_tpi_disconnect(q, mp); 6933 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6934 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6935 return; 6936 6937 /* The following TPI message is not supported by udp. */ 6938 case O_T_CONN_RES: 6939 case T_CONN_RES: 6940 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6941 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6942 "udp_wput_other_end: q %p (%S)", q, 6943 "connres/disconreq"); 6944 return; 6945 6946 /* The following 3 TPI messages are illegal for udp. */ 6947 case T_DATA_REQ: 6948 case T_EXDATA_REQ: 6949 case T_ORDREL_REQ: 6950 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6951 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6952 "udp_wput_other_end: q %p (%S)", q, 6953 "data/exdata/ordrel"); 6954 return; 6955 default: 6956 break; 6957 } 6958 break; 6959 case M_FLUSH: 6960 if (*rptr & FLUSHW) 6961 flushq(q, FLUSHDATA); 6962 break; 6963 case M_IOCTL: 6964 iocp = (struct iocblk *)mp->b_rptr; 6965 switch (iocp->ioc_cmd) { 6966 case TI_GETPEERNAME: 6967 if (udp->udp_state != TS_DATA_XFER) { 6968 /* 6969 * If a default destination address has not 6970 * been associated with the stream, then we 6971 * don't know the peer's name. 6972 */ 6973 iocp->ioc_error = ENOTCONN; 6974 iocp->ioc_count = 0; 6975 mp->b_datap->db_type = M_IOCACK; 6976 qreply(q, mp); 6977 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6978 "udp_wput_other_end: q %p (%S)", q, 6979 "getpeername"); 6980 return; 6981 } 6982 /* FALLTHRU */ 6983 case TI_GETMYNAME: { 6984 /* 6985 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6986 * need to copyin the user's strbuf structure. 6987 * Processing will continue in the M_IOCDATA case 6988 * below. 6989 */ 6990 mi_copyin(q, mp, NULL, 6991 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6992 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6993 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6994 return; 6995 } 6996 case ND_SET: 6997 /* nd_getset performs the necessary checking */ 6998 case ND_GET: 6999 if (nd_getset(q, us->us_nd, mp)) { 7000 qreply(q, mp); 7001 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7002 "udp_wput_other_end: q %p (%S)", q, "get"); 7003 return; 7004 } 7005 break; 7006 case _SIOCSOCKFALLBACK: 7007 /* 7008 * Either sockmod is about to be popped and the 7009 * socket would now be treated as a plain stream, 7010 * or a module is about to be pushed so we could 7011 * no longer use read-side synchronous stream. 7012 * Drain any queued data and disable direct sockfs 7013 * interface from now on. 7014 */ 7015 if (!udp->udp_issocket) { 7016 DB_TYPE(mp) = M_IOCNAK; 7017 iocp->ioc_error = EINVAL; 7018 } else { 7019 udp_disable_direct_sockfs(udp); 7020 7021 DB_TYPE(mp) = M_IOCACK; 7022 iocp->ioc_error = 0; 7023 } 7024 iocp->ioc_count = 0; 7025 iocp->ioc_rval = 0; 7026 qreply(q, mp); 7027 return; 7028 default: 7029 break; 7030 } 7031 break; 7032 case M_IOCDATA: 7033 udp_wput_iocdata(q, mp); 7034 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7035 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7036 return; 7037 default: 7038 /* Unrecognized messages are passed through without change. */ 7039 break; 7040 } 7041 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7042 "udp_wput_other_end: q %p (%S)", q, "end"); 7043 ip_output(connp, mp, q, IP_WPUT); 7044 } 7045 7046 /* 7047 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7048 * messages. 7049 */ 7050 static void 7051 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7052 { 7053 mblk_t *mp1; 7054 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7055 STRUCT_HANDLE(strbuf, sb); 7056 udp_t *udp = Q_TO_UDP(q); 7057 int error; 7058 uint_t addrlen; 7059 7060 /* Make sure it is one of ours. */ 7061 switch (iocp->ioc_cmd) { 7062 case TI_GETMYNAME: 7063 case TI_GETPEERNAME: 7064 break; 7065 default: 7066 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7067 return; 7068 } 7069 7070 switch (mi_copy_state(q, mp, &mp1)) { 7071 case -1: 7072 return; 7073 case MI_COPY_CASE(MI_COPY_IN, 1): 7074 break; 7075 case MI_COPY_CASE(MI_COPY_OUT, 1): 7076 /* 7077 * The address has been copied out, so now 7078 * copyout the strbuf. 7079 */ 7080 mi_copyout(q, mp); 7081 return; 7082 case MI_COPY_CASE(MI_COPY_OUT, 2): 7083 /* 7084 * The address and strbuf have been copied out. 7085 * We're done, so just acknowledge the original 7086 * M_IOCTL. 7087 */ 7088 mi_copy_done(q, mp, 0); 7089 return; 7090 default: 7091 /* 7092 * Something strange has happened, so acknowledge 7093 * the original M_IOCTL with an EPROTO error. 7094 */ 7095 mi_copy_done(q, mp, EPROTO); 7096 return; 7097 } 7098 7099 /* 7100 * Now we have the strbuf structure for TI_GETMYNAME 7101 * and TI_GETPEERNAME. Next we copyout the requested 7102 * address and then we'll copyout the strbuf. 7103 */ 7104 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7105 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7106 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7107 mi_copy_done(q, mp, EINVAL); 7108 return; 7109 } 7110 7111 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7112 7113 if (mp1 == NULL) 7114 return; 7115 7116 rw_enter(&udp->udp_rwlock, RW_READER); 7117 switch (iocp->ioc_cmd) { 7118 case TI_GETMYNAME: 7119 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7120 break; 7121 case TI_GETPEERNAME: 7122 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7123 break; 7124 } 7125 rw_exit(&udp->udp_rwlock); 7126 7127 if (error != 0) { 7128 mi_copy_done(q, mp, error); 7129 } else { 7130 mp1->b_wptr += addrlen; 7131 STRUCT_FSET(sb, len, addrlen); 7132 7133 /* Copy out the address */ 7134 mi_copyout(q, mp); 7135 } 7136 } 7137 7138 static int 7139 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7140 udpattrs_t *udpattrs) 7141 { 7142 struct T_unitdata_req *udreqp; 7143 int is_absreq_failure; 7144 cred_t *cr; 7145 conn_t *connp = Q_TO_CONN(q); 7146 7147 ASSERT(((t_primp_t)mp->b_rptr)->type); 7148 7149 cr = DB_CREDDEF(mp, connp->conn_cred); 7150 7151 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7152 7153 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7154 udreqp->OPT_offset, cr, &udp_opt_obj, 7155 udpattrs, &is_absreq_failure); 7156 7157 if (*errorp != 0) { 7158 /* 7159 * Note: No special action needed in this 7160 * module for "is_absreq_failure" 7161 */ 7162 return (-1); /* failure */ 7163 } 7164 ASSERT(is_absreq_failure == 0); 7165 return (0); /* success */ 7166 } 7167 7168 void 7169 udp_ddi_g_init(void) 7170 { 7171 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7172 udp_opt_obj.odb_opt_arr_cnt); 7173 7174 /* 7175 * We want to be informed each time a stack is created or 7176 * destroyed in the kernel, so we can maintain the 7177 * set of udp_stack_t's. 7178 */ 7179 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7180 } 7181 7182 void 7183 udp_ddi_g_destroy(void) 7184 { 7185 netstack_unregister(NS_UDP); 7186 } 7187 7188 #define INET_NAME "ip" 7189 7190 /* 7191 * Initialize the UDP stack instance. 7192 */ 7193 static void * 7194 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7195 { 7196 udp_stack_t *us; 7197 udpparam_t *pa; 7198 int i; 7199 int error = 0; 7200 major_t major; 7201 7202 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7203 us->us_netstack = ns; 7204 7205 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7206 us->us_epriv_ports[0] = 2049; 7207 us->us_epriv_ports[1] = 4045; 7208 7209 /* 7210 * The smallest anonymous port in the priviledged port range which UDP 7211 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7212 */ 7213 us->us_min_anonpriv_port = 512; 7214 7215 us->us_bind_fanout_size = udp_bind_fanout_size; 7216 7217 /* Roundup variable that might have been modified in /etc/system */ 7218 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7219 /* Not a power of two. Round up to nearest power of two */ 7220 for (i = 0; i < 31; i++) { 7221 if (us->us_bind_fanout_size < (1 << i)) 7222 break; 7223 } 7224 us->us_bind_fanout_size = 1 << i; 7225 } 7226 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7227 sizeof (udp_fanout_t), KM_SLEEP); 7228 for (i = 0; i < us->us_bind_fanout_size; i++) { 7229 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7230 NULL); 7231 } 7232 7233 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7234 7235 us->us_param_arr = pa; 7236 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7237 7238 (void) udp_param_register(&us->us_nd, 7239 us->us_param_arr, A_CNT(udp_param_arr)); 7240 7241 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7242 us->us_mibkp = udp_kstat_init(stackid); 7243 7244 major = mod_name_to_major(INET_NAME); 7245 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7246 ASSERT(error == 0); 7247 return (us); 7248 } 7249 7250 /* 7251 * Free the UDP stack instance. 7252 */ 7253 static void 7254 udp_stack_fini(netstackid_t stackid, void *arg) 7255 { 7256 udp_stack_t *us = (udp_stack_t *)arg; 7257 int i; 7258 7259 for (i = 0; i < us->us_bind_fanout_size; i++) { 7260 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7261 } 7262 7263 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7264 sizeof (udp_fanout_t)); 7265 7266 us->us_bind_fanout = NULL; 7267 7268 nd_free(&us->us_nd); 7269 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7270 us->us_param_arr = NULL; 7271 7272 udp_kstat_fini(stackid, us->us_mibkp); 7273 us->us_mibkp = NULL; 7274 7275 udp_kstat2_fini(stackid, us->us_kstat); 7276 us->us_kstat = NULL; 7277 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7278 7279 ldi_ident_release(us->us_ldi_ident); 7280 kmem_free(us, sizeof (*us)); 7281 } 7282 7283 static void * 7284 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7285 { 7286 kstat_t *ksp; 7287 7288 udp_stat_t template = { 7289 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7290 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7291 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7292 { "udp_drain", KSTAT_DATA_UINT64 }, 7293 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7294 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7295 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7296 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7297 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7298 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7299 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7300 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7301 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7302 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7303 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7304 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7305 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7306 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7307 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7308 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7309 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7310 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7311 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7312 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7313 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7314 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7315 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7316 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7317 #ifdef DEBUG 7318 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7319 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7320 #endif 7321 }; 7322 7323 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7324 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7325 KSTAT_FLAG_VIRTUAL, stackid); 7326 7327 if (ksp == NULL) 7328 return (NULL); 7329 7330 bcopy(&template, us_statisticsp, sizeof (template)); 7331 ksp->ks_data = (void *)us_statisticsp; 7332 ksp->ks_private = (void *)(uintptr_t)stackid; 7333 7334 kstat_install(ksp); 7335 return (ksp); 7336 } 7337 7338 static void 7339 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7340 { 7341 if (ksp != NULL) { 7342 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7343 kstat_delete_netstack(ksp, stackid); 7344 } 7345 } 7346 7347 static void * 7348 udp_kstat_init(netstackid_t stackid) 7349 { 7350 kstat_t *ksp; 7351 7352 udp_named_kstat_t template = { 7353 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7354 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7355 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7356 { "entrySize", KSTAT_DATA_INT32, 0 }, 7357 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7358 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7359 }; 7360 7361 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7362 KSTAT_TYPE_NAMED, 7363 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7364 7365 if (ksp == NULL || ksp->ks_data == NULL) 7366 return (NULL); 7367 7368 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7369 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7370 7371 bcopy(&template, ksp->ks_data, sizeof (template)); 7372 ksp->ks_update = udp_kstat_update; 7373 ksp->ks_private = (void *)(uintptr_t)stackid; 7374 7375 kstat_install(ksp); 7376 return (ksp); 7377 } 7378 7379 static void 7380 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7381 { 7382 if (ksp != NULL) { 7383 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7384 kstat_delete_netstack(ksp, stackid); 7385 } 7386 } 7387 7388 static int 7389 udp_kstat_update(kstat_t *kp, int rw) 7390 { 7391 udp_named_kstat_t *udpkp; 7392 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7393 netstack_t *ns; 7394 udp_stack_t *us; 7395 7396 if ((kp == NULL) || (kp->ks_data == NULL)) 7397 return (EIO); 7398 7399 if (rw == KSTAT_WRITE) 7400 return (EACCES); 7401 7402 ns = netstack_find_by_stackid(stackid); 7403 if (ns == NULL) 7404 return (-1); 7405 us = ns->netstack_udp; 7406 if (us == NULL) { 7407 netstack_rele(ns); 7408 return (-1); 7409 } 7410 udpkp = (udp_named_kstat_t *)kp->ks_data; 7411 7412 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7413 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7414 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7415 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7416 netstack_rele(ns); 7417 return (0); 7418 } 7419 7420 /* 7421 * Read-side synchronous stream info entry point, called as a 7422 * result of handling certain STREAMS ioctl operations. 7423 */ 7424 static int 7425 udp_rinfop(queue_t *q, infod_t *dp) 7426 { 7427 mblk_t *mp; 7428 uint_t cmd = dp->d_cmd; 7429 int res = 0; 7430 int error = 0; 7431 udp_t *udp = Q_TO_UDP(q); 7432 struct stdata *stp = STREAM(q); 7433 7434 mutex_enter(&udp->udp_drain_lock); 7435 /* If shutdown on read has happened, return nothing */ 7436 mutex_enter(&stp->sd_lock); 7437 if (stp->sd_flag & STREOF) { 7438 mutex_exit(&stp->sd_lock); 7439 goto done; 7440 } 7441 mutex_exit(&stp->sd_lock); 7442 7443 if ((mp = udp->udp_rcv_list_head) == NULL) 7444 goto done; 7445 7446 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7447 7448 if (cmd & INFOD_COUNT) { 7449 /* 7450 * Return the number of messages. 7451 */ 7452 dp->d_count += udp->udp_rcv_msgcnt; 7453 res |= INFOD_COUNT; 7454 } 7455 if (cmd & INFOD_BYTES) { 7456 /* 7457 * Return size of all data messages. 7458 */ 7459 dp->d_bytes += udp->udp_rcv_cnt; 7460 res |= INFOD_BYTES; 7461 } 7462 if (cmd & INFOD_FIRSTBYTES) { 7463 /* 7464 * Return size of first data message. 7465 */ 7466 dp->d_bytes = msgdsize(mp); 7467 res |= INFOD_FIRSTBYTES; 7468 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7469 } 7470 if (cmd & INFOD_COPYOUT) { 7471 mblk_t *mp1 = mp->b_cont; 7472 int n; 7473 /* 7474 * Return data contents of first message. 7475 */ 7476 ASSERT(DB_TYPE(mp1) == M_DATA); 7477 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7478 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7479 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7480 UIO_READ, dp->d_uiop)) != 0) { 7481 goto done; 7482 } 7483 mp1 = mp1->b_cont; 7484 } 7485 res |= INFOD_COPYOUT; 7486 dp->d_cmd &= ~INFOD_COPYOUT; 7487 } 7488 done: 7489 mutex_exit(&udp->udp_drain_lock); 7490 7491 dp->d_res |= res; 7492 7493 return (error); 7494 } 7495 7496 /* 7497 * Read-side synchronous stream entry point. This is called as a result 7498 * of recv/read operation done at sockfs, and is guaranteed to execute 7499 * outside of the interrupt thread context. It returns a single datagram 7500 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7501 */ 7502 static int 7503 udp_rrw(queue_t *q, struiod_t *dp) 7504 { 7505 mblk_t *mp; 7506 udp_t *udp = Q_TO_UDP(q); 7507 udp_stack_t *us = udp->udp_us; 7508 7509 /* 7510 * Dequeue datagram from the head of the list and return 7511 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7512 * set/cleared depending on whether or not there's data 7513 * remaining in the list. 7514 */ 7515 mutex_enter(&udp->udp_drain_lock); 7516 if (!udp->udp_direct_sockfs) { 7517 mutex_exit(&udp->udp_drain_lock); 7518 UDP_STAT(us, udp_rrw_busy); 7519 return (EBUSY); 7520 } 7521 if ((mp = udp->udp_rcv_list_head) != NULL) { 7522 uint_t size = msgdsize(mp); 7523 7524 /* Last datagram in the list? */ 7525 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7526 udp->udp_rcv_list_tail = NULL; 7527 mp->b_next = NULL; 7528 7529 udp->udp_rcv_cnt -= size; 7530 udp->udp_rcv_msgcnt--; 7531 UDP_STAT(us, udp_rrw_msgcnt); 7532 7533 /* No longer flow-controlling? */ 7534 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7535 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7536 udp->udp_drain_qfull = B_FALSE; 7537 } 7538 if (udp->udp_rcv_list_head == NULL) { 7539 /* 7540 * Either we just dequeued the last datagram or 7541 * we get here from sockfs and have nothing to 7542 * return; in this case clear RSLEEP. 7543 */ 7544 ASSERT(udp->udp_rcv_cnt == 0); 7545 ASSERT(udp->udp_rcv_msgcnt == 0); 7546 ASSERT(udp->udp_rcv_list_tail == NULL); 7547 STR_WAKEUP_CLEAR(STREAM(q)); 7548 } else { 7549 /* 7550 * More data follows; we need udp_rrw() to be 7551 * called in future to pick up the rest. 7552 */ 7553 STR_WAKEUP_SET(STREAM(q)); 7554 } 7555 mutex_exit(&udp->udp_drain_lock); 7556 dp->d_mp = mp; 7557 return (0); 7558 } 7559 7560 /* 7561 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7562 * list; this is typically executed within the interrupt thread context 7563 * and so we do things as quickly as possible. 7564 */ 7565 static void 7566 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7567 { 7568 ASSERT(q == RD(q)); 7569 ASSERT(pkt_len == msgdsize(mp)); 7570 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7571 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7572 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7573 7574 mutex_enter(&udp->udp_drain_lock); 7575 /* 7576 * Wake up and signal the receiving app; it is okay to do this 7577 * before enqueueing the mp because we are holding the drain lock. 7578 * One of the advantages of synchronous stream is the ability for 7579 * us to find out when the application performs a read on the 7580 * socket by way of udp_rrw() entry point being called. We need 7581 * to generate SIGPOLL/SIGIO for each received data in the case 7582 * of asynchronous socket just as in the strrput() case. However, 7583 * we only wake the application up when necessary, i.e. during the 7584 * first enqueue. When udp_rrw() is called, we send up a single 7585 * datagram upstream and call STR_WAKEUP_SET() again when there 7586 * are still data remaining in our receive queue. 7587 */ 7588 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7589 if (udp->udp_rcv_list_head == NULL) 7590 udp->udp_rcv_list_head = mp; 7591 else 7592 udp->udp_rcv_list_tail->b_next = mp; 7593 udp->udp_rcv_list_tail = mp; 7594 udp->udp_rcv_cnt += pkt_len; 7595 udp->udp_rcv_msgcnt++; 7596 7597 /* Need to flow-control? */ 7598 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7599 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7600 udp->udp_drain_qfull = B_TRUE; 7601 7602 mutex_exit(&udp->udp_drain_lock); 7603 } 7604 7605 /* 7606 * Drain the contents of receive list to the module upstream; we do 7607 * this during close or when we fallback to the slow mode due to 7608 * sockmod being popped or a module being pushed on top of us. 7609 */ 7610 static void 7611 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7612 { 7613 mblk_t *mp; 7614 udp_stack_t *us = udp->udp_us; 7615 7616 mutex_enter(&udp->udp_drain_lock); 7617 /* 7618 * There is no race with a concurrent udp_input() sending 7619 * up packets using putnext() after we have cleared the 7620 * udp_direct_sockfs flag but before we have completed 7621 * sending up the packets in udp_rcv_list, since we are 7622 * either a writer or we have quiesced the conn. 7623 */ 7624 udp->udp_direct_sockfs = B_FALSE; 7625 mutex_exit(&udp->udp_drain_lock); 7626 7627 if (udp->udp_rcv_list_head != NULL) 7628 UDP_STAT(us, udp_drain); 7629 7630 /* 7631 * Send up everything via putnext(); note here that we 7632 * don't need the udp_drain_lock to protect us since 7633 * nothing can enter udp_rrw() and that we currently 7634 * have exclusive access to this udp. 7635 */ 7636 while ((mp = udp->udp_rcv_list_head) != NULL) { 7637 udp->udp_rcv_list_head = mp->b_next; 7638 mp->b_next = NULL; 7639 udp->udp_rcv_cnt -= msgdsize(mp); 7640 udp->udp_rcv_msgcnt--; 7641 if (closing) { 7642 freemsg(mp); 7643 } else { 7644 ASSERT(q == RD(q)); 7645 putnext(q, mp); 7646 } 7647 } 7648 ASSERT(udp->udp_rcv_cnt == 0); 7649 ASSERT(udp->udp_rcv_msgcnt == 0); 7650 ASSERT(udp->udp_rcv_list_head == NULL); 7651 udp->udp_rcv_list_tail = NULL; 7652 udp->udp_drain_qfull = B_FALSE; 7653 } 7654 7655 static size_t 7656 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7657 { 7658 udp_stack_t *us = udp->udp_us; 7659 7660 /* We add a bit of extra buffering */ 7661 size += size >> 1; 7662 if (size > us->us_max_buf) 7663 size = us->us_max_buf; 7664 7665 udp->udp_rcv_hiwat = size; 7666 return (size); 7667 } 7668 7669 /* 7670 * For the lower queue so that UDP can be a dummy mux. 7671 * Nobody should be sending 7672 * packets up this stream 7673 */ 7674 static void 7675 udp_lrput(queue_t *q, mblk_t *mp) 7676 { 7677 mblk_t *mp1; 7678 7679 switch (mp->b_datap->db_type) { 7680 case M_FLUSH: 7681 /* Turn around */ 7682 if (*mp->b_rptr & FLUSHW) { 7683 *mp->b_rptr &= ~FLUSHR; 7684 qreply(q, mp); 7685 return; 7686 } 7687 break; 7688 } 7689 /* Could receive messages that passed through ar_rput */ 7690 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7691 mp1->b_prev = mp1->b_next = NULL; 7692 freemsg(mp); 7693 } 7694 7695 /* 7696 * For the lower queue so that UDP can be a dummy mux. 7697 * Nobody should be sending packets down this stream. 7698 */ 7699 /* ARGSUSED */ 7700 void 7701 udp_lwput(queue_t *q, mblk_t *mp) 7702 { 7703 freemsg(mp); 7704 } 7705 7706 /* 7707 * Below routines for UDP socket module. 7708 */ 7709 7710 static conn_t * 7711 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7712 { 7713 udp_t *udp; 7714 conn_t *connp; 7715 zoneid_t zoneid; 7716 netstack_t *ns; 7717 udp_stack_t *us; 7718 7719 ns = netstack_find_by_cred(credp); 7720 ASSERT(ns != NULL); 7721 us = ns->netstack_udp; 7722 ASSERT(us != NULL); 7723 7724 /* 7725 * For exclusive stacks we set the zoneid to zero 7726 * to make UDP operate as if in the global zone. 7727 */ 7728 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7729 zoneid = GLOBAL_ZONEID; 7730 else 7731 zoneid = crgetzoneid(credp); 7732 7733 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7734 7735 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7736 if (connp == NULL) { 7737 netstack_rele(ns); 7738 return (NULL); 7739 } 7740 udp = connp->conn_udp; 7741 7742 /* 7743 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7744 * done by netstack_find_by_cred() 7745 */ 7746 netstack_rele(ns); 7747 7748 rw_enter(&udp->udp_rwlock, RW_WRITER); 7749 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7750 ASSERT(connp->conn_udp == udp); 7751 ASSERT(udp->udp_connp == connp); 7752 7753 /* Set the initial state of the stream and the privilege status. */ 7754 udp->udp_state = TS_UNBND; 7755 if (isv6) { 7756 udp->udp_family = AF_INET6; 7757 udp->udp_ipversion = IPV6_VERSION; 7758 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7759 udp->udp_ttl = us->us_ipv6_hoplimit; 7760 connp->conn_af_isv6 = B_TRUE; 7761 connp->conn_flags |= IPCL_ISV6; 7762 } else { 7763 udp->udp_family = AF_INET; 7764 udp->udp_ipversion = IPV4_VERSION; 7765 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7766 udp->udp_ttl = us->us_ipv4_ttl; 7767 connp->conn_af_isv6 = B_FALSE; 7768 connp->conn_flags &= ~IPCL_ISV6; 7769 } 7770 7771 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7772 udp->udp_pending_op = -1; 7773 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7774 connp->conn_zoneid = zoneid; 7775 7776 udp->udp_open_time = lbolt64; 7777 udp->udp_open_pid = curproc->p_pid; 7778 7779 /* 7780 * If the caller has the process-wide flag set, then default to MAC 7781 * exempt mode. This allows read-down to unlabeled hosts. 7782 */ 7783 if (getpflags(NET_MAC_AWARE, credp) != 0) 7784 connp->conn_mac_exempt = B_TRUE; 7785 7786 connp->conn_ulp_labeled = is_system_labeled(); 7787 7788 udp->udp_us = us; 7789 7790 connp->conn_recv = udp_input; 7791 crhold(credp); 7792 connp->conn_cred = credp; 7793 7794 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7795 7796 rw_exit(&udp->udp_rwlock); 7797 7798 return (connp); 7799 } 7800 7801 /* ARGSUSED */ 7802 sock_lower_handle_t 7803 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7804 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7805 { 7806 udp_t *udp = NULL; 7807 udp_stack_t *us; 7808 conn_t *connp; 7809 boolean_t isv6; 7810 7811 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7812 (proto != 0 && proto != IPPROTO_UDP)) { 7813 *errorp = EPROTONOSUPPORT; 7814 return (NULL); 7815 } 7816 7817 if (family == AF_INET6) 7818 isv6 = B_TRUE; 7819 else 7820 isv6 = B_FALSE; 7821 7822 connp = udp_do_open(credp, isv6, flags); 7823 if (connp == NULL) { 7824 *errorp = ENOMEM; 7825 return (NULL); 7826 } 7827 7828 udp = connp->conn_udp; 7829 ASSERT(udp != NULL); 7830 us = udp->udp_us; 7831 ASSERT(us != NULL); 7832 7833 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7834 7835 /* Set flow control */ 7836 rw_enter(&udp->udp_rwlock, RW_WRITER); 7837 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7838 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7839 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7840 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7841 udp->udp_xmit_lowat = us->us_xmit_lowat; 7842 7843 if (udp->udp_family == AF_INET6) { 7844 /* Build initial header template for transmit */ 7845 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7846 rw_exit(&udp->udp_rwlock); 7847 ipcl_conn_destroy(connp); 7848 return (NULL); 7849 } 7850 } 7851 rw_exit(&udp->udp_rwlock); 7852 7853 connp->conn_flow_cntrld = B_FALSE; 7854 7855 ASSERT(us->us_ldi_ident != NULL); 7856 7857 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7858 ip1dbg(("create of IP helper stream failed\n")); 7859 udp_do_close(connp); 7860 return (NULL); 7861 } 7862 7863 /* Set the send flow control */ 7864 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7865 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7866 7867 mutex_enter(&connp->conn_lock); 7868 connp->conn_state_flags &= ~CONN_INCIPIENT; 7869 mutex_exit(&connp->conn_lock); 7870 7871 *errorp = 0; 7872 *smodep = SM_ATOMIC; 7873 *sock_downcalls = &sock_udp_downcalls; 7874 return ((sock_lower_handle_t)connp); 7875 } 7876 7877 /* ARGSUSED */ 7878 void 7879 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7880 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7881 { 7882 conn_t *connp = (conn_t *)proto_handle; 7883 udp_t *udp = connp->conn_udp; 7884 udp_stack_t *us = udp->udp_us; 7885 struct sock_proto_props sopp; 7886 7887 connp->conn_upcalls = sock_upcalls; 7888 connp->conn_upper_handle = sock_handle; 7889 7890 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7891 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7892 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7893 sopp.sopp_maxblk = INFPSZ; 7894 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7895 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7896 sopp.sopp_maxpsz = 7897 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7898 UDP_MAXPACKET_IPV6; 7899 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7900 udp_mod_info.mi_minpsz; 7901 7902 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7903 &sopp); 7904 } 7905 7906 static void 7907 udp_do_close(conn_t *connp) 7908 { 7909 udp_t *udp; 7910 7911 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7912 udp = connp->conn_udp; 7913 7914 udp_quiesce_conn(connp); 7915 ip_quiesce_conn(connp); 7916 7917 if (!IPCL_IS_NONSTR(connp)) { 7918 /* 7919 * Disable read-side synchronous stream 7920 * interface and drain any queued data. 7921 */ 7922 ASSERT(connp->conn_wq != NULL); 7923 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7924 ASSERT(!udp->udp_direct_sockfs); 7925 7926 ASSERT(connp->conn_rq != NULL); 7927 qprocsoff(connp->conn_rq); 7928 } 7929 7930 ASSERT(udp->udp_rcv_cnt == 0); 7931 ASSERT(udp->udp_rcv_msgcnt == 0); 7932 ASSERT(udp->udp_rcv_list_head == NULL); 7933 ASSERT(udp->udp_rcv_list_tail == NULL); 7934 7935 udp_close_free(connp); 7936 7937 /* 7938 * Now we are truly single threaded on this stream, and can 7939 * delete the things hanging off the connp, and finally the connp. 7940 * We removed this connp from the fanout list, it cannot be 7941 * accessed thru the fanouts, and we already waited for the 7942 * conn_ref to drop to 0. We are already in close, so 7943 * there cannot be any other thread from the top. qprocsoff 7944 * has completed, and service has completed or won't run in 7945 * future. 7946 */ 7947 ASSERT(connp->conn_ref == 1); 7948 if (!IPCL_IS_NONSTR(connp)) { 7949 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7950 } else { 7951 ip_close_helper_stream(connp); 7952 } 7953 7954 connp->conn_ref--; 7955 ipcl_conn_destroy(connp); 7956 } 7957 7958 /* ARGSUSED */ 7959 int 7960 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7961 { 7962 conn_t *connp = (conn_t *)proto_handle; 7963 7964 udp_do_close(connp); 7965 return (0); 7966 } 7967 7968 static int 7969 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7970 boolean_t bind_to_req_port_only) 7971 { 7972 sin_t *sin; 7973 sin6_t *sin6; 7974 sin6_t sin6addr; 7975 in_port_t port; /* Host byte order */ 7976 in_port_t requested_port; /* Host byte order */ 7977 int count; 7978 in6_addr_t v6src; 7979 int loopmax; 7980 udp_fanout_t *udpf; 7981 in_port_t lport; /* Network byte order */ 7982 zoneid_t zoneid; 7983 udp_t *udp; 7984 boolean_t is_inaddr_any; 7985 mlp_type_t addrtype, mlptype; 7986 udp_stack_t *us; 7987 int error = 0; 7988 mblk_t *mp = NULL; 7989 7990 udp = connp->conn_udp; 7991 us = udp->udp_us; 7992 7993 if (udp->udp_state != TS_UNBND) { 7994 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7995 "udp_bind: bad state, %u", udp->udp_state); 7996 return (-TOUTSTATE); 7997 } 7998 7999 switch (len) { 8000 case 0: 8001 if (udp->udp_family == AF_INET) { 8002 sin = (sin_t *)&sin6addr; 8003 *sin = sin_null; 8004 sin->sin_family = AF_INET; 8005 sin->sin_addr.s_addr = INADDR_ANY; 8006 udp->udp_ipversion = IPV4_VERSION; 8007 } else { 8008 ASSERT(udp->udp_family == AF_INET6); 8009 sin6 = (sin6_t *)&sin6addr; 8010 *sin6 = sin6_null; 8011 sin6->sin6_family = AF_INET6; 8012 V6_SET_ZERO(sin6->sin6_addr); 8013 udp->udp_ipversion = IPV6_VERSION; 8014 } 8015 port = 0; 8016 break; 8017 8018 case sizeof (sin_t): /* Complete IPv4 address */ 8019 sin = (sin_t *)sa; 8020 8021 if (sin == NULL || !OK_32PTR((char *)sin)) 8022 return (EINVAL); 8023 8024 if (udp->udp_family != AF_INET || 8025 sin->sin_family != AF_INET) { 8026 return (EAFNOSUPPORT); 8027 } 8028 port = ntohs(sin->sin_port); 8029 break; 8030 8031 case sizeof (sin6_t): /* complete IPv6 address */ 8032 sin6 = (sin6_t *)sa; 8033 8034 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8035 return (EINVAL); 8036 8037 if (udp->udp_family != AF_INET6 || 8038 sin6->sin6_family != AF_INET6) { 8039 return (EAFNOSUPPORT); 8040 } 8041 port = ntohs(sin6->sin6_port); 8042 break; 8043 8044 default: /* Invalid request */ 8045 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8046 "udp_bind: bad ADDR_length length %u", len); 8047 return (-TBADADDR); 8048 } 8049 8050 requested_port = port; 8051 8052 if (requested_port == 0 || !bind_to_req_port_only) 8053 bind_to_req_port_only = B_FALSE; 8054 else /* T_BIND_REQ and requested_port != 0 */ 8055 bind_to_req_port_only = B_TRUE; 8056 8057 if (requested_port == 0) { 8058 /* 8059 * If the application passed in zero for the port number, it 8060 * doesn't care which port number we bind to. Get one in the 8061 * valid range. 8062 */ 8063 if (udp->udp_anon_priv_bind) { 8064 port = udp_get_next_priv_port(udp); 8065 } else { 8066 port = udp_update_next_port(udp, 8067 us->us_next_port_to_try, B_TRUE); 8068 } 8069 } else { 8070 /* 8071 * If the port is in the well-known privileged range, 8072 * make sure the caller was privileged. 8073 */ 8074 int i; 8075 boolean_t priv = B_FALSE; 8076 8077 if (port < us->us_smallest_nonpriv_port) { 8078 priv = B_TRUE; 8079 } else { 8080 for (i = 0; i < us->us_num_epriv_ports; i++) { 8081 if (port == us->us_epriv_ports[i]) { 8082 priv = B_TRUE; 8083 break; 8084 } 8085 } 8086 } 8087 8088 if (priv) { 8089 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8090 return (-TACCES); 8091 } 8092 } 8093 8094 if (port == 0) 8095 return (-TNOADDR); 8096 8097 /* 8098 * The state must be TS_UNBND. TPI mandates that users must send 8099 * TPI primitives only 1 at a time and wait for the response before 8100 * sending the next primitive. 8101 */ 8102 rw_enter(&udp->udp_rwlock, RW_WRITER); 8103 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8104 rw_exit(&udp->udp_rwlock); 8105 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8106 "udp_bind: bad state, %u", udp->udp_state); 8107 return (-TOUTSTATE); 8108 } 8109 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8110 udp->udp_pending_op = T_BIND_REQ; 8111 /* 8112 * Copy the source address into our udp structure. This address 8113 * may still be zero; if so, IP will fill in the correct address 8114 * each time an outbound packet is passed to it. Since the udp is 8115 * not yet in the bind hash list, we don't grab the uf_lock to 8116 * change udp_ipversion 8117 */ 8118 if (udp->udp_family == AF_INET) { 8119 ASSERT(sin != NULL); 8120 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8121 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8122 udp->udp_ip_snd_options_len; 8123 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8124 } else { 8125 ASSERT(sin6 != NULL); 8126 v6src = sin6->sin6_addr; 8127 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8128 /* 8129 * no need to hold the uf_lock to set the udp_ipversion 8130 * since we are not yet in the fanout list 8131 */ 8132 udp->udp_ipversion = IPV4_VERSION; 8133 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8134 UDPH_SIZE + udp->udp_ip_snd_options_len; 8135 } else { 8136 udp->udp_ipversion = IPV6_VERSION; 8137 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8138 } 8139 } 8140 8141 /* 8142 * If udp_reuseaddr is not set, then we have to make sure that 8143 * the IP address and port number the application requested 8144 * (or we selected for the application) is not being used by 8145 * another stream. If another stream is already using the 8146 * requested IP address and port, the behavior depends on 8147 * "bind_to_req_port_only". If set the bind fails; otherwise we 8148 * search for any an unused port to bind to the the stream. 8149 * 8150 * As per the BSD semantics, as modified by the Deering multicast 8151 * changes, if udp_reuseaddr is set, then we allow multiple binds 8152 * to the same port independent of the local IP address. 8153 * 8154 * This is slightly different than in SunOS 4.X which did not 8155 * support IP multicast. Note that the change implemented by the 8156 * Deering multicast code effects all binds - not only binding 8157 * to IP multicast addresses. 8158 * 8159 * Note that when binding to port zero we ignore SO_REUSEADDR in 8160 * order to guarantee a unique port. 8161 */ 8162 8163 count = 0; 8164 if (udp->udp_anon_priv_bind) { 8165 /* 8166 * loopmax = (IPPORT_RESERVED-1) - 8167 * us->us_min_anonpriv_port + 1 8168 */ 8169 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8170 } else { 8171 loopmax = us->us_largest_anon_port - 8172 us->us_smallest_anon_port + 1; 8173 } 8174 8175 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8176 zoneid = connp->conn_zoneid; 8177 8178 for (;;) { 8179 udp_t *udp1; 8180 boolean_t found_exclbind = B_FALSE; 8181 8182 /* 8183 * Walk through the list of udp streams bound to 8184 * requested port with the same IP address. 8185 */ 8186 lport = htons(port); 8187 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8188 us->us_bind_fanout_size)]; 8189 mutex_enter(&udpf->uf_lock); 8190 for (udp1 = udpf->uf_udp; udp1 != NULL; 8191 udp1 = udp1->udp_bind_hash) { 8192 if (lport != udp1->udp_port) 8193 continue; 8194 8195 /* 8196 * On a labeled system, we must treat bindings to ports 8197 * on shared IP addresses by sockets with MAC exemption 8198 * privilege as being in all zones, as there's 8199 * otherwise no way to identify the right receiver. 8200 */ 8201 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8202 IPCL_ZONE_MATCH(connp, 8203 udp1->udp_connp->conn_zoneid)) && 8204 !connp->conn_mac_exempt && \ 8205 !udp1->udp_connp->conn_mac_exempt) 8206 continue; 8207 8208 /* 8209 * If UDP_EXCLBIND is set for either the bound or 8210 * binding endpoint, the semantics of bind 8211 * is changed according to the following chart. 8212 * 8213 * spec = specified address (v4 or v6) 8214 * unspec = unspecified address (v4 or v6) 8215 * A = specified addresses are different for endpoints 8216 * 8217 * bound bind to allowed? 8218 * ------------------------------------- 8219 * unspec unspec no 8220 * unspec spec no 8221 * spec unspec no 8222 * spec spec yes if A 8223 * 8224 * For labeled systems, SO_MAC_EXEMPT behaves the same 8225 * as UDP_EXCLBIND, except that zoneid is ignored. 8226 */ 8227 if (udp1->udp_exclbind || udp->udp_exclbind || 8228 udp1->udp_connp->conn_mac_exempt || 8229 connp->conn_mac_exempt) { 8230 if (V6_OR_V4_INADDR_ANY( 8231 udp1->udp_bound_v6src) || 8232 is_inaddr_any || 8233 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8234 &v6src)) { 8235 found_exclbind = B_TRUE; 8236 break; 8237 } 8238 continue; 8239 } 8240 8241 /* 8242 * Check ipversion to allow IPv4 and IPv6 sockets to 8243 * have disjoint port number spaces. 8244 */ 8245 if (udp->udp_ipversion != udp1->udp_ipversion) { 8246 8247 /* 8248 * On the first time through the loop, if the 8249 * the user intentionally specified a 8250 * particular port number, then ignore any 8251 * bindings of the other protocol that may 8252 * conflict. This allows the user to bind IPv6 8253 * alone and get both v4 and v6, or bind both 8254 * both and get each seperately. On subsequent 8255 * times through the loop, we're checking a 8256 * port that we chose (not the user) and thus 8257 * we do not allow casual duplicate bindings. 8258 */ 8259 if (count == 0 && requested_port != 0) 8260 continue; 8261 } 8262 8263 /* 8264 * No difference depending on SO_REUSEADDR. 8265 * 8266 * If existing port is bound to a 8267 * non-wildcard IP address and 8268 * the requesting stream is bound to 8269 * a distinct different IP addresses 8270 * (non-wildcard, also), keep going. 8271 */ 8272 if (!is_inaddr_any && 8273 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8274 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8275 &v6src)) { 8276 continue; 8277 } 8278 break; 8279 } 8280 8281 if (!found_exclbind && 8282 (udp->udp_reuseaddr && requested_port != 0)) { 8283 break; 8284 } 8285 8286 if (udp1 == NULL) { 8287 /* 8288 * No other stream has this IP address 8289 * and port number. We can use it. 8290 */ 8291 break; 8292 } 8293 mutex_exit(&udpf->uf_lock); 8294 if (bind_to_req_port_only) { 8295 /* 8296 * We get here only when requested port 8297 * is bound (and only first of the for() 8298 * loop iteration). 8299 * 8300 * The semantics of this bind request 8301 * require it to fail so we return from 8302 * the routine (and exit the loop). 8303 * 8304 */ 8305 udp->udp_pending_op = -1; 8306 rw_exit(&udp->udp_rwlock); 8307 return (-TADDRBUSY); 8308 } 8309 8310 if (udp->udp_anon_priv_bind) { 8311 port = udp_get_next_priv_port(udp); 8312 } else { 8313 if ((count == 0) && (requested_port != 0)) { 8314 /* 8315 * If the application wants us to find 8316 * a port, get one to start with. Set 8317 * requested_port to 0, so that we will 8318 * update us->us_next_port_to_try below. 8319 */ 8320 port = udp_update_next_port(udp, 8321 us->us_next_port_to_try, B_TRUE); 8322 requested_port = 0; 8323 } else { 8324 port = udp_update_next_port(udp, port + 1, 8325 B_FALSE); 8326 } 8327 } 8328 8329 if (port == 0 || ++count >= loopmax) { 8330 /* 8331 * We've tried every possible port number and 8332 * there are none available, so send an error 8333 * to the user. 8334 */ 8335 udp->udp_pending_op = -1; 8336 rw_exit(&udp->udp_rwlock); 8337 return (-TNOADDR); 8338 } 8339 } 8340 8341 /* 8342 * Copy the source address into our udp structure. This address 8343 * may still be zero; if so, ip will fill in the correct address 8344 * each time an outbound packet is passed to it. 8345 * If we are binding to a broadcast or multicast address then 8346 * udp_post_ip_bind_connect will clear the source address 8347 * when udp_do_bind success. 8348 */ 8349 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8350 udp->udp_port = lport; 8351 /* 8352 * Now reset the the next anonymous port if the application requested 8353 * an anonymous port, or we handed out the next anonymous port. 8354 */ 8355 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8356 us->us_next_port_to_try = port + 1; 8357 } 8358 8359 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8360 if (udp->udp_family == AF_INET) { 8361 sin->sin_port = udp->udp_port; 8362 } else { 8363 sin6->sin6_port = udp->udp_port; 8364 /* Rebuild the header template */ 8365 error = udp_build_hdrs(udp); 8366 if (error != 0) { 8367 udp->udp_pending_op = -1; 8368 rw_exit(&udp->udp_rwlock); 8369 mutex_exit(&udpf->uf_lock); 8370 return (error); 8371 } 8372 } 8373 udp->udp_state = TS_IDLE; 8374 udp_bind_hash_insert(udpf, udp); 8375 mutex_exit(&udpf->uf_lock); 8376 rw_exit(&udp->udp_rwlock); 8377 8378 if (cl_inet_bind) { 8379 /* 8380 * Running in cluster mode - register bind information 8381 */ 8382 if (udp->udp_ipversion == IPV4_VERSION) { 8383 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8384 IPPROTO_UDP, AF_INET, 8385 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8386 (in_port_t)udp->udp_port, NULL); 8387 } else { 8388 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8389 IPPROTO_UDP, AF_INET6, 8390 (uint8_t *)&(udp->udp_v6src), 8391 (in_port_t)udp->udp_port, NULL); 8392 } 8393 } 8394 8395 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8396 if (is_system_labeled() && (!connp->conn_anon_port || 8397 connp->conn_anon_mlp)) { 8398 uint16_t mlpport; 8399 cred_t *cr = connp->conn_cred; 8400 zone_t *zone; 8401 8402 zone = crgetzone(cr); 8403 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8404 mlptSingle; 8405 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8406 &v6src, us->us_netstack->netstack_ip); 8407 if (addrtype == mlptSingle) { 8408 rw_enter(&udp->udp_rwlock, RW_WRITER); 8409 udp->udp_pending_op = -1; 8410 rw_exit(&udp->udp_rwlock); 8411 connp->conn_anon_port = B_FALSE; 8412 connp->conn_mlp_type = mlptSingle; 8413 return (-TNOADDR); 8414 } 8415 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8416 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8417 addrtype); 8418 if (mlptype != mlptSingle && 8419 (connp->conn_mlp_type == mlptSingle || 8420 secpolicy_net_bindmlp(cr) != 0)) { 8421 if (udp->udp_debug) { 8422 (void) strlog(UDP_MOD_ID, 0, 1, 8423 SL_ERROR|SL_TRACE, 8424 "udp_bind: no priv for multilevel port %d", 8425 mlpport); 8426 } 8427 rw_enter(&udp->udp_rwlock, RW_WRITER); 8428 udp->udp_pending_op = -1; 8429 rw_exit(&udp->udp_rwlock); 8430 connp->conn_anon_port = B_FALSE; 8431 connp->conn_mlp_type = mlptSingle; 8432 return (-TACCES); 8433 } 8434 8435 /* 8436 * If we're specifically binding a shared IP address and the 8437 * port is MLP on shared addresses, then check to see if this 8438 * zone actually owns the MLP. Reject if not. 8439 */ 8440 if (mlptype == mlptShared && addrtype == mlptShared) { 8441 /* 8442 * No need to handle exclusive-stack zones since 8443 * ALL_ZONES only applies to the shared stack. 8444 */ 8445 zoneid_t mlpzone; 8446 8447 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8448 htons(mlpport)); 8449 if (connp->conn_zoneid != mlpzone) { 8450 if (udp->udp_debug) { 8451 (void) strlog(UDP_MOD_ID, 0, 1, 8452 SL_ERROR|SL_TRACE, 8453 "udp_bind: attempt to bind port " 8454 "%d on shared addr in zone %d " 8455 "(should be %d)", 8456 mlpport, connp->conn_zoneid, 8457 mlpzone); 8458 } 8459 rw_enter(&udp->udp_rwlock, RW_WRITER); 8460 udp->udp_pending_op = -1; 8461 rw_exit(&udp->udp_rwlock); 8462 connp->conn_anon_port = B_FALSE; 8463 connp->conn_mlp_type = mlptSingle; 8464 return (-TACCES); 8465 } 8466 } 8467 if (connp->conn_anon_port) { 8468 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8469 port, B_TRUE); 8470 if (error != 0) { 8471 if (udp->udp_debug) { 8472 (void) strlog(UDP_MOD_ID, 0, 1, 8473 SL_ERROR|SL_TRACE, 8474 "udp_bind: cannot establish anon " 8475 "MLP for port %d", port); 8476 } 8477 rw_enter(&udp->udp_rwlock, RW_WRITER); 8478 udp->udp_pending_op = -1; 8479 rw_exit(&udp->udp_rwlock); 8480 connp->conn_anon_port = B_FALSE; 8481 connp->conn_mlp_type = mlptSingle; 8482 return (-TACCES); 8483 } 8484 } 8485 connp->conn_mlp_type = mlptype; 8486 } 8487 8488 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8489 /* 8490 * Append a request for an IRE if udp_v6src not 8491 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8492 */ 8493 mp = allocb(sizeof (ire_t), BPRI_HI); 8494 if (!mp) { 8495 rw_enter(&udp->udp_rwlock, RW_WRITER); 8496 udp->udp_pending_op = -1; 8497 rw_exit(&udp->udp_rwlock); 8498 return (ENOMEM); 8499 } 8500 mp->b_wptr += sizeof (ire_t); 8501 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8502 } 8503 if (udp->udp_family == AF_INET6) { 8504 ASSERT(udp->udp_connp->conn_af_isv6); 8505 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8506 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8507 } else { 8508 ASSERT(!udp->udp_connp->conn_af_isv6); 8509 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8510 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8511 B_TRUE); 8512 } 8513 8514 (void) udp_post_ip_bind_connect(udp, mp, error); 8515 return (error); 8516 } 8517 8518 int 8519 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8520 socklen_t len, cred_t *cr) 8521 { 8522 int error; 8523 conn_t *connp; 8524 8525 connp = (conn_t *)proto_handle; 8526 8527 if (sa == NULL) 8528 error = udp_do_unbind(connp); 8529 else 8530 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8531 8532 if (error < 0) { 8533 if (error == -TOUTSTATE) 8534 error = EINVAL; 8535 else 8536 error = proto_tlitosyserr(-error); 8537 } 8538 8539 return (error); 8540 } 8541 8542 static int 8543 udp_implicit_bind(conn_t *connp, cred_t *cr) 8544 { 8545 int error; 8546 8547 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8548 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8549 } 8550 8551 /* 8552 * This routine removes a port number association from a stream. It 8553 * is called by udp_unbind and udp_tpi_unbind. 8554 */ 8555 static int 8556 udp_do_unbind(conn_t *connp) 8557 { 8558 udp_t *udp = connp->conn_udp; 8559 udp_fanout_t *udpf; 8560 udp_stack_t *us = udp->udp_us; 8561 8562 if (cl_inet_unbind != NULL) { 8563 /* 8564 * Running in cluster mode - register unbind information 8565 */ 8566 if (udp->udp_ipversion == IPV4_VERSION) { 8567 (*cl_inet_unbind)( 8568 connp->conn_netstack->netstack_stackid, 8569 IPPROTO_UDP, AF_INET, 8570 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8571 (in_port_t)udp->udp_port, NULL); 8572 } else { 8573 (*cl_inet_unbind)( 8574 connp->conn_netstack->netstack_stackid, 8575 IPPROTO_UDP, AF_INET6, 8576 (uint8_t *)&(udp->udp_v6src), 8577 (in_port_t)udp->udp_port, NULL); 8578 } 8579 } 8580 8581 rw_enter(&udp->udp_rwlock, RW_WRITER); 8582 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8583 rw_exit(&udp->udp_rwlock); 8584 return (-TOUTSTATE); 8585 } 8586 udp->udp_pending_op = T_UNBIND_REQ; 8587 rw_exit(&udp->udp_rwlock); 8588 8589 /* 8590 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8591 * and therefore ip_unbind must never return NULL. 8592 */ 8593 ip_unbind(connp); 8594 8595 /* 8596 * Once we're unbound from IP, the pending operation may be cleared 8597 * here. 8598 */ 8599 rw_enter(&udp->udp_rwlock, RW_WRITER); 8600 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8601 us->us_bind_fanout_size)]; 8602 8603 mutex_enter(&udpf->uf_lock); 8604 udp_bind_hash_remove(udp, B_TRUE); 8605 V6_SET_ZERO(udp->udp_v6src); 8606 V6_SET_ZERO(udp->udp_bound_v6src); 8607 udp->udp_port = 0; 8608 mutex_exit(&udpf->uf_lock); 8609 8610 udp->udp_pending_op = -1; 8611 udp->udp_state = TS_UNBND; 8612 if (udp->udp_family == AF_INET6) 8613 (void) udp_build_hdrs(udp); 8614 rw_exit(&udp->udp_rwlock); 8615 8616 return (0); 8617 } 8618 8619 static int 8620 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8621 { 8622 ire_t *ire; 8623 udp_fanout_t *udpf; 8624 udp_stack_t *us = udp->udp_us; 8625 8626 ASSERT(udp->udp_pending_op != -1); 8627 rw_enter(&udp->udp_rwlock, RW_WRITER); 8628 if (error == 0) { 8629 /* For udp_do_connect() success */ 8630 /* udp_do_bind() success will do nothing in here */ 8631 /* 8632 * If a broadcast/multicast address was bound, set 8633 * the source address to 0. 8634 * This ensures no datagrams with broadcast address 8635 * as source address are emitted (which would violate 8636 * RFC1122 - Hosts requirements) 8637 * 8638 * Note that when connecting the returned IRE is 8639 * for the destination address and we only perform 8640 * the broadcast check for the source address (it 8641 * is OK to connect to a broadcast/multicast address.) 8642 */ 8643 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8644 ire = (ire_t *)ire_mp->b_rptr; 8645 8646 /* 8647 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8648 * multicast local address. 8649 */ 8650 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8651 us->us_bind_fanout_size)]; 8652 if (ire->ire_type == IRE_BROADCAST && 8653 udp->udp_state != TS_DATA_XFER) { 8654 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8655 udp->udp_pending_op == O_T_BIND_REQ); 8656 /* 8657 * This was just a local bind to a broadcast 8658 * addr. 8659 */ 8660 mutex_enter(&udpf->uf_lock); 8661 V6_SET_ZERO(udp->udp_v6src); 8662 mutex_exit(&udpf->uf_lock); 8663 if (udp->udp_family == AF_INET6) 8664 (void) udp_build_hdrs(udp); 8665 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8666 if (udp->udp_family == AF_INET6) 8667 (void) udp_build_hdrs(udp); 8668 } 8669 } 8670 } else { 8671 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8672 us->us_bind_fanout_size)]; 8673 mutex_enter(&udpf->uf_lock); 8674 8675 if (udp->udp_state == TS_DATA_XFER) { 8676 /* Connect failed */ 8677 /* Revert back to the bound source */ 8678 udp->udp_v6src = udp->udp_bound_v6src; 8679 udp->udp_state = TS_IDLE; 8680 } else { 8681 /* For udp_do_bind() failed */ 8682 V6_SET_ZERO(udp->udp_v6src); 8683 V6_SET_ZERO(udp->udp_bound_v6src); 8684 udp->udp_state = TS_UNBND; 8685 udp_bind_hash_remove(udp, B_TRUE); 8686 udp->udp_port = 0; 8687 } 8688 mutex_exit(&udpf->uf_lock); 8689 if (udp->udp_family == AF_INET6) 8690 (void) udp_build_hdrs(udp); 8691 } 8692 udp->udp_pending_op = -1; 8693 rw_exit(&udp->udp_rwlock); 8694 if (ire_mp != NULL) 8695 freeb(ire_mp); 8696 return (error); 8697 } 8698 8699 /* 8700 * It associates a default destination address with the stream. 8701 */ 8702 static int 8703 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 8704 { 8705 sin6_t *sin6; 8706 sin_t *sin; 8707 in6_addr_t v6dst; 8708 ipaddr_t v4dst; 8709 uint16_t dstport; 8710 uint32_t flowinfo; 8711 mblk_t *ire_mp; 8712 udp_fanout_t *udpf; 8713 udp_t *udp, *udp1; 8714 ushort_t ipversion; 8715 udp_stack_t *us; 8716 int error; 8717 8718 udp = connp->conn_udp; 8719 us = udp->udp_us; 8720 8721 /* 8722 * Address has been verified by the caller 8723 */ 8724 switch (len) { 8725 default: 8726 /* 8727 * Should never happen 8728 */ 8729 return (EINVAL); 8730 8731 case sizeof (sin_t): 8732 sin = (sin_t *)sa; 8733 v4dst = sin->sin_addr.s_addr; 8734 dstport = sin->sin_port; 8735 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8736 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8737 ipversion = IPV4_VERSION; 8738 break; 8739 8740 case sizeof (sin6_t): 8741 sin6 = (sin6_t *)sa; 8742 v6dst = sin6->sin6_addr; 8743 dstport = sin6->sin6_port; 8744 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8745 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8746 ipversion = IPV4_VERSION; 8747 flowinfo = 0; 8748 } else { 8749 ipversion = IPV6_VERSION; 8750 flowinfo = sin6->sin6_flowinfo; 8751 } 8752 break; 8753 } 8754 8755 if (dstport == 0) 8756 return (-TBADADDR); 8757 8758 rw_enter(&udp->udp_rwlock, RW_WRITER); 8759 8760 /* 8761 * This UDP must have bound to a port already before doing a connect. 8762 * TPI mandates that users must send TPI primitives only 1 at a time 8763 * and wait for the response before sending the next primitive. 8764 */ 8765 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8766 rw_exit(&udp->udp_rwlock); 8767 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8768 "udp_connect: bad state, %u", udp->udp_state); 8769 return (-TOUTSTATE); 8770 } 8771 udp->udp_pending_op = T_CONN_REQ; 8772 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8773 8774 if (ipversion == IPV4_VERSION) { 8775 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8776 udp->udp_ip_snd_options_len; 8777 } else { 8778 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8779 } 8780 8781 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8782 us->us_bind_fanout_size)]; 8783 8784 mutex_enter(&udpf->uf_lock); 8785 if (udp->udp_state == TS_DATA_XFER) { 8786 /* Already connected - clear out state */ 8787 udp->udp_v6src = udp->udp_bound_v6src; 8788 udp->udp_state = TS_IDLE; 8789 } 8790 8791 /* 8792 * Create a default IP header with no IP options. 8793 */ 8794 udp->udp_dstport = dstport; 8795 udp->udp_ipversion = ipversion; 8796 if (ipversion == IPV4_VERSION) { 8797 /* 8798 * Interpret a zero destination to mean loopback. 8799 * Update the T_CONN_REQ (sin/sin6) since it is used to 8800 * generate the T_CONN_CON. 8801 */ 8802 if (v4dst == INADDR_ANY) { 8803 v4dst = htonl(INADDR_LOOPBACK); 8804 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8805 if (udp->udp_family == AF_INET) { 8806 sin->sin_addr.s_addr = v4dst; 8807 } else { 8808 sin6->sin6_addr = v6dst; 8809 } 8810 } 8811 udp->udp_v6dst = v6dst; 8812 udp->udp_flowinfo = 0; 8813 8814 /* 8815 * If the destination address is multicast and 8816 * an outgoing multicast interface has been set, 8817 * use the address of that interface as our 8818 * source address if no source address has been set. 8819 */ 8820 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8821 CLASSD(v4dst) && 8822 udp->udp_multicast_if_addr != INADDR_ANY) { 8823 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8824 &udp->udp_v6src); 8825 } 8826 } else { 8827 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8828 /* 8829 * Interpret a zero destination to mean loopback. 8830 * Update the T_CONN_REQ (sin/sin6) since it is used to 8831 * generate the T_CONN_CON. 8832 */ 8833 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8834 v6dst = ipv6_loopback; 8835 sin6->sin6_addr = v6dst; 8836 } 8837 udp->udp_v6dst = v6dst; 8838 udp->udp_flowinfo = flowinfo; 8839 /* 8840 * If the destination address is multicast and 8841 * an outgoing multicast interface has been set, 8842 * then the ip bind logic will pick the correct source 8843 * address (i.e. matching the outgoing multicast interface). 8844 */ 8845 } 8846 8847 /* 8848 * Verify that the src/port/dst/port is unique for all 8849 * connections in TS_DATA_XFER 8850 */ 8851 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8852 if (udp1->udp_state != TS_DATA_XFER) 8853 continue; 8854 if (udp->udp_port != udp1->udp_port || 8855 udp->udp_ipversion != udp1->udp_ipversion || 8856 dstport != udp1->udp_dstport || 8857 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8858 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8859 !(IPCL_ZONE_MATCH(udp->udp_connp, 8860 udp1->udp_connp->conn_zoneid) || 8861 IPCL_ZONE_MATCH(udp1->udp_connp, 8862 udp->udp_connp->conn_zoneid))) 8863 continue; 8864 mutex_exit(&udpf->uf_lock); 8865 udp->udp_pending_op = -1; 8866 rw_exit(&udp->udp_rwlock); 8867 return (-TBADADDR); 8868 } 8869 8870 if (cl_inet_connect2 != NULL) { 8871 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8872 if (error != 0) { 8873 mutex_exit(&udpf->uf_lock); 8874 udp->udp_pending_op = -1; 8875 rw_exit(&udp->udp_rwlock); 8876 return (-TBADADDR); 8877 } 8878 } 8879 8880 udp->udp_state = TS_DATA_XFER; 8881 mutex_exit(&udpf->uf_lock); 8882 8883 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8884 if (ire_mp == NULL) { 8885 mutex_enter(&udpf->uf_lock); 8886 udp->udp_state = TS_IDLE; 8887 udp->udp_pending_op = -1; 8888 mutex_exit(&udpf->uf_lock); 8889 rw_exit(&udp->udp_rwlock); 8890 return (ENOMEM); 8891 } 8892 8893 rw_exit(&udp->udp_rwlock); 8894 8895 ire_mp->b_wptr += sizeof (ire_t); 8896 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8897 8898 if (udp->udp_family == AF_INET) { 8899 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8900 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8901 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8902 B_TRUE, B_TRUE); 8903 } else { 8904 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8905 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8906 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE); 8907 } 8908 8909 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8910 } 8911 8912 /* ARGSUSED */ 8913 static int 8914 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8915 socklen_t len, sock_connid_t *id, cred_t *cr) 8916 { 8917 conn_t *connp = (conn_t *)proto_handle; 8918 udp_t *udp = connp->conn_udp; 8919 int error; 8920 boolean_t did_bind = B_FALSE; 8921 8922 if (sa == NULL) { 8923 /* 8924 * Disconnect 8925 * Make sure we are connected 8926 */ 8927 if (udp->udp_state != TS_DATA_XFER) 8928 return (EINVAL); 8929 8930 error = udp_disconnect(connp); 8931 return (error); 8932 } 8933 8934 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8935 if (error != 0) 8936 goto done; 8937 8938 /* do an implicit bind if necessary */ 8939 if (udp->udp_state == TS_UNBND) { 8940 error = udp_implicit_bind(connp, cr); 8941 /* 8942 * We could be racing with an actual bind, in which case 8943 * we would see EPROTO. We cross our fingers and try 8944 * to connect. 8945 */ 8946 if (!(error == 0 || error == EPROTO)) 8947 goto done; 8948 did_bind = B_TRUE; 8949 } 8950 /* 8951 * set SO_DGRAM_ERRIND 8952 */ 8953 udp->udp_dgram_errind = B_TRUE; 8954 8955 error = udp_do_connect(connp, sa, len); 8956 8957 if (error != 0 && did_bind) { 8958 int unbind_err; 8959 8960 unbind_err = udp_do_unbind(connp); 8961 ASSERT(unbind_err == 0); 8962 } 8963 8964 if (error == 0) { 8965 *id = 0; 8966 (*connp->conn_upcalls->su_connected) 8967 (connp->conn_upper_handle, 0, NULL, -1); 8968 } else if (error < 0) { 8969 error = proto_tlitosyserr(-error); 8970 } 8971 8972 done: 8973 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8974 /* 8975 * No need to hold locks to set state 8976 * after connect failure socket state is undefined 8977 * We set the state only to imitate old sockfs behavior 8978 */ 8979 udp->udp_state = TS_IDLE; 8980 } 8981 return (error); 8982 } 8983 8984 /* ARGSUSED */ 8985 int 8986 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8987 cred_t *cr) 8988 { 8989 conn_t *connp = (conn_t *)proto_handle; 8990 udp_t *udp = connp->conn_udp; 8991 udp_stack_t *us = udp->udp_us; 8992 int error = 0; 8993 8994 ASSERT(DB_TYPE(mp) == M_DATA); 8995 8996 /* 8997 * If the socket is connected and no change in destination 8998 */ 8999 if (msg->msg_namelen == 0) { 9000 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9001 if (error == EDESTADDRREQ) 9002 return (error); 9003 else 9004 return (udp->udp_dgram_errind ? error : 0); 9005 } 9006 9007 /* 9008 * Do an implicit bind if necessary. 9009 */ 9010 if (udp->udp_state == TS_UNBND) { 9011 error = udp_implicit_bind(connp, cr); 9012 /* 9013 * We could be racing with an actual bind, in which case 9014 * we would see EPROTO. We cross our fingers and try 9015 * to send. 9016 */ 9017 if (!(error == 0 || error == EPROTO)) { 9018 freemsg(mp); 9019 return (error); 9020 } 9021 } 9022 9023 rw_enter(&udp->udp_rwlock, RW_WRITER); 9024 9025 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9026 rw_exit(&udp->udp_rwlock); 9027 freemsg(mp); 9028 return (EISCONN); 9029 } 9030 9031 9032 if (udp->udp_delayed_error != 0) { 9033 boolean_t match; 9034 9035 error = udp->udp_delayed_error; 9036 match = B_FALSE; 9037 udp->udp_delayed_error = 0; 9038 switch (udp->udp_family) { 9039 case AF_INET: { 9040 /* Compare just IP address and port */ 9041 sin_t *sin1 = (sin_t *)msg->msg_name; 9042 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9043 9044 if (msg->msg_namelen == sizeof (sin_t) && 9045 sin1->sin_port == sin2->sin_port && 9046 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9047 match = B_TRUE; 9048 9049 break; 9050 } 9051 case AF_INET6: { 9052 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9053 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9054 9055 if (msg->msg_namelen == sizeof (sin6_t) && 9056 sin1->sin6_port == sin2->sin6_port && 9057 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9058 &sin2->sin6_addr)) 9059 match = B_TRUE; 9060 break; 9061 } 9062 default: 9063 ASSERT(0); 9064 } 9065 9066 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9067 9068 if (match) { 9069 rw_exit(&udp->udp_rwlock); 9070 freemsg(mp); 9071 return (error); 9072 } 9073 } 9074 9075 error = proto_verify_ip_addr(udp->udp_family, 9076 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9077 rw_exit(&udp->udp_rwlock); 9078 9079 if (error != 0) { 9080 freemsg(mp); 9081 return (error); 9082 } 9083 9084 error = udp_send_not_connected(connp, mp, 9085 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9086 curproc->p_pid); 9087 if (error != 0) { 9088 UDP_STAT(us, udp_out_err_output); 9089 freemsg(mp); 9090 } 9091 return (udp->udp_dgram_errind ? error : 0); 9092 } 9093 9094 void 9095 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9096 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9097 { 9098 conn_t *connp = (conn_t *)proto_handle; 9099 udp_t *udp; 9100 struct T_capability_ack tca; 9101 struct sockaddr_in6 laddr, faddr; 9102 socklen_t laddrlen, faddrlen; 9103 short opts; 9104 struct stroptions *stropt; 9105 mblk_t *stropt_mp; 9106 int error; 9107 9108 udp = connp->conn_udp; 9109 9110 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9111 9112 /* 9113 * setup the fallback stream that was allocated 9114 */ 9115 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9116 connp->conn_minor_arena = WR(q)->q_ptr; 9117 9118 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9119 9120 WR(q)->q_qinfo = &udp_winit; 9121 9122 connp->conn_rq = RD(q); 9123 connp->conn_wq = WR(q); 9124 9125 /* Notify stream head about options before sending up data */ 9126 stropt_mp->b_datap->db_type = M_SETOPTS; 9127 stropt_mp->b_wptr += sizeof (*stropt); 9128 stropt = (struct stroptions *)stropt_mp->b_rptr; 9129 stropt->so_flags = SO_WROFF | SO_HIWAT; 9130 stropt->so_wroff = 9131 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9132 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9133 putnext(RD(q), stropt_mp); 9134 9135 /* 9136 * Free the helper stream 9137 */ 9138 ip_close_helper_stream(connp); 9139 9140 if (!direct_sockfs) 9141 udp_disable_direct_sockfs(udp); 9142 9143 /* 9144 * Collect the information needed to sync with the sonode 9145 */ 9146 udp_do_capability_ack(udp, &tca, TC1_INFO); 9147 9148 laddrlen = faddrlen = sizeof (sin6_t); 9149 (void) udp_getsockname((sock_lower_handle_t)connp, 9150 (struct sockaddr *)&laddr, &laddrlen, NULL); 9151 error = udp_getpeername((sock_lower_handle_t)connp, 9152 (struct sockaddr *)&faddr, &faddrlen, NULL); 9153 if (error != 0) 9154 faddrlen = 0; 9155 9156 opts = 0; 9157 if (udp->udp_dgram_errind) 9158 opts |= SO_DGRAM_ERRIND; 9159 if (udp->udp_dontroute) 9160 opts |= SO_DONTROUTE; 9161 9162 /* 9163 * Once we grab the drain lock, no data will be send up 9164 * to the socket. So we notify the socket that the endpoint 9165 * is quiescent and it's therefore safe move data from 9166 * the socket to the stream head. 9167 */ 9168 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9169 (struct sockaddr *)&laddr, laddrlen, 9170 (struct sockaddr *)&faddr, faddrlen, opts); 9171 9172 /* 9173 * push up any packets that were queued in udp_t 9174 */ 9175 9176 mutex_enter(&udp->udp_recv_lock); 9177 while (udp->udp_fallback_queue_head != NULL) { 9178 mblk_t *mp; 9179 mp = udp->udp_fallback_queue_head; 9180 udp->udp_fallback_queue_head = mp->b_next; 9181 mutex_exit(&udp->udp_recv_lock); 9182 mp->b_next = NULL; 9183 putnext(RD(q), mp); 9184 mutex_enter(&udp->udp_recv_lock); 9185 } 9186 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9187 /* 9188 * No longer a streams less socket 9189 */ 9190 connp->conn_flags &= ~IPCL_NONSTR; 9191 mutex_exit(&udp->udp_recv_lock); 9192 9193 ASSERT(connp->conn_ref >= 1); 9194 } 9195 9196 static int 9197 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9198 { 9199 sin_t *sin = (sin_t *)sa; 9200 sin6_t *sin6 = (sin6_t *)sa; 9201 9202 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9203 ASSERT(udp != NULL); 9204 9205 if (udp->udp_state != TS_DATA_XFER) 9206 return (ENOTCONN); 9207 9208 switch (udp->udp_family) { 9209 case AF_INET: 9210 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9211 9212 if (*salenp < sizeof (sin_t)) 9213 return (EINVAL); 9214 9215 *salenp = sizeof (sin_t); 9216 *sin = sin_null; 9217 sin->sin_family = AF_INET; 9218 sin->sin_port = udp->udp_dstport; 9219 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9220 break; 9221 case AF_INET6: 9222 if (*salenp < sizeof (sin6_t)) 9223 return (EINVAL); 9224 9225 *salenp = sizeof (sin6_t); 9226 *sin6 = sin6_null; 9227 sin6->sin6_family = AF_INET6; 9228 sin6->sin6_port = udp->udp_dstport; 9229 sin6->sin6_addr = udp->udp_v6dst; 9230 sin6->sin6_flowinfo = udp->udp_flowinfo; 9231 break; 9232 } 9233 9234 return (0); 9235 } 9236 9237 /* ARGSUSED */ 9238 int 9239 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9240 socklen_t *salenp, cred_t *cr) 9241 { 9242 conn_t *connp = (conn_t *)proto_handle; 9243 udp_t *udp = connp->conn_udp; 9244 int error; 9245 9246 ASSERT(udp != NULL); 9247 9248 rw_enter(&udp->udp_rwlock, RW_READER); 9249 9250 error = udp_do_getpeername(udp, sa, salenp); 9251 9252 rw_exit(&udp->udp_rwlock); 9253 9254 return (error); 9255 } 9256 9257 static int 9258 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9259 { 9260 sin_t *sin = (sin_t *)sa; 9261 sin6_t *sin6 = (sin6_t *)sa; 9262 9263 ASSERT(udp != NULL); 9264 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9265 9266 switch (udp->udp_family) { 9267 case AF_INET: 9268 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9269 9270 if (*salenp < sizeof (sin_t)) 9271 return (EINVAL); 9272 9273 *salenp = sizeof (sin_t); 9274 *sin = sin_null; 9275 sin->sin_family = AF_INET; 9276 if (udp->udp_state == TS_UNBND) { 9277 break; 9278 } 9279 sin->sin_port = udp->udp_port; 9280 9281 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9282 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9283 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9284 } else { 9285 /* 9286 * INADDR_ANY 9287 * udp_v6src is not set, we might be bound to 9288 * broadcast/multicast. Use udp_bound_v6src as 9289 * local address instead (that could 9290 * also still be INADDR_ANY) 9291 */ 9292 sin->sin_addr.s_addr = 9293 V4_PART_OF_V6(udp->udp_bound_v6src); 9294 } 9295 break; 9296 9297 case AF_INET6: 9298 if (*salenp < sizeof (sin6_t)) 9299 return (EINVAL); 9300 9301 *salenp = sizeof (sin6_t); 9302 *sin6 = sin6_null; 9303 sin6->sin6_family = AF_INET6; 9304 if (udp->udp_state == TS_UNBND) { 9305 break; 9306 } 9307 sin6->sin6_port = udp->udp_port; 9308 9309 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9310 sin6->sin6_addr = udp->udp_v6src; 9311 } else { 9312 /* 9313 * UNSPECIFIED 9314 * udp_v6src is not set, we might be bound to 9315 * broadcast/multicast. Use udp_bound_v6src as 9316 * local address instead (that could 9317 * also still be UNSPECIFIED) 9318 */ 9319 sin6->sin6_addr = udp->udp_bound_v6src; 9320 } 9321 } 9322 return (0); 9323 } 9324 9325 /* ARGSUSED */ 9326 int 9327 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9328 socklen_t *salenp, cred_t *cr) 9329 { 9330 conn_t *connp = (conn_t *)proto_handle; 9331 udp_t *udp = connp->conn_udp; 9332 int error; 9333 9334 ASSERT(udp != NULL); 9335 rw_enter(&udp->udp_rwlock, RW_READER); 9336 9337 error = udp_do_getsockname(udp, sa, salenp); 9338 9339 rw_exit(&udp->udp_rwlock); 9340 9341 return (error); 9342 } 9343 9344 int 9345 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9346 void *optvalp, socklen_t *optlen, cred_t *cr) 9347 { 9348 conn_t *connp = (conn_t *)proto_handle; 9349 udp_t *udp = connp->conn_udp; 9350 int error; 9351 t_uscalar_t max_optbuf_len; 9352 void *optvalp_buf; 9353 int len; 9354 9355 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9356 udp_opt_obj.odb_opt_des_arr, 9357 udp_opt_obj.odb_opt_arr_cnt, 9358 udp_opt_obj.odb_topmost_tpiprovider, 9359 B_FALSE, B_TRUE, cr); 9360 if (error != 0) { 9361 if (error < 0) 9362 error = proto_tlitosyserr(-error); 9363 return (error); 9364 } 9365 9366 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9367 rw_enter(&udp->udp_rwlock, RW_READER); 9368 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9369 rw_exit(&udp->udp_rwlock); 9370 9371 if (len < 0) { 9372 /* 9373 * Pass on to IP 9374 */ 9375 kmem_free(optvalp_buf, max_optbuf_len); 9376 return (ip_get_options(connp, level, option_name, 9377 optvalp, optlen, cr)); 9378 } else { 9379 /* 9380 * update optlen and copy option value 9381 */ 9382 t_uscalar_t size = MIN(len, *optlen); 9383 bcopy(optvalp_buf, optvalp, size); 9384 bcopy(&size, optlen, sizeof (size)); 9385 9386 kmem_free(optvalp_buf, max_optbuf_len); 9387 return (0); 9388 } 9389 } 9390 9391 int 9392 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9393 const void *optvalp, socklen_t optlen, cred_t *cr) 9394 { 9395 conn_t *connp = (conn_t *)proto_handle; 9396 udp_t *udp = connp->conn_udp; 9397 int error; 9398 9399 error = proto_opt_check(level, option_name, optlen, NULL, 9400 udp_opt_obj.odb_opt_des_arr, 9401 udp_opt_obj.odb_opt_arr_cnt, 9402 udp_opt_obj.odb_topmost_tpiprovider, 9403 B_TRUE, B_FALSE, cr); 9404 9405 if (error != 0) { 9406 if (error < 0) 9407 error = proto_tlitosyserr(-error); 9408 return (error); 9409 } 9410 9411 rw_enter(&udp->udp_rwlock, RW_WRITER); 9412 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9413 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9414 NULL, cr); 9415 rw_exit(&udp->udp_rwlock); 9416 9417 if (error < 0) { 9418 /* 9419 * Pass on to ip 9420 */ 9421 error = ip_set_options(connp, level, option_name, optvalp, 9422 optlen, cr); 9423 } 9424 9425 return (error); 9426 } 9427 9428 void 9429 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9430 { 9431 conn_t *connp = (conn_t *)proto_handle; 9432 udp_t *udp = connp->conn_udp; 9433 9434 mutex_enter(&udp->udp_recv_lock); 9435 connp->conn_flow_cntrld = B_FALSE; 9436 mutex_exit(&udp->udp_recv_lock); 9437 } 9438 9439 /* ARGSUSED */ 9440 int 9441 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9442 { 9443 conn_t *connp = (conn_t *)proto_handle; 9444 9445 /* shut down the send side */ 9446 if (how != SHUT_RD) 9447 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9448 SOCK_OPCTL_SHUT_SEND, 0); 9449 /* shut down the recv side */ 9450 if (how != SHUT_WR) 9451 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9452 SOCK_OPCTL_SHUT_RECV, 0); 9453 return (0); 9454 } 9455 9456 int 9457 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9458 int mode, int32_t *rvalp, cred_t *cr) 9459 { 9460 conn_t *connp = (conn_t *)proto_handle; 9461 int error; 9462 9463 switch (cmd) { 9464 case ND_SET: 9465 case ND_GET: 9466 case _SIOCSOCKFALLBACK: 9467 case TI_GETPEERNAME: 9468 case TI_GETMYNAME: 9469 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9470 cmd)); 9471 error = EINVAL; 9472 break; 9473 default: 9474 /* 9475 * Pass on to IP using helper stream 9476 */ 9477 error = ldi_ioctl( 9478 connp->conn_helper_info->ip_helper_stream_handle, 9479 cmd, arg, mode, cr, rvalp); 9480 break; 9481 } 9482 return (error); 9483 } 9484 9485 /* ARGSUSED */ 9486 int 9487 udp_accept(sock_lower_handle_t lproto_handle, 9488 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9489 cred_t *cr) 9490 { 9491 return (EOPNOTSUPP); 9492 } 9493 9494 /* ARGSUSED */ 9495 int 9496 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9497 { 9498 return (EOPNOTSUPP); 9499 } 9500 9501 sock_downcalls_t sock_udp_downcalls = { 9502 udp_activate, /* sd_activate */ 9503 udp_accept, /* sd_accept */ 9504 udp_bind, /* sd_bind */ 9505 udp_listen, /* sd_listen */ 9506 udp_connect, /* sd_connect */ 9507 udp_getpeername, /* sd_getpeername */ 9508 udp_getsockname, /* sd_getsockname */ 9509 udp_getsockopt, /* sd_getsockopt */ 9510 udp_setsockopt, /* sd_setsockopt */ 9511 udp_send, /* sd_send */ 9512 NULL, /* sd_send_uio */ 9513 NULL, /* sd_recv_uio */ 9514 NULL, /* sd_poll */ 9515 udp_shutdown, /* sd_shutdown */ 9516 udp_clr_flowctrl, /* sd_setflowctrl */ 9517 udp_ioctl, /* sd_ioctl */ 9518 udp_close /* sd_close */ 9519 }; 9520