1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 #define NDD_TOO_QUICK_MSG \ 137 "ndd get info rate too high for non-privileged users, try again " \ 138 "later.\n" 139 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 140 141 /* Option processing attrs */ 142 typedef struct udpattrs_s { 143 union { 144 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 145 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 146 } udpattr_ippu; 147 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 148 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 149 mblk_t *udpattr_mb; 150 boolean_t udpattr_credset; 151 } udpattrs_t; 152 153 static void udp_addr_req(queue_t *q, mblk_t *mp); 154 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 155 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 156 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 157 static int udp_build_hdrs(udp_t *udp); 158 static void udp_capability_req(queue_t *q, mblk_t *mp); 159 static int udp_tpi_close(queue_t *q, int flags); 160 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 161 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 162 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 165 t_scalar_t tlierr, int unixerr); 166 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 167 cred_t *cr); 168 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 169 char *value, caddr_t cp, cred_t *cr); 170 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static void udp_icmp_error(conn_t *, mblk_t *); 173 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 174 static void udp_info_req(queue_t *q, mblk_t *mp); 175 static void udp_input(void *, mblk_t *, void *); 176 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 177 t_scalar_t addr_length); 178 static void udp_lrput(queue_t *, mblk_t *); 179 static void udp_lwput(queue_t *, mblk_t *); 180 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 181 cred_t *credp, boolean_t isv6); 182 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp); 184 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 187 int *errorp, udpattrs_t *udpattrs); 188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 189 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 191 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 192 cred_t *cr); 193 static void udp_report_item(mblk_t *mp, udp_t *udp); 194 static int udp_rinfop(queue_t *q, infod_t *dp); 195 static int udp_rrw(queue_t *q, struiod_t *dp); 196 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 197 cred_t *cr); 198 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 199 ipha_t *ipha); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 229 cred_t *, pid_t); 230 231 /* Common routine for TPI and socket module */ 232 static conn_t *udp_do_open(cred_t *, boolean_t, int); 233 static void udp_do_close(conn_t *); 234 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 235 boolean_t); 236 static int udp_do_unbind(conn_t *); 237 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 238 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 239 240 int udp_getsockname(sock_lower_handle_t, 241 struct sockaddr *, socklen_t *, cred_t *); 242 int udp_getpeername(sock_lower_handle_t, 243 struct sockaddr *, socklen_t *, cred_t *); 244 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t); 245 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 246 247 #define UDP_RECV_HIWATER (56 * 1024) 248 #define UDP_RECV_LOWATER 128 249 #define UDP_XMIT_HIWATER (56 * 1024) 250 #define UDP_XMIT_LOWATER 1024 251 252 /* 253 * The following is defined in tcp.c 254 */ 255 extern int (*cl_inet_connect2)(netstackid_t stack_id, 256 uint8_t protocol, boolean_t is_outgoing, 257 sa_family_t addr_family, 258 uint8_t *laddrp, in_port_t lport, 259 uint8_t *faddrp, in_port_t fport, void *args); 260 261 /* 262 * Checks if the given destination addr/port is allowed out. 263 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 264 * Called for each connect() and for sendto()/sendmsg() to a different 265 * destination. 266 * For connect(), called in udp_connect(). 267 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 268 * 269 * This macro assumes that the cl_inet_connect2 hook is not NULL. 270 * Please check this before calling this macro. 271 * 272 * void 273 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 274 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 275 */ 276 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 277 (err) = 0; \ 278 /* \ 279 * Running in cluster mode - check and register active \ 280 * "connection" information \ 281 */ \ 282 if ((udp)->udp_ipversion == IPV4_VERSION) \ 283 (err) = (*cl_inet_connect2)( \ 284 (cp)->conn_netstack->netstack_stackid, \ 285 IPPROTO_UDP, is_outgoing, AF_INET, \ 286 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 287 (udp)->udp_port, \ 288 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 289 (in_port_t)(fport), NULL); \ 290 else \ 291 (err) = (*cl_inet_connect2)( \ 292 (cp)->conn_netstack->netstack_stackid, \ 293 IPPROTO_UDP, is_outgoing, AF_INET6, \ 294 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 295 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 296 } 297 298 static struct module_info udp_mod_info = { 299 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 300 }; 301 302 /* 303 * Entry points for UDP as a device. 304 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 305 */ 306 static struct qinit udp_rinitv4 = { 307 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 308 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 309 }; 310 311 static struct qinit udp_rinitv6 = { 312 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 313 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 314 }; 315 316 static struct qinit udp_winit = { 317 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 318 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 319 }; 320 321 /* UDP entry point during fallback */ 322 struct qinit udp_fallback_sock_winit = { 323 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 324 }; 325 326 /* 327 * UDP needs to handle I_LINK and I_PLINK since ifconfig 328 * likes to use it as a place to hang the various streams. 329 */ 330 static struct qinit udp_lrinit = { 331 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 332 &udp_mod_info 333 }; 334 335 static struct qinit udp_lwinit = { 336 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 337 &udp_mod_info 338 }; 339 340 /* For AF_INET aka /dev/udp */ 341 struct streamtab udpinfov4 = { 342 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 343 }; 344 345 /* For AF_INET6 aka /dev/udp6 */ 346 struct streamtab udpinfov6 = { 347 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 348 }; 349 350 static sin_t sin_null; /* Zero address for quick clears */ 351 static sin6_t sin6_null; /* Zero address for quick clears */ 352 353 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 354 355 /* Default structure copied into T_INFO_ACK messages */ 356 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 357 T_INFO_ACK, 358 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 359 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 360 T_INVALID, /* CDATA_size. udp does not support connect data. */ 361 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 362 sizeof (sin_t), /* ADDR_size. */ 363 0, /* OPT_size - not initialized here */ 364 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 365 T_CLTS, /* SERV_type. udp supports connection-less. */ 366 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 367 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 368 }; 369 370 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 371 372 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 373 T_INFO_ACK, 374 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 375 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 376 T_INVALID, /* CDATA_size. udp does not support connect data. */ 377 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 378 sizeof (sin6_t), /* ADDR_size. */ 379 0, /* OPT_size - not initialized here */ 380 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 381 T_CLTS, /* SERV_type. udp supports connection-less. */ 382 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 383 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 384 }; 385 386 /* largest UDP port number */ 387 #define UDP_MAX_PORT 65535 388 389 /* 390 * Table of ND variables supported by udp. These are loaded into us_nd 391 * in udp_open. 392 * All of these are alterable, within the min/max values given, at run time. 393 */ 394 /* BEGIN CSTYLED */ 395 udpparam_t udp_param_arr[] = { 396 /*min max value name */ 397 { 0L, 256, 32, "udp_wroff_extra" }, 398 { 1L, 255, 255, "udp_ipv4_ttl" }, 399 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 400 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 401 { 0, 1, 1, "udp_do_checksum" }, 402 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 403 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 404 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 405 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 406 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 407 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 408 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 409 }; 410 /* END CSTYLED */ 411 412 /* Setable in /etc/system */ 413 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 414 uint32_t udp_random_anon_port = 1; 415 416 /* 417 * Hook functions to enable cluster networking. 418 * On non-clustered systems these vectors must always be NULL 419 */ 420 421 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 422 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 423 void *args) = NULL; 424 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 425 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 426 void *args) = NULL; 427 428 typedef union T_primitives *t_primp_t; 429 430 /* 431 * Return the next anonymous port in the privileged port range for 432 * bind checking. 433 * 434 * Trusted Extension (TX) notes: TX allows administrator to mark or 435 * reserve ports as Multilevel ports (MLP). MLP has special function 436 * on TX systems. Once a port is made MLP, it's not available as 437 * ordinary port. This creates "holes" in the port name space. It 438 * may be necessary to skip the "holes" find a suitable anon port. 439 */ 440 static in_port_t 441 udp_get_next_priv_port(udp_t *udp) 442 { 443 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 444 in_port_t nextport; 445 boolean_t restart = B_FALSE; 446 udp_stack_t *us = udp->udp_us; 447 448 retry: 449 if (next_priv_port < us->us_min_anonpriv_port || 450 next_priv_port >= IPPORT_RESERVED) { 451 next_priv_port = IPPORT_RESERVED - 1; 452 if (restart) 453 return (0); 454 restart = B_TRUE; 455 } 456 457 if (is_system_labeled() && 458 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 459 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 460 next_priv_port = nextport; 461 goto retry; 462 } 463 464 return (next_priv_port--); 465 } 466 467 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 468 /* ARGSUSED */ 469 static int 470 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 471 { 472 udp_fanout_t *udpf; 473 int i; 474 zoneid_t zoneid; 475 conn_t *connp; 476 udp_t *udp; 477 udp_stack_t *us; 478 479 connp = Q_TO_CONN(q); 480 udp = connp->conn_udp; 481 us = udp->udp_us; 482 483 /* Refer to comments in udp_status_report(). */ 484 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 485 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 486 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 487 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 488 return (0); 489 } 490 } 491 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 492 /* The following may work even if we cannot get a large buf. */ 493 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 494 return (0); 495 } 496 497 (void) mi_mpprintf(mp, 498 "UDP " MI_COL_HDRPAD_STR 499 /* 12345678[89ABCDEF] */ 500 " zone lport src addr dest addr port state"); 501 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 502 503 zoneid = connp->conn_zoneid; 504 505 for (i = 0; i < us->us_bind_fanout_size; i++) { 506 udpf = &us->us_bind_fanout[i]; 507 mutex_enter(&udpf->uf_lock); 508 509 /* Print the hash index. */ 510 udp = udpf->uf_udp; 511 if (zoneid != GLOBAL_ZONEID) { 512 /* skip to first entry in this zone; might be none */ 513 while (udp != NULL && 514 udp->udp_connp->conn_zoneid != zoneid) 515 udp = udp->udp_bind_hash; 516 } 517 if (udp != NULL) { 518 uint_t print_len, buf_len; 519 520 buf_len = mp->b_cont->b_datap->db_lim - 521 mp->b_cont->b_wptr; 522 print_len = snprintf((char *)mp->b_cont->b_wptr, 523 buf_len, "%d\n", i); 524 if (print_len < buf_len) { 525 mp->b_cont->b_wptr += print_len; 526 } else { 527 mp->b_cont->b_wptr += buf_len; 528 } 529 for (; udp != NULL; udp = udp->udp_bind_hash) { 530 if (zoneid == GLOBAL_ZONEID || 531 zoneid == udp->udp_connp->conn_zoneid) 532 udp_report_item(mp->b_cont, udp); 533 } 534 } 535 mutex_exit(&udpf->uf_lock); 536 } 537 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 538 return (0); 539 } 540 541 /* 542 * Hash list removal routine for udp_t structures. 543 */ 544 static void 545 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 546 { 547 udp_t *udpnext; 548 kmutex_t *lockp; 549 udp_stack_t *us = udp->udp_us; 550 551 if (udp->udp_ptpbhn == NULL) 552 return; 553 554 /* 555 * Extract the lock pointer in case there are concurrent 556 * hash_remove's for this instance. 557 */ 558 ASSERT(udp->udp_port != 0); 559 if (!caller_holds_lock) { 560 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 561 us->us_bind_fanout_size)].uf_lock; 562 ASSERT(lockp != NULL); 563 mutex_enter(lockp); 564 } 565 if (udp->udp_ptpbhn != NULL) { 566 udpnext = udp->udp_bind_hash; 567 if (udpnext != NULL) { 568 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 569 udp->udp_bind_hash = NULL; 570 } 571 *udp->udp_ptpbhn = udpnext; 572 udp->udp_ptpbhn = NULL; 573 } 574 if (!caller_holds_lock) { 575 mutex_exit(lockp); 576 } 577 } 578 579 static void 580 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 581 { 582 udp_t **udpp; 583 udp_t *udpnext; 584 585 ASSERT(MUTEX_HELD(&uf->uf_lock)); 586 ASSERT(udp->udp_ptpbhn == NULL); 587 udpp = &uf->uf_udp; 588 udpnext = udpp[0]; 589 if (udpnext != NULL) { 590 /* 591 * If the new udp bound to the INADDR_ANY address 592 * and the first one in the list is not bound to 593 * INADDR_ANY we skip all entries until we find the 594 * first one bound to INADDR_ANY. 595 * This makes sure that applications binding to a 596 * specific address get preference over those binding to 597 * INADDR_ANY. 598 */ 599 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 600 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 601 while ((udpnext = udpp[0]) != NULL && 602 !V6_OR_V4_INADDR_ANY( 603 udpnext->udp_bound_v6src)) { 604 udpp = &(udpnext->udp_bind_hash); 605 } 606 if (udpnext != NULL) 607 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 608 } else { 609 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 610 } 611 } 612 udp->udp_bind_hash = udpnext; 613 udp->udp_ptpbhn = udpp; 614 udpp[0] = udp; 615 } 616 617 /* 618 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 619 * passed to udp_wput. 620 * It associates a port number and local address with the stream. 621 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 622 * protocol type (IPPROTO_UDP) placed in the message following the address. 623 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 624 * (Called as writer.) 625 * 626 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 627 * without setting SO_REUSEADDR. This is needed so that they 628 * can be viewed as two independent transport protocols. 629 * However, anonymouns ports are allocated from the same range to avoid 630 * duplicating the us->us_next_port_to_try. 631 */ 632 static void 633 udp_tpi_bind(queue_t *q, mblk_t *mp) 634 { 635 sin_t *sin; 636 sin6_t *sin6; 637 mblk_t *mp1; 638 struct T_bind_req *tbr; 639 conn_t *connp; 640 udp_t *udp; 641 int error; 642 struct sockaddr *sa; 643 644 connp = Q_TO_CONN(q); 645 udp = connp->conn_udp; 646 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 647 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 648 "udp_bind: bad req, len %u", 649 (uint_t)(mp->b_wptr - mp->b_rptr)); 650 udp_err_ack(q, mp, TPROTO, 0); 651 return; 652 } 653 if (udp->udp_state != TS_UNBND) { 654 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 655 "udp_bind: bad state, %u", udp->udp_state); 656 udp_err_ack(q, mp, TOUTSTATE, 0); 657 return; 658 } 659 /* 660 * Reallocate the message to make sure we have enough room for an 661 * address and the protocol type. 662 */ 663 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 664 if (!mp1) { 665 udp_err_ack(q, mp, TSYSERR, ENOMEM); 666 return; 667 } 668 669 mp = mp1; 670 671 /* Reset the message type in preparation for shipping it back. */ 672 DB_TYPE(mp) = M_PCPROTO; 673 674 tbr = (struct T_bind_req *)mp->b_rptr; 675 switch (tbr->ADDR_length) { 676 case 0: /* Request for a generic port */ 677 tbr->ADDR_offset = sizeof (struct T_bind_req); 678 if (udp->udp_family == AF_INET) { 679 tbr->ADDR_length = sizeof (sin_t); 680 sin = (sin_t *)&tbr[1]; 681 *sin = sin_null; 682 sin->sin_family = AF_INET; 683 mp->b_wptr = (uchar_t *)&sin[1]; 684 sa = (struct sockaddr *)sin; 685 } else { 686 ASSERT(udp->udp_family == AF_INET6); 687 tbr->ADDR_length = sizeof (sin6_t); 688 sin6 = (sin6_t *)&tbr[1]; 689 *sin6 = sin6_null; 690 sin6->sin6_family = AF_INET6; 691 mp->b_wptr = (uchar_t *)&sin6[1]; 692 sa = (struct sockaddr *)sin6; 693 } 694 break; 695 696 case sizeof (sin_t): /* Complete IPv4 address */ 697 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 698 sizeof (sin_t)); 699 if (sa == NULL || !OK_32PTR((char *)sa)) { 700 udp_err_ack(q, mp, TSYSERR, EINVAL); 701 return; 702 } 703 if (udp->udp_family != AF_INET || 704 sa->sa_family != AF_INET) { 705 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 706 return; 707 } 708 break; 709 710 case sizeof (sin6_t): /* complete IPv6 address */ 711 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 712 sizeof (sin6_t)); 713 if (sa == NULL || !OK_32PTR((char *)sa)) { 714 udp_err_ack(q, mp, TSYSERR, EINVAL); 715 return; 716 } 717 if (udp->udp_family != AF_INET6 || 718 sa->sa_family != AF_INET6) { 719 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 720 return; 721 } 722 break; 723 724 default: /* Invalid request */ 725 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 726 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 727 udp_err_ack(q, mp, TBADADDR, 0); 728 return; 729 } 730 731 732 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 733 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 734 tbr->PRIM_type != O_T_BIND_REQ); 735 736 if (error != 0) { 737 if (error > 0) { 738 udp_err_ack(q, mp, TSYSERR, error); 739 } else { 740 udp_err_ack(q, mp, -error, 0); 741 } 742 } else { 743 tbr->PRIM_type = T_BIND_ACK; 744 qreply(q, mp); 745 } 746 } 747 748 /* 749 * This routine handles each T_CONN_REQ message passed to udp. It 750 * associates a default destination address with the stream. 751 * 752 * This routine sends down a T_BIND_REQ to IP with the following mblks: 753 * T_BIND_REQ - specifying local and remote address/port 754 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 755 * T_OK_ACK - for the T_CONN_REQ 756 * T_CONN_CON - to keep the TPI user happy 757 * 758 * The connect completes in udp_do_connect. 759 * When a T_BIND_ACK is received information is extracted from the IRE 760 * and the two appended messages are sent to the TPI user. 761 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 762 * convert it to an error ack for the appropriate primitive. 763 */ 764 static void 765 udp_tpi_connect(queue_t *q, mblk_t *mp) 766 { 767 mblk_t *mp1; 768 udp_t *udp; 769 conn_t *connp = Q_TO_CONN(q); 770 int error; 771 socklen_t len; 772 struct sockaddr *sa; 773 struct T_conn_req *tcr; 774 775 udp = connp->conn_udp; 776 tcr = (struct T_conn_req *)mp->b_rptr; 777 778 /* A bit of sanity checking */ 779 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 780 udp_err_ack(q, mp, TPROTO, 0); 781 return; 782 } 783 784 if (tcr->OPT_length != 0) { 785 udp_err_ack(q, mp, TBADOPT, 0); 786 return; 787 } 788 789 /* 790 * Determine packet type based on type of address passed in 791 * the request should contain an IPv4 or IPv6 address. 792 * Make sure that address family matches the type of 793 * family of the the address passed down 794 */ 795 len = tcr->DEST_length; 796 switch (tcr->DEST_length) { 797 default: 798 udp_err_ack(q, mp, TBADADDR, 0); 799 return; 800 801 case sizeof (sin_t): 802 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 803 sizeof (sin_t)); 804 break; 805 806 case sizeof (sin6_t): 807 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 808 sizeof (sin6_t)); 809 break; 810 } 811 812 error = proto_verify_ip_addr(udp->udp_family, sa, len); 813 if (error != 0) { 814 udp_err_ack(q, mp, TSYSERR, error); 815 return; 816 } 817 818 /* 819 * We have to send a connection confirmation to 820 * keep TLI happy. 821 */ 822 if (udp->udp_family == AF_INET) { 823 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 824 sizeof (sin_t), NULL, 0); 825 } else { 826 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 827 sizeof (sin6_t), NULL, 0); 828 } 829 if (mp1 == NULL) { 830 udp_err_ack(q, mp, TSYSERR, ENOMEM); 831 return; 832 } 833 834 /* 835 * ok_ack for T_CONN_REQ 836 */ 837 mp = mi_tpi_ok_ack_alloc(mp); 838 if (mp == NULL) { 839 /* Unable to reuse the T_CONN_REQ for the ack. */ 840 freemsg(mp1); 841 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 842 return; 843 } 844 845 error = udp_do_connect(connp, sa, len); 846 if (error != 0) { 847 freeb(mp1); 848 if (error < 0) 849 udp_err_ack(q, mp, -error, 0); 850 else 851 udp_err_ack(q, mp, TSYSERR, error); 852 } else { 853 putnext(connp->conn_rq, mp); 854 putnext(connp->conn_rq, mp1); 855 } 856 } 857 858 static int 859 udp_tpi_close(queue_t *q, int flags) 860 { 861 conn_t *connp; 862 863 if (flags & SO_FALLBACK) { 864 /* 865 * stream is being closed while in fallback 866 * simply free the resources that were allocated 867 */ 868 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 869 qprocsoff(q); 870 goto done; 871 } 872 873 connp = Q_TO_CONN(q); 874 udp_do_close(connp); 875 done: 876 q->q_ptr = WR(q)->q_ptr = NULL; 877 return (0); 878 } 879 880 /* 881 * Called in the close path to quiesce the conn 882 */ 883 void 884 udp_quiesce_conn(conn_t *connp) 885 { 886 udp_t *udp = connp->conn_udp; 887 888 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 889 /* 890 * Running in cluster mode - register unbind information 891 */ 892 if (udp->udp_ipversion == IPV4_VERSION) { 893 (*cl_inet_unbind)( 894 connp->conn_netstack->netstack_stackid, 895 IPPROTO_UDP, AF_INET, 896 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 897 (in_port_t)udp->udp_port, NULL); 898 } else { 899 (*cl_inet_unbind)( 900 connp->conn_netstack->netstack_stackid, 901 IPPROTO_UDP, AF_INET6, 902 (uint8_t *)(&(udp->udp_v6src)), 903 (in_port_t)udp->udp_port, NULL); 904 } 905 } 906 907 udp_bind_hash_remove(udp, B_FALSE); 908 909 } 910 911 void 912 udp_close_free(conn_t *connp) 913 { 914 udp_t *udp = connp->conn_udp; 915 916 /* If there are any options associated with the stream, free them. */ 917 if (udp->udp_ip_snd_options != NULL) { 918 mi_free((char *)udp->udp_ip_snd_options); 919 udp->udp_ip_snd_options = NULL; 920 udp->udp_ip_snd_options_len = 0; 921 } 922 923 if (udp->udp_ip_rcv_options != NULL) { 924 mi_free((char *)udp->udp_ip_rcv_options); 925 udp->udp_ip_rcv_options = NULL; 926 udp->udp_ip_rcv_options_len = 0; 927 } 928 929 /* Free memory associated with sticky options */ 930 if (udp->udp_sticky_hdrs_len != 0) { 931 kmem_free(udp->udp_sticky_hdrs, 932 udp->udp_sticky_hdrs_len); 933 udp->udp_sticky_hdrs = NULL; 934 udp->udp_sticky_hdrs_len = 0; 935 } 936 937 ip6_pkt_free(&udp->udp_sticky_ipp); 938 939 /* 940 * Clear any fields which the kmem_cache constructor clears. 941 * Only udp_connp needs to be preserved. 942 * TBD: We should make this more efficient to avoid clearing 943 * everything. 944 */ 945 ASSERT(udp->udp_connp == connp); 946 bzero(udp, sizeof (udp_t)); 947 udp->udp_connp = connp; 948 } 949 950 static int 951 udp_do_disconnect(conn_t *connp) 952 { 953 udp_t *udp; 954 mblk_t *ire_mp; 955 udp_fanout_t *udpf; 956 udp_stack_t *us; 957 int error; 958 959 udp = connp->conn_udp; 960 us = udp->udp_us; 961 rw_enter(&udp->udp_rwlock, RW_WRITER); 962 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 963 rw_exit(&udp->udp_rwlock); 964 return (-TOUTSTATE); 965 } 966 udp->udp_pending_op = T_DISCON_REQ; 967 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 968 us->us_bind_fanout_size)]; 969 mutex_enter(&udpf->uf_lock); 970 udp->udp_v6src = udp->udp_bound_v6src; 971 udp->udp_state = TS_IDLE; 972 mutex_exit(&udpf->uf_lock); 973 974 if (udp->udp_family == AF_INET6) { 975 /* Rebuild the header template */ 976 error = udp_build_hdrs(udp); 977 if (error != 0) { 978 udp->udp_pending_op = -1; 979 rw_exit(&udp->udp_rwlock); 980 return (error); 981 } 982 } 983 984 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 985 if (ire_mp == NULL) { 986 mutex_enter(&udpf->uf_lock); 987 udp->udp_pending_op = -1; 988 mutex_exit(&udpf->uf_lock); 989 rw_exit(&udp->udp_rwlock); 990 return (ENOMEM); 991 } 992 993 rw_exit(&udp->udp_rwlock); 994 995 if (udp->udp_family == AF_INET6) { 996 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 997 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 998 } else { 999 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 1000 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1001 } 1002 1003 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1004 } 1005 1006 1007 static void 1008 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1009 { 1010 conn_t *connp = Q_TO_CONN(q); 1011 int error; 1012 1013 /* 1014 * Allocate the largest primitive we need to send back 1015 * T_error_ack is > than T_ok_ack 1016 */ 1017 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1018 if (mp == NULL) { 1019 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1020 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1021 return; 1022 } 1023 1024 error = udp_do_disconnect(connp); 1025 1026 if (error != 0) { 1027 if (error < 0) { 1028 udp_err_ack(q, mp, -error, 0); 1029 } else { 1030 udp_err_ack(q, mp, TSYSERR, error); 1031 } 1032 } else { 1033 mp = mi_tpi_ok_ack_alloc(mp); 1034 ASSERT(mp != NULL); 1035 qreply(q, mp); 1036 } 1037 } 1038 1039 int 1040 udp_disconnect(conn_t *connp) 1041 { 1042 int error; 1043 udp_t *udp = connp->conn_udp; 1044 1045 udp->udp_dgram_errind = B_FALSE; 1046 1047 error = udp_do_disconnect(connp); 1048 1049 if (error < 0) 1050 error = proto_tlitosyserr(-error); 1051 1052 return (error); 1053 } 1054 1055 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1056 static void 1057 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1058 { 1059 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1060 qreply(q, mp); 1061 } 1062 1063 /* Shorthand to generate and send TPI error acks to our client */ 1064 static void 1065 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1066 int sys_error) 1067 { 1068 struct T_error_ack *teackp; 1069 1070 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1071 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1072 teackp = (struct T_error_ack *)mp->b_rptr; 1073 teackp->ERROR_prim = primitive; 1074 teackp->TLI_error = t_error; 1075 teackp->UNIX_error = sys_error; 1076 qreply(q, mp); 1077 } 1078 } 1079 1080 /*ARGSUSED*/ 1081 static int 1082 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1083 { 1084 int i; 1085 udp_t *udp = Q_TO_UDP(q); 1086 udp_stack_t *us = udp->udp_us; 1087 1088 for (i = 0; i < us->us_num_epriv_ports; i++) { 1089 if (us->us_epriv_ports[i] != 0) 1090 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1091 } 1092 return (0); 1093 } 1094 1095 /* ARGSUSED */ 1096 static int 1097 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1098 cred_t *cr) 1099 { 1100 long new_value; 1101 int i; 1102 udp_t *udp = Q_TO_UDP(q); 1103 udp_stack_t *us = udp->udp_us; 1104 1105 /* 1106 * Fail the request if the new value does not lie within the 1107 * port number limits. 1108 */ 1109 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1110 new_value <= 0 || new_value >= 65536) { 1111 return (EINVAL); 1112 } 1113 1114 /* Check if the value is already in the list */ 1115 for (i = 0; i < us->us_num_epriv_ports; i++) { 1116 if (new_value == us->us_epriv_ports[i]) { 1117 return (EEXIST); 1118 } 1119 } 1120 /* Find an empty slot */ 1121 for (i = 0; i < us->us_num_epriv_ports; i++) { 1122 if (us->us_epriv_ports[i] == 0) 1123 break; 1124 } 1125 if (i == us->us_num_epriv_ports) { 1126 return (EOVERFLOW); 1127 } 1128 1129 /* Set the new value */ 1130 us->us_epriv_ports[i] = (in_port_t)new_value; 1131 return (0); 1132 } 1133 1134 /* ARGSUSED */ 1135 static int 1136 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1137 cred_t *cr) 1138 { 1139 long new_value; 1140 int i; 1141 udp_t *udp = Q_TO_UDP(q); 1142 udp_stack_t *us = udp->udp_us; 1143 1144 /* 1145 * Fail the request if the new value does not lie within the 1146 * port number limits. 1147 */ 1148 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1149 new_value <= 0 || new_value >= 65536) { 1150 return (EINVAL); 1151 } 1152 1153 /* Check that the value is already in the list */ 1154 for (i = 0; i < us->us_num_epriv_ports; i++) { 1155 if (us->us_epriv_ports[i] == new_value) 1156 break; 1157 } 1158 if (i == us->us_num_epriv_ports) { 1159 return (ESRCH); 1160 } 1161 1162 /* Clear the value */ 1163 us->us_epriv_ports[i] = 0; 1164 return (0); 1165 } 1166 1167 /* At minimum we need 4 bytes of UDP header */ 1168 #define ICMP_MIN_UDP_HDR 4 1169 1170 /* 1171 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1172 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1173 * Assumes that IP has pulled up everything up to and including the ICMP header. 1174 */ 1175 static void 1176 udp_icmp_error(conn_t *connp, mblk_t *mp) 1177 { 1178 icmph_t *icmph; 1179 ipha_t *ipha; 1180 int iph_hdr_length; 1181 udpha_t *udpha; 1182 sin_t sin; 1183 sin6_t sin6; 1184 mblk_t *mp1; 1185 int error = 0; 1186 udp_t *udp = connp->conn_udp; 1187 1188 mp1 = NULL; 1189 ipha = (ipha_t *)mp->b_rptr; 1190 1191 ASSERT(OK_32PTR(mp->b_rptr)); 1192 1193 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1194 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1195 udp_icmp_error_ipv6(connp, mp); 1196 return; 1197 } 1198 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1199 1200 /* Skip past the outer IP and ICMP headers */ 1201 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1202 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1203 ipha = (ipha_t *)&icmph[1]; 1204 1205 /* Skip past the inner IP and find the ULP header */ 1206 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1207 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1208 1209 switch (icmph->icmph_type) { 1210 case ICMP_DEST_UNREACHABLE: 1211 switch (icmph->icmph_code) { 1212 case ICMP_FRAGMENTATION_NEEDED: 1213 /* 1214 * IP has already adjusted the path MTU. 1215 */ 1216 break; 1217 case ICMP_PORT_UNREACHABLE: 1218 case ICMP_PROTOCOL_UNREACHABLE: 1219 error = ECONNREFUSED; 1220 break; 1221 default: 1222 /* Transient errors */ 1223 break; 1224 } 1225 break; 1226 default: 1227 /* Transient errors */ 1228 break; 1229 } 1230 if (error == 0) { 1231 freemsg(mp); 1232 return; 1233 } 1234 1235 /* 1236 * Deliver T_UDERROR_IND when the application has asked for it. 1237 * The socket layer enables this automatically when connected. 1238 */ 1239 if (!udp->udp_dgram_errind) { 1240 freemsg(mp); 1241 return; 1242 } 1243 1244 1245 switch (udp->udp_family) { 1246 case AF_INET: 1247 sin = sin_null; 1248 sin.sin_family = AF_INET; 1249 sin.sin_addr.s_addr = ipha->ipha_dst; 1250 sin.sin_port = udpha->uha_dst_port; 1251 if (IPCL_IS_NONSTR(connp)) { 1252 rw_enter(&udp->udp_rwlock, RW_WRITER); 1253 if (udp->udp_state == TS_DATA_XFER) { 1254 if (sin.sin_port == udp->udp_dstport && 1255 sin.sin_addr.s_addr == 1256 V4_PART_OF_V6(udp->udp_v6dst)) { 1257 1258 rw_exit(&udp->udp_rwlock); 1259 (*connp->conn_upcalls->su_set_error) 1260 (connp->conn_upper_handle, error); 1261 goto done; 1262 } 1263 } else { 1264 udp->udp_delayed_error = error; 1265 *((sin_t *)&udp->udp_delayed_addr) = sin; 1266 } 1267 rw_exit(&udp->udp_rwlock); 1268 } else { 1269 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1270 NULL, 0, error); 1271 } 1272 break; 1273 case AF_INET6: 1274 sin6 = sin6_null; 1275 sin6.sin6_family = AF_INET6; 1276 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1277 sin6.sin6_port = udpha->uha_dst_port; 1278 if (IPCL_IS_NONSTR(connp)) { 1279 rw_enter(&udp->udp_rwlock, RW_WRITER); 1280 if (udp->udp_state == TS_DATA_XFER) { 1281 if (sin6.sin6_port == udp->udp_dstport && 1282 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1283 &udp->udp_v6dst)) { 1284 rw_exit(&udp->udp_rwlock); 1285 (*connp->conn_upcalls->su_set_error) 1286 (connp->conn_upper_handle, error); 1287 goto done; 1288 } 1289 } else { 1290 udp->udp_delayed_error = error; 1291 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1292 } 1293 rw_exit(&udp->udp_rwlock); 1294 } else { 1295 1296 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1297 NULL, 0, error); 1298 } 1299 break; 1300 } 1301 if (mp1 != NULL) 1302 putnext(connp->conn_rq, mp1); 1303 done: 1304 freemsg(mp); 1305 } 1306 1307 /* 1308 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1309 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1310 * Assumes that IP has pulled up all the extension headers as well as the 1311 * ICMPv6 header. 1312 */ 1313 static void 1314 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1315 { 1316 icmp6_t *icmp6; 1317 ip6_t *ip6h, *outer_ip6h; 1318 uint16_t iph_hdr_length; 1319 uint8_t *nexthdrp; 1320 udpha_t *udpha; 1321 sin6_t sin6; 1322 mblk_t *mp1; 1323 int error = 0; 1324 udp_t *udp = connp->conn_udp; 1325 udp_stack_t *us = udp->udp_us; 1326 1327 outer_ip6h = (ip6_t *)mp->b_rptr; 1328 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1329 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1330 else 1331 iph_hdr_length = IPV6_HDR_LEN; 1332 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1333 ip6h = (ip6_t *)&icmp6[1]; 1334 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1335 freemsg(mp); 1336 return; 1337 } 1338 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1339 1340 switch (icmp6->icmp6_type) { 1341 case ICMP6_DST_UNREACH: 1342 switch (icmp6->icmp6_code) { 1343 case ICMP6_DST_UNREACH_NOPORT: 1344 error = ECONNREFUSED; 1345 break; 1346 case ICMP6_DST_UNREACH_ADMIN: 1347 case ICMP6_DST_UNREACH_NOROUTE: 1348 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1349 case ICMP6_DST_UNREACH_ADDR: 1350 /* Transient errors */ 1351 break; 1352 default: 1353 break; 1354 } 1355 break; 1356 case ICMP6_PACKET_TOO_BIG: { 1357 struct T_unitdata_ind *tudi; 1358 struct T_opthdr *toh; 1359 size_t udi_size; 1360 mblk_t *newmp; 1361 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1362 sizeof (struct ip6_mtuinfo); 1363 sin6_t *sin6; 1364 struct ip6_mtuinfo *mtuinfo; 1365 1366 /* 1367 * If the application has requested to receive path mtu 1368 * information, send up an empty message containing an 1369 * IPV6_PATHMTU ancillary data item. 1370 */ 1371 if (!udp->udp_ipv6_recvpathmtu) 1372 break; 1373 1374 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1375 opt_length; 1376 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1377 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1378 break; 1379 } 1380 1381 /* 1382 * newmp->b_cont is left to NULL on purpose. This is an 1383 * empty message containing only ancillary data. 1384 */ 1385 newmp->b_datap->db_type = M_PROTO; 1386 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1387 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1388 tudi->PRIM_type = T_UNITDATA_IND; 1389 tudi->SRC_length = sizeof (sin6_t); 1390 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1391 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1392 tudi->OPT_length = opt_length; 1393 1394 sin6 = (sin6_t *)&tudi[1]; 1395 bzero(sin6, sizeof (sin6_t)); 1396 sin6->sin6_family = AF_INET6; 1397 sin6->sin6_addr = udp->udp_v6dst; 1398 1399 toh = (struct T_opthdr *)&sin6[1]; 1400 toh->level = IPPROTO_IPV6; 1401 toh->name = IPV6_PATHMTU; 1402 toh->len = opt_length; 1403 toh->status = 0; 1404 1405 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1406 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1407 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1408 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1409 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1410 /* 1411 * We've consumed everything we need from the original 1412 * message. Free it, then send our empty message. 1413 */ 1414 freemsg(mp); 1415 if (!IPCL_IS_NONSTR(connp)) { 1416 putnext(connp->conn_rq, newmp); 1417 } else { 1418 (*connp->conn_upcalls->su_recv) 1419 (connp->conn_upper_handle, newmp, 0, 0, &error, 1420 NULL); 1421 } 1422 return; 1423 } 1424 case ICMP6_TIME_EXCEEDED: 1425 /* Transient errors */ 1426 break; 1427 case ICMP6_PARAM_PROB: 1428 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1429 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1430 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1431 (uchar_t *)nexthdrp) { 1432 error = ECONNREFUSED; 1433 break; 1434 } 1435 break; 1436 } 1437 if (error == 0) { 1438 freemsg(mp); 1439 return; 1440 } 1441 1442 /* 1443 * Deliver T_UDERROR_IND when the application has asked for it. 1444 * The socket layer enables this automatically when connected. 1445 */ 1446 if (!udp->udp_dgram_errind) { 1447 freemsg(mp); 1448 return; 1449 } 1450 1451 sin6 = sin6_null; 1452 sin6.sin6_family = AF_INET6; 1453 sin6.sin6_addr = ip6h->ip6_dst; 1454 sin6.sin6_port = udpha->uha_dst_port; 1455 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1456 1457 if (IPCL_IS_NONSTR(connp)) { 1458 rw_enter(&udp->udp_rwlock, RW_WRITER); 1459 if (udp->udp_state == TS_DATA_XFER) { 1460 if (sin6.sin6_port == udp->udp_dstport && 1461 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1462 &udp->udp_v6dst)) { 1463 rw_exit(&udp->udp_rwlock); 1464 (*connp->conn_upcalls->su_set_error) 1465 (connp->conn_upper_handle, error); 1466 goto done; 1467 } 1468 } else { 1469 udp->udp_delayed_error = error; 1470 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1471 } 1472 rw_exit(&udp->udp_rwlock); 1473 } else { 1474 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1475 NULL, 0, error); 1476 if (mp1 != NULL) 1477 putnext(connp->conn_rq, mp1); 1478 } 1479 1480 done: 1481 freemsg(mp); 1482 } 1483 1484 /* 1485 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1486 * The local address is filled in if endpoint is bound. The remote address 1487 * is filled in if remote address has been precified ("connected endpoint") 1488 * (The concept of connected CLTS sockets is alien to published TPI 1489 * but we support it anyway). 1490 */ 1491 static void 1492 udp_addr_req(queue_t *q, mblk_t *mp) 1493 { 1494 sin_t *sin; 1495 sin6_t *sin6; 1496 mblk_t *ackmp; 1497 struct T_addr_ack *taa; 1498 udp_t *udp = Q_TO_UDP(q); 1499 1500 /* Make it large enough for worst case */ 1501 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1502 2 * sizeof (sin6_t), 1); 1503 if (ackmp == NULL) { 1504 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1505 return; 1506 } 1507 taa = (struct T_addr_ack *)ackmp->b_rptr; 1508 1509 bzero(taa, sizeof (struct T_addr_ack)); 1510 ackmp->b_wptr = (uchar_t *)&taa[1]; 1511 1512 taa->PRIM_type = T_ADDR_ACK; 1513 ackmp->b_datap->db_type = M_PCPROTO; 1514 rw_enter(&udp->udp_rwlock, RW_READER); 1515 /* 1516 * Note: Following code assumes 32 bit alignment of basic 1517 * data structures like sin_t and struct T_addr_ack. 1518 */ 1519 if (udp->udp_state != TS_UNBND) { 1520 /* 1521 * Fill in local address first 1522 */ 1523 taa->LOCADDR_offset = sizeof (*taa); 1524 if (udp->udp_family == AF_INET) { 1525 taa->LOCADDR_length = sizeof (sin_t); 1526 sin = (sin_t *)&taa[1]; 1527 /* Fill zeroes and then initialize non-zero fields */ 1528 *sin = sin_null; 1529 sin->sin_family = AF_INET; 1530 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1531 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1532 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1533 sin->sin_addr.s_addr); 1534 } else { 1535 /* 1536 * INADDR_ANY 1537 * udp_v6src is not set, we might be bound to 1538 * broadcast/multicast. Use udp_bound_v6src as 1539 * local address instead (that could 1540 * also still be INADDR_ANY) 1541 */ 1542 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1543 sin->sin_addr.s_addr); 1544 } 1545 sin->sin_port = udp->udp_port; 1546 ackmp->b_wptr = (uchar_t *)&sin[1]; 1547 if (udp->udp_state == TS_DATA_XFER) { 1548 /* 1549 * connected, fill remote address too 1550 */ 1551 taa->REMADDR_length = sizeof (sin_t); 1552 /* assumed 32-bit alignment */ 1553 taa->REMADDR_offset = taa->LOCADDR_offset + 1554 taa->LOCADDR_length; 1555 1556 sin = (sin_t *)(ackmp->b_rptr + 1557 taa->REMADDR_offset); 1558 /* initialize */ 1559 *sin = sin_null; 1560 sin->sin_family = AF_INET; 1561 sin->sin_addr.s_addr = 1562 V4_PART_OF_V6(udp->udp_v6dst); 1563 sin->sin_port = udp->udp_dstport; 1564 ackmp->b_wptr = (uchar_t *)&sin[1]; 1565 } 1566 } else { 1567 taa->LOCADDR_length = sizeof (sin6_t); 1568 sin6 = (sin6_t *)&taa[1]; 1569 /* Fill zeroes and then initialize non-zero fields */ 1570 *sin6 = sin6_null; 1571 sin6->sin6_family = AF_INET6; 1572 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1573 sin6->sin6_addr = udp->udp_v6src; 1574 } else { 1575 /* 1576 * UNSPECIFIED 1577 * udp_v6src is not set, we might be bound to 1578 * broadcast/multicast. Use udp_bound_v6src as 1579 * local address instead (that could 1580 * also still be UNSPECIFIED) 1581 */ 1582 sin6->sin6_addr = 1583 udp->udp_bound_v6src; 1584 } 1585 sin6->sin6_port = udp->udp_port; 1586 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1587 if (udp->udp_state == TS_DATA_XFER) { 1588 /* 1589 * connected, fill remote address too 1590 */ 1591 taa->REMADDR_length = sizeof (sin6_t); 1592 /* assumed 32-bit alignment */ 1593 taa->REMADDR_offset = taa->LOCADDR_offset + 1594 taa->LOCADDR_length; 1595 1596 sin6 = (sin6_t *)(ackmp->b_rptr + 1597 taa->REMADDR_offset); 1598 /* initialize */ 1599 *sin6 = sin6_null; 1600 sin6->sin6_family = AF_INET6; 1601 sin6->sin6_addr = udp->udp_v6dst; 1602 sin6->sin6_port = udp->udp_dstport; 1603 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1604 } 1605 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1606 } 1607 } 1608 rw_exit(&udp->udp_rwlock); 1609 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1610 qreply(q, ackmp); 1611 } 1612 1613 static void 1614 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1615 { 1616 if (udp->udp_family == AF_INET) { 1617 *tap = udp_g_t_info_ack_ipv4; 1618 } else { 1619 *tap = udp_g_t_info_ack_ipv6; 1620 } 1621 tap->CURRENT_state = udp->udp_state; 1622 tap->OPT_size = udp_max_optsize; 1623 } 1624 1625 static void 1626 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1627 t_uscalar_t cap_bits1) 1628 { 1629 tcap->CAP_bits1 = 0; 1630 1631 if (cap_bits1 & TC1_INFO) { 1632 udp_copy_info(&tcap->INFO_ack, udp); 1633 tcap->CAP_bits1 |= TC1_INFO; 1634 } 1635 } 1636 1637 /* 1638 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1639 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1640 * udp_g_t_info_ack. The current state of the stream is copied from 1641 * udp_state. 1642 */ 1643 static void 1644 udp_capability_req(queue_t *q, mblk_t *mp) 1645 { 1646 t_uscalar_t cap_bits1; 1647 struct T_capability_ack *tcap; 1648 udp_t *udp = Q_TO_UDP(q); 1649 1650 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1651 1652 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1653 mp->b_datap->db_type, T_CAPABILITY_ACK); 1654 if (!mp) 1655 return; 1656 1657 tcap = (struct T_capability_ack *)mp->b_rptr; 1658 udp_do_capability_ack(udp, tcap, cap_bits1); 1659 1660 qreply(q, mp); 1661 } 1662 1663 /* 1664 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1665 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1666 * The current state of the stream is copied from udp_state. 1667 */ 1668 static void 1669 udp_info_req(queue_t *q, mblk_t *mp) 1670 { 1671 udp_t *udp = Q_TO_UDP(q); 1672 1673 /* Create a T_INFO_ACK message. */ 1674 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1675 T_INFO_ACK); 1676 if (!mp) 1677 return; 1678 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1679 qreply(q, mp); 1680 } 1681 1682 /* 1683 * IP recognizes seven kinds of bind requests: 1684 * 1685 * - A zero-length address binds only to the protocol number. 1686 * 1687 * - A 4-byte address is treated as a request to 1688 * validate that the address is a valid local IPv4 1689 * address, appropriate for an application to bind to. 1690 * IP does the verification, but does not make any note 1691 * of the address at this time. 1692 * 1693 * - A 16-byte address contains is treated as a request 1694 * to validate a local IPv6 address, as the 4-byte 1695 * address case above. 1696 * 1697 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1698 * use it for the inbound fanout of packets. 1699 * 1700 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1701 * use it for the inbound fanout of packets. 1702 * 1703 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1704 * information consisting of local and remote addresses 1705 * and ports. In this case, the addresses are both 1706 * validated as appropriate for this operation, and, if 1707 * so, the information is retained for use in the 1708 * inbound fanout. 1709 * 1710 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1711 * fanout information, like the 12-byte case above. 1712 * 1713 * IP will also fill in the IRE request mblk with information 1714 * regarding our peer. In all cases, we notify IP of our protocol 1715 * type by appending a single protocol byte to the bind request. 1716 */ 1717 static mblk_t * 1718 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1719 { 1720 char *cp; 1721 mblk_t *mp; 1722 struct T_bind_req *tbr; 1723 ipa_conn_t *ac; 1724 ipa6_conn_t *ac6; 1725 sin_t *sin; 1726 sin6_t *sin6; 1727 1728 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1729 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1730 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1731 if (!mp) 1732 return (mp); 1733 mp->b_datap->db_type = M_PROTO; 1734 tbr = (struct T_bind_req *)mp->b_rptr; 1735 tbr->PRIM_type = bind_prim; 1736 tbr->ADDR_offset = sizeof (*tbr); 1737 tbr->CONIND_number = 0; 1738 tbr->ADDR_length = addr_length; 1739 cp = (char *)&tbr[1]; 1740 switch (addr_length) { 1741 case sizeof (ipa_conn_t): 1742 ASSERT(udp->udp_family == AF_INET); 1743 /* Append a request for an IRE */ 1744 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1745 if (!mp->b_cont) { 1746 freemsg(mp); 1747 return (NULL); 1748 } 1749 mp->b_cont->b_wptr += sizeof (ire_t); 1750 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1751 1752 /* cp known to be 32 bit aligned */ 1753 ac = (ipa_conn_t *)cp; 1754 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1755 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1756 ac->ac_fport = udp->udp_dstport; 1757 ac->ac_lport = udp->udp_port; 1758 break; 1759 1760 case sizeof (ipa6_conn_t): 1761 ASSERT(udp->udp_family == AF_INET6); 1762 /* Append a request for an IRE */ 1763 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1764 if (!mp->b_cont) { 1765 freemsg(mp); 1766 return (NULL); 1767 } 1768 mp->b_cont->b_wptr += sizeof (ire_t); 1769 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1770 1771 /* cp known to be 32 bit aligned */ 1772 ac6 = (ipa6_conn_t *)cp; 1773 ac6->ac6_laddr = udp->udp_v6src; 1774 ac6->ac6_faddr = udp->udp_v6dst; 1775 ac6->ac6_fport = udp->udp_dstport; 1776 ac6->ac6_lport = udp->udp_port; 1777 break; 1778 1779 case sizeof (sin_t): 1780 ASSERT(udp->udp_family == AF_INET); 1781 /* Append a request for an IRE */ 1782 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1783 if (!mp->b_cont) { 1784 freemsg(mp); 1785 return (NULL); 1786 } 1787 mp->b_cont->b_wptr += sizeof (ire_t); 1788 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1789 1790 sin = (sin_t *)cp; 1791 *sin = sin_null; 1792 sin->sin_family = AF_INET; 1793 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1794 sin->sin_port = udp->udp_port; 1795 break; 1796 1797 case sizeof (sin6_t): 1798 ASSERT(udp->udp_family == AF_INET6); 1799 /* Append a request for an IRE */ 1800 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1801 if (!mp->b_cont) { 1802 freemsg(mp); 1803 return (NULL); 1804 } 1805 mp->b_cont->b_wptr += sizeof (ire_t); 1806 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1807 1808 sin6 = (sin6_t *)cp; 1809 *sin6 = sin6_null; 1810 sin6->sin6_family = AF_INET6; 1811 sin6->sin6_addr = udp->udp_bound_v6src; 1812 sin6->sin6_port = udp->udp_port; 1813 break; 1814 } 1815 /* Add protocol number to end */ 1816 cp[addr_length] = (char)IPPROTO_UDP; 1817 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1818 return (mp); 1819 } 1820 1821 /* For /dev/udp aka AF_INET open */ 1822 static int 1823 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1824 { 1825 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1826 } 1827 1828 /* For /dev/udp6 aka AF_INET6 open */ 1829 static int 1830 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1831 { 1832 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1833 } 1834 1835 /* 1836 * This is the open routine for udp. It allocates a udp_t structure for 1837 * the stream and, on the first open of the module, creates an ND table. 1838 */ 1839 /*ARGSUSED2*/ 1840 static int 1841 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1842 boolean_t isv6) 1843 { 1844 int error; 1845 udp_t *udp; 1846 conn_t *connp; 1847 dev_t conn_dev; 1848 udp_stack_t *us; 1849 vmem_t *minor_arena; 1850 1851 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1852 1853 /* If the stream is already open, return immediately. */ 1854 if (q->q_ptr != NULL) 1855 return (0); 1856 1857 if (sflag == MODOPEN) 1858 return (EINVAL); 1859 1860 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1861 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1862 minor_arena = ip_minor_arena_la; 1863 } else { 1864 /* 1865 * Either minor numbers in the large arena were exhausted 1866 * or a non socket application is doing the open. 1867 * Try to allocate from the small arena. 1868 */ 1869 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1870 return (EBUSY); 1871 1872 minor_arena = ip_minor_arena_sa; 1873 } 1874 1875 if (flag & SO_FALLBACK) { 1876 /* 1877 * Non streams socket needs a stream to fallback to 1878 */ 1879 RD(q)->q_ptr = (void *)conn_dev; 1880 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1881 WR(q)->q_ptr = (void *)minor_arena; 1882 qprocson(q); 1883 return (0); 1884 } 1885 1886 connp = udp_do_open(credp, isv6, KM_SLEEP); 1887 if (connp == NULL) { 1888 inet_minor_free(minor_arena, conn_dev); 1889 return (ENOMEM); 1890 } 1891 udp = connp->conn_udp; 1892 us = udp->udp_us; 1893 1894 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1895 connp->conn_dev = conn_dev; 1896 connp->conn_minor_arena = minor_arena; 1897 1898 /* 1899 * Initialize the udp_t structure for this stream. 1900 */ 1901 q->q_ptr = connp; 1902 WR(q)->q_ptr = connp; 1903 connp->conn_rq = q; 1904 connp->conn_wq = WR(q); 1905 1906 rw_enter(&udp->udp_rwlock, RW_WRITER); 1907 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1908 ASSERT(connp->conn_udp == udp); 1909 ASSERT(udp->udp_connp == connp); 1910 1911 if (flag & SO_SOCKSTR) { 1912 connp->conn_flags |= IPCL_SOCKET; 1913 udp->udp_issocket = B_TRUE; 1914 udp->udp_direct_sockfs = B_TRUE; 1915 } 1916 1917 q->q_hiwat = us->us_recv_hiwat; 1918 WR(q)->q_hiwat = us->us_xmit_hiwat; 1919 WR(q)->q_lowat = us->us_xmit_lowat; 1920 1921 qprocson(q); 1922 1923 if (udp->udp_family == AF_INET6) { 1924 /* Build initial header template for transmit */ 1925 if ((error = udp_build_hdrs(udp)) != 0) { 1926 rw_exit(&udp->udp_rwlock); 1927 qprocsoff(q); 1928 inet_minor_free(minor_arena, conn_dev); 1929 ipcl_conn_destroy(connp); 1930 return (error); 1931 } 1932 } 1933 rw_exit(&udp->udp_rwlock); 1934 1935 /* Set the Stream head write offset and high watermark. */ 1936 (void) proto_set_tx_wroff(q, connp, 1937 udp->udp_max_hdr_len + us->us_wroff_extra); 1938 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1939 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1940 1941 mutex_enter(&connp->conn_lock); 1942 connp->conn_state_flags &= ~CONN_INCIPIENT; 1943 mutex_exit(&connp->conn_lock); 1944 return (0); 1945 } 1946 1947 /* 1948 * Which UDP options OK to set through T_UNITDATA_REQ... 1949 */ 1950 /* ARGSUSED */ 1951 static boolean_t 1952 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1953 { 1954 return (B_TRUE); 1955 } 1956 1957 /* 1958 * This routine gets default values of certain options whose default 1959 * values are maintained by protcol specific code 1960 */ 1961 /* ARGSUSED */ 1962 int 1963 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1964 { 1965 udp_t *udp = Q_TO_UDP(q); 1966 udp_stack_t *us = udp->udp_us; 1967 int *i1 = (int *)ptr; 1968 1969 switch (level) { 1970 case IPPROTO_IP: 1971 switch (name) { 1972 case IP_MULTICAST_TTL: 1973 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1974 return (sizeof (uchar_t)); 1975 case IP_MULTICAST_LOOP: 1976 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1977 return (sizeof (uchar_t)); 1978 } 1979 break; 1980 case IPPROTO_IPV6: 1981 switch (name) { 1982 case IPV6_MULTICAST_HOPS: 1983 *i1 = IP_DEFAULT_MULTICAST_TTL; 1984 return (sizeof (int)); 1985 case IPV6_MULTICAST_LOOP: 1986 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1987 return (sizeof (int)); 1988 case IPV6_UNICAST_HOPS: 1989 *i1 = us->us_ipv6_hoplimit; 1990 return (sizeof (int)); 1991 } 1992 break; 1993 } 1994 return (-1); 1995 } 1996 1997 /* 1998 * This routine retrieves the current status of socket options. 1999 * It returns the size of the option retrieved. 2000 */ 2001 static int 2002 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2003 { 2004 udp_t *udp = connp->conn_udp; 2005 udp_stack_t *us = udp->udp_us; 2006 int *i1 = (int *)ptr; 2007 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2008 int len; 2009 2010 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2011 switch (level) { 2012 case SOL_SOCKET: 2013 switch (name) { 2014 case SO_DEBUG: 2015 *i1 = udp->udp_debug; 2016 break; /* goto sizeof (int) option return */ 2017 case SO_REUSEADDR: 2018 *i1 = udp->udp_reuseaddr; 2019 break; /* goto sizeof (int) option return */ 2020 case SO_TYPE: 2021 *i1 = SOCK_DGRAM; 2022 break; /* goto sizeof (int) option return */ 2023 2024 /* 2025 * The following three items are available here, 2026 * but are only meaningful to IP. 2027 */ 2028 case SO_DONTROUTE: 2029 *i1 = udp->udp_dontroute; 2030 break; /* goto sizeof (int) option return */ 2031 case SO_USELOOPBACK: 2032 *i1 = udp->udp_useloopback; 2033 break; /* goto sizeof (int) option return */ 2034 case SO_BROADCAST: 2035 *i1 = udp->udp_broadcast; 2036 break; /* goto sizeof (int) option return */ 2037 2038 case SO_SNDBUF: 2039 *i1 = udp->udp_xmit_hiwat; 2040 break; /* goto sizeof (int) option return */ 2041 case SO_RCVBUF: 2042 *i1 = udp->udp_rcv_disply_hiwat; 2043 break; /* goto sizeof (int) option return */ 2044 case SO_DGRAM_ERRIND: 2045 *i1 = udp->udp_dgram_errind; 2046 break; /* goto sizeof (int) option return */ 2047 case SO_RECVUCRED: 2048 *i1 = udp->udp_recvucred; 2049 break; /* goto sizeof (int) option return */ 2050 case SO_TIMESTAMP: 2051 *i1 = udp->udp_timestamp; 2052 break; /* goto sizeof (int) option return */ 2053 case SO_ANON_MLP: 2054 *i1 = connp->conn_anon_mlp; 2055 break; /* goto sizeof (int) option return */ 2056 case SO_MAC_EXEMPT: 2057 *i1 = connp->conn_mac_exempt; 2058 break; /* goto sizeof (int) option return */ 2059 case SO_ALLZONES: 2060 *i1 = connp->conn_allzones; 2061 break; /* goto sizeof (int) option return */ 2062 case SO_EXCLBIND: 2063 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2064 break; 2065 case SO_PROTOTYPE: 2066 *i1 = IPPROTO_UDP; 2067 break; 2068 case SO_DOMAIN: 2069 *i1 = udp->udp_family; 2070 break; 2071 default: 2072 return (-1); 2073 } 2074 break; 2075 case IPPROTO_IP: 2076 if (udp->udp_family != AF_INET) 2077 return (-1); 2078 switch (name) { 2079 case IP_OPTIONS: 2080 case T_IP_OPTIONS: 2081 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2082 if (len > 0) { 2083 bcopy(udp->udp_ip_rcv_options + 2084 udp->udp_label_len, ptr, len); 2085 } 2086 return (len); 2087 case IP_TOS: 2088 case T_IP_TOS: 2089 *i1 = (int)udp->udp_type_of_service; 2090 break; /* goto sizeof (int) option return */ 2091 case IP_TTL: 2092 *i1 = (int)udp->udp_ttl; 2093 break; /* goto sizeof (int) option return */ 2094 case IP_DHCPINIT_IF: 2095 return (-EINVAL); 2096 case IP_NEXTHOP: 2097 case IP_RECVPKTINFO: 2098 /* 2099 * This also handles IP_PKTINFO. 2100 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2101 * Differentiation is based on the size of the argument 2102 * passed in. 2103 * This option is handled in IP which will return an 2104 * error for IP_PKTINFO as it's not supported as a 2105 * sticky option. 2106 */ 2107 return (-EINVAL); 2108 case IP_MULTICAST_IF: 2109 /* 0 address if not set */ 2110 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2111 return (sizeof (ipaddr_t)); 2112 case IP_MULTICAST_TTL: 2113 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2114 return (sizeof (uchar_t)); 2115 case IP_MULTICAST_LOOP: 2116 *ptr = connp->conn_multicast_loop; 2117 return (sizeof (uint8_t)); 2118 case IP_RECVOPTS: 2119 *i1 = udp->udp_recvopts; 2120 break; /* goto sizeof (int) option return */ 2121 case IP_RECVDSTADDR: 2122 *i1 = udp->udp_recvdstaddr; 2123 break; /* goto sizeof (int) option return */ 2124 case IP_RECVIF: 2125 *i1 = udp->udp_recvif; 2126 break; /* goto sizeof (int) option return */ 2127 case IP_RECVSLLA: 2128 *i1 = udp->udp_recvslla; 2129 break; /* goto sizeof (int) option return */ 2130 case IP_RECVTTL: 2131 *i1 = udp->udp_recvttl; 2132 break; /* goto sizeof (int) option return */ 2133 case IP_ADD_MEMBERSHIP: 2134 case IP_DROP_MEMBERSHIP: 2135 case IP_BLOCK_SOURCE: 2136 case IP_UNBLOCK_SOURCE: 2137 case IP_ADD_SOURCE_MEMBERSHIP: 2138 case IP_DROP_SOURCE_MEMBERSHIP: 2139 case MCAST_JOIN_GROUP: 2140 case MCAST_LEAVE_GROUP: 2141 case MCAST_BLOCK_SOURCE: 2142 case MCAST_UNBLOCK_SOURCE: 2143 case MCAST_JOIN_SOURCE_GROUP: 2144 case MCAST_LEAVE_SOURCE_GROUP: 2145 /* cannot "get" the value for these */ 2146 return (-1); 2147 case IP_BOUND_IF: 2148 /* Zero if not set */ 2149 *i1 = udp->udp_bound_if; 2150 break; /* goto sizeof (int) option return */ 2151 case IP_UNSPEC_SRC: 2152 *i1 = udp->udp_unspec_source; 2153 break; /* goto sizeof (int) option return */ 2154 case IP_BROADCAST_TTL: 2155 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2156 return (sizeof (uchar_t)); 2157 default: 2158 return (-1); 2159 } 2160 break; 2161 case IPPROTO_IPV6: 2162 if (udp->udp_family != AF_INET6) 2163 return (-1); 2164 switch (name) { 2165 case IPV6_UNICAST_HOPS: 2166 *i1 = (unsigned int)udp->udp_ttl; 2167 break; /* goto sizeof (int) option return */ 2168 case IPV6_MULTICAST_IF: 2169 /* 0 index if not set */ 2170 *i1 = udp->udp_multicast_if_index; 2171 break; /* goto sizeof (int) option return */ 2172 case IPV6_MULTICAST_HOPS: 2173 *i1 = udp->udp_multicast_ttl; 2174 break; /* goto sizeof (int) option return */ 2175 case IPV6_MULTICAST_LOOP: 2176 *i1 = connp->conn_multicast_loop; 2177 break; /* goto sizeof (int) option return */ 2178 case IPV6_JOIN_GROUP: 2179 case IPV6_LEAVE_GROUP: 2180 case MCAST_JOIN_GROUP: 2181 case MCAST_LEAVE_GROUP: 2182 case MCAST_BLOCK_SOURCE: 2183 case MCAST_UNBLOCK_SOURCE: 2184 case MCAST_JOIN_SOURCE_GROUP: 2185 case MCAST_LEAVE_SOURCE_GROUP: 2186 /* cannot "get" the value for these */ 2187 return (-1); 2188 case IPV6_BOUND_IF: 2189 /* Zero if not set */ 2190 *i1 = udp->udp_bound_if; 2191 break; /* goto sizeof (int) option return */ 2192 case IPV6_UNSPEC_SRC: 2193 *i1 = udp->udp_unspec_source; 2194 break; /* goto sizeof (int) option return */ 2195 case IPV6_RECVPKTINFO: 2196 *i1 = udp->udp_ip_recvpktinfo; 2197 break; /* goto sizeof (int) option return */ 2198 case IPV6_RECVTCLASS: 2199 *i1 = udp->udp_ipv6_recvtclass; 2200 break; /* goto sizeof (int) option return */ 2201 case IPV6_RECVPATHMTU: 2202 *i1 = udp->udp_ipv6_recvpathmtu; 2203 break; /* goto sizeof (int) option return */ 2204 case IPV6_RECVHOPLIMIT: 2205 *i1 = udp->udp_ipv6_recvhoplimit; 2206 break; /* goto sizeof (int) option return */ 2207 case IPV6_RECVHOPOPTS: 2208 *i1 = udp->udp_ipv6_recvhopopts; 2209 break; /* goto sizeof (int) option return */ 2210 case IPV6_RECVDSTOPTS: 2211 *i1 = udp->udp_ipv6_recvdstopts; 2212 break; /* goto sizeof (int) option return */ 2213 case _OLD_IPV6_RECVDSTOPTS: 2214 *i1 = udp->udp_old_ipv6_recvdstopts; 2215 break; /* goto sizeof (int) option return */ 2216 case IPV6_RECVRTHDRDSTOPTS: 2217 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2218 break; /* goto sizeof (int) option return */ 2219 case IPV6_RECVRTHDR: 2220 *i1 = udp->udp_ipv6_recvrthdr; 2221 break; /* goto sizeof (int) option return */ 2222 case IPV6_PKTINFO: { 2223 /* XXX assumes that caller has room for max size! */ 2224 struct in6_pktinfo *pkti; 2225 2226 pkti = (struct in6_pktinfo *)ptr; 2227 if (ipp->ipp_fields & IPPF_IFINDEX) 2228 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2229 else 2230 pkti->ipi6_ifindex = 0; 2231 if (ipp->ipp_fields & IPPF_ADDR) 2232 pkti->ipi6_addr = ipp->ipp_addr; 2233 else 2234 pkti->ipi6_addr = ipv6_all_zeros; 2235 return (sizeof (struct in6_pktinfo)); 2236 } 2237 case IPV6_TCLASS: 2238 if (ipp->ipp_fields & IPPF_TCLASS) 2239 *i1 = ipp->ipp_tclass; 2240 else 2241 *i1 = IPV6_FLOW_TCLASS( 2242 IPV6_DEFAULT_VERS_AND_FLOW); 2243 break; /* goto sizeof (int) option return */ 2244 case IPV6_NEXTHOP: { 2245 sin6_t *sin6 = (sin6_t *)ptr; 2246 2247 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2248 return (0); 2249 *sin6 = sin6_null; 2250 sin6->sin6_family = AF_INET6; 2251 sin6->sin6_addr = ipp->ipp_nexthop; 2252 return (sizeof (sin6_t)); 2253 } 2254 case IPV6_HOPOPTS: 2255 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2256 return (0); 2257 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2258 return (0); 2259 /* 2260 * The cipso/label option is added by kernel. 2261 * User is not usually aware of this option. 2262 * We copy out the hbh opt after the label option. 2263 */ 2264 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2265 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2266 if (udp->udp_label_len_v6 > 0) { 2267 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2268 ptr[1] = (ipp->ipp_hopoptslen - 2269 udp->udp_label_len_v6 + 7) / 8 - 1; 2270 } 2271 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2272 case IPV6_RTHDRDSTOPTS: 2273 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2274 return (0); 2275 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2276 return (ipp->ipp_rtdstoptslen); 2277 case IPV6_RTHDR: 2278 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2279 return (0); 2280 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2281 return (ipp->ipp_rthdrlen); 2282 case IPV6_DSTOPTS: 2283 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2284 return (0); 2285 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2286 return (ipp->ipp_dstoptslen); 2287 case IPV6_PATHMTU: 2288 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2289 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2290 us->us_netstack)); 2291 default: 2292 return (-1); 2293 } 2294 break; 2295 case IPPROTO_UDP: 2296 switch (name) { 2297 case UDP_ANONPRIVBIND: 2298 *i1 = udp->udp_anon_priv_bind; 2299 break; 2300 case UDP_EXCLBIND: 2301 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2302 break; 2303 case UDP_RCVHDR: 2304 *i1 = udp->udp_rcvhdr ? 1 : 0; 2305 break; 2306 case UDP_NAT_T_ENDPOINT: 2307 *i1 = udp->udp_nat_t_endpoint; 2308 break; 2309 default: 2310 return (-1); 2311 } 2312 break; 2313 default: 2314 return (-1); 2315 } 2316 return (sizeof (int)); 2317 } 2318 2319 int 2320 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2321 { 2322 udp_t *udp; 2323 int err; 2324 2325 udp = Q_TO_UDP(q); 2326 2327 rw_enter(&udp->udp_rwlock, RW_READER); 2328 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2329 rw_exit(&udp->udp_rwlock); 2330 return (err); 2331 } 2332 2333 /* 2334 * This routine sets socket options. 2335 */ 2336 /* ARGSUSED */ 2337 static int 2338 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2339 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2340 void *thisdg_attrs, boolean_t checkonly) 2341 { 2342 udpattrs_t *attrs = thisdg_attrs; 2343 int *i1 = (int *)invalp; 2344 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2345 udp_t *udp = connp->conn_udp; 2346 udp_stack_t *us = udp->udp_us; 2347 int error; 2348 uint_t newlen; 2349 size_t sth_wroff; 2350 2351 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2352 /* 2353 * For fixed length options, no sanity check 2354 * of passed in length is done. It is assumed *_optcom_req() 2355 * routines do the right thing. 2356 */ 2357 switch (level) { 2358 case SOL_SOCKET: 2359 switch (name) { 2360 case SO_REUSEADDR: 2361 if (!checkonly) { 2362 udp->udp_reuseaddr = onoff; 2363 PASS_OPT_TO_IP(connp); 2364 } 2365 break; 2366 case SO_DEBUG: 2367 if (!checkonly) 2368 udp->udp_debug = onoff; 2369 break; 2370 /* 2371 * The following three items are available here, 2372 * but are only meaningful to IP. 2373 */ 2374 case SO_DONTROUTE: 2375 if (!checkonly) { 2376 udp->udp_dontroute = onoff; 2377 PASS_OPT_TO_IP(connp); 2378 } 2379 break; 2380 case SO_USELOOPBACK: 2381 if (!checkonly) { 2382 udp->udp_useloopback = onoff; 2383 PASS_OPT_TO_IP(connp); 2384 } 2385 break; 2386 case SO_BROADCAST: 2387 if (!checkonly) { 2388 udp->udp_broadcast = onoff; 2389 PASS_OPT_TO_IP(connp); 2390 } 2391 break; 2392 2393 case SO_SNDBUF: 2394 if (*i1 > us->us_max_buf) { 2395 *outlenp = 0; 2396 return (ENOBUFS); 2397 } 2398 if (!checkonly) { 2399 udp->udp_xmit_hiwat = *i1; 2400 connp->conn_wq->q_hiwat = *i1; 2401 } 2402 break; 2403 case SO_RCVBUF: 2404 if (*i1 > us->us_max_buf) { 2405 *outlenp = 0; 2406 return (ENOBUFS); 2407 } 2408 if (!checkonly) { 2409 int size; 2410 2411 udp->udp_rcv_disply_hiwat = *i1; 2412 size = udp_set_rcv_hiwat(udp, *i1); 2413 rw_exit(&udp->udp_rwlock); 2414 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2415 size); 2416 rw_enter(&udp->udp_rwlock, RW_WRITER); 2417 } 2418 break; 2419 case SO_DGRAM_ERRIND: 2420 if (!checkonly) 2421 udp->udp_dgram_errind = onoff; 2422 break; 2423 case SO_RECVUCRED: 2424 if (!checkonly) 2425 udp->udp_recvucred = onoff; 2426 break; 2427 case SO_ALLZONES: 2428 /* 2429 * "soft" error (negative) 2430 * option not handled at this level 2431 * Do not modify *outlenp. 2432 */ 2433 return (-EINVAL); 2434 case SO_TIMESTAMP: 2435 if (!checkonly) 2436 udp->udp_timestamp = onoff; 2437 break; 2438 case SO_ANON_MLP: 2439 if (!checkonly) { 2440 connp->conn_anon_mlp = onoff; 2441 PASS_OPT_TO_IP(connp); 2442 } 2443 break; 2444 case SO_MAC_EXEMPT: 2445 if (secpolicy_net_mac_aware(cr) != 0 || 2446 udp->udp_state != TS_UNBND) 2447 return (EACCES); 2448 if (!checkonly) { 2449 connp->conn_mac_exempt = onoff; 2450 PASS_OPT_TO_IP(connp); 2451 } 2452 break; 2453 case SCM_UCRED: { 2454 struct ucred_s *ucr; 2455 cred_t *cr, *newcr; 2456 ts_label_t *tsl; 2457 2458 /* 2459 * Only sockets that have proper privileges and are 2460 * bound to MLPs will have any other value here, so 2461 * this implicitly tests for privilege to set label. 2462 */ 2463 if (connp->conn_mlp_type == mlptSingle) 2464 break; 2465 ucr = (struct ucred_s *)invalp; 2466 if (inlen != ucredsize || 2467 ucr->uc_labeloff < sizeof (*ucr) || 2468 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2469 return (EINVAL); 2470 if (!checkonly) { 2471 mblk_t *mb; 2472 2473 if (attrs == NULL || 2474 (mb = attrs->udpattr_mb) == NULL) 2475 return (EINVAL); 2476 if ((cr = DB_CRED(mb)) == NULL) 2477 cr = udp->udp_connp->conn_cred; 2478 ASSERT(cr != NULL); 2479 if ((tsl = crgetlabel(cr)) == NULL) 2480 return (EINVAL); 2481 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2482 tsl->tsl_doi, KM_NOSLEEP); 2483 if (newcr == NULL) 2484 return (ENOSR); 2485 mblk_setcred(mb, newcr); 2486 attrs->udpattr_credset = B_TRUE; 2487 crfree(newcr); 2488 } 2489 break; 2490 } 2491 case SO_EXCLBIND: 2492 if (!checkonly) 2493 udp->udp_exclbind = onoff; 2494 break; 2495 default: 2496 *outlenp = 0; 2497 return (EINVAL); 2498 } 2499 break; 2500 case IPPROTO_IP: 2501 if (udp->udp_family != AF_INET) { 2502 *outlenp = 0; 2503 return (ENOPROTOOPT); 2504 } 2505 switch (name) { 2506 case IP_OPTIONS: 2507 case T_IP_OPTIONS: 2508 /* Save options for use by IP. */ 2509 newlen = inlen + udp->udp_label_len; 2510 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2511 *outlenp = 0; 2512 return (EINVAL); 2513 } 2514 if (checkonly) 2515 break; 2516 2517 /* 2518 * Update the stored options taking into account 2519 * any CIPSO option which we should not overwrite. 2520 */ 2521 if (!tsol_option_set(&udp->udp_ip_snd_options, 2522 &udp->udp_ip_snd_options_len, 2523 udp->udp_label_len, invalp, inlen)) { 2524 *outlenp = 0; 2525 return (ENOMEM); 2526 } 2527 2528 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2529 UDPH_SIZE + udp->udp_ip_snd_options_len; 2530 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2531 rw_exit(&udp->udp_rwlock); 2532 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2533 sth_wroff); 2534 rw_enter(&udp->udp_rwlock, RW_WRITER); 2535 break; 2536 2537 case IP_TTL: 2538 if (!checkonly) { 2539 udp->udp_ttl = (uchar_t)*i1; 2540 } 2541 break; 2542 case IP_TOS: 2543 case T_IP_TOS: 2544 if (!checkonly) { 2545 udp->udp_type_of_service = (uchar_t)*i1; 2546 } 2547 break; 2548 case IP_MULTICAST_IF: { 2549 /* 2550 * TODO should check OPTMGMT reply and undo this if 2551 * there is an error. 2552 */ 2553 struct in_addr *inap = (struct in_addr *)invalp; 2554 if (!checkonly) { 2555 udp->udp_multicast_if_addr = 2556 inap->s_addr; 2557 PASS_OPT_TO_IP(connp); 2558 } 2559 break; 2560 } 2561 case IP_MULTICAST_TTL: 2562 if (!checkonly) 2563 udp->udp_multicast_ttl = *invalp; 2564 break; 2565 case IP_MULTICAST_LOOP: 2566 if (!checkonly) { 2567 connp->conn_multicast_loop = *invalp; 2568 PASS_OPT_TO_IP(connp); 2569 } 2570 break; 2571 case IP_RECVOPTS: 2572 if (!checkonly) 2573 udp->udp_recvopts = onoff; 2574 break; 2575 case IP_RECVDSTADDR: 2576 if (!checkonly) 2577 udp->udp_recvdstaddr = onoff; 2578 break; 2579 case IP_RECVIF: 2580 if (!checkonly) { 2581 udp->udp_recvif = onoff; 2582 PASS_OPT_TO_IP(connp); 2583 } 2584 break; 2585 case IP_RECVSLLA: 2586 if (!checkonly) { 2587 udp->udp_recvslla = onoff; 2588 PASS_OPT_TO_IP(connp); 2589 } 2590 break; 2591 case IP_RECVTTL: 2592 if (!checkonly) 2593 udp->udp_recvttl = onoff; 2594 break; 2595 case IP_PKTINFO: { 2596 /* 2597 * This also handles IP_RECVPKTINFO. 2598 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2599 * Differentiation is based on the size of the 2600 * argument passed in. 2601 */ 2602 struct in_pktinfo *pktinfop; 2603 ip4_pkt_t *attr_pktinfop; 2604 2605 if (checkonly) 2606 break; 2607 2608 if (inlen == sizeof (int)) { 2609 /* 2610 * This is IP_RECVPKTINFO option. 2611 * Keep a local copy of whether this option is 2612 * set or not and pass it down to IP for 2613 * processing. 2614 */ 2615 2616 udp->udp_ip_recvpktinfo = onoff; 2617 return (-EINVAL); 2618 } 2619 2620 if (attrs == NULL || 2621 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2622 /* 2623 * sticky option or no buffer to return 2624 * the results. 2625 */ 2626 return (EINVAL); 2627 } 2628 2629 if (inlen != sizeof (struct in_pktinfo)) 2630 return (EINVAL); 2631 2632 pktinfop = (struct in_pktinfo *)invalp; 2633 2634 /* 2635 * At least one of the values should be specified 2636 */ 2637 if (pktinfop->ipi_ifindex == 0 && 2638 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2639 return (EINVAL); 2640 } 2641 2642 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2643 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2644 2645 break; 2646 } 2647 case IP_ADD_MEMBERSHIP: 2648 case IP_DROP_MEMBERSHIP: 2649 case IP_BLOCK_SOURCE: 2650 case IP_UNBLOCK_SOURCE: 2651 case IP_ADD_SOURCE_MEMBERSHIP: 2652 case IP_DROP_SOURCE_MEMBERSHIP: 2653 case MCAST_JOIN_GROUP: 2654 case MCAST_LEAVE_GROUP: 2655 case MCAST_BLOCK_SOURCE: 2656 case MCAST_UNBLOCK_SOURCE: 2657 case MCAST_JOIN_SOURCE_GROUP: 2658 case MCAST_LEAVE_SOURCE_GROUP: 2659 case IP_SEC_OPT: 2660 case IP_NEXTHOP: 2661 case IP_DHCPINIT_IF: 2662 /* 2663 * "soft" error (negative) 2664 * option not handled at this level 2665 * Do not modify *outlenp. 2666 */ 2667 return (-EINVAL); 2668 case IP_BOUND_IF: 2669 if (!checkonly) { 2670 udp->udp_bound_if = *i1; 2671 PASS_OPT_TO_IP(connp); 2672 } 2673 break; 2674 case IP_UNSPEC_SRC: 2675 if (!checkonly) { 2676 udp->udp_unspec_source = onoff; 2677 PASS_OPT_TO_IP(connp); 2678 } 2679 break; 2680 case IP_BROADCAST_TTL: 2681 if (!checkonly) 2682 connp->conn_broadcast_ttl = *invalp; 2683 break; 2684 default: 2685 *outlenp = 0; 2686 return (EINVAL); 2687 } 2688 break; 2689 case IPPROTO_IPV6: { 2690 ip6_pkt_t *ipp; 2691 boolean_t sticky; 2692 2693 if (udp->udp_family != AF_INET6) { 2694 *outlenp = 0; 2695 return (ENOPROTOOPT); 2696 } 2697 /* 2698 * Deal with both sticky options and ancillary data 2699 */ 2700 sticky = B_FALSE; 2701 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2702 NULL) { 2703 /* sticky options, or none */ 2704 ipp = &udp->udp_sticky_ipp; 2705 sticky = B_TRUE; 2706 } 2707 2708 switch (name) { 2709 case IPV6_MULTICAST_IF: 2710 if (!checkonly) { 2711 udp->udp_multicast_if_index = *i1; 2712 PASS_OPT_TO_IP(connp); 2713 } 2714 break; 2715 case IPV6_UNICAST_HOPS: 2716 /* -1 means use default */ 2717 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2718 *outlenp = 0; 2719 return (EINVAL); 2720 } 2721 if (!checkonly) { 2722 if (*i1 == -1) { 2723 udp->udp_ttl = ipp->ipp_unicast_hops = 2724 us->us_ipv6_hoplimit; 2725 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2726 /* Pass modified value to IP. */ 2727 *i1 = udp->udp_ttl; 2728 } else { 2729 udp->udp_ttl = ipp->ipp_unicast_hops = 2730 (uint8_t)*i1; 2731 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2732 } 2733 /* Rebuild the header template */ 2734 error = udp_build_hdrs(udp); 2735 if (error != 0) { 2736 *outlenp = 0; 2737 return (error); 2738 } 2739 } 2740 break; 2741 case IPV6_MULTICAST_HOPS: 2742 /* -1 means use default */ 2743 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2744 *outlenp = 0; 2745 return (EINVAL); 2746 } 2747 if (!checkonly) { 2748 if (*i1 == -1) { 2749 udp->udp_multicast_ttl = 2750 ipp->ipp_multicast_hops = 2751 IP_DEFAULT_MULTICAST_TTL; 2752 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2753 /* Pass modified value to IP. */ 2754 *i1 = udp->udp_multicast_ttl; 2755 } else { 2756 udp->udp_multicast_ttl = 2757 ipp->ipp_multicast_hops = 2758 (uint8_t)*i1; 2759 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2760 } 2761 } 2762 break; 2763 case IPV6_MULTICAST_LOOP: 2764 if (*i1 != 0 && *i1 != 1) { 2765 *outlenp = 0; 2766 return (EINVAL); 2767 } 2768 if (!checkonly) { 2769 connp->conn_multicast_loop = *i1; 2770 PASS_OPT_TO_IP(connp); 2771 } 2772 break; 2773 case IPV6_JOIN_GROUP: 2774 case IPV6_LEAVE_GROUP: 2775 case MCAST_JOIN_GROUP: 2776 case MCAST_LEAVE_GROUP: 2777 case MCAST_BLOCK_SOURCE: 2778 case MCAST_UNBLOCK_SOURCE: 2779 case MCAST_JOIN_SOURCE_GROUP: 2780 case MCAST_LEAVE_SOURCE_GROUP: 2781 /* 2782 * "soft" error (negative) 2783 * option not handled at this level 2784 * Note: Do not modify *outlenp 2785 */ 2786 return (-EINVAL); 2787 case IPV6_BOUND_IF: 2788 if (!checkonly) { 2789 udp->udp_bound_if = *i1; 2790 PASS_OPT_TO_IP(connp); 2791 } 2792 break; 2793 case IPV6_UNSPEC_SRC: 2794 if (!checkonly) { 2795 udp->udp_unspec_source = onoff; 2796 PASS_OPT_TO_IP(connp); 2797 } 2798 break; 2799 /* 2800 * Set boolean switches for ancillary data delivery 2801 */ 2802 case IPV6_RECVPKTINFO: 2803 if (!checkonly) { 2804 udp->udp_ip_recvpktinfo = onoff; 2805 PASS_OPT_TO_IP(connp); 2806 } 2807 break; 2808 case IPV6_RECVTCLASS: 2809 if (!checkonly) { 2810 udp->udp_ipv6_recvtclass = onoff; 2811 PASS_OPT_TO_IP(connp); 2812 } 2813 break; 2814 case IPV6_RECVPATHMTU: 2815 if (!checkonly) { 2816 udp->udp_ipv6_recvpathmtu = onoff; 2817 PASS_OPT_TO_IP(connp); 2818 } 2819 break; 2820 case IPV6_RECVHOPLIMIT: 2821 if (!checkonly) { 2822 udp->udp_ipv6_recvhoplimit = onoff; 2823 PASS_OPT_TO_IP(connp); 2824 } 2825 break; 2826 case IPV6_RECVHOPOPTS: 2827 if (!checkonly) { 2828 udp->udp_ipv6_recvhopopts = onoff; 2829 PASS_OPT_TO_IP(connp); 2830 } 2831 break; 2832 case IPV6_RECVDSTOPTS: 2833 if (!checkonly) { 2834 udp->udp_ipv6_recvdstopts = onoff; 2835 PASS_OPT_TO_IP(connp); 2836 } 2837 break; 2838 case _OLD_IPV6_RECVDSTOPTS: 2839 if (!checkonly) 2840 udp->udp_old_ipv6_recvdstopts = onoff; 2841 break; 2842 case IPV6_RECVRTHDRDSTOPTS: 2843 if (!checkonly) { 2844 udp->udp_ipv6_recvrthdrdstopts = onoff; 2845 PASS_OPT_TO_IP(connp); 2846 } 2847 break; 2848 case IPV6_RECVRTHDR: 2849 if (!checkonly) { 2850 udp->udp_ipv6_recvrthdr = onoff; 2851 PASS_OPT_TO_IP(connp); 2852 } 2853 break; 2854 /* 2855 * Set sticky options or ancillary data. 2856 * If sticky options, (re)build any extension headers 2857 * that might be needed as a result. 2858 */ 2859 case IPV6_PKTINFO: 2860 /* 2861 * The source address and ifindex are verified 2862 * in ip_opt_set(). For ancillary data the 2863 * source address is checked in ip_wput_v6. 2864 */ 2865 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2866 return (EINVAL); 2867 if (checkonly) 2868 break; 2869 2870 if (inlen == 0) { 2871 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2872 ipp->ipp_sticky_ignored |= 2873 (IPPF_IFINDEX|IPPF_ADDR); 2874 } else { 2875 struct in6_pktinfo *pkti; 2876 2877 pkti = (struct in6_pktinfo *)invalp; 2878 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2879 ipp->ipp_addr = pkti->ipi6_addr; 2880 if (ipp->ipp_ifindex != 0) 2881 ipp->ipp_fields |= IPPF_IFINDEX; 2882 else 2883 ipp->ipp_fields &= ~IPPF_IFINDEX; 2884 if (!IN6_IS_ADDR_UNSPECIFIED( 2885 &ipp->ipp_addr)) 2886 ipp->ipp_fields |= IPPF_ADDR; 2887 else 2888 ipp->ipp_fields &= ~IPPF_ADDR; 2889 } 2890 if (sticky) { 2891 error = udp_build_hdrs(udp); 2892 if (error != 0) 2893 return (error); 2894 PASS_OPT_TO_IP(connp); 2895 } 2896 break; 2897 case IPV6_HOPLIMIT: 2898 if (sticky) 2899 return (EINVAL); 2900 if (inlen != 0 && inlen != sizeof (int)) 2901 return (EINVAL); 2902 if (checkonly) 2903 break; 2904 2905 if (inlen == 0) { 2906 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2907 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2908 } else { 2909 if (*i1 > 255 || *i1 < -1) 2910 return (EINVAL); 2911 if (*i1 == -1) 2912 ipp->ipp_hoplimit = 2913 us->us_ipv6_hoplimit; 2914 else 2915 ipp->ipp_hoplimit = *i1; 2916 ipp->ipp_fields |= IPPF_HOPLIMIT; 2917 } 2918 break; 2919 case IPV6_TCLASS: 2920 if (inlen != 0 && inlen != sizeof (int)) 2921 return (EINVAL); 2922 if (checkonly) 2923 break; 2924 2925 if (inlen == 0) { 2926 ipp->ipp_fields &= ~IPPF_TCLASS; 2927 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2928 } else { 2929 if (*i1 > 255 || *i1 < -1) 2930 return (EINVAL); 2931 if (*i1 == -1) 2932 ipp->ipp_tclass = 0; 2933 else 2934 ipp->ipp_tclass = *i1; 2935 ipp->ipp_fields |= IPPF_TCLASS; 2936 } 2937 if (sticky) { 2938 error = udp_build_hdrs(udp); 2939 if (error != 0) 2940 return (error); 2941 } 2942 break; 2943 case IPV6_NEXTHOP: 2944 /* 2945 * IP will verify that the nexthop is reachable 2946 * and fail for sticky options. 2947 */ 2948 if (inlen != 0 && inlen != sizeof (sin6_t)) 2949 return (EINVAL); 2950 if (checkonly) 2951 break; 2952 2953 if (inlen == 0) { 2954 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2955 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2956 } else { 2957 sin6_t *sin6 = (sin6_t *)invalp; 2958 2959 if (sin6->sin6_family != AF_INET6) { 2960 return (EAFNOSUPPORT); 2961 } 2962 if (IN6_IS_ADDR_V4MAPPED( 2963 &sin6->sin6_addr)) 2964 return (EADDRNOTAVAIL); 2965 ipp->ipp_nexthop = sin6->sin6_addr; 2966 if (!IN6_IS_ADDR_UNSPECIFIED( 2967 &ipp->ipp_nexthop)) 2968 ipp->ipp_fields |= IPPF_NEXTHOP; 2969 else 2970 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2971 } 2972 if (sticky) { 2973 error = udp_build_hdrs(udp); 2974 if (error != 0) 2975 return (error); 2976 PASS_OPT_TO_IP(connp); 2977 } 2978 break; 2979 case IPV6_HOPOPTS: { 2980 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2981 /* 2982 * Sanity checks - minimum size, size a multiple of 2983 * eight bytes, and matching size passed in. 2984 */ 2985 if (inlen != 0 && 2986 inlen != (8 * (hopts->ip6h_len + 1))) 2987 return (EINVAL); 2988 2989 if (checkonly) 2990 break; 2991 2992 error = optcom_pkt_set(invalp, inlen, sticky, 2993 (uchar_t **)&ipp->ipp_hopopts, 2994 &ipp->ipp_hopoptslen, 2995 sticky ? udp->udp_label_len_v6 : 0); 2996 if (error != 0) 2997 return (error); 2998 if (ipp->ipp_hopoptslen == 0) { 2999 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3000 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3001 } else { 3002 ipp->ipp_fields |= IPPF_HOPOPTS; 3003 } 3004 if (sticky) { 3005 error = udp_build_hdrs(udp); 3006 if (error != 0) 3007 return (error); 3008 } 3009 break; 3010 } 3011 case IPV6_RTHDRDSTOPTS: { 3012 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3013 3014 /* 3015 * Sanity checks - minimum size, size a multiple of 3016 * eight bytes, and matching size passed in. 3017 */ 3018 if (inlen != 0 && 3019 inlen != (8 * (dopts->ip6d_len + 1))) 3020 return (EINVAL); 3021 3022 if (checkonly) 3023 break; 3024 3025 if (inlen == 0) { 3026 if (sticky && 3027 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3028 kmem_free(ipp->ipp_rtdstopts, 3029 ipp->ipp_rtdstoptslen); 3030 ipp->ipp_rtdstopts = NULL; 3031 ipp->ipp_rtdstoptslen = 0; 3032 } 3033 3034 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3035 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3036 } else { 3037 error = optcom_pkt_set(invalp, inlen, sticky, 3038 (uchar_t **)&ipp->ipp_rtdstopts, 3039 &ipp->ipp_rtdstoptslen, 0); 3040 if (error != 0) 3041 return (error); 3042 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3043 } 3044 if (sticky) { 3045 error = udp_build_hdrs(udp); 3046 if (error != 0) 3047 return (error); 3048 } 3049 break; 3050 } 3051 case IPV6_DSTOPTS: { 3052 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3053 3054 /* 3055 * Sanity checks - minimum size, size a multiple of 3056 * eight bytes, and matching size passed in. 3057 */ 3058 if (inlen != 0 && 3059 inlen != (8 * (dopts->ip6d_len + 1))) 3060 return (EINVAL); 3061 3062 if (checkonly) 3063 break; 3064 3065 if (inlen == 0) { 3066 if (sticky && 3067 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3068 kmem_free(ipp->ipp_dstopts, 3069 ipp->ipp_dstoptslen); 3070 ipp->ipp_dstopts = NULL; 3071 ipp->ipp_dstoptslen = 0; 3072 } 3073 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3074 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3075 } else { 3076 error = optcom_pkt_set(invalp, inlen, sticky, 3077 (uchar_t **)&ipp->ipp_dstopts, 3078 &ipp->ipp_dstoptslen, 0); 3079 if (error != 0) 3080 return (error); 3081 ipp->ipp_fields |= IPPF_DSTOPTS; 3082 } 3083 if (sticky) { 3084 error = udp_build_hdrs(udp); 3085 if (error != 0) 3086 return (error); 3087 } 3088 break; 3089 } 3090 case IPV6_RTHDR: { 3091 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3092 3093 /* 3094 * Sanity checks - minimum size, size a multiple of 3095 * eight bytes, and matching size passed in. 3096 */ 3097 if (inlen != 0 && 3098 inlen != (8 * (rt->ip6r_len + 1))) 3099 return (EINVAL); 3100 3101 if (checkonly) 3102 break; 3103 3104 if (inlen == 0) { 3105 if (sticky && 3106 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3107 kmem_free(ipp->ipp_rthdr, 3108 ipp->ipp_rthdrlen); 3109 ipp->ipp_rthdr = NULL; 3110 ipp->ipp_rthdrlen = 0; 3111 } 3112 ipp->ipp_fields &= ~IPPF_RTHDR; 3113 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3114 } else { 3115 error = optcom_pkt_set(invalp, inlen, sticky, 3116 (uchar_t **)&ipp->ipp_rthdr, 3117 &ipp->ipp_rthdrlen, 0); 3118 if (error != 0) 3119 return (error); 3120 ipp->ipp_fields |= IPPF_RTHDR; 3121 } 3122 if (sticky) { 3123 error = udp_build_hdrs(udp); 3124 if (error != 0) 3125 return (error); 3126 } 3127 break; 3128 } 3129 3130 case IPV6_DONTFRAG: 3131 if (checkonly) 3132 break; 3133 3134 if (onoff) { 3135 ipp->ipp_fields |= IPPF_DONTFRAG; 3136 } else { 3137 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3138 } 3139 break; 3140 3141 case IPV6_USE_MIN_MTU: 3142 if (inlen != sizeof (int)) 3143 return (EINVAL); 3144 3145 if (*i1 < -1 || *i1 > 1) 3146 return (EINVAL); 3147 3148 if (checkonly) 3149 break; 3150 3151 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3152 ipp->ipp_use_min_mtu = *i1; 3153 break; 3154 3155 case IPV6_SEC_OPT: 3156 case IPV6_SRC_PREFERENCES: 3157 case IPV6_V6ONLY: 3158 /* Handled at the IP level */ 3159 return (-EINVAL); 3160 default: 3161 *outlenp = 0; 3162 return (EINVAL); 3163 } 3164 break; 3165 } /* end IPPROTO_IPV6 */ 3166 case IPPROTO_UDP: 3167 switch (name) { 3168 case UDP_ANONPRIVBIND: 3169 if ((error = secpolicy_net_privaddr(cr, 0, 3170 IPPROTO_UDP)) != 0) { 3171 *outlenp = 0; 3172 return (error); 3173 } 3174 if (!checkonly) { 3175 udp->udp_anon_priv_bind = onoff; 3176 } 3177 break; 3178 case UDP_EXCLBIND: 3179 if (!checkonly) 3180 udp->udp_exclbind = onoff; 3181 break; 3182 case UDP_RCVHDR: 3183 if (!checkonly) 3184 udp->udp_rcvhdr = onoff; 3185 break; 3186 case UDP_NAT_T_ENDPOINT: 3187 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3188 *outlenp = 0; 3189 return (error); 3190 } 3191 3192 /* 3193 * Use udp_family instead so we can avoid ambiguitites 3194 * with AF_INET6 sockets that may switch from IPv4 3195 * to IPv6. 3196 */ 3197 if (udp->udp_family != AF_INET) { 3198 *outlenp = 0; 3199 return (EAFNOSUPPORT); 3200 } 3201 3202 if (!checkonly) { 3203 int size; 3204 3205 udp->udp_nat_t_endpoint = onoff; 3206 3207 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3208 UDPH_SIZE + udp->udp_ip_snd_options_len; 3209 3210 /* Also, adjust wroff */ 3211 if (onoff) { 3212 udp->udp_max_hdr_len += 3213 sizeof (uint32_t); 3214 } 3215 size = udp->udp_max_hdr_len + 3216 us->us_wroff_extra; 3217 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3218 size); 3219 } 3220 break; 3221 default: 3222 *outlenp = 0; 3223 return (EINVAL); 3224 } 3225 break; 3226 default: 3227 *outlenp = 0; 3228 return (EINVAL); 3229 } 3230 /* 3231 * Common case of OK return with outval same as inval. 3232 */ 3233 if (invalp != outvalp) { 3234 /* don't trust bcopy for identical src/dst */ 3235 (void) bcopy(invalp, outvalp, inlen); 3236 } 3237 *outlenp = inlen; 3238 return (0); 3239 } 3240 3241 int 3242 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3243 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3244 void *thisdg_attrs, cred_t *cr) 3245 { 3246 int error; 3247 boolean_t checkonly; 3248 3249 error = 0; 3250 switch (optset_context) { 3251 case SETFN_OPTCOM_CHECKONLY: 3252 checkonly = B_TRUE; 3253 /* 3254 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3255 * inlen != 0 implies value supplied and 3256 * we have to "pretend" to set it. 3257 * inlen == 0 implies that there is no 3258 * value part in T_CHECK request and just validation 3259 * done elsewhere should be enough, we just return here. 3260 */ 3261 if (inlen == 0) { 3262 *outlenp = 0; 3263 goto done; 3264 } 3265 break; 3266 case SETFN_OPTCOM_NEGOTIATE: 3267 checkonly = B_FALSE; 3268 break; 3269 case SETFN_UD_NEGOTIATE: 3270 case SETFN_CONN_NEGOTIATE: 3271 checkonly = B_FALSE; 3272 /* 3273 * Negotiating local and "association-related" options 3274 * through T_UNITDATA_REQ. 3275 * 3276 * Following routine can filter out ones we do not 3277 * want to be "set" this way. 3278 */ 3279 if (!udp_opt_allow_udr_set(level, name)) { 3280 *outlenp = 0; 3281 error = EINVAL; 3282 goto done; 3283 } 3284 break; 3285 default: 3286 /* 3287 * We should never get here 3288 */ 3289 *outlenp = 0; 3290 error = EINVAL; 3291 goto done; 3292 } 3293 3294 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3295 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3296 3297 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3298 outvalp, cr, thisdg_attrs, checkonly); 3299 done: 3300 return (error); 3301 } 3302 3303 /* ARGSUSED */ 3304 int 3305 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3306 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3307 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3308 { 3309 conn_t *connp = Q_TO_CONN(q); 3310 int error; 3311 udp_t *udp = connp->conn_udp; 3312 3313 rw_enter(&udp->udp_rwlock, RW_WRITER); 3314 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3315 outlenp, outvalp, thisdg_attrs, cr); 3316 rw_exit(&udp->udp_rwlock); 3317 return (error); 3318 } 3319 3320 /* 3321 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3322 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3323 * headers, and the udp header. 3324 * Returns failure if can't allocate memory. 3325 */ 3326 static int 3327 udp_build_hdrs(udp_t *udp) 3328 { 3329 udp_stack_t *us = udp->udp_us; 3330 uchar_t *hdrs; 3331 uint_t hdrs_len; 3332 ip6_t *ip6h; 3333 ip6i_t *ip6i; 3334 udpha_t *udpha; 3335 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3336 size_t sth_wroff; 3337 conn_t *connp = udp->udp_connp; 3338 3339 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3340 ASSERT(connp != NULL); 3341 3342 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3343 ASSERT(hdrs_len != 0); 3344 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3345 /* Need to reallocate */ 3346 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3347 if (hdrs == NULL) 3348 return (ENOMEM); 3349 3350 if (udp->udp_sticky_hdrs_len != 0) { 3351 kmem_free(udp->udp_sticky_hdrs, 3352 udp->udp_sticky_hdrs_len); 3353 } 3354 udp->udp_sticky_hdrs = hdrs; 3355 udp->udp_sticky_hdrs_len = hdrs_len; 3356 } 3357 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3358 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3359 3360 /* Set header fields not in ipp */ 3361 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3362 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3363 ip6h = (ip6_t *)&ip6i[1]; 3364 } else { 3365 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3366 } 3367 3368 if (!(ipp->ipp_fields & IPPF_ADDR)) 3369 ip6h->ip6_src = udp->udp_v6src; 3370 3371 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3372 udpha->uha_src_port = udp->udp_port; 3373 3374 /* Try to get everything in a single mblk */ 3375 if (hdrs_len > udp->udp_max_hdr_len) { 3376 udp->udp_max_hdr_len = hdrs_len; 3377 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3378 rw_exit(&udp->udp_rwlock); 3379 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3380 udp->udp_connp, sth_wroff); 3381 rw_enter(&udp->udp_rwlock, RW_WRITER); 3382 } 3383 return (0); 3384 } 3385 3386 /* 3387 * This routine retrieves the value of an ND variable in a udpparam_t 3388 * structure. It is called through nd_getset when a user reads the 3389 * variable. 3390 */ 3391 /* ARGSUSED */ 3392 static int 3393 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3394 { 3395 udpparam_t *udppa = (udpparam_t *)cp; 3396 3397 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3398 return (0); 3399 } 3400 3401 /* 3402 * Walk through the param array specified registering each element with the 3403 * named dispatch (ND) handler. 3404 */ 3405 static boolean_t 3406 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3407 { 3408 for (; cnt-- > 0; udppa++) { 3409 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3410 if (!nd_load(ndp, udppa->udp_param_name, 3411 udp_param_get, udp_param_set, 3412 (caddr_t)udppa)) { 3413 nd_free(ndp); 3414 return (B_FALSE); 3415 } 3416 } 3417 } 3418 if (!nd_load(ndp, "udp_extra_priv_ports", 3419 udp_extra_priv_ports_get, NULL, NULL)) { 3420 nd_free(ndp); 3421 return (B_FALSE); 3422 } 3423 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3424 NULL, udp_extra_priv_ports_add, NULL)) { 3425 nd_free(ndp); 3426 return (B_FALSE); 3427 } 3428 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3429 NULL, udp_extra_priv_ports_del, NULL)) { 3430 nd_free(ndp); 3431 return (B_FALSE); 3432 } 3433 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3434 NULL)) { 3435 nd_free(ndp); 3436 return (B_FALSE); 3437 } 3438 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3439 NULL)) { 3440 nd_free(ndp); 3441 return (B_FALSE); 3442 } 3443 return (B_TRUE); 3444 } 3445 3446 /* This routine sets an ND variable in a udpparam_t structure. */ 3447 /* ARGSUSED */ 3448 static int 3449 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3450 { 3451 long new_value; 3452 udpparam_t *udppa = (udpparam_t *)cp; 3453 3454 /* 3455 * Fail the request if the new value does not lie within the 3456 * required bounds. 3457 */ 3458 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3459 new_value < udppa->udp_param_min || 3460 new_value > udppa->udp_param_max) { 3461 return (EINVAL); 3462 } 3463 3464 /* Set the new value */ 3465 udppa->udp_param_value = new_value; 3466 return (0); 3467 } 3468 3469 /* 3470 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3471 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3472 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3473 * then it's assumed to be allocated to be large enough. 3474 * 3475 * Returns zero if trimming of the security option causes all options to go 3476 * away. 3477 */ 3478 static size_t 3479 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3480 { 3481 struct T_opthdr *toh; 3482 size_t hol = ipp->ipp_hopoptslen; 3483 ip6_hbh_t *dstopt = NULL; 3484 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3485 size_t tlen, olen, plen; 3486 boolean_t deleting; 3487 const struct ip6_opt *sopt, *lastpad; 3488 struct ip6_opt *dopt; 3489 3490 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3491 toh->level = IPPROTO_IPV6; 3492 toh->name = IPV6_HOPOPTS; 3493 toh->status = 0; 3494 dstopt = (ip6_hbh_t *)(toh + 1); 3495 } 3496 3497 /* 3498 * If labeling is enabled, then skip the label option 3499 * but get other options if there are any. 3500 */ 3501 if (is_system_labeled()) { 3502 dopt = NULL; 3503 if (dstopt != NULL) { 3504 /* will fill in ip6h_len later */ 3505 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3506 dopt = (struct ip6_opt *)(dstopt + 1); 3507 } 3508 sopt = (const struct ip6_opt *)(srcopt + 1); 3509 hol -= sizeof (*srcopt); 3510 tlen = sizeof (*dstopt); 3511 lastpad = NULL; 3512 deleting = B_FALSE; 3513 /* 3514 * This loop finds the first (lastpad pointer) of any number of 3515 * pads that preceeds the security option, then treats the 3516 * security option as though it were a pad, and then finds the 3517 * next non-pad option (or end of list). 3518 * 3519 * It then treats the entire block as one big pad. To preserve 3520 * alignment of any options that follow, or just the end of the 3521 * list, it computes a minimal new padding size that keeps the 3522 * same alignment for the next option. 3523 * 3524 * If it encounters just a sequence of pads with no security 3525 * option, those are copied as-is rather than collapsed. 3526 * 3527 * Note that to handle the end of list case, the code makes one 3528 * loop with 'hol' set to zero. 3529 */ 3530 for (;;) { 3531 if (hol > 0) { 3532 if (sopt->ip6o_type == IP6OPT_PAD1) { 3533 if (lastpad == NULL) 3534 lastpad = sopt; 3535 sopt = (const struct ip6_opt *) 3536 &sopt->ip6o_len; 3537 hol--; 3538 continue; 3539 } 3540 olen = sopt->ip6o_len + sizeof (*sopt); 3541 if (olen > hol) 3542 olen = hol; 3543 if (sopt->ip6o_type == IP6OPT_PADN || 3544 sopt->ip6o_type == ip6opt_ls) { 3545 if (sopt->ip6o_type == ip6opt_ls) 3546 deleting = B_TRUE; 3547 if (lastpad == NULL) 3548 lastpad = sopt; 3549 sopt = (const struct ip6_opt *) 3550 ((const char *)sopt + olen); 3551 hol -= olen; 3552 continue; 3553 } 3554 } else { 3555 /* if nothing was copied at all, then delete */ 3556 if (tlen == sizeof (*dstopt)) 3557 return (0); 3558 /* last pass; pick up any trailing padding */ 3559 olen = 0; 3560 } 3561 if (deleting) { 3562 /* 3563 * compute aligning effect of deleted material 3564 * to reproduce with pad. 3565 */ 3566 plen = ((const char *)sopt - 3567 (const char *)lastpad) & 7; 3568 tlen += plen; 3569 if (dopt != NULL) { 3570 if (plen == 1) { 3571 dopt->ip6o_type = IP6OPT_PAD1; 3572 } else if (plen > 1) { 3573 plen -= sizeof (*dopt); 3574 dopt->ip6o_type = IP6OPT_PADN; 3575 dopt->ip6o_len = plen; 3576 if (plen > 0) 3577 bzero(dopt + 1, plen); 3578 } 3579 dopt = (struct ip6_opt *) 3580 ((char *)dopt + plen); 3581 } 3582 deleting = B_FALSE; 3583 lastpad = NULL; 3584 } 3585 /* if there's uncopied padding, then copy that now */ 3586 if (lastpad != NULL) { 3587 olen += (const char *)sopt - 3588 (const char *)lastpad; 3589 sopt = lastpad; 3590 lastpad = NULL; 3591 } 3592 if (dopt != NULL && olen > 0) { 3593 bcopy(sopt, dopt, olen); 3594 dopt = (struct ip6_opt *)((char *)dopt + olen); 3595 } 3596 if (hol == 0) 3597 break; 3598 tlen += olen; 3599 sopt = (const struct ip6_opt *) 3600 ((const char *)sopt + olen); 3601 hol -= olen; 3602 } 3603 /* go back and patch up the length value, rounded upward */ 3604 if (dstopt != NULL) 3605 dstopt->ip6h_len = (tlen - 1) >> 3; 3606 } else { 3607 tlen = hol; 3608 if (dstopt != NULL) 3609 bcopy(srcopt, dstopt, hol); 3610 } 3611 3612 tlen += sizeof (*toh); 3613 if (toh != NULL) 3614 toh->len = tlen; 3615 3616 return (tlen); 3617 } 3618 3619 /* 3620 * Update udp_rcv_opt_len from the packet. 3621 * Called when options received, and when no options received but 3622 * udp_ip_recv_opt_len has previously recorded options. 3623 */ 3624 static void 3625 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3626 { 3627 /* Save the options if any */ 3628 if (opt_len > 0) { 3629 if (opt_len > udp->udp_ip_rcv_options_len) { 3630 /* Need to allocate larger buffer */ 3631 if (udp->udp_ip_rcv_options_len != 0) 3632 mi_free((char *)udp->udp_ip_rcv_options); 3633 udp->udp_ip_rcv_options_len = 0; 3634 udp->udp_ip_rcv_options = 3635 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3636 if (udp->udp_ip_rcv_options != NULL) 3637 udp->udp_ip_rcv_options_len = opt_len; 3638 } 3639 if (udp->udp_ip_rcv_options_len != 0) { 3640 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3641 /* Adjust length if we are resusing the space */ 3642 udp->udp_ip_rcv_options_len = opt_len; 3643 } 3644 } else if (udp->udp_ip_rcv_options_len != 0) { 3645 /* Clear out previously recorded options */ 3646 mi_free((char *)udp->udp_ip_rcv_options); 3647 udp->udp_ip_rcv_options = NULL; 3648 udp->udp_ip_rcv_options_len = 0; 3649 } 3650 } 3651 3652 static void 3653 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3654 { 3655 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3656 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3657 /* 3658 * fallback has started but messages have not been moved yet 3659 */ 3660 if (udp->udp_fallback_queue_head == NULL) { 3661 ASSERT(udp->udp_fallback_queue_tail == NULL); 3662 udp->udp_fallback_queue_head = mp; 3663 udp->udp_fallback_queue_tail = mp; 3664 } else { 3665 ASSERT(udp->udp_fallback_queue_tail != NULL); 3666 udp->udp_fallback_queue_tail->b_next = mp; 3667 udp->udp_fallback_queue_tail = mp; 3668 } 3669 mutex_exit(&udp->udp_recv_lock); 3670 } else { 3671 /* 3672 * no more fallbacks possible, ok to drop lock. 3673 */ 3674 mutex_exit(&udp->udp_recv_lock); 3675 putnext(udp->udp_connp->conn_rq, mp); 3676 } 3677 } 3678 3679 /* ARGSUSED2 */ 3680 static void 3681 udp_input(void *arg1, mblk_t *mp, void *arg2) 3682 { 3683 conn_t *connp = (conn_t *)arg1; 3684 struct T_unitdata_ind *tudi; 3685 uchar_t *rptr; /* Pointer to IP header */ 3686 int hdr_length; /* Length of IP+UDP headers */ 3687 int opt_len; 3688 int udi_size; /* Size of T_unitdata_ind */ 3689 int mp_len; 3690 udp_t *udp; 3691 udpha_t *udpha; 3692 int ipversion; 3693 ip6_pkt_t ipp; 3694 ip6_t *ip6h; 3695 ip6i_t *ip6i; 3696 mblk_t *mp1; 3697 mblk_t *options_mp = NULL; 3698 ip_pktinfo_t *pinfo = NULL; 3699 cred_t *cr = NULL; 3700 pid_t cpid; 3701 uint32_t udp_ip_rcv_options_len; 3702 udp_bits_t udp_bits; 3703 cred_t *rcr = connp->conn_cred; 3704 udp_stack_t *us; 3705 3706 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3707 3708 udp = connp->conn_udp; 3709 us = udp->udp_us; 3710 rptr = mp->b_rptr; 3711 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3712 ASSERT(OK_32PTR(rptr)); 3713 3714 /* 3715 * IP should have prepended the options data in an M_CTL 3716 * Check M_CTL "type" to make sure are not here bcos of 3717 * a valid ICMP message 3718 */ 3719 if (DB_TYPE(mp) == M_CTL) { 3720 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3721 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3722 IN_PKTINFO) { 3723 /* 3724 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3725 * has been prepended to the packet by IP. We need to 3726 * extract the mblk and adjust the rptr 3727 */ 3728 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3729 options_mp = mp; 3730 mp = mp->b_cont; 3731 rptr = mp->b_rptr; 3732 UDP_STAT(us, udp_in_pktinfo); 3733 } else { 3734 /* 3735 * ICMP messages. 3736 */ 3737 udp_icmp_error(connp, mp); 3738 return; 3739 } 3740 } 3741 3742 mp_len = msgdsize(mp); 3743 /* 3744 * This is the inbound data path. 3745 * First, we check to make sure the IP version number is correct, 3746 * and then pull the IP and UDP headers into the first mblk. 3747 */ 3748 3749 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3750 ipp.ipp_fields = 0; 3751 3752 ipversion = IPH_HDR_VERSION(rptr); 3753 3754 rw_enter(&udp->udp_rwlock, RW_READER); 3755 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3756 udp_bits = udp->udp_bits; 3757 rw_exit(&udp->udp_rwlock); 3758 3759 switch (ipversion) { 3760 case IPV4_VERSION: 3761 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3762 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3763 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3764 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3765 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3766 udp->udp_family == AF_INET) { 3767 /* 3768 * Record/update udp_ip_rcv_options with the lock 3769 * held. Not needed for AF_INET6 sockets 3770 * since they don't support a getsockopt of IP_OPTIONS. 3771 */ 3772 rw_enter(&udp->udp_rwlock, RW_WRITER); 3773 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3774 opt_len); 3775 rw_exit(&udp->udp_rwlock); 3776 } 3777 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3778 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3779 udp->udp_ip_recvpktinfo) { 3780 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3781 ipp.ipp_fields |= IPPF_IFINDEX; 3782 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3783 } 3784 } 3785 break; 3786 case IPV6_VERSION: 3787 /* 3788 * IPv6 packets can only be received by applications 3789 * that are prepared to receive IPv6 addresses. 3790 * The IP fanout must ensure this. 3791 */ 3792 ASSERT(udp->udp_family == AF_INET6); 3793 3794 ip6h = (ip6_t *)rptr; 3795 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3796 3797 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3798 uint8_t nexthdrp; 3799 /* Look for ifindex information */ 3800 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3801 ip6i = (ip6i_t *)ip6h; 3802 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3803 goto tossit; 3804 3805 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3806 ASSERT(ip6i->ip6i_ifindex != 0); 3807 ipp.ipp_fields |= IPPF_IFINDEX; 3808 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3809 } 3810 rptr = (uchar_t *)&ip6i[1]; 3811 mp->b_rptr = rptr; 3812 if (rptr == mp->b_wptr) { 3813 mp1 = mp->b_cont; 3814 freeb(mp); 3815 mp = mp1; 3816 rptr = mp->b_rptr; 3817 } 3818 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3819 goto tossit; 3820 ip6h = (ip6_t *)rptr; 3821 mp_len = msgdsize(mp); 3822 } 3823 /* 3824 * Find any potentially interesting extension headers 3825 * as well as the length of the IPv6 + extension 3826 * headers. 3827 */ 3828 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3829 UDPH_SIZE; 3830 ASSERT(nexthdrp == IPPROTO_UDP); 3831 } else { 3832 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3833 ip6i = NULL; 3834 } 3835 break; 3836 default: 3837 ASSERT(0); 3838 } 3839 3840 /* 3841 * IP inspected the UDP header thus all of it must be in the mblk. 3842 * UDP length check is performed for IPv6 packets and IPv4 packets 3843 * to check if the size of the packet as specified 3844 * by the header is the same as the physical size of the packet. 3845 * FIXME? Didn't IP already check this? 3846 */ 3847 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3848 if ((MBLKL(mp) < hdr_length) || 3849 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3850 goto tossit; 3851 } 3852 3853 3854 /* Walk past the headers unless IP_RECVHDR was set. */ 3855 if (!udp_bits.udpb_rcvhdr) { 3856 mp->b_rptr = rptr + hdr_length; 3857 mp_len -= hdr_length; 3858 } 3859 3860 /* 3861 * This is the inbound data path. Packets are passed upstream as 3862 * T_UNITDATA_IND messages with full IP headers still attached. 3863 */ 3864 if (udp->udp_family == AF_INET) { 3865 sin_t *sin; 3866 3867 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3868 3869 /* 3870 * Normally only send up the source address. 3871 * If IP_RECVDSTADDR is set we include the destination IP 3872 * address as an option. With IP_RECVOPTS we include all 3873 * the IP options. 3874 */ 3875 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3876 if (udp_bits.udpb_recvdstaddr) { 3877 udi_size += sizeof (struct T_opthdr) + 3878 sizeof (struct in_addr); 3879 UDP_STAT(us, udp_in_recvdstaddr); 3880 } 3881 3882 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3883 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3884 udi_size += sizeof (struct T_opthdr) + 3885 sizeof (struct in_pktinfo); 3886 UDP_STAT(us, udp_ip_rcvpktinfo); 3887 } 3888 3889 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3890 udi_size += sizeof (struct T_opthdr) + opt_len; 3891 UDP_STAT(us, udp_in_recvopts); 3892 } 3893 3894 /* 3895 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3896 * space accordingly 3897 */ 3898 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3899 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3900 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3901 UDP_STAT(us, udp_in_recvif); 3902 } 3903 3904 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3905 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3906 udi_size += sizeof (struct T_opthdr) + 3907 sizeof (struct sockaddr_dl); 3908 UDP_STAT(us, udp_in_recvslla); 3909 } 3910 3911 if ((udp_bits.udpb_recvucred) && 3912 (cr = DB_CRED(mp)) != NULL) { 3913 udi_size += sizeof (struct T_opthdr) + ucredsize; 3914 cpid = DB_CPID(mp); 3915 UDP_STAT(us, udp_in_recvucred); 3916 } 3917 3918 /* 3919 * If SO_TIMESTAMP is set allocate the appropriate sized 3920 * buffer. Since gethrestime() expects a pointer aligned 3921 * argument, we allocate space necessary for extra 3922 * alignment (even though it might not be used). 3923 */ 3924 if (udp_bits.udpb_timestamp) { 3925 udi_size += sizeof (struct T_opthdr) + 3926 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3927 UDP_STAT(us, udp_in_timestamp); 3928 } 3929 3930 /* 3931 * If IP_RECVTTL is set allocate the appropriate sized buffer 3932 */ 3933 if (udp_bits.udpb_recvttl) { 3934 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3935 UDP_STAT(us, udp_in_recvttl); 3936 } 3937 3938 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3939 mp1 = allocb(udi_size, BPRI_MED); 3940 if (mp1 == NULL) { 3941 freemsg(mp); 3942 if (options_mp != NULL) 3943 freeb(options_mp); 3944 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3945 return; 3946 } 3947 mp1->b_cont = mp; 3948 mp = mp1; 3949 mp->b_datap->db_type = M_PROTO; 3950 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3951 mp->b_wptr = (uchar_t *)tudi + udi_size; 3952 tudi->PRIM_type = T_UNITDATA_IND; 3953 tudi->SRC_length = sizeof (sin_t); 3954 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3955 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3956 sizeof (sin_t); 3957 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3958 tudi->OPT_length = udi_size; 3959 sin = (sin_t *)&tudi[1]; 3960 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3961 sin->sin_port = udpha->uha_src_port; 3962 sin->sin_family = udp->udp_family; 3963 *(uint32_t *)&sin->sin_zero[0] = 0; 3964 *(uint32_t *)&sin->sin_zero[4] = 0; 3965 3966 /* 3967 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3968 * IP_RECVTTL has been set. 3969 */ 3970 if (udi_size != 0) { 3971 /* 3972 * Copy in destination address before options to avoid 3973 * any padding issues. 3974 */ 3975 char *dstopt; 3976 3977 dstopt = (char *)&sin[1]; 3978 if (udp_bits.udpb_recvdstaddr) { 3979 struct T_opthdr *toh; 3980 ipaddr_t *dstptr; 3981 3982 toh = (struct T_opthdr *)dstopt; 3983 toh->level = IPPROTO_IP; 3984 toh->name = IP_RECVDSTADDR; 3985 toh->len = sizeof (struct T_opthdr) + 3986 sizeof (ipaddr_t); 3987 toh->status = 0; 3988 dstopt += sizeof (struct T_opthdr); 3989 dstptr = (ipaddr_t *)dstopt; 3990 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3991 dstopt += sizeof (ipaddr_t); 3992 udi_size -= toh->len; 3993 } 3994 3995 if (udp_bits.udpb_recvopts && opt_len > 0) { 3996 struct T_opthdr *toh; 3997 3998 toh = (struct T_opthdr *)dstopt; 3999 toh->level = IPPROTO_IP; 4000 toh->name = IP_RECVOPTS; 4001 toh->len = sizeof (struct T_opthdr) + opt_len; 4002 toh->status = 0; 4003 dstopt += sizeof (struct T_opthdr); 4004 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4005 opt_len); 4006 dstopt += opt_len; 4007 udi_size -= toh->len; 4008 } 4009 4010 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4011 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4012 struct T_opthdr *toh; 4013 struct in_pktinfo *pktinfop; 4014 4015 toh = (struct T_opthdr *)dstopt; 4016 toh->level = IPPROTO_IP; 4017 toh->name = IP_PKTINFO; 4018 toh->len = sizeof (struct T_opthdr) + 4019 sizeof (*pktinfop); 4020 toh->status = 0; 4021 dstopt += sizeof (struct T_opthdr); 4022 pktinfop = (struct in_pktinfo *)dstopt; 4023 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4024 pktinfop->ipi_spec_dst = 4025 pinfo->ip_pkt_match_addr; 4026 pktinfop->ipi_addr.s_addr = 4027 ((ipha_t *)rptr)->ipha_dst; 4028 4029 dstopt += sizeof (struct in_pktinfo); 4030 udi_size -= toh->len; 4031 } 4032 4033 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4034 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4035 4036 struct T_opthdr *toh; 4037 struct sockaddr_dl *dstptr; 4038 4039 toh = (struct T_opthdr *)dstopt; 4040 toh->level = IPPROTO_IP; 4041 toh->name = IP_RECVSLLA; 4042 toh->len = sizeof (struct T_opthdr) + 4043 sizeof (struct sockaddr_dl); 4044 toh->status = 0; 4045 dstopt += sizeof (struct T_opthdr); 4046 dstptr = (struct sockaddr_dl *)dstopt; 4047 bcopy(&pinfo->ip_pkt_slla, dstptr, 4048 sizeof (struct sockaddr_dl)); 4049 dstopt += sizeof (struct sockaddr_dl); 4050 udi_size -= toh->len; 4051 } 4052 4053 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4054 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4055 4056 struct T_opthdr *toh; 4057 uint_t *dstptr; 4058 4059 toh = (struct T_opthdr *)dstopt; 4060 toh->level = IPPROTO_IP; 4061 toh->name = IP_RECVIF; 4062 toh->len = sizeof (struct T_opthdr) + 4063 sizeof (uint_t); 4064 toh->status = 0; 4065 dstopt += sizeof (struct T_opthdr); 4066 dstptr = (uint_t *)dstopt; 4067 *dstptr = pinfo->ip_pkt_ifindex; 4068 dstopt += sizeof (uint_t); 4069 udi_size -= toh->len; 4070 } 4071 4072 if (cr != NULL) { 4073 struct T_opthdr *toh; 4074 4075 toh = (struct T_opthdr *)dstopt; 4076 toh->level = SOL_SOCKET; 4077 toh->name = SCM_UCRED; 4078 toh->len = sizeof (struct T_opthdr) + ucredsize; 4079 toh->status = 0; 4080 dstopt += sizeof (struct T_opthdr); 4081 (void) cred2ucred(cr, cpid, dstopt, rcr); 4082 dstopt += ucredsize; 4083 udi_size -= toh->len; 4084 } 4085 4086 if (udp_bits.udpb_timestamp) { 4087 struct T_opthdr *toh; 4088 4089 toh = (struct T_opthdr *)dstopt; 4090 toh->level = SOL_SOCKET; 4091 toh->name = SCM_TIMESTAMP; 4092 toh->len = sizeof (struct T_opthdr) + 4093 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4094 toh->status = 0; 4095 dstopt += sizeof (struct T_opthdr); 4096 /* Align for gethrestime() */ 4097 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4098 sizeof (intptr_t)); 4099 gethrestime((timestruc_t *)dstopt); 4100 dstopt = (char *)toh + toh->len; 4101 udi_size -= toh->len; 4102 } 4103 4104 /* 4105 * CAUTION: 4106 * Due to aligment issues 4107 * Processing of IP_RECVTTL option 4108 * should always be the last. Adding 4109 * any option processing after this will 4110 * cause alignment panic. 4111 */ 4112 if (udp_bits.udpb_recvttl) { 4113 struct T_opthdr *toh; 4114 uint8_t *dstptr; 4115 4116 toh = (struct T_opthdr *)dstopt; 4117 toh->level = IPPROTO_IP; 4118 toh->name = IP_RECVTTL; 4119 toh->len = sizeof (struct T_opthdr) + 4120 sizeof (uint8_t); 4121 toh->status = 0; 4122 dstopt += sizeof (struct T_opthdr); 4123 dstptr = (uint8_t *)dstopt; 4124 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4125 dstopt += sizeof (uint8_t); 4126 udi_size -= toh->len; 4127 } 4128 4129 /* Consumed all of allocated space */ 4130 ASSERT(udi_size == 0); 4131 } 4132 } else { 4133 sin6_t *sin6; 4134 4135 /* 4136 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4137 * 4138 * Normally we only send up the address. If receiving of any 4139 * optional receive side information is enabled, we also send 4140 * that up as options. 4141 */ 4142 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4143 4144 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4145 IPPF_RTHDR|IPPF_IFINDEX)) { 4146 if ((udp_bits.udpb_ipv6_recvhopopts) && 4147 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4148 size_t hlen; 4149 4150 UDP_STAT(us, udp_in_recvhopopts); 4151 hlen = copy_hop_opts(&ipp, NULL); 4152 if (hlen == 0) 4153 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4154 udi_size += hlen; 4155 } 4156 if (((udp_bits.udpb_ipv6_recvdstopts) || 4157 udp_bits.udpb_old_ipv6_recvdstopts) && 4158 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4159 udi_size += sizeof (struct T_opthdr) + 4160 ipp.ipp_dstoptslen; 4161 UDP_STAT(us, udp_in_recvdstopts); 4162 } 4163 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4164 udp_bits.udpb_ipv6_recvrthdr && 4165 (ipp.ipp_fields & IPPF_RTHDR)) || 4166 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4167 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4168 udi_size += sizeof (struct T_opthdr) + 4169 ipp.ipp_rtdstoptslen; 4170 UDP_STAT(us, udp_in_recvrtdstopts); 4171 } 4172 if ((udp_bits.udpb_ipv6_recvrthdr) && 4173 (ipp.ipp_fields & IPPF_RTHDR)) { 4174 udi_size += sizeof (struct T_opthdr) + 4175 ipp.ipp_rthdrlen; 4176 UDP_STAT(us, udp_in_recvrthdr); 4177 } 4178 if ((udp_bits.udpb_ip_recvpktinfo) && 4179 (ipp.ipp_fields & IPPF_IFINDEX)) { 4180 udi_size += sizeof (struct T_opthdr) + 4181 sizeof (struct in6_pktinfo); 4182 UDP_STAT(us, udp_in_recvpktinfo); 4183 } 4184 4185 } 4186 if ((udp_bits.udpb_recvucred) && 4187 (cr = DB_CRED(mp)) != NULL) { 4188 udi_size += sizeof (struct T_opthdr) + ucredsize; 4189 cpid = DB_CPID(mp); 4190 UDP_STAT(us, udp_in_recvucred); 4191 } 4192 4193 /* 4194 * If SO_TIMESTAMP is set allocate the appropriate sized 4195 * buffer. Since gethrestime() expects a pointer aligned 4196 * argument, we allocate space necessary for extra 4197 * alignment (even though it might not be used). 4198 */ 4199 if (udp_bits.udpb_timestamp) { 4200 udi_size += sizeof (struct T_opthdr) + 4201 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4202 UDP_STAT(us, udp_in_timestamp); 4203 } 4204 4205 if (udp_bits.udpb_ipv6_recvhoplimit) { 4206 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4207 UDP_STAT(us, udp_in_recvhoplimit); 4208 } 4209 4210 if (udp_bits.udpb_ipv6_recvtclass) { 4211 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4212 UDP_STAT(us, udp_in_recvtclass); 4213 } 4214 4215 mp1 = allocb(udi_size, BPRI_MED); 4216 if (mp1 == NULL) { 4217 freemsg(mp); 4218 if (options_mp != NULL) 4219 freeb(options_mp); 4220 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4221 return; 4222 } 4223 mp1->b_cont = mp; 4224 mp = mp1; 4225 mp->b_datap->db_type = M_PROTO; 4226 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4227 mp->b_wptr = (uchar_t *)tudi + udi_size; 4228 tudi->PRIM_type = T_UNITDATA_IND; 4229 tudi->SRC_length = sizeof (sin6_t); 4230 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4231 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4232 sizeof (sin6_t); 4233 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4234 tudi->OPT_length = udi_size; 4235 sin6 = (sin6_t *)&tudi[1]; 4236 if (ipversion == IPV4_VERSION) { 4237 in6_addr_t v6dst; 4238 4239 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4240 &sin6->sin6_addr); 4241 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4242 &v6dst); 4243 sin6->sin6_flowinfo = 0; 4244 sin6->sin6_scope_id = 0; 4245 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4246 connp->conn_zoneid, us->us_netstack); 4247 } else { 4248 sin6->sin6_addr = ip6h->ip6_src; 4249 /* No sin6_flowinfo per API */ 4250 sin6->sin6_flowinfo = 0; 4251 /* For link-scope source pass up scope id */ 4252 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4253 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4254 sin6->sin6_scope_id = ipp.ipp_ifindex; 4255 else 4256 sin6->sin6_scope_id = 0; 4257 sin6->__sin6_src_id = ip_srcid_find_addr( 4258 &ip6h->ip6_dst, connp->conn_zoneid, 4259 us->us_netstack); 4260 } 4261 sin6->sin6_port = udpha->uha_src_port; 4262 sin6->sin6_family = udp->udp_family; 4263 4264 if (udi_size != 0) { 4265 uchar_t *dstopt; 4266 4267 dstopt = (uchar_t *)&sin6[1]; 4268 if ((udp_bits.udpb_ip_recvpktinfo) && 4269 (ipp.ipp_fields & IPPF_IFINDEX)) { 4270 struct T_opthdr *toh; 4271 struct in6_pktinfo *pkti; 4272 4273 toh = (struct T_opthdr *)dstopt; 4274 toh->level = IPPROTO_IPV6; 4275 toh->name = IPV6_PKTINFO; 4276 toh->len = sizeof (struct T_opthdr) + 4277 sizeof (*pkti); 4278 toh->status = 0; 4279 dstopt += sizeof (struct T_opthdr); 4280 pkti = (struct in6_pktinfo *)dstopt; 4281 if (ipversion == IPV6_VERSION) 4282 pkti->ipi6_addr = ip6h->ip6_dst; 4283 else 4284 IN6_IPADDR_TO_V4MAPPED( 4285 ((ipha_t *)rptr)->ipha_dst, 4286 &pkti->ipi6_addr); 4287 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4288 dstopt += sizeof (*pkti); 4289 udi_size -= toh->len; 4290 } 4291 if (udp_bits.udpb_ipv6_recvhoplimit) { 4292 struct T_opthdr *toh; 4293 4294 toh = (struct T_opthdr *)dstopt; 4295 toh->level = IPPROTO_IPV6; 4296 toh->name = IPV6_HOPLIMIT; 4297 toh->len = sizeof (struct T_opthdr) + 4298 sizeof (uint_t); 4299 toh->status = 0; 4300 dstopt += sizeof (struct T_opthdr); 4301 if (ipversion == IPV6_VERSION) 4302 *(uint_t *)dstopt = ip6h->ip6_hops; 4303 else 4304 *(uint_t *)dstopt = 4305 ((ipha_t *)rptr)->ipha_ttl; 4306 dstopt += sizeof (uint_t); 4307 udi_size -= toh->len; 4308 } 4309 if (udp_bits.udpb_ipv6_recvtclass) { 4310 struct T_opthdr *toh; 4311 4312 toh = (struct T_opthdr *)dstopt; 4313 toh->level = IPPROTO_IPV6; 4314 toh->name = IPV6_TCLASS; 4315 toh->len = sizeof (struct T_opthdr) + 4316 sizeof (uint_t); 4317 toh->status = 0; 4318 dstopt += sizeof (struct T_opthdr); 4319 if (ipversion == IPV6_VERSION) { 4320 *(uint_t *)dstopt = 4321 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4322 } else { 4323 ipha_t *ipha = (ipha_t *)rptr; 4324 *(uint_t *)dstopt = 4325 ipha->ipha_type_of_service; 4326 } 4327 dstopt += sizeof (uint_t); 4328 udi_size -= toh->len; 4329 } 4330 if ((udp_bits.udpb_ipv6_recvhopopts) && 4331 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4332 size_t hlen; 4333 4334 hlen = copy_hop_opts(&ipp, dstopt); 4335 dstopt += hlen; 4336 udi_size -= hlen; 4337 } 4338 if ((udp_bits.udpb_ipv6_recvdstopts) && 4339 (udp_bits.udpb_ipv6_recvrthdr) && 4340 (ipp.ipp_fields & IPPF_RTHDR) && 4341 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4342 struct T_opthdr *toh; 4343 4344 toh = (struct T_opthdr *)dstopt; 4345 toh->level = IPPROTO_IPV6; 4346 toh->name = IPV6_DSTOPTS; 4347 toh->len = sizeof (struct T_opthdr) + 4348 ipp.ipp_rtdstoptslen; 4349 toh->status = 0; 4350 dstopt += sizeof (struct T_opthdr); 4351 bcopy(ipp.ipp_rtdstopts, dstopt, 4352 ipp.ipp_rtdstoptslen); 4353 dstopt += ipp.ipp_rtdstoptslen; 4354 udi_size -= toh->len; 4355 } 4356 if ((udp_bits.udpb_ipv6_recvrthdr) && 4357 (ipp.ipp_fields & IPPF_RTHDR)) { 4358 struct T_opthdr *toh; 4359 4360 toh = (struct T_opthdr *)dstopt; 4361 toh->level = IPPROTO_IPV6; 4362 toh->name = IPV6_RTHDR; 4363 toh->len = sizeof (struct T_opthdr) + 4364 ipp.ipp_rthdrlen; 4365 toh->status = 0; 4366 dstopt += sizeof (struct T_opthdr); 4367 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4368 dstopt += ipp.ipp_rthdrlen; 4369 udi_size -= toh->len; 4370 } 4371 if ((udp_bits.udpb_ipv6_recvdstopts) && 4372 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4373 struct T_opthdr *toh; 4374 4375 toh = (struct T_opthdr *)dstopt; 4376 toh->level = IPPROTO_IPV6; 4377 toh->name = IPV6_DSTOPTS; 4378 toh->len = sizeof (struct T_opthdr) + 4379 ipp.ipp_dstoptslen; 4380 toh->status = 0; 4381 dstopt += sizeof (struct T_opthdr); 4382 bcopy(ipp.ipp_dstopts, dstopt, 4383 ipp.ipp_dstoptslen); 4384 dstopt += ipp.ipp_dstoptslen; 4385 udi_size -= toh->len; 4386 } 4387 if (cr != NULL) { 4388 struct T_opthdr *toh; 4389 4390 toh = (struct T_opthdr *)dstopt; 4391 toh->level = SOL_SOCKET; 4392 toh->name = SCM_UCRED; 4393 toh->len = sizeof (struct T_opthdr) + ucredsize; 4394 toh->status = 0; 4395 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4396 dstopt += toh->len; 4397 udi_size -= toh->len; 4398 } 4399 if (udp_bits.udpb_timestamp) { 4400 struct T_opthdr *toh; 4401 4402 toh = (struct T_opthdr *)dstopt; 4403 toh->level = SOL_SOCKET; 4404 toh->name = SCM_TIMESTAMP; 4405 toh->len = sizeof (struct T_opthdr) + 4406 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4407 toh->status = 0; 4408 dstopt += sizeof (struct T_opthdr); 4409 /* Align for gethrestime() */ 4410 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4411 sizeof (intptr_t)); 4412 gethrestime((timestruc_t *)dstopt); 4413 dstopt = (uchar_t *)toh + toh->len; 4414 udi_size -= toh->len; 4415 } 4416 4417 /* Consumed all of allocated space */ 4418 ASSERT(udi_size == 0); 4419 } 4420 #undef sin6 4421 /* No IP_RECVDSTADDR for IPv6. */ 4422 } 4423 4424 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4425 if (options_mp != NULL) 4426 freeb(options_mp); 4427 4428 if (IPCL_IS_NONSTR(connp)) { 4429 int error; 4430 4431 if ((*connp->conn_upcalls->su_recv) 4432 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4433 NULL) < 0) { 4434 mutex_enter(&udp->udp_recv_lock); 4435 if (error == ENOSPC) { 4436 /* 4437 * let's confirm while holding the lock 4438 */ 4439 if ((*connp->conn_upcalls->su_recv) 4440 (connp->conn_upper_handle, NULL, 0, 0, 4441 &error, NULL) < 0) { 4442 if (error == ENOSPC) { 4443 connp->conn_flow_cntrld = 4444 B_TRUE; 4445 } else { 4446 ASSERT(error == EOPNOTSUPP); 4447 } 4448 } 4449 mutex_exit(&udp->udp_recv_lock); 4450 } else { 4451 ASSERT(error == EOPNOTSUPP); 4452 udp_queue_fallback(udp, mp); 4453 } 4454 } 4455 } else { 4456 putnext(connp->conn_rq, mp); 4457 } 4458 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4459 return; 4460 4461 tossit: 4462 freemsg(mp); 4463 if (options_mp != NULL) 4464 freeb(options_mp); 4465 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4466 } 4467 4468 /* 4469 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4470 * information that can be changing beneath us. 4471 */ 4472 mblk_t * 4473 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4474 { 4475 mblk_t *mpdata; 4476 mblk_t *mp_conn_ctl; 4477 mblk_t *mp_attr_ctl; 4478 mblk_t *mp6_conn_ctl; 4479 mblk_t *mp6_attr_ctl; 4480 mblk_t *mp_conn_tail; 4481 mblk_t *mp_attr_tail; 4482 mblk_t *mp6_conn_tail; 4483 mblk_t *mp6_attr_tail; 4484 struct opthdr *optp; 4485 mib2_udpEntry_t ude; 4486 mib2_udp6Entry_t ude6; 4487 mib2_transportMLPEntry_t mlp; 4488 int state; 4489 zoneid_t zoneid; 4490 int i; 4491 connf_t *connfp; 4492 conn_t *connp = Q_TO_CONN(q); 4493 int v4_conn_idx; 4494 int v6_conn_idx; 4495 boolean_t needattr; 4496 udp_t *udp; 4497 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4498 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4499 mblk_t *mp2ctl; 4500 4501 /* 4502 * make a copy of the original message 4503 */ 4504 mp2ctl = copymsg(mpctl); 4505 4506 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4507 if (mpctl == NULL || 4508 (mpdata = mpctl->b_cont) == NULL || 4509 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4510 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4511 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4512 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4513 freemsg(mp_conn_ctl); 4514 freemsg(mp_attr_ctl); 4515 freemsg(mp6_conn_ctl); 4516 freemsg(mpctl); 4517 freemsg(mp2ctl); 4518 return (0); 4519 } 4520 4521 zoneid = connp->conn_zoneid; 4522 4523 /* fixed length structure for IPv4 and IPv6 counters */ 4524 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4525 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4526 /* synchronize 64- and 32-bit counters */ 4527 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4528 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4529 4530 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4531 optp->level = MIB2_UDP; 4532 optp->name = 0; 4533 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4534 sizeof (us->us_udp_mib)); 4535 optp->len = msgdsize(mpdata); 4536 qreply(q, mpctl); 4537 4538 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4539 v4_conn_idx = v6_conn_idx = 0; 4540 4541 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4542 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4543 connp = NULL; 4544 4545 while ((connp = ipcl_get_next_conn(connfp, connp, 4546 IPCL_UDPCONN))) { 4547 udp = connp->conn_udp; 4548 if (zoneid != connp->conn_zoneid) 4549 continue; 4550 4551 /* 4552 * Note that the port numbers are sent in 4553 * host byte order 4554 */ 4555 4556 if (udp->udp_state == TS_UNBND) 4557 state = MIB2_UDP_unbound; 4558 else if (udp->udp_state == TS_IDLE) 4559 state = MIB2_UDP_idle; 4560 else if (udp->udp_state == TS_DATA_XFER) 4561 state = MIB2_UDP_connected; 4562 else 4563 state = MIB2_UDP_unknown; 4564 4565 needattr = B_FALSE; 4566 bzero(&mlp, sizeof (mlp)); 4567 if (connp->conn_mlp_type != mlptSingle) { 4568 if (connp->conn_mlp_type == mlptShared || 4569 connp->conn_mlp_type == mlptBoth) 4570 mlp.tme_flags |= MIB2_TMEF_SHARED; 4571 if (connp->conn_mlp_type == mlptPrivate || 4572 connp->conn_mlp_type == mlptBoth) 4573 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4574 needattr = B_TRUE; 4575 } 4576 4577 /* 4578 * Create an IPv4 table entry for IPv4 entries and also 4579 * any IPv6 entries which are bound to in6addr_any 4580 * (i.e. anything a IPv4 peer could connect/send to). 4581 */ 4582 if (udp->udp_ipversion == IPV4_VERSION || 4583 (udp->udp_state <= TS_IDLE && 4584 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4585 ude.udpEntryInfo.ue_state = state; 4586 /* 4587 * If in6addr_any this will set it to 4588 * INADDR_ANY 4589 */ 4590 ude.udpLocalAddress = 4591 V4_PART_OF_V6(udp->udp_v6src); 4592 ude.udpLocalPort = ntohs(udp->udp_port); 4593 if (udp->udp_state == TS_DATA_XFER) { 4594 /* 4595 * Can potentially get here for 4596 * v6 socket if another process 4597 * (say, ping) has just done a 4598 * sendto(), changing the state 4599 * from the TS_IDLE above to 4600 * TS_DATA_XFER by the time we hit 4601 * this part of the code. 4602 */ 4603 ude.udpEntryInfo.ue_RemoteAddress = 4604 V4_PART_OF_V6(udp->udp_v6dst); 4605 ude.udpEntryInfo.ue_RemotePort = 4606 ntohs(udp->udp_dstport); 4607 } else { 4608 ude.udpEntryInfo.ue_RemoteAddress = 0; 4609 ude.udpEntryInfo.ue_RemotePort = 0; 4610 } 4611 4612 /* 4613 * We make the assumption that all udp_t 4614 * structs will be created within an address 4615 * region no larger than 32-bits. 4616 */ 4617 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4618 ude.udpCreationProcess = 4619 (udp->udp_open_pid < 0) ? 4620 MIB2_UNKNOWN_PROCESS : 4621 udp->udp_open_pid; 4622 ude.udpCreationTime = udp->udp_open_time; 4623 4624 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4625 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4626 mlp.tme_connidx = v4_conn_idx++; 4627 if (needattr) 4628 (void) snmp_append_data2( 4629 mp_attr_ctl->b_cont, &mp_attr_tail, 4630 (char *)&mlp, sizeof (mlp)); 4631 } 4632 if (udp->udp_ipversion == IPV6_VERSION) { 4633 ude6.udp6EntryInfo.ue_state = state; 4634 ude6.udp6LocalAddress = udp->udp_v6src; 4635 ude6.udp6LocalPort = ntohs(udp->udp_port); 4636 ude6.udp6IfIndex = udp->udp_bound_if; 4637 if (udp->udp_state == TS_DATA_XFER) { 4638 ude6.udp6EntryInfo.ue_RemoteAddress = 4639 udp->udp_v6dst; 4640 ude6.udp6EntryInfo.ue_RemotePort = 4641 ntohs(udp->udp_dstport); 4642 } else { 4643 ude6.udp6EntryInfo.ue_RemoteAddress = 4644 sin6_null.sin6_addr; 4645 ude6.udp6EntryInfo.ue_RemotePort = 0; 4646 } 4647 /* 4648 * We make the assumption that all udp_t 4649 * structs will be created within an address 4650 * region no larger than 32-bits. 4651 */ 4652 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4653 ude6.udp6CreationProcess = 4654 (udp->udp_open_pid < 0) ? 4655 MIB2_UNKNOWN_PROCESS : 4656 udp->udp_open_pid; 4657 ude6.udp6CreationTime = udp->udp_open_time; 4658 4659 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4660 &mp6_conn_tail, (char *)&ude6, 4661 sizeof (ude6)); 4662 mlp.tme_connidx = v6_conn_idx++; 4663 if (needattr) 4664 (void) snmp_append_data2( 4665 mp6_attr_ctl->b_cont, 4666 &mp6_attr_tail, (char *)&mlp, 4667 sizeof (mlp)); 4668 } 4669 } 4670 } 4671 4672 /* IPv4 UDP endpoints */ 4673 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4674 sizeof (struct T_optmgmt_ack)]; 4675 optp->level = MIB2_UDP; 4676 optp->name = MIB2_UDP_ENTRY; 4677 optp->len = msgdsize(mp_conn_ctl->b_cont); 4678 qreply(q, mp_conn_ctl); 4679 4680 /* table of MLP attributes... */ 4681 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4682 sizeof (struct T_optmgmt_ack)]; 4683 optp->level = MIB2_UDP; 4684 optp->name = EXPER_XPORT_MLP; 4685 optp->len = msgdsize(mp_attr_ctl->b_cont); 4686 if (optp->len == 0) 4687 freemsg(mp_attr_ctl); 4688 else 4689 qreply(q, mp_attr_ctl); 4690 4691 /* IPv6 UDP endpoints */ 4692 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4693 sizeof (struct T_optmgmt_ack)]; 4694 optp->level = MIB2_UDP6; 4695 optp->name = MIB2_UDP6_ENTRY; 4696 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4697 qreply(q, mp6_conn_ctl); 4698 4699 /* table of MLP attributes... */ 4700 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4701 sizeof (struct T_optmgmt_ack)]; 4702 optp->level = MIB2_UDP6; 4703 optp->name = EXPER_XPORT_MLP; 4704 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4705 if (optp->len == 0) 4706 freemsg(mp6_attr_ctl); 4707 else 4708 qreply(q, mp6_attr_ctl); 4709 4710 return (mp2ctl); 4711 } 4712 4713 /* 4714 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4715 * NOTE: Per MIB-II, UDP has no writable data. 4716 * TODO: If this ever actually tries to set anything, it needs to be 4717 * to do the appropriate locking. 4718 */ 4719 /* ARGSUSED */ 4720 int 4721 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4722 uchar_t *ptr, int len) 4723 { 4724 switch (level) { 4725 case MIB2_UDP: 4726 return (0); 4727 default: 4728 return (1); 4729 } 4730 } 4731 4732 static void 4733 udp_report_item(mblk_t *mp, udp_t *udp) 4734 { 4735 char *state; 4736 char addrbuf1[INET6_ADDRSTRLEN]; 4737 char addrbuf2[INET6_ADDRSTRLEN]; 4738 uint_t print_len, buf_len; 4739 4740 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4741 ASSERT(buf_len >= 0); 4742 if (buf_len == 0) 4743 return; 4744 4745 if (udp->udp_state == TS_UNBND) 4746 state = "UNBOUND"; 4747 else if (udp->udp_state == TS_IDLE) 4748 state = "IDLE"; 4749 else if (udp->udp_state == TS_DATA_XFER) 4750 state = "CONNECTED"; 4751 else 4752 state = "UnkState"; 4753 print_len = snprintf((char *)mp->b_wptr, buf_len, 4754 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4755 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4756 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4757 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4758 ntohs(udp->udp_dstport), state); 4759 if (print_len < buf_len) { 4760 mp->b_wptr += print_len; 4761 } else { 4762 mp->b_wptr += buf_len; 4763 } 4764 } 4765 4766 /* Report for ndd "udp_status" */ 4767 /* ARGSUSED */ 4768 static int 4769 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4770 { 4771 zoneid_t zoneid; 4772 connf_t *connfp; 4773 conn_t *connp = Q_TO_CONN(q); 4774 udp_t *udp = connp->conn_udp; 4775 int i; 4776 udp_stack_t *us = udp->udp_us; 4777 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4778 4779 /* 4780 * Because of the ndd constraint, at most we can have 64K buffer 4781 * to put in all UDP info. So to be more efficient, just 4782 * allocate a 64K buffer here, assuming we need that large buffer. 4783 * This may be a problem as any user can read udp_status. Therefore 4784 * we limit the rate of doing this using us_ndd_get_info_interval. 4785 * This should be OK as normal users should not do this too often. 4786 */ 4787 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4788 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4789 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4790 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4791 return (0); 4792 } 4793 } 4794 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4795 /* The following may work even if we cannot get a large buf. */ 4796 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4797 return (0); 4798 } 4799 (void) mi_mpprintf(mp, 4800 "UDP " MI_COL_HDRPAD_STR 4801 /* 12345678[89ABCDEF] */ 4802 " zone lport src addr dest addr port state"); 4803 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4804 4805 zoneid = connp->conn_zoneid; 4806 4807 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4808 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4809 connp = NULL; 4810 4811 while ((connp = ipcl_get_next_conn(connfp, connp, 4812 IPCL_UDPCONN))) { 4813 udp = connp->conn_udp; 4814 if (zoneid != GLOBAL_ZONEID && 4815 zoneid != connp->conn_zoneid) 4816 continue; 4817 4818 udp_report_item(mp->b_cont, udp); 4819 } 4820 } 4821 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4822 return (0); 4823 } 4824 4825 /* 4826 * This routine creates a T_UDERROR_IND message and passes it upstream. 4827 * The address and options are copied from the T_UNITDATA_REQ message 4828 * passed in mp. This message is freed. 4829 */ 4830 static void 4831 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4832 t_scalar_t err) 4833 { 4834 struct T_unitdata_req *tudr; 4835 mblk_t *mp1; 4836 uchar_t *optaddr; 4837 t_scalar_t optlen; 4838 4839 if (DB_TYPE(mp) == M_DATA) { 4840 ASSERT(destaddr != NULL && destlen != 0); 4841 optaddr = NULL; 4842 optlen = 0; 4843 } else { 4844 if ((mp->b_wptr < mp->b_rptr) || 4845 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4846 goto done; 4847 } 4848 tudr = (struct T_unitdata_req *)mp->b_rptr; 4849 destaddr = mp->b_rptr + tudr->DEST_offset; 4850 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4851 destaddr + tudr->DEST_length < mp->b_rptr || 4852 destaddr + tudr->DEST_length > mp->b_wptr) { 4853 goto done; 4854 } 4855 optaddr = mp->b_rptr + tudr->OPT_offset; 4856 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4857 optaddr + tudr->OPT_length < mp->b_rptr || 4858 optaddr + tudr->OPT_length > mp->b_wptr) { 4859 goto done; 4860 } 4861 destlen = tudr->DEST_length; 4862 optlen = tudr->OPT_length; 4863 } 4864 4865 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4866 (char *)optaddr, optlen, err); 4867 if (mp1 != NULL) 4868 qreply(q, mp1); 4869 4870 done: 4871 freemsg(mp); 4872 } 4873 4874 /* 4875 * This routine removes a port number association from a stream. It 4876 * is called by udp_wput to handle T_UNBIND_REQ messages. 4877 */ 4878 static void 4879 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4880 { 4881 conn_t *connp = Q_TO_CONN(q); 4882 int error; 4883 4884 error = udp_do_unbind(connp); 4885 if (error) { 4886 if (error < 0) 4887 udp_err_ack(q, mp, -error, 0); 4888 else 4889 udp_err_ack(q, mp, TSYSERR, error); 4890 return; 4891 } 4892 4893 mp = mi_tpi_ok_ack_alloc(mp); 4894 ASSERT(mp != NULL); 4895 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4896 qreply(q, mp); 4897 } 4898 4899 /* 4900 * Don't let port fall into the privileged range. 4901 * Since the extra privileged ports can be arbitrary we also 4902 * ensure that we exclude those from consideration. 4903 * us->us_epriv_ports is not sorted thus we loop over it until 4904 * there are no changes. 4905 */ 4906 static in_port_t 4907 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4908 { 4909 int i; 4910 in_port_t nextport; 4911 boolean_t restart = B_FALSE; 4912 udp_stack_t *us = udp->udp_us; 4913 4914 if (random && udp_random_anon_port != 0) { 4915 (void) random_get_pseudo_bytes((uint8_t *)&port, 4916 sizeof (in_port_t)); 4917 /* 4918 * Unless changed by a sys admin, the smallest anon port 4919 * is 32768 and the largest anon port is 65535. It is 4920 * very likely (50%) for the random port to be smaller 4921 * than the smallest anon port. When that happens, 4922 * add port % (anon port range) to the smallest anon 4923 * port to get the random port. It should fall into the 4924 * valid anon port range. 4925 */ 4926 if (port < us->us_smallest_anon_port) { 4927 port = us->us_smallest_anon_port + 4928 port % (us->us_largest_anon_port - 4929 us->us_smallest_anon_port); 4930 } 4931 } 4932 4933 retry: 4934 if (port < us->us_smallest_anon_port) 4935 port = us->us_smallest_anon_port; 4936 4937 if (port > us->us_largest_anon_port) { 4938 port = us->us_smallest_anon_port; 4939 if (restart) 4940 return (0); 4941 restart = B_TRUE; 4942 } 4943 4944 if (port < us->us_smallest_nonpriv_port) 4945 port = us->us_smallest_nonpriv_port; 4946 4947 for (i = 0; i < us->us_num_epriv_ports; i++) { 4948 if (port == us->us_epriv_ports[i]) { 4949 port++; 4950 /* 4951 * Make sure that the port is in the 4952 * valid range. 4953 */ 4954 goto retry; 4955 } 4956 } 4957 4958 if (is_system_labeled() && 4959 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4960 port, IPPROTO_UDP, B_TRUE)) != 0) { 4961 port = nextport; 4962 goto retry; 4963 } 4964 4965 return (port); 4966 } 4967 4968 static int 4969 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 4970 boolean_t *update_lastdst) 4971 { 4972 int err; 4973 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4974 udp_t *udp = Q_TO_UDP(wq); 4975 udp_stack_t *us = udp->udp_us; 4976 4977 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 4978 opt_storage, udp->udp_connp->conn_mac_exempt, 4979 us->us_netstack->netstack_ip); 4980 if (err == 0) { 4981 err = tsol_update_options(&udp->udp_ip_snd_options, 4982 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4983 opt_storage); 4984 } 4985 if (err != 0) { 4986 DTRACE_PROBE4( 4987 tx__ip__log__info__updatelabel__udp, 4988 char *, "queue(1) failed to update options(2) on mp(3)", 4989 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4990 } else { 4991 *update_lastdst = B_TRUE; 4992 } 4993 return (err); 4994 } 4995 4996 static mblk_t * 4997 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4998 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4999 cred_t *cr, pid_t pid) 5000 { 5001 udp_t *udp = connp->conn_udp; 5002 mblk_t *mp1 = mp; 5003 mblk_t *mp2; 5004 ipha_t *ipha; 5005 int ip_hdr_length; 5006 uint32_t ip_len; 5007 udpha_t *udpha; 5008 boolean_t lock_held = B_FALSE; 5009 in_port_t uha_src_port; 5010 udpattrs_t attrs; 5011 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5012 uint32_t ip_snd_opt_len = 0; 5013 ip4_pkt_t pktinfo; 5014 ip4_pkt_t *pktinfop = &pktinfo; 5015 ip_opt_info_t optinfo; 5016 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5017 udp_stack_t *us = udp->udp_us; 5018 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5019 queue_t *q = connp->conn_wq; 5020 ire_t *ire; 5021 in6_addr_t v6dst; 5022 boolean_t update_lastdst = B_FALSE; 5023 5024 *error = 0; 5025 pktinfop->ip4_ill_index = 0; 5026 pktinfop->ip4_addr = INADDR_ANY; 5027 optinfo.ip_opt_flags = 0; 5028 optinfo.ip_opt_ill_index = 0; 5029 5030 if (v4dst == INADDR_ANY) 5031 v4dst = htonl(INADDR_LOOPBACK); 5032 5033 /* 5034 * If options passed in, feed it for verification and handling 5035 */ 5036 attrs.udpattr_credset = B_FALSE; 5037 if (IPCL_IS_NONSTR(connp)) { 5038 if (msg->msg_controllen != 0) { 5039 attrs.udpattr_ipp4 = pktinfop; 5040 attrs.udpattr_mb = mp; 5041 5042 rw_enter(&udp->udp_rwlock, RW_WRITER); 5043 *error = process_auxiliary_options(connp, 5044 msg->msg_control, msg->msg_controllen, 5045 &attrs, &udp_opt_obj, udp_opt_set); 5046 rw_exit(&udp->udp_rwlock); 5047 if (*error) 5048 goto done; 5049 } 5050 } else { 5051 if (DB_TYPE(mp) != M_DATA) { 5052 mp1 = mp->b_cont; 5053 if (((struct T_unitdata_req *) 5054 mp->b_rptr)->OPT_length != 0) { 5055 attrs.udpattr_ipp4 = pktinfop; 5056 attrs.udpattr_mb = mp; 5057 if (udp_unitdata_opt_process(q, mp, error, 5058 &attrs) < 0) 5059 goto done; 5060 /* 5061 * Note: success in processing options. 5062 * mp option buffer represented by 5063 * OPT_length/offset now potentially modified 5064 * and contain option setting results 5065 */ 5066 ASSERT(*error == 0); 5067 } 5068 } 5069 } 5070 5071 /* mp1 points to the M_DATA mblk carrying the packet */ 5072 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5073 5074 /* 5075 * Determine whether we need to mark the mblk with the user's 5076 * credentials. 5077 */ 5078 ire = connp->conn_ire_cache; 5079 if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) || 5080 (ire->ire_addr != v4dst) || 5081 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5082 if (cr != NULL && DB_CRED(mp) == NULL) 5083 msg_setcredpid(mp, cr, pid); 5084 } 5085 5086 rw_enter(&udp->udp_rwlock, RW_READER); 5087 lock_held = B_TRUE; 5088 5089 /* 5090 * Cluster and TSOL note: 5091 * udp.udp_v6lastdst is shared by Cluster and TSOL 5092 * udp.udp_lastdstport is used by Cluster 5093 * 5094 * Both Cluster and TSOL need to update the dest addr and/or port. 5095 * Updating is done after both Cluster and TSOL checks, protected 5096 * by conn_lock. 5097 */ 5098 mutex_enter(&connp->conn_lock); 5099 5100 if (cl_inet_connect2 != NULL && 5101 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5102 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5103 udp->udp_lastdstport != port)) { 5104 mutex_exit(&connp->conn_lock); 5105 *error = 0; 5106 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5107 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5108 if (*error != 0) { 5109 *error = EHOSTUNREACH; 5110 goto done; 5111 } 5112 update_lastdst = B_TRUE; 5113 mutex_enter(&connp->conn_lock); 5114 } 5115 5116 /* 5117 * Check if our saved options are valid; update if not. 5118 * TSOL Note: Since we are not in WRITER mode, UDP packets 5119 * to different destination may require different labels, 5120 * or worse, UDP packets to same IP address may require 5121 * different labels due to use of shared all-zones address. 5122 * We use conn_lock to ensure that lastdst, ip_snd_options, 5123 * and ip_snd_options_len are consistent for the current 5124 * destination and are updated atomically. 5125 */ 5126 if (is_system_labeled()) { 5127 /* Using UDP MLP requires SCM_UCRED from user */ 5128 if (connp->conn_mlp_type != mlptSingle && 5129 !attrs.udpattr_credset) { 5130 mutex_exit(&connp->conn_lock); 5131 DTRACE_PROBE4( 5132 tx__ip__log__info__output__udp, 5133 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5134 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5135 *error = ECONNREFUSED; 5136 goto done; 5137 } 5138 /* 5139 * update label option for this UDP socket if 5140 * - the destination has changed, or 5141 * - the UDP socket is MLP 5142 */ 5143 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5144 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5145 connp->conn_mlp_type != mlptSingle) && 5146 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5147 != 0) { 5148 mutex_exit(&connp->conn_lock); 5149 goto done; 5150 } 5151 } 5152 if (update_lastdst) { 5153 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5154 udp->udp_lastdstport = port; 5155 } 5156 if (udp->udp_ip_snd_options_len > 0) { 5157 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5158 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5159 } 5160 mutex_exit(&connp->conn_lock); 5161 5162 /* Add an IP header */ 5163 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5164 (insert_spi ? sizeof (uint32_t) : 0); 5165 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5166 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5167 !OK_32PTR(ipha)) { 5168 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5169 if (mp2 == NULL) { 5170 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5171 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5172 *error = ENOMEM; 5173 goto done; 5174 } 5175 mp2->b_wptr = DB_LIM(mp2); 5176 mp2->b_cont = mp1; 5177 mp1 = mp2; 5178 if (DB_TYPE(mp) != M_DATA) 5179 mp->b_cont = mp1; 5180 else 5181 mp = mp1; 5182 5183 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5184 } 5185 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5186 #ifdef _BIG_ENDIAN 5187 /* Set version, header length, and tos */ 5188 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5189 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5190 udp->udp_type_of_service); 5191 /* Set ttl and protocol */ 5192 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5193 #else 5194 /* Set version, header length, and tos */ 5195 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5196 ((udp->udp_type_of_service << 8) | 5197 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5198 /* Set ttl and protocol */ 5199 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5200 #endif 5201 if (pktinfop->ip4_addr != INADDR_ANY) { 5202 ipha->ipha_src = pktinfop->ip4_addr; 5203 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5204 } else { 5205 /* 5206 * Copy our address into the packet. If this is zero, 5207 * first look at __sin6_src_id for a hint. If we leave the 5208 * source as INADDR_ANY then ip will fill in the real source 5209 * address. 5210 */ 5211 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5212 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5213 in6_addr_t v6src; 5214 5215 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5216 us->us_netstack); 5217 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5218 } 5219 } 5220 uha_src_port = udp->udp_port; 5221 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5222 rw_exit(&udp->udp_rwlock); 5223 lock_held = B_FALSE; 5224 } 5225 5226 if (pktinfop->ip4_ill_index != 0) { 5227 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5228 } 5229 5230 ipha->ipha_fragment_offset_and_flags = 0; 5231 ipha->ipha_ident = 0; 5232 5233 mp1->b_rptr = (uchar_t *)ipha; 5234 5235 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5236 (uintptr_t)UINT_MAX); 5237 5238 /* Determine length of packet */ 5239 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5240 if ((mp2 = mp1->b_cont) != NULL) { 5241 do { 5242 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5243 ip_len += (uint32_t)MBLKL(mp2); 5244 } while ((mp2 = mp2->b_cont) != NULL); 5245 } 5246 /* 5247 * If the size of the packet is greater than the maximum allowed by 5248 * ip, return an error. Passing this down could cause panics because 5249 * the size will have wrapped and be inconsistent with the msg size. 5250 */ 5251 if (ip_len > IP_MAXPACKET) { 5252 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5253 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5254 *error = EMSGSIZE; 5255 goto done; 5256 } 5257 ipha->ipha_length = htons((uint16_t)ip_len); 5258 ip_len -= ip_hdr_length; 5259 ip_len = htons((uint16_t)ip_len); 5260 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5261 5262 /* Insert all-0s SPI now. */ 5263 if (insert_spi) 5264 *((uint32_t *)(udpha + 1)) = 0; 5265 5266 /* 5267 * Copy in the destination address 5268 */ 5269 ipha->ipha_dst = v4dst; 5270 5271 /* 5272 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5273 */ 5274 if (CLASSD(v4dst)) 5275 ipha->ipha_ttl = udp->udp_multicast_ttl; 5276 5277 udpha->uha_dst_port = port; 5278 udpha->uha_src_port = uha_src_port; 5279 5280 if (ip_snd_opt_len > 0) { 5281 uint32_t cksum; 5282 5283 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5284 lock_held = B_FALSE; 5285 rw_exit(&udp->udp_rwlock); 5286 /* 5287 * Massage source route putting first source route in ipha_dst. 5288 * Ignore the destination in T_unitdata_req. 5289 * Create a checksum adjustment for a source route, if any. 5290 */ 5291 cksum = ip_massage_options(ipha, us->us_netstack); 5292 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5293 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5294 (ipha->ipha_dst & 0xFFFF); 5295 if ((int)cksum < 0) 5296 cksum--; 5297 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5298 /* 5299 * IP does the checksum if uha_checksum is non-zero, 5300 * We make it easy for IP to include our pseudo header 5301 * by putting our length in uha_checksum. 5302 */ 5303 cksum += ip_len; 5304 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5305 /* There might be a carry. */ 5306 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5307 #ifdef _LITTLE_ENDIAN 5308 if (us->us_do_checksum) 5309 ip_len = (cksum << 16) | ip_len; 5310 #else 5311 if (us->us_do_checksum) 5312 ip_len = (ip_len << 16) | cksum; 5313 else 5314 ip_len <<= 16; 5315 #endif 5316 } else { 5317 /* 5318 * IP does the checksum if uha_checksum is non-zero, 5319 * We make it easy for IP to include our pseudo header 5320 * by putting our length in uha_checksum. 5321 */ 5322 if (us->us_do_checksum) 5323 ip_len |= (ip_len << 16); 5324 #ifndef _LITTLE_ENDIAN 5325 else 5326 ip_len <<= 16; 5327 #endif 5328 } 5329 ASSERT(!lock_held); 5330 /* Set UDP length and checksum */ 5331 *((uint32_t *)&udpha->uha_length) = ip_len; 5332 if (DB_CRED(mp) != NULL) 5333 mblk_setcred(mp1, DB_CRED(mp)); 5334 5335 if (DB_TYPE(mp) != M_DATA) { 5336 ASSERT(mp != mp1); 5337 freeb(mp); 5338 } 5339 5340 /* mp has been consumed and we'll return success */ 5341 ASSERT(*error == 0); 5342 mp = NULL; 5343 5344 /* We're done. Pass the packet to ip. */ 5345 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5346 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5347 "udp_wput_end: q %p (%S)", q, "end"); 5348 5349 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5350 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5351 connp->conn_dontroute || 5352 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5353 optinfo.ip_opt_ill_index != 0 || 5354 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5355 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5356 ipst->ips_ip_g_mrouter != NULL) { 5357 UDP_STAT(us, udp_ip_send); 5358 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5359 &optinfo); 5360 } else { 5361 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5362 } 5363 5364 done: 5365 if (lock_held) 5366 rw_exit(&udp->udp_rwlock); 5367 if (*error != 0) { 5368 ASSERT(mp != NULL); 5369 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5370 } 5371 return (mp); 5372 } 5373 5374 static void 5375 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5376 { 5377 conn_t *connp = udp->udp_connp; 5378 ipaddr_t src, dst; 5379 ire_t *ire; 5380 ipif_t *ipif = NULL; 5381 mblk_t *ire_fp_mp; 5382 boolean_t retry_caching; 5383 udp_stack_t *us = udp->udp_us; 5384 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5385 5386 dst = ipha->ipha_dst; 5387 src = ipha->ipha_src; 5388 ASSERT(ipha->ipha_ident == 0); 5389 5390 if (CLASSD(dst)) { 5391 int err; 5392 5393 ipif = conn_get_held_ipif(connp, 5394 &connp->conn_multicast_ipif, &err); 5395 5396 if (ipif == NULL || ipif->ipif_isv6 || 5397 (ipif->ipif_ill->ill_phyint->phyint_flags & 5398 PHYI_LOOPBACK)) { 5399 if (ipif != NULL) 5400 ipif_refrele(ipif); 5401 UDP_STAT(us, udp_ip_send); 5402 ip_output(connp, mp, q, IP_WPUT); 5403 return; 5404 } 5405 } 5406 5407 retry_caching = B_FALSE; 5408 mutex_enter(&connp->conn_lock); 5409 ire = connp->conn_ire_cache; 5410 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5411 5412 if (ire == NULL || ire->ire_addr != dst || 5413 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5414 retry_caching = B_TRUE; 5415 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5416 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5417 5418 ASSERT(ipif != NULL); 5419 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5420 retry_caching = B_TRUE; 5421 } 5422 5423 if (!retry_caching) { 5424 ASSERT(ire != NULL); 5425 IRE_REFHOLD(ire); 5426 mutex_exit(&connp->conn_lock); 5427 } else { 5428 boolean_t cached = B_FALSE; 5429 5430 connp->conn_ire_cache = NULL; 5431 mutex_exit(&connp->conn_lock); 5432 5433 /* Release the old ire */ 5434 if (ire != NULL) { 5435 IRE_REFRELE_NOTR(ire); 5436 ire = NULL; 5437 } 5438 5439 if (CLASSD(dst)) { 5440 ASSERT(ipif != NULL); 5441 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5442 connp->conn_zoneid, MBLK_GETLABEL(mp), 5443 MATCH_IRE_ILL, ipst); 5444 } else { 5445 ASSERT(ipif == NULL); 5446 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5447 MBLK_GETLABEL(mp), ipst); 5448 } 5449 5450 if (ire == NULL) { 5451 if (ipif != NULL) 5452 ipif_refrele(ipif); 5453 UDP_STAT(us, udp_ire_null); 5454 ip_output(connp, mp, q, IP_WPUT); 5455 return; 5456 } 5457 IRE_REFHOLD_NOTR(ire); 5458 5459 mutex_enter(&connp->conn_lock); 5460 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5461 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5462 irb_t *irb = ire->ire_bucket; 5463 5464 /* 5465 * IRE's created for non-connection oriented transports 5466 * are normally initialized with IRE_MARK_TEMPORARY set 5467 * in the ire_marks. These IRE's are preferentially 5468 * reaped when the hash chain length in the cache 5469 * bucket exceeds the maximum value specified in 5470 * ip[6]_ire_max_bucket_cnt. This can severely affect 5471 * UDP performance if IRE cache entries that we need 5472 * to reuse are continually removed. To remedy this, 5473 * when we cache the IRE in the conn_t, we remove the 5474 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5475 * set. 5476 */ 5477 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5478 rw_enter(&irb->irb_lock, RW_WRITER); 5479 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5480 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5481 irb->irb_tmp_ire_cnt--; 5482 } 5483 rw_exit(&irb->irb_lock); 5484 } 5485 connp->conn_ire_cache = ire; 5486 cached = B_TRUE; 5487 } 5488 mutex_exit(&connp->conn_lock); 5489 5490 /* 5491 * We can continue to use the ire but since it was not 5492 * cached, we should drop the extra reference. 5493 */ 5494 if (!cached) 5495 IRE_REFRELE_NOTR(ire); 5496 } 5497 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5498 ASSERT(!CLASSD(dst) || ipif != NULL); 5499 5500 /* 5501 * Check if we can take the fast-path. 5502 * Note that "incomplete" ire's (where the link-layer for next hop 5503 * is not resolved, or where the fast-path header in nce_fp_mp is not 5504 * available yet) are sent down the legacy (slow) path 5505 */ 5506 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5507 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5508 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5509 ((ire->ire_nce == NULL) || 5510 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5511 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5512 if (ipif != NULL) 5513 ipif_refrele(ipif); 5514 UDP_STAT(us, udp_ip_ire_send); 5515 IRE_REFRELE(ire); 5516 ip_output(connp, mp, q, IP_WPUT); 5517 return; 5518 } 5519 5520 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5521 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5522 ipha->ipha_src = ipif->ipif_src_addr; 5523 else 5524 ipha->ipha_src = ire->ire_src_addr; 5525 } 5526 5527 if (ipif != NULL) 5528 ipif_refrele(ipif); 5529 5530 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5531 } 5532 5533 static void 5534 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5535 { 5536 ipaddr_t src, dst; 5537 ill_t *ill; 5538 mblk_t *ire_fp_mp; 5539 uint_t ire_fp_mp_len; 5540 uint16_t *up; 5541 uint32_t cksum, hcksum_txflags; 5542 queue_t *dev_q; 5543 udp_t *udp = connp->conn_udp; 5544 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5545 udp_stack_t *us = udp->udp_us; 5546 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5547 boolean_t ll_multicast = B_FALSE; 5548 5549 dev_q = ire->ire_stq->q_next; 5550 ASSERT(dev_q != NULL); 5551 5552 ill = ire_to_ill(ire); 5553 ASSERT(ill != NULL); 5554 5555 /* is queue flow controlled? */ 5556 if (q->q_first != NULL || connp->conn_draining || 5557 DEV_Q_FLOW_BLOCKED(dev_q)) { 5558 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5559 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5560 5561 if (ipst->ips_ip_output_queue) 5562 (void) putq(connp->conn_wq, mp); 5563 else 5564 freemsg(mp); 5565 ire_refrele(ire); 5566 return; 5567 } 5568 5569 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5570 ire_fp_mp_len = MBLKL(ire_fp_mp); 5571 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5572 5573 dst = ipha->ipha_dst; 5574 src = ipha->ipha_src; 5575 5576 5577 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5578 5579 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5580 #ifndef _BIG_ENDIAN 5581 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5582 #endif 5583 5584 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5585 ASSERT(ill->ill_hcksum_capab != NULL); 5586 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5587 } else { 5588 hcksum_txflags = 0; 5589 } 5590 5591 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5592 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5593 5594 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5595 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5596 if (*up != 0) { 5597 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5598 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5599 ntohs(ipha->ipha_length), cksum); 5600 5601 /* Software checksum? */ 5602 if (DB_CKSUMFLAGS(mp) == 0) { 5603 UDP_STAT(us, udp_out_sw_cksum); 5604 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5605 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5606 } 5607 } 5608 5609 if (!CLASSD(dst)) { 5610 ipha->ipha_fragment_offset_and_flags |= 5611 (uint32_t)htons(ire->ire_frag_flag); 5612 } 5613 5614 /* Calculate IP header checksum if hardware isn't capable */ 5615 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5616 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5617 ((uint16_t *)ipha)[4]); 5618 } 5619 5620 if (CLASSD(dst)) { 5621 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5622 ip_multicast_loopback(q, ill, mp, 5623 connp->conn_multicast_loop ? 0 : 5624 IP_FF_NO_MCAST_LOOP, zoneid); 5625 } 5626 5627 /* If multicast TTL is 0 then we are done */ 5628 if (ipha->ipha_ttl == 0) { 5629 freemsg(mp); 5630 ire_refrele(ire); 5631 return; 5632 } 5633 ll_multicast = B_TRUE; 5634 } 5635 5636 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5637 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5638 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5639 5640 UPDATE_OB_PKT_COUNT(ire); 5641 ire->ire_last_used_time = lbolt; 5642 5643 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5644 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5645 ntohs(ipha->ipha_length)); 5646 5647 DTRACE_PROBE4(ip4__physical__out__start, 5648 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5649 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5650 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5651 ll_multicast, ipst); 5652 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5653 if (ipst->ips_ipobs_enabled && mp != NULL) { 5654 zoneid_t szone; 5655 5656 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5657 ipst, ALL_ZONES); 5658 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5659 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5660 } 5661 5662 if (mp != NULL) { 5663 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5664 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5665 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5666 5667 if (ILL_DIRECT_CAPABLE(ill)) { 5668 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5669 5670 (void) idd->idd_tx_df(idd->idd_tx_dh, mp, 5671 (uintptr_t)connp, 0); 5672 } else { 5673 putnext(ire->ire_stq, mp); 5674 } 5675 } 5676 IRE_REFRELE(ire); 5677 } 5678 5679 static boolean_t 5680 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5681 boolean_t *update_lastdst) 5682 { 5683 udp_t *udp = Q_TO_UDP(wq); 5684 int err; 5685 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5686 udp_stack_t *us = udp->udp_us; 5687 5688 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 5689 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5690 us->us_netstack->netstack_ip); 5691 if (err == 0) { 5692 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5693 &udp->udp_label_len_v6, opt_storage); 5694 } 5695 if (err != 0) { 5696 DTRACE_PROBE4( 5697 tx__ip__log__drop__updatelabel__udp6, 5698 char *, "queue(1) failed to update options(2) on mp(3)", 5699 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5700 } else { 5701 *update_lastdst = B_TRUE; 5702 } 5703 return (err); 5704 } 5705 5706 static int 5707 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5708 pid_t pid) 5709 { 5710 udp_t *udp = connp->conn_udp; 5711 udp_stack_t *us = udp->udp_us; 5712 ipaddr_t v4dst; 5713 in_port_t dstport; 5714 boolean_t mapped_addr; 5715 struct sockaddr_storage ss; 5716 sin_t *sin; 5717 sin6_t *sin6; 5718 struct sockaddr *addr; 5719 socklen_t addrlen; 5720 int error; 5721 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5722 5723 /* M_DATA for connected socket */ 5724 5725 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5726 UDP_DBGSTAT(us, udp_data_conn); 5727 5728 mutex_enter(&connp->conn_lock); 5729 if (udp->udp_state != TS_DATA_XFER) { 5730 mutex_exit(&connp->conn_lock); 5731 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5732 UDP_STAT(us, udp_out_err_notconn); 5733 freemsg(mp); 5734 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5735 "udp_wput_end: connp %p (%S)", connp, 5736 "not-connected; address required"); 5737 return (EDESTADDRREQ); 5738 } 5739 5740 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5741 if (mapped_addr) 5742 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5743 5744 /* Initialize addr and addrlen as if they're passed in */ 5745 if (udp->udp_family == AF_INET) { 5746 sin = (sin_t *)&ss; 5747 sin->sin_family = AF_INET; 5748 dstport = sin->sin_port = udp->udp_dstport; 5749 ASSERT(mapped_addr); 5750 sin->sin_addr.s_addr = v4dst; 5751 addr = (struct sockaddr *)sin; 5752 addrlen = sizeof (*sin); 5753 } else { 5754 sin6 = (sin6_t *)&ss; 5755 sin6->sin6_family = AF_INET6; 5756 dstport = sin6->sin6_port = udp->udp_dstport; 5757 sin6->sin6_flowinfo = udp->udp_flowinfo; 5758 sin6->sin6_addr = udp->udp_v6dst; 5759 sin6->sin6_scope_id = 0; 5760 sin6->__sin6_src_id = 0; 5761 addr = (struct sockaddr *)sin6; 5762 addrlen = sizeof (*sin6); 5763 } 5764 mutex_exit(&connp->conn_lock); 5765 5766 if (mapped_addr) { 5767 /* 5768 * Handle both AF_INET and AF_INET6; the latter 5769 * for IPV4 mapped destination addresses. Note 5770 * here that both addr and addrlen point to the 5771 * corresponding struct depending on the address 5772 * family of the socket. 5773 */ 5774 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5775 insert_spi, msg, cr, pid); 5776 } else { 5777 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5778 } 5779 if (error == 0) { 5780 ASSERT(mp == NULL); 5781 return (0); 5782 } 5783 5784 UDP_STAT(us, udp_out_err_output); 5785 ASSERT(mp != NULL); 5786 if (IPCL_IS_NONSTR(connp)) { 5787 freemsg(mp); 5788 return (error); 5789 } else { 5790 /* mp is freed by the following routine */ 5791 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5792 (t_scalar_t)addrlen, (t_scalar_t)error); 5793 return (0); 5794 } 5795 } 5796 5797 /* ARGSUSED */ 5798 static int 5799 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5800 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5801 { 5802 5803 udp_t *udp = connp->conn_udp; 5804 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5805 int error = 0; 5806 sin6_t *sin6; 5807 sin_t *sin; 5808 uint_t srcid; 5809 uint16_t port; 5810 ipaddr_t v4dst; 5811 5812 5813 ASSERT(addr != NULL); 5814 5815 switch (udp->udp_family) { 5816 case AF_INET6: 5817 sin6 = (sin6_t *)addr; 5818 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5819 /* 5820 * Destination is a non-IPv4-compatible IPv6 address. 5821 * Send out an IPv6 format packet. 5822 */ 5823 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5824 pid); 5825 if (error != 0) 5826 goto ud_error; 5827 5828 return (0); 5829 } 5830 /* 5831 * If the local address is not zero or a mapped address 5832 * return an error. It would be possible to send an IPv4 5833 * packet but the response would never make it back to the 5834 * application since it is bound to a non-mapped address. 5835 */ 5836 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5837 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5838 error = EADDRNOTAVAIL; 5839 goto ud_error; 5840 } 5841 /* Send IPv4 packet without modifying udp_ipversion */ 5842 /* Extract port and ipaddr */ 5843 port = sin6->sin6_port; 5844 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5845 srcid = sin6->__sin6_src_id; 5846 break; 5847 5848 case AF_INET: 5849 sin = (sin_t *)addr; 5850 /* Extract port and ipaddr */ 5851 port = sin->sin_port; 5852 v4dst = sin->sin_addr.s_addr; 5853 srcid = 0; 5854 break; 5855 } 5856 5857 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5858 msg, cr, pid); 5859 5860 if (error == 0) { 5861 ASSERT(mp == NULL); 5862 return (0); 5863 } 5864 5865 ud_error: 5866 ASSERT(mp != NULL); 5867 5868 return (error); 5869 } 5870 5871 /* 5872 * This routine handles all messages passed downstream. It either 5873 * consumes the message or passes it downstream; it never queues a 5874 * a message. 5875 * 5876 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5877 * is valid when we are directly beneath the stream head, and thus sockfs 5878 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5879 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5880 * connected endpoints. 5881 */ 5882 void 5883 udp_wput(queue_t *q, mblk_t *mp) 5884 { 5885 conn_t *connp = Q_TO_CONN(q); 5886 udp_t *udp = connp->conn_udp; 5887 int error = 0; 5888 struct sockaddr *addr; 5889 socklen_t addrlen; 5890 udp_stack_t *us = udp->udp_us; 5891 5892 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5893 "udp_wput_start: queue %p mp %p", q, mp); 5894 5895 /* 5896 * We directly handle several cases here: T_UNITDATA_REQ message 5897 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5898 * socket. 5899 */ 5900 switch (DB_TYPE(mp)) { 5901 case M_DATA: 5902 /* 5903 * Quick check for error cases. Checks will be done again 5904 * under the lock later on 5905 */ 5906 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5907 /* Not connected; address is required */ 5908 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5909 UDP_STAT(us, udp_out_err_notconn); 5910 freemsg(mp); 5911 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5912 "udp_wput_end: connp %p (%S)", connp, 5913 "not-connected; address required"); 5914 return; 5915 } 5916 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5917 return; 5918 5919 case M_PROTO: 5920 case M_PCPROTO: { 5921 struct T_unitdata_req *tudr; 5922 5923 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5924 tudr = (struct T_unitdata_req *)mp->b_rptr; 5925 5926 /* Handle valid T_UNITDATA_REQ here */ 5927 if (MBLKL(mp) >= sizeof (*tudr) && 5928 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5929 if (mp->b_cont == NULL) { 5930 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5931 "udp_wput_end: q %p (%S)", q, "badaddr"); 5932 error = EPROTO; 5933 goto ud_error; 5934 } 5935 5936 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5937 tudr->DEST_length)) { 5938 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5939 "udp_wput_end: q %p (%S)", q, "badaddr"); 5940 error = EADDRNOTAVAIL; 5941 goto ud_error; 5942 } 5943 /* 5944 * If a port has not been bound to the stream, fail. 5945 * This is not a problem when sockfs is directly 5946 * above us, because it will ensure that the socket 5947 * is first bound before allowing data to be sent. 5948 */ 5949 if (udp->udp_state == TS_UNBND) { 5950 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5951 "udp_wput_end: q %p (%S)", q, "outstate"); 5952 error = EPROTO; 5953 goto ud_error; 5954 } 5955 addr = (struct sockaddr *) 5956 &mp->b_rptr[tudr->DEST_offset]; 5957 addrlen = tudr->DEST_length; 5958 if (tudr->OPT_length != 0) 5959 UDP_STAT(us, udp_out_opt); 5960 break; 5961 } 5962 /* FALLTHRU */ 5963 } 5964 default: 5965 udp_wput_other(q, mp); 5966 return; 5967 } 5968 ASSERT(addr != NULL); 5969 5970 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5971 -1); 5972 if (error != 0) { 5973 ud_error: 5974 UDP_STAT(us, udp_out_err_output); 5975 ASSERT(mp != NULL); 5976 /* mp is freed by the following routine */ 5977 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5978 (t_scalar_t)error); 5979 } 5980 } 5981 5982 /* ARGSUSED */ 5983 static void 5984 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5985 { 5986 #ifdef DEBUG 5987 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5988 #endif 5989 freemsg(mp); 5990 } 5991 5992 5993 /* 5994 * udp_output_v6(): 5995 * Assumes that udp_wput did some sanity checking on the destination 5996 * address. 5997 */ 5998 static mblk_t * 5999 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6000 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6001 { 6002 ip6_t *ip6h; 6003 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6004 mblk_t *mp1 = mp; 6005 mblk_t *mp2; 6006 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6007 size_t ip_len; 6008 udpha_t *udph; 6009 udp_t *udp = connp->conn_udp; 6010 udp_stack_t *us = udp->udp_us; 6011 queue_t *q = connp->conn_wq; 6012 ip6_pkt_t ipp_s; /* For ancillary data options */ 6013 ip6_pkt_t *ipp = &ipp_s; 6014 ip6_pkt_t *tipp; /* temporary ipp */ 6015 uint32_t csum = 0; 6016 uint_t ignore = 0; 6017 uint_t option_exists = 0, is_sticky = 0; 6018 uint8_t *cp; 6019 uint8_t *nxthdr_ptr; 6020 in6_addr_t ip6_dst; 6021 in_port_t port; 6022 udpattrs_t attrs; 6023 boolean_t opt_present; 6024 ip6_hbh_t *hopoptsptr = NULL; 6025 uint_t hopoptslen = 0; 6026 boolean_t is_ancillary = B_FALSE; 6027 size_t sth_wroff = 0; 6028 ire_t *ire; 6029 boolean_t update_lastdst = B_FALSE; 6030 6031 *error = 0; 6032 6033 /* 6034 * If the local address is a mapped address return 6035 * an error. 6036 * It would be possible to send an IPv6 packet but the 6037 * response would never make it back to the application 6038 * since it is bound to a mapped address. 6039 */ 6040 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6041 *error = EADDRNOTAVAIL; 6042 goto done; 6043 } 6044 6045 ipp->ipp_fields = 0; 6046 ipp->ipp_sticky_ignored = 0; 6047 6048 /* 6049 * If TPI options passed in, feed it for verification and handling 6050 */ 6051 attrs.udpattr_credset = B_FALSE; 6052 opt_present = B_FALSE; 6053 if (IPCL_IS_NONSTR(connp)) { 6054 if (msg->msg_controllen != 0) { 6055 attrs.udpattr_ipp6 = ipp; 6056 attrs.udpattr_mb = mp; 6057 6058 rw_enter(&udp->udp_rwlock, RW_WRITER); 6059 *error = process_auxiliary_options(connp, 6060 msg->msg_control, msg->msg_controllen, 6061 &attrs, &udp_opt_obj, udp_opt_set); 6062 rw_exit(&udp->udp_rwlock); 6063 if (*error) 6064 goto done; 6065 ASSERT(*error == 0); 6066 opt_present = B_TRUE; 6067 } 6068 } else { 6069 if (DB_TYPE(mp) != M_DATA) { 6070 mp1 = mp->b_cont; 6071 if (((struct T_unitdata_req *) 6072 mp->b_rptr)->OPT_length != 0) { 6073 attrs.udpattr_ipp6 = ipp; 6074 attrs.udpattr_mb = mp; 6075 if (udp_unitdata_opt_process(q, mp, error, 6076 &attrs) < 0) { 6077 goto done; 6078 } 6079 ASSERT(*error == 0); 6080 opt_present = B_TRUE; 6081 } 6082 } 6083 } 6084 6085 /* 6086 * Determine whether we need to mark the mblk with the user's 6087 * credentials. 6088 */ 6089 ire = connp->conn_ire_cache; 6090 if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 6091 (ire == NULL) || 6092 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6093 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6094 if (cr != NULL && DB_CRED(mp) == NULL) 6095 msg_setcredpid(mp, cr, pid); 6096 } 6097 6098 rw_enter(&udp->udp_rwlock, RW_READER); 6099 ignore = ipp->ipp_sticky_ignored; 6100 6101 /* mp1 points to the M_DATA mblk carrying the packet */ 6102 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6103 6104 if (sin6->sin6_scope_id != 0 && 6105 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6106 /* 6107 * IPPF_SCOPE_ID is special. It's neither a sticky 6108 * option nor ancillary data. It needs to be 6109 * explicitly set in options_exists. 6110 */ 6111 option_exists |= IPPF_SCOPE_ID; 6112 } 6113 6114 /* 6115 * Compute the destination address 6116 */ 6117 ip6_dst = sin6->sin6_addr; 6118 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6119 ip6_dst = ipv6_loopback; 6120 6121 port = sin6->sin6_port; 6122 6123 /* 6124 * Cluster and TSOL notes, Cluster check: 6125 * see comments in udp_output_v4(). 6126 */ 6127 mutex_enter(&connp->conn_lock); 6128 6129 if (cl_inet_connect2 != NULL && 6130 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6131 port != udp->udp_lastdstport)) { 6132 mutex_exit(&connp->conn_lock); 6133 *error = 0; 6134 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6135 if (*error != 0) { 6136 *error = EHOSTUNREACH; 6137 rw_exit(&udp->udp_rwlock); 6138 goto done; 6139 } 6140 update_lastdst = B_TRUE; 6141 mutex_enter(&connp->conn_lock); 6142 } 6143 6144 /* 6145 * If we're not going to the same destination as last time, then 6146 * recompute the label required. This is done in a separate routine to 6147 * avoid blowing up our stack here. 6148 * 6149 * TSOL Note: Since we are not in WRITER mode, UDP packets 6150 * to different destination may require different labels, 6151 * or worse, UDP packets to same IP address may require 6152 * different labels due to use of shared all-zones address. 6153 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6154 * and sticky ipp_hopoptslen are consistent for the current 6155 * destination and are updated atomically. 6156 */ 6157 if (is_system_labeled()) { 6158 /* Using UDP MLP requires SCM_UCRED from user */ 6159 if (connp->conn_mlp_type != mlptSingle && 6160 !attrs.udpattr_credset) { 6161 DTRACE_PROBE4( 6162 tx__ip__log__info__output__udp6, 6163 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6164 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6165 *error = ECONNREFUSED; 6166 rw_exit(&udp->udp_rwlock); 6167 mutex_exit(&connp->conn_lock); 6168 goto done; 6169 } 6170 /* 6171 * update label option for this UDP socket if 6172 * - the destination has changed, or 6173 * - the UDP socket is MLP 6174 */ 6175 if ((opt_present || 6176 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6177 connp->conn_mlp_type != mlptSingle) && 6178 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6179 &update_lastdst)) != 0) { 6180 rw_exit(&udp->udp_rwlock); 6181 mutex_exit(&connp->conn_lock); 6182 goto done; 6183 } 6184 } 6185 6186 if (update_lastdst) { 6187 udp->udp_v6lastdst = ip6_dst; 6188 udp->udp_lastdstport = port; 6189 } 6190 6191 /* 6192 * If there's a security label here, then we ignore any options the 6193 * user may try to set. We keep the peer's label as a hidden sticky 6194 * option. We make a private copy of this label before releasing the 6195 * lock so that label is kept consistent with the destination addr. 6196 */ 6197 if (udp->udp_label_len_v6 > 0) { 6198 ignore &= ~IPPF_HOPOPTS; 6199 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6200 } 6201 6202 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6203 /* No sticky options nor ancillary data. */ 6204 mutex_exit(&connp->conn_lock); 6205 goto no_options; 6206 } 6207 6208 /* 6209 * Go through the options figuring out where each is going to 6210 * come from and build two masks. The first mask indicates if 6211 * the option exists at all. The second mask indicates if the 6212 * option is sticky or ancillary. 6213 */ 6214 if (!(ignore & IPPF_HOPOPTS)) { 6215 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6216 option_exists |= IPPF_HOPOPTS; 6217 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6218 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6219 option_exists |= IPPF_HOPOPTS; 6220 is_sticky |= IPPF_HOPOPTS; 6221 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6222 hopoptsptr = kmem_alloc( 6223 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6224 if (hopoptsptr == NULL) { 6225 *error = ENOMEM; 6226 mutex_exit(&connp->conn_lock); 6227 goto done; 6228 } 6229 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6230 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6231 hopoptslen); 6232 udp_ip_hdr_len += hopoptslen; 6233 } 6234 } 6235 mutex_exit(&connp->conn_lock); 6236 6237 if (!(ignore & IPPF_RTHDR)) { 6238 if (ipp->ipp_fields & IPPF_RTHDR) { 6239 option_exists |= IPPF_RTHDR; 6240 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6241 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6242 option_exists |= IPPF_RTHDR; 6243 is_sticky |= IPPF_RTHDR; 6244 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6245 } 6246 } 6247 6248 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6249 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6250 option_exists |= IPPF_RTDSTOPTS; 6251 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6252 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6253 option_exists |= IPPF_RTDSTOPTS; 6254 is_sticky |= IPPF_RTDSTOPTS; 6255 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6256 } 6257 } 6258 6259 if (!(ignore & IPPF_DSTOPTS)) { 6260 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6261 option_exists |= IPPF_DSTOPTS; 6262 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6263 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6264 option_exists |= IPPF_DSTOPTS; 6265 is_sticky |= IPPF_DSTOPTS; 6266 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6267 } 6268 } 6269 6270 if (!(ignore & IPPF_IFINDEX)) { 6271 if (ipp->ipp_fields & IPPF_IFINDEX) { 6272 option_exists |= IPPF_IFINDEX; 6273 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6274 option_exists |= IPPF_IFINDEX; 6275 is_sticky |= IPPF_IFINDEX; 6276 } 6277 } 6278 6279 if (!(ignore & IPPF_ADDR)) { 6280 if (ipp->ipp_fields & IPPF_ADDR) { 6281 option_exists |= IPPF_ADDR; 6282 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6283 option_exists |= IPPF_ADDR; 6284 is_sticky |= IPPF_ADDR; 6285 } 6286 } 6287 6288 if (!(ignore & IPPF_DONTFRAG)) { 6289 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6290 option_exists |= IPPF_DONTFRAG; 6291 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6292 option_exists |= IPPF_DONTFRAG; 6293 is_sticky |= IPPF_DONTFRAG; 6294 } 6295 } 6296 6297 if (!(ignore & IPPF_USE_MIN_MTU)) { 6298 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6299 option_exists |= IPPF_USE_MIN_MTU; 6300 } else if (udp->udp_sticky_ipp.ipp_fields & 6301 IPPF_USE_MIN_MTU) { 6302 option_exists |= IPPF_USE_MIN_MTU; 6303 is_sticky |= IPPF_USE_MIN_MTU; 6304 } 6305 } 6306 6307 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6308 option_exists |= IPPF_HOPLIMIT; 6309 /* IPV6_HOPLIMIT can never be sticky */ 6310 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6311 6312 if (!(ignore & IPPF_UNICAST_HOPS) && 6313 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6314 option_exists |= IPPF_UNICAST_HOPS; 6315 is_sticky |= IPPF_UNICAST_HOPS; 6316 } 6317 6318 if (!(ignore & IPPF_MULTICAST_HOPS) && 6319 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6320 option_exists |= IPPF_MULTICAST_HOPS; 6321 is_sticky |= IPPF_MULTICAST_HOPS; 6322 } 6323 6324 if (!(ignore & IPPF_TCLASS)) { 6325 if (ipp->ipp_fields & IPPF_TCLASS) { 6326 option_exists |= IPPF_TCLASS; 6327 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6328 option_exists |= IPPF_TCLASS; 6329 is_sticky |= IPPF_TCLASS; 6330 } 6331 } 6332 6333 if (!(ignore & IPPF_NEXTHOP) && 6334 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6335 option_exists |= IPPF_NEXTHOP; 6336 is_sticky |= IPPF_NEXTHOP; 6337 } 6338 6339 no_options: 6340 6341 /* 6342 * If any options carried in the ip6i_t were specified, we 6343 * need to account for the ip6i_t in the data we'll be sending 6344 * down. 6345 */ 6346 if (option_exists & IPPF_HAS_IP6I) 6347 udp_ip_hdr_len += sizeof (ip6i_t); 6348 6349 /* check/fix buffer config, setup pointers into it */ 6350 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6351 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6352 !OK_32PTR(ip6h)) { 6353 6354 /* Try to get everything in a single mblk next time */ 6355 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6356 udp->udp_max_hdr_len = udp_ip_hdr_len; 6357 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6358 } 6359 6360 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6361 if (mp2 == NULL) { 6362 *error = ENOMEM; 6363 rw_exit(&udp->udp_rwlock); 6364 goto done; 6365 } 6366 mp2->b_wptr = DB_LIM(mp2); 6367 mp2->b_cont = mp1; 6368 mp1 = mp2; 6369 if (DB_TYPE(mp) != M_DATA) 6370 mp->b_cont = mp1; 6371 else 6372 mp = mp1; 6373 6374 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6375 } 6376 mp1->b_rptr = (unsigned char *)ip6h; 6377 ip6i = (ip6i_t *)ip6h; 6378 6379 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6380 if (option_exists & IPPF_HAS_IP6I) { 6381 ip6h = (ip6_t *)&ip6i[1]; 6382 ip6i->ip6i_flags = 0; 6383 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6384 6385 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6386 if (option_exists & IPPF_SCOPE_ID) { 6387 ip6i->ip6i_flags |= IP6I_IFINDEX; 6388 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6389 } else if (option_exists & IPPF_IFINDEX) { 6390 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6391 ASSERT(tipp->ipp_ifindex != 0); 6392 ip6i->ip6i_flags |= IP6I_IFINDEX; 6393 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6394 } 6395 6396 if (option_exists & IPPF_ADDR) { 6397 /* 6398 * Enable per-packet source address verification if 6399 * IPV6_PKTINFO specified the source address. 6400 * ip6_src is set in the transport's _wput function. 6401 */ 6402 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6403 } 6404 6405 if (option_exists & IPPF_DONTFRAG) { 6406 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6407 } 6408 6409 if (option_exists & IPPF_USE_MIN_MTU) { 6410 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6411 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6412 } 6413 6414 if (option_exists & IPPF_NEXTHOP) { 6415 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6416 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6417 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6418 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6419 } 6420 6421 /* 6422 * tell IP this is an ip6i_t private header 6423 */ 6424 ip6i->ip6i_nxt = IPPROTO_RAW; 6425 } 6426 6427 /* Initialize IPv6 header */ 6428 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6429 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6430 6431 /* Set the hoplimit of the outgoing packet. */ 6432 if (option_exists & IPPF_HOPLIMIT) { 6433 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6434 ip6h->ip6_hops = ipp->ipp_hoplimit; 6435 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6436 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6437 ip6h->ip6_hops = udp->udp_multicast_ttl; 6438 if (option_exists & IPPF_MULTICAST_HOPS) 6439 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6440 } else { 6441 ip6h->ip6_hops = udp->udp_ttl; 6442 if (option_exists & IPPF_UNICAST_HOPS) 6443 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6444 } 6445 6446 if (option_exists & IPPF_ADDR) { 6447 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6448 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6449 ip6h->ip6_src = tipp->ipp_addr; 6450 } else { 6451 /* 6452 * The source address was not set using IPV6_PKTINFO. 6453 * First look at the bound source. 6454 * If unspecified fallback to __sin6_src_id. 6455 */ 6456 ip6h->ip6_src = udp->udp_v6src; 6457 if (sin6->__sin6_src_id != 0 && 6458 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6459 ip_srcid_find_id(sin6->__sin6_src_id, 6460 &ip6h->ip6_src, connp->conn_zoneid, 6461 us->us_netstack); 6462 } 6463 } 6464 6465 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6466 cp = (uint8_t *)&ip6h[1]; 6467 6468 /* 6469 * Here's where we have to start stringing together 6470 * any extension headers in the right order: 6471 * Hop-by-hop, destination, routing, and final destination opts. 6472 */ 6473 if (option_exists & IPPF_HOPOPTS) { 6474 /* Hop-by-hop options */ 6475 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6476 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6477 if (hopoptslen == 0) { 6478 hopoptsptr = tipp->ipp_hopopts; 6479 hopoptslen = tipp->ipp_hopoptslen; 6480 is_ancillary = B_TRUE; 6481 } 6482 6483 *nxthdr_ptr = IPPROTO_HOPOPTS; 6484 nxthdr_ptr = &hbh->ip6h_nxt; 6485 6486 bcopy(hopoptsptr, cp, hopoptslen); 6487 cp += hopoptslen; 6488 6489 if (hopoptsptr != NULL && !is_ancillary) { 6490 kmem_free(hopoptsptr, hopoptslen); 6491 hopoptsptr = NULL; 6492 hopoptslen = 0; 6493 } 6494 } 6495 /* 6496 * En-route destination options 6497 * Only do them if there's a routing header as well 6498 */ 6499 if (option_exists & IPPF_RTDSTOPTS) { 6500 ip6_dest_t *dst = (ip6_dest_t *)cp; 6501 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6502 6503 *nxthdr_ptr = IPPROTO_DSTOPTS; 6504 nxthdr_ptr = &dst->ip6d_nxt; 6505 6506 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6507 cp += tipp->ipp_rtdstoptslen; 6508 } 6509 /* 6510 * Routing header next 6511 */ 6512 if (option_exists & IPPF_RTHDR) { 6513 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6514 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6515 6516 *nxthdr_ptr = IPPROTO_ROUTING; 6517 nxthdr_ptr = &rt->ip6r_nxt; 6518 6519 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6520 cp += tipp->ipp_rthdrlen; 6521 } 6522 /* 6523 * Do ultimate destination options 6524 */ 6525 if (option_exists & IPPF_DSTOPTS) { 6526 ip6_dest_t *dest = (ip6_dest_t *)cp; 6527 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6528 6529 *nxthdr_ptr = IPPROTO_DSTOPTS; 6530 nxthdr_ptr = &dest->ip6d_nxt; 6531 6532 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6533 cp += tipp->ipp_dstoptslen; 6534 } 6535 /* 6536 * Now set the last header pointer to the proto passed in 6537 */ 6538 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6539 *nxthdr_ptr = IPPROTO_UDP; 6540 6541 /* Update UDP header */ 6542 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6543 udph->uha_dst_port = sin6->sin6_port; 6544 udph->uha_src_port = udp->udp_port; 6545 6546 /* 6547 * Copy in the destination address 6548 */ 6549 ip6h->ip6_dst = ip6_dst; 6550 6551 ip6h->ip6_vcf = 6552 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6553 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6554 6555 if (option_exists & IPPF_TCLASS) { 6556 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6557 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6558 tipp->ipp_tclass); 6559 } 6560 rw_exit(&udp->udp_rwlock); 6561 6562 if (option_exists & IPPF_RTHDR) { 6563 ip6_rthdr_t *rth; 6564 6565 /* 6566 * Perform any processing needed for source routing. 6567 * We know that all extension headers will be in the same mblk 6568 * as the IPv6 header. 6569 */ 6570 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6571 if (rth != NULL && rth->ip6r_segleft != 0) { 6572 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6573 /* 6574 * Drop packet - only support Type 0 routing. 6575 * Notify the application as well. 6576 */ 6577 *error = EPROTO; 6578 goto done; 6579 } 6580 6581 /* 6582 * rth->ip6r_len is twice the number of 6583 * addresses in the header. Thus it must be even. 6584 */ 6585 if (rth->ip6r_len & 0x1) { 6586 *error = EPROTO; 6587 goto done; 6588 } 6589 /* 6590 * Shuffle the routing header and ip6_dst 6591 * addresses, and get the checksum difference 6592 * between the first hop (in ip6_dst) and 6593 * the destination (in the last routing hdr entry). 6594 */ 6595 csum = ip_massage_options_v6(ip6h, rth, 6596 us->us_netstack); 6597 /* 6598 * Verify that the first hop isn't a mapped address. 6599 * Routers along the path need to do this verification 6600 * for subsequent hops. 6601 */ 6602 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6603 *error = EADDRNOTAVAIL; 6604 goto done; 6605 } 6606 6607 cp += (rth->ip6r_len + 1)*8; 6608 } 6609 } 6610 6611 /* count up length of UDP packet */ 6612 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6613 if ((mp2 = mp1->b_cont) != NULL) { 6614 do { 6615 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6616 ip_len += (uint32_t)MBLKL(mp2); 6617 } while ((mp2 = mp2->b_cont) != NULL); 6618 } 6619 6620 /* 6621 * If the size of the packet is greater than the maximum allowed by 6622 * ip, return an error. Passing this down could cause panics because 6623 * the size will have wrapped and be inconsistent with the msg size. 6624 */ 6625 if (ip_len > IP_MAXPACKET) { 6626 *error = EMSGSIZE; 6627 goto done; 6628 } 6629 6630 /* Store the UDP length. Subtract length of extension hdrs */ 6631 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6632 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6633 6634 /* 6635 * We make it easy for IP to include our pseudo header 6636 * by putting our length in uh_checksum, modified (if 6637 * we have a routing header) by the checksum difference 6638 * between the ultimate destination and first hop addresses. 6639 * Note: UDP over IPv6 must always checksum the packet. 6640 */ 6641 csum += udph->uha_length; 6642 csum = (csum & 0xFFFF) + (csum >> 16); 6643 udph->uha_checksum = (uint16_t)csum; 6644 6645 #ifdef _LITTLE_ENDIAN 6646 ip_len = htons(ip_len); 6647 #endif 6648 ip6h->ip6_plen = ip_len; 6649 if (DB_CRED(mp) != NULL) 6650 mblk_setcred(mp1, DB_CRED(mp)); 6651 6652 if (DB_TYPE(mp) != M_DATA) { 6653 ASSERT(mp != mp1); 6654 freeb(mp); 6655 } 6656 6657 /* mp has been consumed and we'll return success */ 6658 ASSERT(*error == 0); 6659 mp = NULL; 6660 6661 /* We're done. Pass the packet to IP */ 6662 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6663 ip_output_v6(connp, mp1, q, IP_WPUT); 6664 6665 done: 6666 if (sth_wroff != 0) { 6667 (void) proto_set_tx_wroff(RD(q), connp, 6668 udp->udp_max_hdr_len + us->us_wroff_extra); 6669 } 6670 if (hopoptsptr != NULL && !is_ancillary) { 6671 kmem_free(hopoptsptr, hopoptslen); 6672 hopoptsptr = NULL; 6673 } 6674 if (*error != 0) { 6675 ASSERT(mp != NULL); 6676 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6677 } 6678 return (mp); 6679 } 6680 6681 6682 static int 6683 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6684 { 6685 sin_t *sin = (sin_t *)sa; 6686 sin6_t *sin6 = (sin6_t *)sa; 6687 6688 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6689 6690 if (udp->udp_state != TS_DATA_XFER) 6691 return (ENOTCONN); 6692 6693 switch (udp->udp_family) { 6694 case AF_INET: 6695 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6696 6697 if (*salenp < sizeof (sin_t)) 6698 return (EINVAL); 6699 6700 *salenp = sizeof (sin_t); 6701 *sin = sin_null; 6702 sin->sin_family = AF_INET; 6703 sin->sin_port = udp->udp_dstport; 6704 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6705 break; 6706 6707 case AF_INET6: 6708 if (*salenp < sizeof (sin6_t)) 6709 return (EINVAL); 6710 6711 *salenp = sizeof (sin6_t); 6712 *sin6 = sin6_null; 6713 sin6->sin6_family = AF_INET6; 6714 sin6->sin6_port = udp->udp_dstport; 6715 sin6->sin6_addr = udp->udp_v6dst; 6716 sin6->sin6_flowinfo = udp->udp_flowinfo; 6717 break; 6718 } 6719 6720 return (0); 6721 } 6722 6723 static int 6724 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6725 { 6726 sin_t *sin = (sin_t *)sa; 6727 sin6_t *sin6 = (sin6_t *)sa; 6728 6729 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6730 6731 switch (udp->udp_family) { 6732 case AF_INET: 6733 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6734 6735 if (*salenp < sizeof (sin_t)) 6736 return (EINVAL); 6737 6738 *salenp = sizeof (sin_t); 6739 *sin = sin_null; 6740 sin->sin_family = AF_INET; 6741 sin->sin_port = udp->udp_port; 6742 6743 /* 6744 * If udp_v6src is unspecified, we might be bound to broadcast 6745 * / multicast. Use udp_bound_v6src as local address instead 6746 * (that could also still be unspecified). 6747 */ 6748 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6749 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6750 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6751 } else { 6752 sin->sin_addr.s_addr = 6753 V4_PART_OF_V6(udp->udp_bound_v6src); 6754 } 6755 break; 6756 6757 case AF_INET6: 6758 if (*salenp < sizeof (sin6_t)) 6759 return (EINVAL); 6760 6761 *salenp = sizeof (sin6_t); 6762 *sin6 = sin6_null; 6763 sin6->sin6_family = AF_INET6; 6764 sin6->sin6_port = udp->udp_port; 6765 sin6->sin6_flowinfo = udp->udp_flowinfo; 6766 6767 /* 6768 * If udp_v6src is unspecified, we might be bound to broadcast 6769 * / multicast. Use udp_bound_v6src as local address instead 6770 * (that could also still be unspecified). 6771 */ 6772 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6773 sin6->sin6_addr = udp->udp_v6src; 6774 else 6775 sin6->sin6_addr = udp->udp_bound_v6src; 6776 break; 6777 } 6778 6779 return (0); 6780 } 6781 6782 /* 6783 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6784 */ 6785 static void 6786 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6787 { 6788 void *data; 6789 mblk_t *datamp = mp->b_cont; 6790 udp_t *udp = Q_TO_UDP(q); 6791 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6792 6793 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6794 cmdp->cb_error = EPROTO; 6795 qreply(q, mp); 6796 return; 6797 } 6798 data = datamp->b_rptr; 6799 6800 rw_enter(&udp->udp_rwlock, RW_READER); 6801 switch (cmdp->cb_cmd) { 6802 case TI_GETPEERNAME: 6803 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6804 break; 6805 case TI_GETMYNAME: 6806 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6807 break; 6808 default: 6809 cmdp->cb_error = EINVAL; 6810 break; 6811 } 6812 rw_exit(&udp->udp_rwlock); 6813 6814 qreply(q, mp); 6815 } 6816 6817 static void 6818 udp_disable_direct_sockfs(udp_t *udp) 6819 { 6820 udp->udp_issocket = B_FALSE; 6821 if (udp->udp_direct_sockfs) { 6822 /* 6823 * Disable read-side synchronous stream interface and 6824 * drain any queued data. 6825 */ 6826 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6827 ASSERT(!udp->udp_direct_sockfs); 6828 UDP_STAT(udp->udp_us, udp_sock_fallback); 6829 } 6830 } 6831 6832 static void 6833 udp_wput_other(queue_t *q, mblk_t *mp) 6834 { 6835 uchar_t *rptr = mp->b_rptr; 6836 struct datab *db; 6837 struct iocblk *iocp; 6838 cred_t *cr; 6839 conn_t *connp = Q_TO_CONN(q); 6840 udp_t *udp = connp->conn_udp; 6841 udp_stack_t *us; 6842 6843 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6844 "udp_wput_other_start: q %p", q); 6845 6846 us = udp->udp_us; 6847 db = mp->b_datap; 6848 6849 cr = DB_CREDDEF(mp, connp->conn_cred); 6850 6851 switch (db->db_type) { 6852 case M_CMD: 6853 udp_wput_cmdblk(q, mp); 6854 return; 6855 6856 case M_PROTO: 6857 case M_PCPROTO: 6858 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6859 freemsg(mp); 6860 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6861 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6862 return; 6863 } 6864 switch (((t_primp_t)rptr)->type) { 6865 case T_ADDR_REQ: 6866 udp_addr_req(q, mp); 6867 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6868 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6869 return; 6870 case O_T_BIND_REQ: 6871 case T_BIND_REQ: 6872 udp_tpi_bind(q, mp); 6873 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6874 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6875 return; 6876 case T_CONN_REQ: 6877 udp_tpi_connect(q, mp); 6878 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6879 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6880 return; 6881 case T_CAPABILITY_REQ: 6882 udp_capability_req(q, mp); 6883 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6884 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6885 return; 6886 case T_INFO_REQ: 6887 udp_info_req(q, mp); 6888 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6889 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6890 return; 6891 case T_UNITDATA_REQ: 6892 /* 6893 * If a T_UNITDATA_REQ gets here, the address must 6894 * be bad. Valid T_UNITDATA_REQs are handled 6895 * in udp_wput. 6896 */ 6897 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6898 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6899 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6900 return; 6901 case T_UNBIND_REQ: 6902 udp_tpi_unbind(q, mp); 6903 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6904 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6905 return; 6906 case T_SVR4_OPTMGMT_REQ: 6907 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6908 cr)) { 6909 (void) svr4_optcom_req(q, 6910 mp, cr, &udp_opt_obj, B_TRUE); 6911 } 6912 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6913 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6914 return; 6915 6916 case T_OPTMGMT_REQ: 6917 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6918 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6919 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6920 return; 6921 6922 case T_DISCON_REQ: 6923 udp_tpi_disconnect(q, mp); 6924 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6925 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6926 return; 6927 6928 /* The following TPI message is not supported by udp. */ 6929 case O_T_CONN_RES: 6930 case T_CONN_RES: 6931 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6932 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6933 "udp_wput_other_end: q %p (%S)", q, 6934 "connres/disconreq"); 6935 return; 6936 6937 /* The following 3 TPI messages are illegal for udp. */ 6938 case T_DATA_REQ: 6939 case T_EXDATA_REQ: 6940 case T_ORDREL_REQ: 6941 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6942 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6943 "udp_wput_other_end: q %p (%S)", q, 6944 "data/exdata/ordrel"); 6945 return; 6946 default: 6947 break; 6948 } 6949 break; 6950 case M_FLUSH: 6951 if (*rptr & FLUSHW) 6952 flushq(q, FLUSHDATA); 6953 break; 6954 case M_IOCTL: 6955 iocp = (struct iocblk *)mp->b_rptr; 6956 switch (iocp->ioc_cmd) { 6957 case TI_GETPEERNAME: 6958 if (udp->udp_state != TS_DATA_XFER) { 6959 /* 6960 * If a default destination address has not 6961 * been associated with the stream, then we 6962 * don't know the peer's name. 6963 */ 6964 iocp->ioc_error = ENOTCONN; 6965 iocp->ioc_count = 0; 6966 mp->b_datap->db_type = M_IOCACK; 6967 qreply(q, mp); 6968 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6969 "udp_wput_other_end: q %p (%S)", q, 6970 "getpeername"); 6971 return; 6972 } 6973 /* FALLTHRU */ 6974 case TI_GETMYNAME: { 6975 /* 6976 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6977 * need to copyin the user's strbuf structure. 6978 * Processing will continue in the M_IOCDATA case 6979 * below. 6980 */ 6981 mi_copyin(q, mp, NULL, 6982 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6983 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6984 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6985 return; 6986 } 6987 case ND_SET: 6988 /* nd_getset performs the necessary checking */ 6989 case ND_GET: 6990 if (nd_getset(q, us->us_nd, mp)) { 6991 qreply(q, mp); 6992 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6993 "udp_wput_other_end: q %p (%S)", q, "get"); 6994 return; 6995 } 6996 break; 6997 case _SIOCSOCKFALLBACK: 6998 /* 6999 * Either sockmod is about to be popped and the 7000 * socket would now be treated as a plain stream, 7001 * or a module is about to be pushed so we could 7002 * no longer use read-side synchronous stream. 7003 * Drain any queued data and disable direct sockfs 7004 * interface from now on. 7005 */ 7006 if (!udp->udp_issocket) { 7007 DB_TYPE(mp) = M_IOCNAK; 7008 iocp->ioc_error = EINVAL; 7009 } else { 7010 udp_disable_direct_sockfs(udp); 7011 7012 DB_TYPE(mp) = M_IOCACK; 7013 iocp->ioc_error = 0; 7014 } 7015 iocp->ioc_count = 0; 7016 iocp->ioc_rval = 0; 7017 qreply(q, mp); 7018 return; 7019 default: 7020 break; 7021 } 7022 break; 7023 case M_IOCDATA: 7024 udp_wput_iocdata(q, mp); 7025 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7026 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7027 return; 7028 default: 7029 /* Unrecognized messages are passed through without change. */ 7030 break; 7031 } 7032 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7033 "udp_wput_other_end: q %p (%S)", q, "end"); 7034 ip_output(connp, mp, q, IP_WPUT); 7035 } 7036 7037 /* 7038 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7039 * messages. 7040 */ 7041 static void 7042 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7043 { 7044 mblk_t *mp1; 7045 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7046 STRUCT_HANDLE(strbuf, sb); 7047 udp_t *udp = Q_TO_UDP(q); 7048 int error; 7049 uint_t addrlen; 7050 7051 /* Make sure it is one of ours. */ 7052 switch (iocp->ioc_cmd) { 7053 case TI_GETMYNAME: 7054 case TI_GETPEERNAME: 7055 break; 7056 default: 7057 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7058 return; 7059 } 7060 7061 switch (mi_copy_state(q, mp, &mp1)) { 7062 case -1: 7063 return; 7064 case MI_COPY_CASE(MI_COPY_IN, 1): 7065 break; 7066 case MI_COPY_CASE(MI_COPY_OUT, 1): 7067 /* 7068 * The address has been copied out, so now 7069 * copyout the strbuf. 7070 */ 7071 mi_copyout(q, mp); 7072 return; 7073 case MI_COPY_CASE(MI_COPY_OUT, 2): 7074 /* 7075 * The address and strbuf have been copied out. 7076 * We're done, so just acknowledge the original 7077 * M_IOCTL. 7078 */ 7079 mi_copy_done(q, mp, 0); 7080 return; 7081 default: 7082 /* 7083 * Something strange has happened, so acknowledge 7084 * the original M_IOCTL with an EPROTO error. 7085 */ 7086 mi_copy_done(q, mp, EPROTO); 7087 return; 7088 } 7089 7090 /* 7091 * Now we have the strbuf structure for TI_GETMYNAME 7092 * and TI_GETPEERNAME. Next we copyout the requested 7093 * address and then we'll copyout the strbuf. 7094 */ 7095 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7096 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7097 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7098 mi_copy_done(q, mp, EINVAL); 7099 return; 7100 } 7101 7102 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7103 7104 if (mp1 == NULL) 7105 return; 7106 7107 rw_enter(&udp->udp_rwlock, RW_READER); 7108 switch (iocp->ioc_cmd) { 7109 case TI_GETMYNAME: 7110 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7111 break; 7112 case TI_GETPEERNAME: 7113 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7114 break; 7115 } 7116 rw_exit(&udp->udp_rwlock); 7117 7118 if (error != 0) { 7119 mi_copy_done(q, mp, error); 7120 } else { 7121 mp1->b_wptr += addrlen; 7122 STRUCT_FSET(sb, len, addrlen); 7123 7124 /* Copy out the address */ 7125 mi_copyout(q, mp); 7126 } 7127 } 7128 7129 static int 7130 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7131 udpattrs_t *udpattrs) 7132 { 7133 struct T_unitdata_req *udreqp; 7134 int is_absreq_failure; 7135 cred_t *cr; 7136 conn_t *connp = Q_TO_CONN(q); 7137 7138 ASSERT(((t_primp_t)mp->b_rptr)->type); 7139 7140 cr = DB_CREDDEF(mp, connp->conn_cred); 7141 7142 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7143 7144 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7145 udreqp->OPT_offset, cr, &udp_opt_obj, 7146 udpattrs, &is_absreq_failure); 7147 7148 if (*errorp != 0) { 7149 /* 7150 * Note: No special action needed in this 7151 * module for "is_absreq_failure" 7152 */ 7153 return (-1); /* failure */ 7154 } 7155 ASSERT(is_absreq_failure == 0); 7156 return (0); /* success */ 7157 } 7158 7159 void 7160 udp_ddi_g_init(void) 7161 { 7162 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7163 udp_opt_obj.odb_opt_arr_cnt); 7164 7165 /* 7166 * We want to be informed each time a stack is created or 7167 * destroyed in the kernel, so we can maintain the 7168 * set of udp_stack_t's. 7169 */ 7170 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7171 } 7172 7173 void 7174 udp_ddi_g_destroy(void) 7175 { 7176 netstack_unregister(NS_UDP); 7177 } 7178 7179 #define INET_NAME "ip" 7180 7181 /* 7182 * Initialize the UDP stack instance. 7183 */ 7184 static void * 7185 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7186 { 7187 udp_stack_t *us; 7188 udpparam_t *pa; 7189 int i; 7190 int error = 0; 7191 major_t major; 7192 7193 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7194 us->us_netstack = ns; 7195 7196 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7197 us->us_epriv_ports[0] = 2049; 7198 us->us_epriv_ports[1] = 4045; 7199 7200 /* 7201 * The smallest anonymous port in the priviledged port range which UDP 7202 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7203 */ 7204 us->us_min_anonpriv_port = 512; 7205 7206 us->us_bind_fanout_size = udp_bind_fanout_size; 7207 7208 /* Roundup variable that might have been modified in /etc/system */ 7209 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7210 /* Not a power of two. Round up to nearest power of two */ 7211 for (i = 0; i < 31; i++) { 7212 if (us->us_bind_fanout_size < (1 << i)) 7213 break; 7214 } 7215 us->us_bind_fanout_size = 1 << i; 7216 } 7217 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7218 sizeof (udp_fanout_t), KM_SLEEP); 7219 for (i = 0; i < us->us_bind_fanout_size; i++) { 7220 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7221 NULL); 7222 } 7223 7224 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7225 7226 us->us_param_arr = pa; 7227 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7228 7229 (void) udp_param_register(&us->us_nd, 7230 us->us_param_arr, A_CNT(udp_param_arr)); 7231 7232 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7233 us->us_mibkp = udp_kstat_init(stackid); 7234 7235 major = mod_name_to_major(INET_NAME); 7236 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7237 ASSERT(error == 0); 7238 return (us); 7239 } 7240 7241 /* 7242 * Free the UDP stack instance. 7243 */ 7244 static void 7245 udp_stack_fini(netstackid_t stackid, void *arg) 7246 { 7247 udp_stack_t *us = (udp_stack_t *)arg; 7248 int i; 7249 7250 for (i = 0; i < us->us_bind_fanout_size; i++) { 7251 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7252 } 7253 7254 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7255 sizeof (udp_fanout_t)); 7256 7257 us->us_bind_fanout = NULL; 7258 7259 nd_free(&us->us_nd); 7260 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7261 us->us_param_arr = NULL; 7262 7263 udp_kstat_fini(stackid, us->us_mibkp); 7264 us->us_mibkp = NULL; 7265 7266 udp_kstat2_fini(stackid, us->us_kstat); 7267 us->us_kstat = NULL; 7268 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7269 7270 ldi_ident_release(us->us_ldi_ident); 7271 kmem_free(us, sizeof (*us)); 7272 } 7273 7274 static void * 7275 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7276 { 7277 kstat_t *ksp; 7278 7279 udp_stat_t template = { 7280 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7281 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7282 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7283 { "udp_drain", KSTAT_DATA_UINT64 }, 7284 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7285 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7286 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7287 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7288 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7289 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7290 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7291 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7292 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7293 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7294 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7295 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7296 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7297 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7298 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7299 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7300 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7301 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7302 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7303 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7304 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7305 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7306 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7307 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7308 #ifdef DEBUG 7309 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7310 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7311 #endif 7312 }; 7313 7314 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7315 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7316 KSTAT_FLAG_VIRTUAL, stackid); 7317 7318 if (ksp == NULL) 7319 return (NULL); 7320 7321 bcopy(&template, us_statisticsp, sizeof (template)); 7322 ksp->ks_data = (void *)us_statisticsp; 7323 ksp->ks_private = (void *)(uintptr_t)stackid; 7324 7325 kstat_install(ksp); 7326 return (ksp); 7327 } 7328 7329 static void 7330 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7331 { 7332 if (ksp != NULL) { 7333 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7334 kstat_delete_netstack(ksp, stackid); 7335 } 7336 } 7337 7338 static void * 7339 udp_kstat_init(netstackid_t stackid) 7340 { 7341 kstat_t *ksp; 7342 7343 udp_named_kstat_t template = { 7344 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7345 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7346 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7347 { "entrySize", KSTAT_DATA_INT32, 0 }, 7348 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7349 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7350 }; 7351 7352 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7353 KSTAT_TYPE_NAMED, 7354 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7355 7356 if (ksp == NULL || ksp->ks_data == NULL) 7357 return (NULL); 7358 7359 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7360 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7361 7362 bcopy(&template, ksp->ks_data, sizeof (template)); 7363 ksp->ks_update = udp_kstat_update; 7364 ksp->ks_private = (void *)(uintptr_t)stackid; 7365 7366 kstat_install(ksp); 7367 return (ksp); 7368 } 7369 7370 static void 7371 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7372 { 7373 if (ksp != NULL) { 7374 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7375 kstat_delete_netstack(ksp, stackid); 7376 } 7377 } 7378 7379 static int 7380 udp_kstat_update(kstat_t *kp, int rw) 7381 { 7382 udp_named_kstat_t *udpkp; 7383 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7384 netstack_t *ns; 7385 udp_stack_t *us; 7386 7387 if ((kp == NULL) || (kp->ks_data == NULL)) 7388 return (EIO); 7389 7390 if (rw == KSTAT_WRITE) 7391 return (EACCES); 7392 7393 ns = netstack_find_by_stackid(stackid); 7394 if (ns == NULL) 7395 return (-1); 7396 us = ns->netstack_udp; 7397 if (us == NULL) { 7398 netstack_rele(ns); 7399 return (-1); 7400 } 7401 udpkp = (udp_named_kstat_t *)kp->ks_data; 7402 7403 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7404 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7405 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7406 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7407 netstack_rele(ns); 7408 return (0); 7409 } 7410 7411 /* 7412 * Read-side synchronous stream info entry point, called as a 7413 * result of handling certain STREAMS ioctl operations. 7414 */ 7415 static int 7416 udp_rinfop(queue_t *q, infod_t *dp) 7417 { 7418 mblk_t *mp; 7419 uint_t cmd = dp->d_cmd; 7420 int res = 0; 7421 int error = 0; 7422 udp_t *udp = Q_TO_UDP(q); 7423 struct stdata *stp = STREAM(q); 7424 7425 mutex_enter(&udp->udp_drain_lock); 7426 /* If shutdown on read has happened, return nothing */ 7427 mutex_enter(&stp->sd_lock); 7428 if (stp->sd_flag & STREOF) { 7429 mutex_exit(&stp->sd_lock); 7430 goto done; 7431 } 7432 mutex_exit(&stp->sd_lock); 7433 7434 if ((mp = udp->udp_rcv_list_head) == NULL) 7435 goto done; 7436 7437 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7438 7439 if (cmd & INFOD_COUNT) { 7440 /* 7441 * Return the number of messages. 7442 */ 7443 dp->d_count += udp->udp_rcv_msgcnt; 7444 res |= INFOD_COUNT; 7445 } 7446 if (cmd & INFOD_BYTES) { 7447 /* 7448 * Return size of all data messages. 7449 */ 7450 dp->d_bytes += udp->udp_rcv_cnt; 7451 res |= INFOD_BYTES; 7452 } 7453 if (cmd & INFOD_FIRSTBYTES) { 7454 /* 7455 * Return size of first data message. 7456 */ 7457 dp->d_bytes = msgdsize(mp); 7458 res |= INFOD_FIRSTBYTES; 7459 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7460 } 7461 if (cmd & INFOD_COPYOUT) { 7462 mblk_t *mp1 = mp->b_cont; 7463 int n; 7464 /* 7465 * Return data contents of first message. 7466 */ 7467 ASSERT(DB_TYPE(mp1) == M_DATA); 7468 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7469 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7470 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7471 UIO_READ, dp->d_uiop)) != 0) { 7472 goto done; 7473 } 7474 mp1 = mp1->b_cont; 7475 } 7476 res |= INFOD_COPYOUT; 7477 dp->d_cmd &= ~INFOD_COPYOUT; 7478 } 7479 done: 7480 mutex_exit(&udp->udp_drain_lock); 7481 7482 dp->d_res |= res; 7483 7484 return (error); 7485 } 7486 7487 /* 7488 * Read-side synchronous stream entry point. This is called as a result 7489 * of recv/read operation done at sockfs, and is guaranteed to execute 7490 * outside of the interrupt thread context. It returns a single datagram 7491 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7492 */ 7493 static int 7494 udp_rrw(queue_t *q, struiod_t *dp) 7495 { 7496 mblk_t *mp; 7497 udp_t *udp = Q_TO_UDP(q); 7498 udp_stack_t *us = udp->udp_us; 7499 7500 /* 7501 * Dequeue datagram from the head of the list and return 7502 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7503 * set/cleared depending on whether or not there's data 7504 * remaining in the list. 7505 */ 7506 mutex_enter(&udp->udp_drain_lock); 7507 if (!udp->udp_direct_sockfs) { 7508 mutex_exit(&udp->udp_drain_lock); 7509 UDP_STAT(us, udp_rrw_busy); 7510 return (EBUSY); 7511 } 7512 if ((mp = udp->udp_rcv_list_head) != NULL) { 7513 uint_t size = msgdsize(mp); 7514 7515 /* Last datagram in the list? */ 7516 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7517 udp->udp_rcv_list_tail = NULL; 7518 mp->b_next = NULL; 7519 7520 udp->udp_rcv_cnt -= size; 7521 udp->udp_rcv_msgcnt--; 7522 UDP_STAT(us, udp_rrw_msgcnt); 7523 7524 /* No longer flow-controlling? */ 7525 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7526 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7527 udp->udp_drain_qfull = B_FALSE; 7528 } 7529 if (udp->udp_rcv_list_head == NULL) { 7530 /* 7531 * Either we just dequeued the last datagram or 7532 * we get here from sockfs and have nothing to 7533 * return; in this case clear RSLEEP. 7534 */ 7535 ASSERT(udp->udp_rcv_cnt == 0); 7536 ASSERT(udp->udp_rcv_msgcnt == 0); 7537 ASSERT(udp->udp_rcv_list_tail == NULL); 7538 STR_WAKEUP_CLEAR(STREAM(q)); 7539 } else { 7540 /* 7541 * More data follows; we need udp_rrw() to be 7542 * called in future to pick up the rest. 7543 */ 7544 STR_WAKEUP_SET(STREAM(q)); 7545 } 7546 mutex_exit(&udp->udp_drain_lock); 7547 dp->d_mp = mp; 7548 return (0); 7549 } 7550 7551 /* 7552 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7553 * list; this is typically executed within the interrupt thread context 7554 * and so we do things as quickly as possible. 7555 */ 7556 static void 7557 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7558 { 7559 ASSERT(q == RD(q)); 7560 ASSERT(pkt_len == msgdsize(mp)); 7561 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7562 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7563 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7564 7565 mutex_enter(&udp->udp_drain_lock); 7566 /* 7567 * Wake up and signal the receiving app; it is okay to do this 7568 * before enqueueing the mp because we are holding the drain lock. 7569 * One of the advantages of synchronous stream is the ability for 7570 * us to find out when the application performs a read on the 7571 * socket by way of udp_rrw() entry point being called. We need 7572 * to generate SIGPOLL/SIGIO for each received data in the case 7573 * of asynchronous socket just as in the strrput() case. However, 7574 * we only wake the application up when necessary, i.e. during the 7575 * first enqueue. When udp_rrw() is called, we send up a single 7576 * datagram upstream and call STR_WAKEUP_SET() again when there 7577 * are still data remaining in our receive queue. 7578 */ 7579 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7580 if (udp->udp_rcv_list_head == NULL) 7581 udp->udp_rcv_list_head = mp; 7582 else 7583 udp->udp_rcv_list_tail->b_next = mp; 7584 udp->udp_rcv_list_tail = mp; 7585 udp->udp_rcv_cnt += pkt_len; 7586 udp->udp_rcv_msgcnt++; 7587 7588 /* Need to flow-control? */ 7589 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7590 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7591 udp->udp_drain_qfull = B_TRUE; 7592 7593 mutex_exit(&udp->udp_drain_lock); 7594 } 7595 7596 /* 7597 * Drain the contents of receive list to the module upstream; we do 7598 * this during close or when we fallback to the slow mode due to 7599 * sockmod being popped or a module being pushed on top of us. 7600 */ 7601 static void 7602 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7603 { 7604 mblk_t *mp; 7605 udp_stack_t *us = udp->udp_us; 7606 7607 mutex_enter(&udp->udp_drain_lock); 7608 /* 7609 * There is no race with a concurrent udp_input() sending 7610 * up packets using putnext() after we have cleared the 7611 * udp_direct_sockfs flag but before we have completed 7612 * sending up the packets in udp_rcv_list, since we are 7613 * either a writer or we have quiesced the conn. 7614 */ 7615 udp->udp_direct_sockfs = B_FALSE; 7616 mutex_exit(&udp->udp_drain_lock); 7617 7618 if (udp->udp_rcv_list_head != NULL) 7619 UDP_STAT(us, udp_drain); 7620 7621 /* 7622 * Send up everything via putnext(); note here that we 7623 * don't need the udp_drain_lock to protect us since 7624 * nothing can enter udp_rrw() and that we currently 7625 * have exclusive access to this udp. 7626 */ 7627 while ((mp = udp->udp_rcv_list_head) != NULL) { 7628 udp->udp_rcv_list_head = mp->b_next; 7629 mp->b_next = NULL; 7630 udp->udp_rcv_cnt -= msgdsize(mp); 7631 udp->udp_rcv_msgcnt--; 7632 if (closing) { 7633 freemsg(mp); 7634 } else { 7635 ASSERT(q == RD(q)); 7636 putnext(q, mp); 7637 } 7638 } 7639 ASSERT(udp->udp_rcv_cnt == 0); 7640 ASSERT(udp->udp_rcv_msgcnt == 0); 7641 ASSERT(udp->udp_rcv_list_head == NULL); 7642 udp->udp_rcv_list_tail = NULL; 7643 udp->udp_drain_qfull = B_FALSE; 7644 } 7645 7646 static size_t 7647 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7648 { 7649 udp_stack_t *us = udp->udp_us; 7650 7651 /* We add a bit of extra buffering */ 7652 size += size >> 1; 7653 if (size > us->us_max_buf) 7654 size = us->us_max_buf; 7655 7656 udp->udp_rcv_hiwat = size; 7657 return (size); 7658 } 7659 7660 /* 7661 * For the lower queue so that UDP can be a dummy mux. 7662 * Nobody should be sending 7663 * packets up this stream 7664 */ 7665 static void 7666 udp_lrput(queue_t *q, mblk_t *mp) 7667 { 7668 mblk_t *mp1; 7669 7670 switch (mp->b_datap->db_type) { 7671 case M_FLUSH: 7672 /* Turn around */ 7673 if (*mp->b_rptr & FLUSHW) { 7674 *mp->b_rptr &= ~FLUSHR; 7675 qreply(q, mp); 7676 return; 7677 } 7678 break; 7679 } 7680 /* Could receive messages that passed through ar_rput */ 7681 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7682 mp1->b_prev = mp1->b_next = NULL; 7683 freemsg(mp); 7684 } 7685 7686 /* 7687 * For the lower queue so that UDP can be a dummy mux. 7688 * Nobody should be sending packets down this stream. 7689 */ 7690 /* ARGSUSED */ 7691 void 7692 udp_lwput(queue_t *q, mblk_t *mp) 7693 { 7694 freemsg(mp); 7695 } 7696 7697 /* 7698 * Below routines for UDP socket module. 7699 */ 7700 7701 static conn_t * 7702 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7703 { 7704 udp_t *udp; 7705 conn_t *connp; 7706 zoneid_t zoneid; 7707 netstack_t *ns; 7708 udp_stack_t *us; 7709 7710 ns = netstack_find_by_cred(credp); 7711 ASSERT(ns != NULL); 7712 us = ns->netstack_udp; 7713 ASSERT(us != NULL); 7714 7715 /* 7716 * For exclusive stacks we set the zoneid to zero 7717 * to make UDP operate as if in the global zone. 7718 */ 7719 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7720 zoneid = GLOBAL_ZONEID; 7721 else 7722 zoneid = crgetzoneid(credp); 7723 7724 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7725 7726 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7727 if (connp == NULL) { 7728 netstack_rele(ns); 7729 return (NULL); 7730 } 7731 udp = connp->conn_udp; 7732 7733 /* 7734 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7735 * done by netstack_find_by_cred() 7736 */ 7737 netstack_rele(ns); 7738 7739 rw_enter(&udp->udp_rwlock, RW_WRITER); 7740 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7741 ASSERT(connp->conn_udp == udp); 7742 ASSERT(udp->udp_connp == connp); 7743 7744 /* Set the initial state of the stream and the privilege status. */ 7745 udp->udp_state = TS_UNBND; 7746 if (isv6) { 7747 udp->udp_family = AF_INET6; 7748 udp->udp_ipversion = IPV6_VERSION; 7749 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7750 udp->udp_ttl = us->us_ipv6_hoplimit; 7751 connp->conn_af_isv6 = B_TRUE; 7752 connp->conn_flags |= IPCL_ISV6; 7753 } else { 7754 udp->udp_family = AF_INET; 7755 udp->udp_ipversion = IPV4_VERSION; 7756 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7757 udp->udp_ttl = us->us_ipv4_ttl; 7758 connp->conn_af_isv6 = B_FALSE; 7759 connp->conn_flags &= ~IPCL_ISV6; 7760 } 7761 7762 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7763 udp->udp_pending_op = -1; 7764 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7765 connp->conn_zoneid = zoneid; 7766 7767 udp->udp_open_time = lbolt64; 7768 udp->udp_open_pid = curproc->p_pid; 7769 7770 /* 7771 * If the caller has the process-wide flag set, then default to MAC 7772 * exempt mode. This allows read-down to unlabeled hosts. 7773 */ 7774 if (getpflags(NET_MAC_AWARE, credp) != 0) 7775 connp->conn_mac_exempt = B_TRUE; 7776 7777 connp->conn_ulp_labeled = is_system_labeled(); 7778 7779 udp->udp_us = us; 7780 7781 connp->conn_recv = udp_input; 7782 crhold(credp); 7783 connp->conn_cred = credp; 7784 7785 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7786 7787 rw_exit(&udp->udp_rwlock); 7788 7789 return (connp); 7790 } 7791 7792 /* ARGSUSED */ 7793 sock_lower_handle_t 7794 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7795 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7796 { 7797 udp_t *udp = NULL; 7798 udp_stack_t *us; 7799 conn_t *connp; 7800 boolean_t isv6; 7801 7802 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7803 (proto != 0 && proto != IPPROTO_UDP)) { 7804 *errorp = EPROTONOSUPPORT; 7805 return (NULL); 7806 } 7807 7808 if (family == AF_INET6) 7809 isv6 = B_TRUE; 7810 else 7811 isv6 = B_FALSE; 7812 7813 connp = udp_do_open(credp, isv6, flags); 7814 if (connp == NULL) { 7815 *errorp = ENOMEM; 7816 return (NULL); 7817 } 7818 7819 udp = connp->conn_udp; 7820 ASSERT(udp != NULL); 7821 us = udp->udp_us; 7822 ASSERT(us != NULL); 7823 7824 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7825 7826 /* Set flow control */ 7827 rw_enter(&udp->udp_rwlock, RW_WRITER); 7828 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7829 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7830 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7831 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7832 udp->udp_xmit_lowat = us->us_xmit_lowat; 7833 7834 if (udp->udp_family == AF_INET6) { 7835 /* Build initial header template for transmit */ 7836 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7837 rw_exit(&udp->udp_rwlock); 7838 ipcl_conn_destroy(connp); 7839 return (NULL); 7840 } 7841 } 7842 rw_exit(&udp->udp_rwlock); 7843 7844 connp->conn_flow_cntrld = B_FALSE; 7845 7846 ASSERT(us->us_ldi_ident != NULL); 7847 7848 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7849 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7850 udp_do_close(connp); 7851 return (NULL); 7852 } 7853 7854 /* Set the send flow control */ 7855 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7856 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7857 7858 mutex_enter(&connp->conn_lock); 7859 connp->conn_state_flags &= ~CONN_INCIPIENT; 7860 mutex_exit(&connp->conn_lock); 7861 7862 *errorp = 0; 7863 *smodep = SM_ATOMIC; 7864 *sock_downcalls = &sock_udp_downcalls; 7865 return ((sock_lower_handle_t)connp); 7866 } 7867 7868 /* ARGSUSED */ 7869 void 7870 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7871 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7872 { 7873 conn_t *connp = (conn_t *)proto_handle; 7874 udp_t *udp = connp->conn_udp; 7875 udp_stack_t *us = udp->udp_us; 7876 struct sock_proto_props sopp; 7877 7878 connp->conn_upcalls = sock_upcalls; 7879 connp->conn_upper_handle = sock_handle; 7880 7881 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7882 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7883 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7884 sopp.sopp_maxblk = INFPSZ; 7885 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7886 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7887 sopp.sopp_maxpsz = 7888 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7889 UDP_MAXPACKET_IPV6; 7890 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7891 udp_mod_info.mi_minpsz; 7892 7893 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7894 &sopp); 7895 } 7896 7897 static void 7898 udp_do_close(conn_t *connp) 7899 { 7900 udp_t *udp; 7901 7902 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7903 udp = connp->conn_udp; 7904 7905 udp_quiesce_conn(connp); 7906 ip_quiesce_conn(connp); 7907 7908 if (!IPCL_IS_NONSTR(connp)) { 7909 /* 7910 * Disable read-side synchronous stream 7911 * interface and drain any queued data. 7912 */ 7913 ASSERT(connp->conn_wq != NULL); 7914 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7915 ASSERT(!udp->udp_direct_sockfs); 7916 7917 ASSERT(connp->conn_rq != NULL); 7918 qprocsoff(connp->conn_rq); 7919 } 7920 7921 ASSERT(udp->udp_rcv_cnt == 0); 7922 ASSERT(udp->udp_rcv_msgcnt == 0); 7923 ASSERT(udp->udp_rcv_list_head == NULL); 7924 ASSERT(udp->udp_rcv_list_tail == NULL); 7925 7926 udp_close_free(connp); 7927 7928 /* 7929 * Now we are truly single threaded on this stream, and can 7930 * delete the things hanging off the connp, and finally the connp. 7931 * We removed this connp from the fanout list, it cannot be 7932 * accessed thru the fanouts, and we already waited for the 7933 * conn_ref to drop to 0. We are already in close, so 7934 * there cannot be any other thread from the top. qprocsoff 7935 * has completed, and service has completed or won't run in 7936 * future. 7937 */ 7938 ASSERT(connp->conn_ref == 1); 7939 if (!IPCL_IS_NONSTR(connp)) { 7940 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7941 } else { 7942 ip_free_helper_stream(connp); 7943 } 7944 7945 connp->conn_ref--; 7946 ipcl_conn_destroy(connp); 7947 } 7948 7949 /* ARGSUSED */ 7950 int 7951 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7952 { 7953 conn_t *connp = (conn_t *)proto_handle; 7954 7955 udp_do_close(connp); 7956 return (0); 7957 } 7958 7959 static int 7960 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7961 boolean_t bind_to_req_port_only) 7962 { 7963 sin_t *sin; 7964 sin6_t *sin6; 7965 sin6_t sin6addr; 7966 in_port_t port; /* Host byte order */ 7967 in_port_t requested_port; /* Host byte order */ 7968 int count; 7969 in6_addr_t v6src; 7970 int loopmax; 7971 udp_fanout_t *udpf; 7972 in_port_t lport; /* Network byte order */ 7973 zoneid_t zoneid; 7974 udp_t *udp; 7975 boolean_t is_inaddr_any; 7976 mlp_type_t addrtype, mlptype; 7977 udp_stack_t *us; 7978 int error = 0; 7979 mblk_t *mp = NULL; 7980 7981 udp = connp->conn_udp; 7982 us = udp->udp_us; 7983 7984 if (udp->udp_state != TS_UNBND) { 7985 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7986 "udp_bind: bad state, %u", udp->udp_state); 7987 return (-TOUTSTATE); 7988 } 7989 7990 switch (len) { 7991 case 0: 7992 if (udp->udp_family == AF_INET) { 7993 sin = (sin_t *)&sin6addr; 7994 *sin = sin_null; 7995 sin->sin_family = AF_INET; 7996 sin->sin_addr.s_addr = INADDR_ANY; 7997 udp->udp_ipversion = IPV4_VERSION; 7998 } else { 7999 ASSERT(udp->udp_family == AF_INET6); 8000 sin6 = (sin6_t *)&sin6addr; 8001 *sin6 = sin6_null; 8002 sin6->sin6_family = AF_INET6; 8003 V6_SET_ZERO(sin6->sin6_addr); 8004 udp->udp_ipversion = IPV6_VERSION; 8005 } 8006 port = 0; 8007 break; 8008 8009 case sizeof (sin_t): /* Complete IPv4 address */ 8010 sin = (sin_t *)sa; 8011 8012 if (sin == NULL || !OK_32PTR((char *)sin)) 8013 return (EINVAL); 8014 8015 if (udp->udp_family != AF_INET || 8016 sin->sin_family != AF_INET) { 8017 return (EAFNOSUPPORT); 8018 } 8019 port = ntohs(sin->sin_port); 8020 break; 8021 8022 case sizeof (sin6_t): /* complete IPv6 address */ 8023 sin6 = (sin6_t *)sa; 8024 8025 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8026 return (EINVAL); 8027 8028 if (udp->udp_family != AF_INET6 || 8029 sin6->sin6_family != AF_INET6) { 8030 return (EAFNOSUPPORT); 8031 } 8032 port = ntohs(sin6->sin6_port); 8033 break; 8034 8035 default: /* Invalid request */ 8036 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8037 "udp_bind: bad ADDR_length length %u", len); 8038 return (-TBADADDR); 8039 } 8040 8041 requested_port = port; 8042 8043 if (requested_port == 0 || !bind_to_req_port_only) 8044 bind_to_req_port_only = B_FALSE; 8045 else /* T_BIND_REQ and requested_port != 0 */ 8046 bind_to_req_port_only = B_TRUE; 8047 8048 if (requested_port == 0) { 8049 /* 8050 * If the application passed in zero for the port number, it 8051 * doesn't care which port number we bind to. Get one in the 8052 * valid range. 8053 */ 8054 if (udp->udp_anon_priv_bind) { 8055 port = udp_get_next_priv_port(udp); 8056 } else { 8057 port = udp_update_next_port(udp, 8058 us->us_next_port_to_try, B_TRUE); 8059 } 8060 } else { 8061 /* 8062 * If the port is in the well-known privileged range, 8063 * make sure the caller was privileged. 8064 */ 8065 int i; 8066 boolean_t priv = B_FALSE; 8067 8068 if (port < us->us_smallest_nonpriv_port) { 8069 priv = B_TRUE; 8070 } else { 8071 for (i = 0; i < us->us_num_epriv_ports; i++) { 8072 if (port == us->us_epriv_ports[i]) { 8073 priv = B_TRUE; 8074 break; 8075 } 8076 } 8077 } 8078 8079 if (priv) { 8080 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8081 return (-TACCES); 8082 } 8083 } 8084 8085 if (port == 0) 8086 return (-TNOADDR); 8087 8088 /* 8089 * The state must be TS_UNBND. TPI mandates that users must send 8090 * TPI primitives only 1 at a time and wait for the response before 8091 * sending the next primitive. 8092 */ 8093 rw_enter(&udp->udp_rwlock, RW_WRITER); 8094 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8095 rw_exit(&udp->udp_rwlock); 8096 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8097 "udp_bind: bad state, %u", udp->udp_state); 8098 return (-TOUTSTATE); 8099 } 8100 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8101 udp->udp_pending_op = T_BIND_REQ; 8102 /* 8103 * Copy the source address into our udp structure. This address 8104 * may still be zero; if so, IP will fill in the correct address 8105 * each time an outbound packet is passed to it. Since the udp is 8106 * not yet in the bind hash list, we don't grab the uf_lock to 8107 * change udp_ipversion 8108 */ 8109 if (udp->udp_family == AF_INET) { 8110 ASSERT(sin != NULL); 8111 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8112 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8113 udp->udp_ip_snd_options_len; 8114 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8115 } else { 8116 ASSERT(sin6 != NULL); 8117 v6src = sin6->sin6_addr; 8118 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8119 /* 8120 * no need to hold the uf_lock to set the udp_ipversion 8121 * since we are not yet in the fanout list 8122 */ 8123 udp->udp_ipversion = IPV4_VERSION; 8124 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8125 UDPH_SIZE + udp->udp_ip_snd_options_len; 8126 } else { 8127 udp->udp_ipversion = IPV6_VERSION; 8128 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8129 } 8130 } 8131 8132 /* 8133 * If udp_reuseaddr is not set, then we have to make sure that 8134 * the IP address and port number the application requested 8135 * (or we selected for the application) is not being used by 8136 * another stream. If another stream is already using the 8137 * requested IP address and port, the behavior depends on 8138 * "bind_to_req_port_only". If set the bind fails; otherwise we 8139 * search for any an unused port to bind to the the stream. 8140 * 8141 * As per the BSD semantics, as modified by the Deering multicast 8142 * changes, if udp_reuseaddr is set, then we allow multiple binds 8143 * to the same port independent of the local IP address. 8144 * 8145 * This is slightly different than in SunOS 4.X which did not 8146 * support IP multicast. Note that the change implemented by the 8147 * Deering multicast code effects all binds - not only binding 8148 * to IP multicast addresses. 8149 * 8150 * Note that when binding to port zero we ignore SO_REUSEADDR in 8151 * order to guarantee a unique port. 8152 */ 8153 8154 count = 0; 8155 if (udp->udp_anon_priv_bind) { 8156 /* 8157 * loopmax = (IPPORT_RESERVED-1) - 8158 * us->us_min_anonpriv_port + 1 8159 */ 8160 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8161 } else { 8162 loopmax = us->us_largest_anon_port - 8163 us->us_smallest_anon_port + 1; 8164 } 8165 8166 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8167 zoneid = connp->conn_zoneid; 8168 8169 for (;;) { 8170 udp_t *udp1; 8171 boolean_t found_exclbind = B_FALSE; 8172 8173 /* 8174 * Walk through the list of udp streams bound to 8175 * requested port with the same IP address. 8176 */ 8177 lport = htons(port); 8178 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8179 us->us_bind_fanout_size)]; 8180 mutex_enter(&udpf->uf_lock); 8181 for (udp1 = udpf->uf_udp; udp1 != NULL; 8182 udp1 = udp1->udp_bind_hash) { 8183 if (lport != udp1->udp_port) 8184 continue; 8185 8186 /* 8187 * On a labeled system, we must treat bindings to ports 8188 * on shared IP addresses by sockets with MAC exemption 8189 * privilege as being in all zones, as there's 8190 * otherwise no way to identify the right receiver. 8191 */ 8192 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8193 IPCL_ZONE_MATCH(connp, 8194 udp1->udp_connp->conn_zoneid)) && 8195 !connp->conn_mac_exempt && \ 8196 !udp1->udp_connp->conn_mac_exempt) 8197 continue; 8198 8199 /* 8200 * If UDP_EXCLBIND is set for either the bound or 8201 * binding endpoint, the semantics of bind 8202 * is changed according to the following chart. 8203 * 8204 * spec = specified address (v4 or v6) 8205 * unspec = unspecified address (v4 or v6) 8206 * A = specified addresses are different for endpoints 8207 * 8208 * bound bind to allowed? 8209 * ------------------------------------- 8210 * unspec unspec no 8211 * unspec spec no 8212 * spec unspec no 8213 * spec spec yes if A 8214 * 8215 * For labeled systems, SO_MAC_EXEMPT behaves the same 8216 * as UDP_EXCLBIND, except that zoneid is ignored. 8217 */ 8218 if (udp1->udp_exclbind || udp->udp_exclbind || 8219 udp1->udp_connp->conn_mac_exempt || 8220 connp->conn_mac_exempt) { 8221 if (V6_OR_V4_INADDR_ANY( 8222 udp1->udp_bound_v6src) || 8223 is_inaddr_any || 8224 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8225 &v6src)) { 8226 found_exclbind = B_TRUE; 8227 break; 8228 } 8229 continue; 8230 } 8231 8232 /* 8233 * Check ipversion to allow IPv4 and IPv6 sockets to 8234 * have disjoint port number spaces. 8235 */ 8236 if (udp->udp_ipversion != udp1->udp_ipversion) { 8237 8238 /* 8239 * On the first time through the loop, if the 8240 * the user intentionally specified a 8241 * particular port number, then ignore any 8242 * bindings of the other protocol that may 8243 * conflict. This allows the user to bind IPv6 8244 * alone and get both v4 and v6, or bind both 8245 * both and get each seperately. On subsequent 8246 * times through the loop, we're checking a 8247 * port that we chose (not the user) and thus 8248 * we do not allow casual duplicate bindings. 8249 */ 8250 if (count == 0 && requested_port != 0) 8251 continue; 8252 } 8253 8254 /* 8255 * No difference depending on SO_REUSEADDR. 8256 * 8257 * If existing port is bound to a 8258 * non-wildcard IP address and 8259 * the requesting stream is bound to 8260 * a distinct different IP addresses 8261 * (non-wildcard, also), keep going. 8262 */ 8263 if (!is_inaddr_any && 8264 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8265 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8266 &v6src)) { 8267 continue; 8268 } 8269 break; 8270 } 8271 8272 if (!found_exclbind && 8273 (udp->udp_reuseaddr && requested_port != 0)) { 8274 break; 8275 } 8276 8277 if (udp1 == NULL) { 8278 /* 8279 * No other stream has this IP address 8280 * and port number. We can use it. 8281 */ 8282 break; 8283 } 8284 mutex_exit(&udpf->uf_lock); 8285 if (bind_to_req_port_only) { 8286 /* 8287 * We get here only when requested port 8288 * is bound (and only first of the for() 8289 * loop iteration). 8290 * 8291 * The semantics of this bind request 8292 * require it to fail so we return from 8293 * the routine (and exit the loop). 8294 * 8295 */ 8296 udp->udp_pending_op = -1; 8297 rw_exit(&udp->udp_rwlock); 8298 return (-TADDRBUSY); 8299 } 8300 8301 if (udp->udp_anon_priv_bind) { 8302 port = udp_get_next_priv_port(udp); 8303 } else { 8304 if ((count == 0) && (requested_port != 0)) { 8305 /* 8306 * If the application wants us to find 8307 * a port, get one to start with. Set 8308 * requested_port to 0, so that we will 8309 * update us->us_next_port_to_try below. 8310 */ 8311 port = udp_update_next_port(udp, 8312 us->us_next_port_to_try, B_TRUE); 8313 requested_port = 0; 8314 } else { 8315 port = udp_update_next_port(udp, port + 1, 8316 B_FALSE); 8317 } 8318 } 8319 8320 if (port == 0 || ++count >= loopmax) { 8321 /* 8322 * We've tried every possible port number and 8323 * there are none available, so send an error 8324 * to the user. 8325 */ 8326 udp->udp_pending_op = -1; 8327 rw_exit(&udp->udp_rwlock); 8328 return (-TNOADDR); 8329 } 8330 } 8331 8332 /* 8333 * Copy the source address into our udp structure. This address 8334 * may still be zero; if so, ip will fill in the correct address 8335 * each time an outbound packet is passed to it. 8336 * If we are binding to a broadcast or multicast address then 8337 * udp_post_ip_bind_connect will clear the source address 8338 * when udp_do_bind success. 8339 */ 8340 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8341 udp->udp_port = lport; 8342 /* 8343 * Now reset the the next anonymous port if the application requested 8344 * an anonymous port, or we handed out the next anonymous port. 8345 */ 8346 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8347 us->us_next_port_to_try = port + 1; 8348 } 8349 8350 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8351 if (udp->udp_family == AF_INET) { 8352 sin->sin_port = udp->udp_port; 8353 } else { 8354 sin6->sin6_port = udp->udp_port; 8355 /* Rebuild the header template */ 8356 error = udp_build_hdrs(udp); 8357 if (error != 0) { 8358 udp->udp_pending_op = -1; 8359 rw_exit(&udp->udp_rwlock); 8360 mutex_exit(&udpf->uf_lock); 8361 return (error); 8362 } 8363 } 8364 udp->udp_state = TS_IDLE; 8365 udp_bind_hash_insert(udpf, udp); 8366 mutex_exit(&udpf->uf_lock); 8367 rw_exit(&udp->udp_rwlock); 8368 8369 if (cl_inet_bind) { 8370 /* 8371 * Running in cluster mode - register bind information 8372 */ 8373 if (udp->udp_ipversion == IPV4_VERSION) { 8374 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8375 IPPROTO_UDP, AF_INET, 8376 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8377 (in_port_t)udp->udp_port, NULL); 8378 } else { 8379 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8380 IPPROTO_UDP, AF_INET6, 8381 (uint8_t *)&(udp->udp_v6src), 8382 (in_port_t)udp->udp_port, NULL); 8383 } 8384 } 8385 8386 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8387 if (is_system_labeled() && (!connp->conn_anon_port || 8388 connp->conn_anon_mlp)) { 8389 uint16_t mlpport; 8390 cred_t *cr = connp->conn_cred; 8391 zone_t *zone; 8392 8393 zone = crgetzone(cr); 8394 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8395 mlptSingle; 8396 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8397 &v6src, us->us_netstack->netstack_ip); 8398 if (addrtype == mlptSingle) { 8399 rw_enter(&udp->udp_rwlock, RW_WRITER); 8400 udp->udp_pending_op = -1; 8401 rw_exit(&udp->udp_rwlock); 8402 connp->conn_anon_port = B_FALSE; 8403 connp->conn_mlp_type = mlptSingle; 8404 return (-TNOADDR); 8405 } 8406 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8407 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8408 addrtype); 8409 if (mlptype != mlptSingle && 8410 (connp->conn_mlp_type == mlptSingle || 8411 secpolicy_net_bindmlp(cr) != 0)) { 8412 if (udp->udp_debug) { 8413 (void) strlog(UDP_MOD_ID, 0, 1, 8414 SL_ERROR|SL_TRACE, 8415 "udp_bind: no priv for multilevel port %d", 8416 mlpport); 8417 } 8418 rw_enter(&udp->udp_rwlock, RW_WRITER); 8419 udp->udp_pending_op = -1; 8420 rw_exit(&udp->udp_rwlock); 8421 connp->conn_anon_port = B_FALSE; 8422 connp->conn_mlp_type = mlptSingle; 8423 return (-TACCES); 8424 } 8425 8426 /* 8427 * If we're specifically binding a shared IP address and the 8428 * port is MLP on shared addresses, then check to see if this 8429 * zone actually owns the MLP. Reject if not. 8430 */ 8431 if (mlptype == mlptShared && addrtype == mlptShared) { 8432 /* 8433 * No need to handle exclusive-stack zones since 8434 * ALL_ZONES only applies to the shared stack. 8435 */ 8436 zoneid_t mlpzone; 8437 8438 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8439 htons(mlpport)); 8440 if (connp->conn_zoneid != mlpzone) { 8441 if (udp->udp_debug) { 8442 (void) strlog(UDP_MOD_ID, 0, 1, 8443 SL_ERROR|SL_TRACE, 8444 "udp_bind: attempt to bind port " 8445 "%d on shared addr in zone %d " 8446 "(should be %d)", 8447 mlpport, connp->conn_zoneid, 8448 mlpzone); 8449 } 8450 rw_enter(&udp->udp_rwlock, RW_WRITER); 8451 udp->udp_pending_op = -1; 8452 rw_exit(&udp->udp_rwlock); 8453 connp->conn_anon_port = B_FALSE; 8454 connp->conn_mlp_type = mlptSingle; 8455 return (-TACCES); 8456 } 8457 } 8458 if (connp->conn_anon_port) { 8459 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8460 port, B_TRUE); 8461 if (error != 0) { 8462 if (udp->udp_debug) { 8463 (void) strlog(UDP_MOD_ID, 0, 1, 8464 SL_ERROR|SL_TRACE, 8465 "udp_bind: cannot establish anon " 8466 "MLP for port %d", port); 8467 } 8468 rw_enter(&udp->udp_rwlock, RW_WRITER); 8469 udp->udp_pending_op = -1; 8470 rw_exit(&udp->udp_rwlock); 8471 connp->conn_anon_port = B_FALSE; 8472 connp->conn_mlp_type = mlptSingle; 8473 return (-TACCES); 8474 } 8475 } 8476 connp->conn_mlp_type = mlptype; 8477 } 8478 8479 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8480 /* 8481 * Append a request for an IRE if udp_v6src not 8482 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8483 */ 8484 mp = allocb(sizeof (ire_t), BPRI_HI); 8485 if (!mp) { 8486 rw_enter(&udp->udp_rwlock, RW_WRITER); 8487 udp->udp_pending_op = -1; 8488 rw_exit(&udp->udp_rwlock); 8489 return (ENOMEM); 8490 } 8491 mp->b_wptr += sizeof (ire_t); 8492 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8493 } 8494 if (udp->udp_family == AF_INET6) { 8495 ASSERT(udp->udp_connp->conn_af_isv6); 8496 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8497 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8498 } else { 8499 ASSERT(!udp->udp_connp->conn_af_isv6); 8500 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8501 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8502 B_TRUE); 8503 } 8504 8505 (void) udp_post_ip_bind_connect(udp, mp, error); 8506 return (error); 8507 } 8508 8509 int 8510 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8511 socklen_t len, cred_t *cr) 8512 { 8513 int error; 8514 conn_t *connp; 8515 8516 connp = (conn_t *)proto_handle; 8517 8518 if (sa == NULL) 8519 error = udp_do_unbind(connp); 8520 else 8521 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8522 8523 if (error < 0) { 8524 if (error == -TOUTSTATE) 8525 error = EINVAL; 8526 else 8527 error = proto_tlitosyserr(-error); 8528 } 8529 8530 return (error); 8531 } 8532 8533 static int 8534 udp_implicit_bind(conn_t *connp, cred_t *cr) 8535 { 8536 int error; 8537 8538 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8539 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8540 } 8541 8542 /* 8543 * This routine removes a port number association from a stream. It 8544 * is called by udp_unbind and udp_tpi_unbind. 8545 */ 8546 static int 8547 udp_do_unbind(conn_t *connp) 8548 { 8549 udp_t *udp = connp->conn_udp; 8550 udp_fanout_t *udpf; 8551 udp_stack_t *us = udp->udp_us; 8552 8553 if (cl_inet_unbind != NULL) { 8554 /* 8555 * Running in cluster mode - register unbind information 8556 */ 8557 if (udp->udp_ipversion == IPV4_VERSION) { 8558 (*cl_inet_unbind)( 8559 connp->conn_netstack->netstack_stackid, 8560 IPPROTO_UDP, AF_INET, 8561 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8562 (in_port_t)udp->udp_port, NULL); 8563 } else { 8564 (*cl_inet_unbind)( 8565 connp->conn_netstack->netstack_stackid, 8566 IPPROTO_UDP, AF_INET6, 8567 (uint8_t *)&(udp->udp_v6src), 8568 (in_port_t)udp->udp_port, NULL); 8569 } 8570 } 8571 8572 rw_enter(&udp->udp_rwlock, RW_WRITER); 8573 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8574 rw_exit(&udp->udp_rwlock); 8575 return (-TOUTSTATE); 8576 } 8577 udp->udp_pending_op = T_UNBIND_REQ; 8578 rw_exit(&udp->udp_rwlock); 8579 8580 /* 8581 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8582 * and therefore ip_unbind must never return NULL. 8583 */ 8584 ip_unbind(connp); 8585 8586 /* 8587 * Once we're unbound from IP, the pending operation may be cleared 8588 * here. 8589 */ 8590 rw_enter(&udp->udp_rwlock, RW_WRITER); 8591 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8592 us->us_bind_fanout_size)]; 8593 8594 mutex_enter(&udpf->uf_lock); 8595 udp_bind_hash_remove(udp, B_TRUE); 8596 V6_SET_ZERO(udp->udp_v6src); 8597 V6_SET_ZERO(udp->udp_bound_v6src); 8598 udp->udp_port = 0; 8599 mutex_exit(&udpf->uf_lock); 8600 8601 udp->udp_pending_op = -1; 8602 udp->udp_state = TS_UNBND; 8603 if (udp->udp_family == AF_INET6) 8604 (void) udp_build_hdrs(udp); 8605 rw_exit(&udp->udp_rwlock); 8606 8607 return (0); 8608 } 8609 8610 static int 8611 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8612 { 8613 ire_t *ire; 8614 udp_fanout_t *udpf; 8615 udp_stack_t *us = udp->udp_us; 8616 8617 ASSERT(udp->udp_pending_op != -1); 8618 rw_enter(&udp->udp_rwlock, RW_WRITER); 8619 if (error == 0) { 8620 /* For udp_do_connect() success */ 8621 /* udp_do_bind() success will do nothing in here */ 8622 /* 8623 * If a broadcast/multicast address was bound, set 8624 * the source address to 0. 8625 * This ensures no datagrams with broadcast address 8626 * as source address are emitted (which would violate 8627 * RFC1122 - Hosts requirements) 8628 * 8629 * Note that when connecting the returned IRE is 8630 * for the destination address and we only perform 8631 * the broadcast check for the source address (it 8632 * is OK to connect to a broadcast/multicast address.) 8633 */ 8634 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8635 ire = (ire_t *)ire_mp->b_rptr; 8636 8637 /* 8638 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8639 * multicast local address. 8640 */ 8641 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8642 us->us_bind_fanout_size)]; 8643 if (ire->ire_type == IRE_BROADCAST && 8644 udp->udp_state != TS_DATA_XFER) { 8645 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8646 udp->udp_pending_op == O_T_BIND_REQ); 8647 /* 8648 * This was just a local bind to a broadcast 8649 * addr. 8650 */ 8651 mutex_enter(&udpf->uf_lock); 8652 V6_SET_ZERO(udp->udp_v6src); 8653 mutex_exit(&udpf->uf_lock); 8654 if (udp->udp_family == AF_INET6) 8655 (void) udp_build_hdrs(udp); 8656 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8657 if (udp->udp_family == AF_INET6) 8658 (void) udp_build_hdrs(udp); 8659 } 8660 } 8661 } else { 8662 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8663 us->us_bind_fanout_size)]; 8664 mutex_enter(&udpf->uf_lock); 8665 8666 if (udp->udp_state == TS_DATA_XFER) { 8667 /* Connect failed */ 8668 /* Revert back to the bound source */ 8669 udp->udp_v6src = udp->udp_bound_v6src; 8670 udp->udp_state = TS_IDLE; 8671 } else { 8672 /* For udp_do_bind() failed */ 8673 V6_SET_ZERO(udp->udp_v6src); 8674 V6_SET_ZERO(udp->udp_bound_v6src); 8675 udp->udp_state = TS_UNBND; 8676 udp_bind_hash_remove(udp, B_TRUE); 8677 udp->udp_port = 0; 8678 } 8679 mutex_exit(&udpf->uf_lock); 8680 if (udp->udp_family == AF_INET6) 8681 (void) udp_build_hdrs(udp); 8682 } 8683 udp->udp_pending_op = -1; 8684 rw_exit(&udp->udp_rwlock); 8685 if (ire_mp != NULL) 8686 freeb(ire_mp); 8687 return (error); 8688 } 8689 8690 /* 8691 * It associates a default destination address with the stream. 8692 */ 8693 static int 8694 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 8695 { 8696 sin6_t *sin6; 8697 sin_t *sin; 8698 in6_addr_t v6dst; 8699 ipaddr_t v4dst; 8700 uint16_t dstport; 8701 uint32_t flowinfo; 8702 mblk_t *ire_mp; 8703 udp_fanout_t *udpf; 8704 udp_t *udp, *udp1; 8705 ushort_t ipversion; 8706 udp_stack_t *us; 8707 int error; 8708 8709 udp = connp->conn_udp; 8710 us = udp->udp_us; 8711 8712 /* 8713 * Address has been verified by the caller 8714 */ 8715 switch (len) { 8716 default: 8717 /* 8718 * Should never happen 8719 */ 8720 return (EINVAL); 8721 8722 case sizeof (sin_t): 8723 sin = (sin_t *)sa; 8724 v4dst = sin->sin_addr.s_addr; 8725 dstport = sin->sin_port; 8726 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8727 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8728 ipversion = IPV4_VERSION; 8729 break; 8730 8731 case sizeof (sin6_t): 8732 sin6 = (sin6_t *)sa; 8733 v6dst = sin6->sin6_addr; 8734 dstport = sin6->sin6_port; 8735 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8736 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8737 ipversion = IPV4_VERSION; 8738 flowinfo = 0; 8739 } else { 8740 ipversion = IPV6_VERSION; 8741 flowinfo = sin6->sin6_flowinfo; 8742 } 8743 break; 8744 } 8745 8746 if (dstport == 0) 8747 return (-TBADADDR); 8748 8749 rw_enter(&udp->udp_rwlock, RW_WRITER); 8750 8751 /* 8752 * This UDP must have bound to a port already before doing a connect. 8753 * TPI mandates that users must send TPI primitives only 1 at a time 8754 * and wait for the response before sending the next primitive. 8755 */ 8756 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8757 rw_exit(&udp->udp_rwlock); 8758 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8759 "udp_connect: bad state, %u", udp->udp_state); 8760 return (-TOUTSTATE); 8761 } 8762 udp->udp_pending_op = T_CONN_REQ; 8763 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8764 8765 if (ipversion == IPV4_VERSION) { 8766 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8767 udp->udp_ip_snd_options_len; 8768 } else { 8769 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8770 } 8771 8772 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8773 us->us_bind_fanout_size)]; 8774 8775 mutex_enter(&udpf->uf_lock); 8776 if (udp->udp_state == TS_DATA_XFER) { 8777 /* Already connected - clear out state */ 8778 udp->udp_v6src = udp->udp_bound_v6src; 8779 udp->udp_state = TS_IDLE; 8780 } 8781 8782 /* 8783 * Create a default IP header with no IP options. 8784 */ 8785 udp->udp_dstport = dstport; 8786 udp->udp_ipversion = ipversion; 8787 if (ipversion == IPV4_VERSION) { 8788 /* 8789 * Interpret a zero destination to mean loopback. 8790 * Update the T_CONN_REQ (sin/sin6) since it is used to 8791 * generate the T_CONN_CON. 8792 */ 8793 if (v4dst == INADDR_ANY) { 8794 v4dst = htonl(INADDR_LOOPBACK); 8795 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8796 if (udp->udp_family == AF_INET) { 8797 sin->sin_addr.s_addr = v4dst; 8798 } else { 8799 sin6->sin6_addr = v6dst; 8800 } 8801 } 8802 udp->udp_v6dst = v6dst; 8803 udp->udp_flowinfo = 0; 8804 8805 /* 8806 * If the destination address is multicast and 8807 * an outgoing multicast interface has been set, 8808 * use the address of that interface as our 8809 * source address if no source address has been set. 8810 */ 8811 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8812 CLASSD(v4dst) && 8813 udp->udp_multicast_if_addr != INADDR_ANY) { 8814 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8815 &udp->udp_v6src); 8816 } 8817 } else { 8818 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8819 /* 8820 * Interpret a zero destination to mean loopback. 8821 * Update the T_CONN_REQ (sin/sin6) since it is used to 8822 * generate the T_CONN_CON. 8823 */ 8824 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8825 v6dst = ipv6_loopback; 8826 sin6->sin6_addr = v6dst; 8827 } 8828 udp->udp_v6dst = v6dst; 8829 udp->udp_flowinfo = flowinfo; 8830 /* 8831 * If the destination address is multicast and 8832 * an outgoing multicast interface has been set, 8833 * then the ip bind logic will pick the correct source 8834 * address (i.e. matching the outgoing multicast interface). 8835 */ 8836 } 8837 8838 /* 8839 * Verify that the src/port/dst/port is unique for all 8840 * connections in TS_DATA_XFER 8841 */ 8842 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8843 if (udp1->udp_state != TS_DATA_XFER) 8844 continue; 8845 if (udp->udp_port != udp1->udp_port || 8846 udp->udp_ipversion != udp1->udp_ipversion || 8847 dstport != udp1->udp_dstport || 8848 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8849 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8850 !(IPCL_ZONE_MATCH(udp->udp_connp, 8851 udp1->udp_connp->conn_zoneid) || 8852 IPCL_ZONE_MATCH(udp1->udp_connp, 8853 udp->udp_connp->conn_zoneid))) 8854 continue; 8855 mutex_exit(&udpf->uf_lock); 8856 udp->udp_pending_op = -1; 8857 rw_exit(&udp->udp_rwlock); 8858 return (-TBADADDR); 8859 } 8860 8861 if (cl_inet_connect2 != NULL) { 8862 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8863 if (error != 0) { 8864 mutex_exit(&udpf->uf_lock); 8865 udp->udp_pending_op = -1; 8866 rw_exit(&udp->udp_rwlock); 8867 return (-TBADADDR); 8868 } 8869 } 8870 8871 udp->udp_state = TS_DATA_XFER; 8872 mutex_exit(&udpf->uf_lock); 8873 8874 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8875 if (ire_mp == NULL) { 8876 mutex_enter(&udpf->uf_lock); 8877 udp->udp_state = TS_IDLE; 8878 udp->udp_pending_op = -1; 8879 mutex_exit(&udpf->uf_lock); 8880 rw_exit(&udp->udp_rwlock); 8881 return (ENOMEM); 8882 } 8883 8884 rw_exit(&udp->udp_rwlock); 8885 8886 ire_mp->b_wptr += sizeof (ire_t); 8887 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8888 8889 if (udp->udp_family == AF_INET) { 8890 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8891 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8892 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8893 B_TRUE, B_TRUE); 8894 } else { 8895 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8896 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8897 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE); 8898 } 8899 8900 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8901 } 8902 8903 /* ARGSUSED */ 8904 static int 8905 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8906 socklen_t len, sock_connid_t *id, cred_t *cr) 8907 { 8908 conn_t *connp = (conn_t *)proto_handle; 8909 udp_t *udp = connp->conn_udp; 8910 int error; 8911 boolean_t did_bind = B_FALSE; 8912 8913 if (sa == NULL) { 8914 /* 8915 * Disconnect 8916 * Make sure we are connected 8917 */ 8918 if (udp->udp_state != TS_DATA_XFER) 8919 return (EINVAL); 8920 8921 error = udp_disconnect(connp); 8922 return (error); 8923 } 8924 8925 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8926 if (error != 0) 8927 goto done; 8928 8929 /* do an implicit bind if necessary */ 8930 if (udp->udp_state == TS_UNBND) { 8931 error = udp_implicit_bind(connp, cr); 8932 /* 8933 * We could be racing with an actual bind, in which case 8934 * we would see EPROTO. We cross our fingers and try 8935 * to connect. 8936 */ 8937 if (!(error == 0 || error == EPROTO)) 8938 goto done; 8939 did_bind = B_TRUE; 8940 } 8941 /* 8942 * set SO_DGRAM_ERRIND 8943 */ 8944 udp->udp_dgram_errind = B_TRUE; 8945 8946 error = udp_do_connect(connp, sa, len); 8947 8948 if (error != 0 && did_bind) { 8949 int unbind_err; 8950 8951 unbind_err = udp_do_unbind(connp); 8952 ASSERT(unbind_err == 0); 8953 } 8954 8955 if (error == 0) { 8956 *id = 0; 8957 (*connp->conn_upcalls->su_connected) 8958 (connp->conn_upper_handle, 0, NULL, -1); 8959 } else if (error < 0) { 8960 error = proto_tlitosyserr(-error); 8961 } 8962 8963 done: 8964 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8965 /* 8966 * No need to hold locks to set state 8967 * after connect failure socket state is undefined 8968 * We set the state only to imitate old sockfs behavior 8969 */ 8970 udp->udp_state = TS_IDLE; 8971 } 8972 return (error); 8973 } 8974 8975 /* ARGSUSED */ 8976 int 8977 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8978 cred_t *cr) 8979 { 8980 conn_t *connp = (conn_t *)proto_handle; 8981 udp_t *udp = connp->conn_udp; 8982 udp_stack_t *us = udp->udp_us; 8983 int error = 0; 8984 8985 ASSERT(DB_TYPE(mp) == M_DATA); 8986 8987 /* 8988 * If the socket is connected and no change in destination 8989 */ 8990 if (msg->msg_namelen == 0) { 8991 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8992 if (error == EDESTADDRREQ) 8993 return (error); 8994 else 8995 return (udp->udp_dgram_errind ? error : 0); 8996 } 8997 8998 /* 8999 * Do an implicit bind if necessary. 9000 */ 9001 if (udp->udp_state == TS_UNBND) { 9002 error = udp_implicit_bind(connp, cr); 9003 /* 9004 * We could be racing with an actual bind, in which case 9005 * we would see EPROTO. We cross our fingers and try 9006 * to send. 9007 */ 9008 if (!(error == 0 || error == EPROTO)) { 9009 freemsg(mp); 9010 return (error); 9011 } 9012 } 9013 9014 rw_enter(&udp->udp_rwlock, RW_WRITER); 9015 9016 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9017 rw_exit(&udp->udp_rwlock); 9018 freemsg(mp); 9019 return (EISCONN); 9020 } 9021 9022 9023 if (udp->udp_delayed_error != 0) { 9024 boolean_t match; 9025 9026 error = udp->udp_delayed_error; 9027 match = B_FALSE; 9028 udp->udp_delayed_error = 0; 9029 switch (udp->udp_family) { 9030 case AF_INET: { 9031 /* Compare just IP address and port */ 9032 sin_t *sin1 = (sin_t *)msg->msg_name; 9033 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9034 9035 if (msg->msg_namelen == sizeof (sin_t) && 9036 sin1->sin_port == sin2->sin_port && 9037 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9038 match = B_TRUE; 9039 9040 break; 9041 } 9042 case AF_INET6: { 9043 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9044 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9045 9046 if (msg->msg_namelen == sizeof (sin6_t) && 9047 sin1->sin6_port == sin2->sin6_port && 9048 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9049 &sin2->sin6_addr)) 9050 match = B_TRUE; 9051 break; 9052 } 9053 default: 9054 ASSERT(0); 9055 } 9056 9057 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9058 9059 if (match) { 9060 rw_exit(&udp->udp_rwlock); 9061 freemsg(mp); 9062 return (error); 9063 } 9064 } 9065 9066 error = proto_verify_ip_addr(udp->udp_family, 9067 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9068 rw_exit(&udp->udp_rwlock); 9069 9070 if (error != 0) { 9071 freemsg(mp); 9072 return (error); 9073 } 9074 9075 error = udp_send_not_connected(connp, mp, 9076 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9077 curproc->p_pid); 9078 if (error != 0) { 9079 UDP_STAT(us, udp_out_err_output); 9080 freemsg(mp); 9081 } 9082 return (udp->udp_dgram_errind ? error : 0); 9083 } 9084 9085 void 9086 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9087 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9088 { 9089 conn_t *connp = (conn_t *)proto_handle; 9090 udp_t *udp; 9091 struct T_capability_ack tca; 9092 struct sockaddr_in6 laddr, faddr; 9093 socklen_t laddrlen, faddrlen; 9094 short opts; 9095 struct stroptions *stropt; 9096 mblk_t *stropt_mp; 9097 int error; 9098 9099 udp = connp->conn_udp; 9100 9101 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9102 9103 /* 9104 * setup the fallback stream that was allocated 9105 */ 9106 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9107 connp->conn_minor_arena = WR(q)->q_ptr; 9108 9109 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9110 9111 WR(q)->q_qinfo = &udp_winit; 9112 9113 connp->conn_rq = RD(q); 9114 connp->conn_wq = WR(q); 9115 9116 /* Notify stream head about options before sending up data */ 9117 stropt_mp->b_datap->db_type = M_SETOPTS; 9118 stropt_mp->b_wptr += sizeof (*stropt); 9119 stropt = (struct stroptions *)stropt_mp->b_rptr; 9120 stropt->so_flags = SO_WROFF | SO_HIWAT; 9121 stropt->so_wroff = 9122 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9123 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9124 putnext(RD(q), stropt_mp); 9125 9126 /* 9127 * Free the helper stream 9128 */ 9129 ip_free_helper_stream(connp); 9130 9131 if (!direct_sockfs) 9132 udp_disable_direct_sockfs(udp); 9133 9134 /* 9135 * Collect the information needed to sync with the sonode 9136 */ 9137 udp_do_capability_ack(udp, &tca, TC1_INFO); 9138 9139 laddrlen = faddrlen = sizeof (sin6_t); 9140 (void) udp_getsockname((sock_lower_handle_t)connp, 9141 (struct sockaddr *)&laddr, &laddrlen, NULL); 9142 error = udp_getpeername((sock_lower_handle_t)connp, 9143 (struct sockaddr *)&faddr, &faddrlen, NULL); 9144 if (error != 0) 9145 faddrlen = 0; 9146 9147 opts = 0; 9148 if (udp->udp_dgram_errind) 9149 opts |= SO_DGRAM_ERRIND; 9150 if (udp->udp_dontroute) 9151 opts |= SO_DONTROUTE; 9152 9153 /* 9154 * Once we grab the drain lock, no data will be send up 9155 * to the socket. So we notify the socket that the endpoint 9156 * is quiescent and it's therefore safe move data from 9157 * the socket to the stream head. 9158 */ 9159 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9160 (struct sockaddr *)&laddr, laddrlen, 9161 (struct sockaddr *)&faddr, faddrlen, opts); 9162 9163 /* 9164 * push up any packets that were queued in udp_t 9165 */ 9166 9167 mutex_enter(&udp->udp_recv_lock); 9168 while (udp->udp_fallback_queue_head != NULL) { 9169 mblk_t *mp; 9170 mp = udp->udp_fallback_queue_head; 9171 udp->udp_fallback_queue_head = mp->b_next; 9172 mutex_exit(&udp->udp_recv_lock); 9173 mp->b_next = NULL; 9174 putnext(RD(q), mp); 9175 mutex_enter(&udp->udp_recv_lock); 9176 } 9177 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9178 /* 9179 * No longer a streams less socket 9180 */ 9181 connp->conn_flags &= ~IPCL_NONSTR; 9182 mutex_exit(&udp->udp_recv_lock); 9183 9184 ASSERT(connp->conn_ref >= 1); 9185 } 9186 9187 static int 9188 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9189 { 9190 sin_t *sin = (sin_t *)sa; 9191 sin6_t *sin6 = (sin6_t *)sa; 9192 9193 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9194 ASSERT(udp != NULL); 9195 9196 if (udp->udp_state != TS_DATA_XFER) 9197 return (ENOTCONN); 9198 9199 switch (udp->udp_family) { 9200 case AF_INET: 9201 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9202 9203 if (*salenp < sizeof (sin_t)) 9204 return (EINVAL); 9205 9206 *salenp = sizeof (sin_t); 9207 *sin = sin_null; 9208 sin->sin_family = AF_INET; 9209 sin->sin_port = udp->udp_dstport; 9210 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9211 break; 9212 case AF_INET6: 9213 if (*salenp < sizeof (sin6_t)) 9214 return (EINVAL); 9215 9216 *salenp = sizeof (sin6_t); 9217 *sin6 = sin6_null; 9218 sin6->sin6_family = AF_INET6; 9219 sin6->sin6_port = udp->udp_dstport; 9220 sin6->sin6_addr = udp->udp_v6dst; 9221 sin6->sin6_flowinfo = udp->udp_flowinfo; 9222 break; 9223 } 9224 9225 return (0); 9226 } 9227 9228 /* ARGSUSED */ 9229 int 9230 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9231 socklen_t *salenp, cred_t *cr) 9232 { 9233 conn_t *connp = (conn_t *)proto_handle; 9234 udp_t *udp = connp->conn_udp; 9235 int error; 9236 9237 ASSERT(udp != NULL); 9238 9239 rw_enter(&udp->udp_rwlock, RW_READER); 9240 9241 error = udp_do_getpeername(udp, sa, salenp); 9242 9243 rw_exit(&udp->udp_rwlock); 9244 9245 return (error); 9246 } 9247 9248 static int 9249 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9250 { 9251 sin_t *sin = (sin_t *)sa; 9252 sin6_t *sin6 = (sin6_t *)sa; 9253 9254 ASSERT(udp != NULL); 9255 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9256 9257 switch (udp->udp_family) { 9258 case AF_INET: 9259 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9260 9261 if (*salenp < sizeof (sin_t)) 9262 return (EINVAL); 9263 9264 *salenp = sizeof (sin_t); 9265 *sin = sin_null; 9266 sin->sin_family = AF_INET; 9267 if (udp->udp_state == TS_UNBND) { 9268 break; 9269 } 9270 sin->sin_port = udp->udp_port; 9271 9272 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9273 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9274 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9275 } else { 9276 /* 9277 * INADDR_ANY 9278 * udp_v6src is not set, we might be bound to 9279 * broadcast/multicast. Use udp_bound_v6src as 9280 * local address instead (that could 9281 * also still be INADDR_ANY) 9282 */ 9283 sin->sin_addr.s_addr = 9284 V4_PART_OF_V6(udp->udp_bound_v6src); 9285 } 9286 break; 9287 9288 case AF_INET6: 9289 if (*salenp < sizeof (sin6_t)) 9290 return (EINVAL); 9291 9292 *salenp = sizeof (sin6_t); 9293 *sin6 = sin6_null; 9294 sin6->sin6_family = AF_INET6; 9295 if (udp->udp_state == TS_UNBND) { 9296 break; 9297 } 9298 sin6->sin6_port = udp->udp_port; 9299 9300 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9301 sin6->sin6_addr = udp->udp_v6src; 9302 } else { 9303 /* 9304 * UNSPECIFIED 9305 * udp_v6src is not set, we might be bound to 9306 * broadcast/multicast. Use udp_bound_v6src as 9307 * local address instead (that could 9308 * also still be UNSPECIFIED) 9309 */ 9310 sin6->sin6_addr = udp->udp_bound_v6src; 9311 } 9312 } 9313 return (0); 9314 } 9315 9316 /* ARGSUSED */ 9317 int 9318 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9319 socklen_t *salenp, cred_t *cr) 9320 { 9321 conn_t *connp = (conn_t *)proto_handle; 9322 udp_t *udp = connp->conn_udp; 9323 int error; 9324 9325 ASSERT(udp != NULL); 9326 rw_enter(&udp->udp_rwlock, RW_READER); 9327 9328 error = udp_do_getsockname(udp, sa, salenp); 9329 9330 rw_exit(&udp->udp_rwlock); 9331 9332 return (error); 9333 } 9334 9335 int 9336 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9337 void *optvalp, socklen_t *optlen, cred_t *cr) 9338 { 9339 conn_t *connp = (conn_t *)proto_handle; 9340 udp_t *udp = connp->conn_udp; 9341 int error; 9342 t_uscalar_t max_optbuf_len; 9343 void *optvalp_buf; 9344 int len; 9345 9346 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9347 udp_opt_obj.odb_opt_des_arr, 9348 udp_opt_obj.odb_opt_arr_cnt, 9349 udp_opt_obj.odb_topmost_tpiprovider, 9350 B_FALSE, B_TRUE, cr); 9351 if (error != 0) { 9352 if (error < 0) 9353 error = proto_tlitosyserr(-error); 9354 return (error); 9355 } 9356 9357 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9358 rw_enter(&udp->udp_rwlock, RW_READER); 9359 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9360 rw_exit(&udp->udp_rwlock); 9361 9362 if (len < 0) { 9363 /* 9364 * Pass on to IP 9365 */ 9366 kmem_free(optvalp_buf, max_optbuf_len); 9367 return (ip_get_options(connp, level, option_name, 9368 optvalp, optlen, cr)); 9369 } else { 9370 /* 9371 * update optlen and copy option value 9372 */ 9373 t_uscalar_t size = MIN(len, *optlen); 9374 bcopy(optvalp_buf, optvalp, size); 9375 bcopy(&size, optlen, sizeof (size)); 9376 9377 kmem_free(optvalp_buf, max_optbuf_len); 9378 return (0); 9379 } 9380 } 9381 9382 int 9383 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9384 const void *optvalp, socklen_t optlen, cred_t *cr) 9385 { 9386 conn_t *connp = (conn_t *)proto_handle; 9387 udp_t *udp = connp->conn_udp; 9388 int error; 9389 9390 error = proto_opt_check(level, option_name, optlen, NULL, 9391 udp_opt_obj.odb_opt_des_arr, 9392 udp_opt_obj.odb_opt_arr_cnt, 9393 udp_opt_obj.odb_topmost_tpiprovider, 9394 B_TRUE, B_FALSE, cr); 9395 9396 if (error != 0) { 9397 if (error < 0) 9398 error = proto_tlitosyserr(-error); 9399 return (error); 9400 } 9401 9402 rw_enter(&udp->udp_rwlock, RW_WRITER); 9403 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9404 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9405 NULL, cr); 9406 rw_exit(&udp->udp_rwlock); 9407 9408 if (error < 0) { 9409 /* 9410 * Pass on to ip 9411 */ 9412 error = ip_set_options(connp, level, option_name, optvalp, 9413 optlen, cr); 9414 } 9415 9416 return (error); 9417 } 9418 9419 void 9420 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9421 { 9422 conn_t *connp = (conn_t *)proto_handle; 9423 udp_t *udp = connp->conn_udp; 9424 9425 mutex_enter(&udp->udp_recv_lock); 9426 connp->conn_flow_cntrld = B_FALSE; 9427 mutex_exit(&udp->udp_recv_lock); 9428 } 9429 9430 /* ARGSUSED */ 9431 int 9432 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9433 { 9434 conn_t *connp = (conn_t *)proto_handle; 9435 9436 /* shut down the send side */ 9437 if (how != SHUT_RD) 9438 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9439 SOCK_OPCTL_SHUT_SEND, 0); 9440 /* shut down the recv side */ 9441 if (how != SHUT_WR) 9442 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9443 SOCK_OPCTL_SHUT_RECV, 0); 9444 return (0); 9445 } 9446 9447 int 9448 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9449 int mode, int32_t *rvalp, cred_t *cr) 9450 { 9451 conn_t *connp = (conn_t *)proto_handle; 9452 int error; 9453 9454 switch (cmd) { 9455 case ND_SET: 9456 case ND_GET: 9457 case _SIOCSOCKFALLBACK: 9458 case TI_GETPEERNAME: 9459 case TI_GETMYNAME: 9460 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9461 cmd)); 9462 error = EINVAL; 9463 break; 9464 default: 9465 /* 9466 * Pass on to IP using helper stream 9467 */ 9468 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9469 cmd, arg, mode, cr, rvalp); 9470 break; 9471 } 9472 return (error); 9473 } 9474 9475 /* ARGSUSED */ 9476 int 9477 udp_accept(sock_lower_handle_t lproto_handle, 9478 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9479 cred_t *cr) 9480 { 9481 return (EOPNOTSUPP); 9482 } 9483 9484 /* ARGSUSED */ 9485 int 9486 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9487 { 9488 return (EOPNOTSUPP); 9489 } 9490 9491 sock_downcalls_t sock_udp_downcalls = { 9492 udp_activate, /* sd_activate */ 9493 udp_accept, /* sd_accept */ 9494 udp_bind, /* sd_bind */ 9495 udp_listen, /* sd_listen */ 9496 udp_connect, /* sd_connect */ 9497 udp_getpeername, /* sd_getpeername */ 9498 udp_getsockname, /* sd_getsockname */ 9499 udp_getsockopt, /* sd_getsockopt */ 9500 udp_setsockopt, /* sd_setsockopt */ 9501 udp_send, /* sd_send */ 9502 NULL, /* sd_send_uio */ 9503 NULL, /* sd_recv_uio */ 9504 NULL, /* sd_poll */ 9505 udp_shutdown, /* sd_shutdown */ 9506 udp_clr_flowctrl, /* sd_setflowctrl */ 9507 udp_ioctl, /* sd_ioctl */ 9508 udp_close /* sd_close */ 9509 }; 9510