1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 #define NDD_TOO_QUICK_MSG \ 137 "ndd get info rate too high for non-privileged users, try again " \ 138 "later.\n" 139 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 140 141 /* Option processing attrs */ 142 typedef struct udpattrs_s { 143 union { 144 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 145 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 146 } udpattr_ippu; 147 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 148 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 149 mblk_t *udpattr_mb; 150 boolean_t udpattr_credset; 151 } udpattrs_t; 152 153 static void udp_addr_req(queue_t *q, mblk_t *mp); 154 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 155 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 156 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 157 static int udp_build_hdrs(udp_t *udp); 158 static void udp_capability_req(queue_t *q, mblk_t *mp); 159 static int udp_tpi_close(queue_t *q, int flags); 160 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 161 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 162 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 165 t_scalar_t tlierr, int unixerr); 166 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 167 cred_t *cr); 168 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 169 char *value, caddr_t cp, cred_t *cr); 170 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static void udp_icmp_error(conn_t *, mblk_t *); 173 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 174 static void udp_info_req(queue_t *q, mblk_t *mp); 175 static void udp_input(void *, mblk_t *, void *); 176 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 177 t_scalar_t addr_length); 178 static void udp_lrput(queue_t *, mblk_t *); 179 static void udp_lwput(queue_t *, mblk_t *); 180 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 181 cred_t *credp, boolean_t isv6); 182 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp); 184 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 187 int *errorp, udpattrs_t *udpattrs); 188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 189 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 191 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 192 cred_t *cr); 193 static void udp_report_item(mblk_t *mp, udp_t *udp); 194 static int udp_rinfop(queue_t *q, infod_t *dp); 195 static int udp_rrw(queue_t *q, struiod_t *dp); 196 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 197 cred_t *cr); 198 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 199 ipha_t *ipha); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 229 cred_t *, pid_t); 230 231 /* Common routine for TPI and socket module */ 232 static conn_t *udp_do_open(cred_t *, boolean_t, int); 233 static void udp_do_close(conn_t *); 234 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 235 boolean_t); 236 static int udp_do_unbind(conn_t *); 237 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 238 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 239 240 int udp_getsockname(sock_lower_handle_t, 241 struct sockaddr *, socklen_t *, cred_t *); 242 int udp_getpeername(sock_lower_handle_t, 243 struct sockaddr *, socklen_t *, cred_t *); 244 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 245 cred_t *cr); 246 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 247 248 #define UDP_RECV_HIWATER (56 * 1024) 249 #define UDP_RECV_LOWATER 128 250 #define UDP_XMIT_HIWATER (56 * 1024) 251 #define UDP_XMIT_LOWATER 1024 252 253 /* 254 * The following is defined in tcp.c 255 */ 256 extern int (*cl_inet_connect2)(netstackid_t stack_id, 257 uint8_t protocol, boolean_t is_outgoing, 258 sa_family_t addr_family, 259 uint8_t *laddrp, in_port_t lport, 260 uint8_t *faddrp, in_port_t fport, void *args); 261 262 /* 263 * Checks if the given destination addr/port is allowed out. 264 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 265 * Called for each connect() and for sendto()/sendmsg() to a different 266 * destination. 267 * For connect(), called in udp_connect(). 268 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 269 * 270 * This macro assumes that the cl_inet_connect2 hook is not NULL. 271 * Please check this before calling this macro. 272 * 273 * void 274 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 275 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 276 */ 277 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 278 (err) = 0; \ 279 /* \ 280 * Running in cluster mode - check and register active \ 281 * "connection" information \ 282 */ \ 283 if ((udp)->udp_ipversion == IPV4_VERSION) \ 284 (err) = (*cl_inet_connect2)( \ 285 (cp)->conn_netstack->netstack_stackid, \ 286 IPPROTO_UDP, is_outgoing, AF_INET, \ 287 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 288 (udp)->udp_port, \ 289 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 290 (in_port_t)(fport), NULL); \ 291 else \ 292 (err) = (*cl_inet_connect2)( \ 293 (cp)->conn_netstack->netstack_stackid, \ 294 IPPROTO_UDP, is_outgoing, AF_INET6, \ 295 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 296 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 297 } 298 299 static struct module_info udp_mod_info = { 300 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 301 }; 302 303 /* 304 * Entry points for UDP as a device. 305 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 306 */ 307 static struct qinit udp_rinitv4 = { 308 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 309 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 310 }; 311 312 static struct qinit udp_rinitv6 = { 313 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 314 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 315 }; 316 317 static struct qinit udp_winit = { 318 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 319 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 320 }; 321 322 /* UDP entry point during fallback */ 323 struct qinit udp_fallback_sock_winit = { 324 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 325 }; 326 327 /* 328 * UDP needs to handle I_LINK and I_PLINK since ifconfig 329 * likes to use it as a place to hang the various streams. 330 */ 331 static struct qinit udp_lrinit = { 332 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 333 &udp_mod_info 334 }; 335 336 static struct qinit udp_lwinit = { 337 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 338 &udp_mod_info 339 }; 340 341 /* For AF_INET aka /dev/udp */ 342 struct streamtab udpinfov4 = { 343 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 344 }; 345 346 /* For AF_INET6 aka /dev/udp6 */ 347 struct streamtab udpinfov6 = { 348 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 349 }; 350 351 static sin_t sin_null; /* Zero address for quick clears */ 352 static sin6_t sin6_null; /* Zero address for quick clears */ 353 354 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 355 356 /* Default structure copied into T_INFO_ACK messages */ 357 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 358 T_INFO_ACK, 359 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 360 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 361 T_INVALID, /* CDATA_size. udp does not support connect data. */ 362 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 363 sizeof (sin_t), /* ADDR_size. */ 364 0, /* OPT_size - not initialized here */ 365 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 366 T_CLTS, /* SERV_type. udp supports connection-less. */ 367 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 368 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 369 }; 370 371 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 372 373 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 374 T_INFO_ACK, 375 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 376 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 377 T_INVALID, /* CDATA_size. udp does not support connect data. */ 378 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 379 sizeof (sin6_t), /* ADDR_size. */ 380 0, /* OPT_size - not initialized here */ 381 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 382 T_CLTS, /* SERV_type. udp supports connection-less. */ 383 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 384 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 385 }; 386 387 /* largest UDP port number */ 388 #define UDP_MAX_PORT 65535 389 390 /* 391 * Table of ND variables supported by udp. These are loaded into us_nd 392 * in udp_open. 393 * All of these are alterable, within the min/max values given, at run time. 394 */ 395 /* BEGIN CSTYLED */ 396 udpparam_t udp_param_arr[] = { 397 /*min max value name */ 398 { 0L, 256, 32, "udp_wroff_extra" }, 399 { 1L, 255, 255, "udp_ipv4_ttl" }, 400 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 401 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 402 { 0, 1, 1, "udp_do_checksum" }, 403 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 404 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 405 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 406 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 407 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 408 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 409 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 410 }; 411 /* END CSTYLED */ 412 413 /* Setable in /etc/system */ 414 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 415 uint32_t udp_random_anon_port = 1; 416 417 /* 418 * Hook functions to enable cluster networking. 419 * On non-clustered systems these vectors must always be NULL 420 */ 421 422 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 423 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 424 void *args) = NULL; 425 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 426 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 427 void *args) = NULL; 428 429 typedef union T_primitives *t_primp_t; 430 431 /* 432 * Return the next anonymous port in the privileged port range for 433 * bind checking. 434 * 435 * Trusted Extension (TX) notes: TX allows administrator to mark or 436 * reserve ports as Multilevel ports (MLP). MLP has special function 437 * on TX systems. Once a port is made MLP, it's not available as 438 * ordinary port. This creates "holes" in the port name space. It 439 * may be necessary to skip the "holes" find a suitable anon port. 440 */ 441 static in_port_t 442 udp_get_next_priv_port(udp_t *udp) 443 { 444 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 445 in_port_t nextport; 446 boolean_t restart = B_FALSE; 447 udp_stack_t *us = udp->udp_us; 448 449 retry: 450 if (next_priv_port < us->us_min_anonpriv_port || 451 next_priv_port >= IPPORT_RESERVED) { 452 next_priv_port = IPPORT_RESERVED - 1; 453 if (restart) 454 return (0); 455 restart = B_TRUE; 456 } 457 458 if (is_system_labeled() && 459 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 460 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 461 next_priv_port = nextport; 462 goto retry; 463 } 464 465 return (next_priv_port--); 466 } 467 468 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 469 /* ARGSUSED */ 470 static int 471 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 472 { 473 udp_fanout_t *udpf; 474 int i; 475 zoneid_t zoneid; 476 conn_t *connp; 477 udp_t *udp; 478 udp_stack_t *us; 479 480 connp = Q_TO_CONN(q); 481 udp = connp->conn_udp; 482 us = udp->udp_us; 483 484 /* Refer to comments in udp_status_report(). */ 485 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 486 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 487 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 488 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 489 return (0); 490 } 491 } 492 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 493 /* The following may work even if we cannot get a large buf. */ 494 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 495 return (0); 496 } 497 498 (void) mi_mpprintf(mp, 499 "UDP " MI_COL_HDRPAD_STR 500 /* 12345678[89ABCDEF] */ 501 " zone lport src addr dest addr port state"); 502 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 503 504 zoneid = connp->conn_zoneid; 505 506 for (i = 0; i < us->us_bind_fanout_size; i++) { 507 udpf = &us->us_bind_fanout[i]; 508 mutex_enter(&udpf->uf_lock); 509 510 /* Print the hash index. */ 511 udp = udpf->uf_udp; 512 if (zoneid != GLOBAL_ZONEID) { 513 /* skip to first entry in this zone; might be none */ 514 while (udp != NULL && 515 udp->udp_connp->conn_zoneid != zoneid) 516 udp = udp->udp_bind_hash; 517 } 518 if (udp != NULL) { 519 uint_t print_len, buf_len; 520 521 buf_len = mp->b_cont->b_datap->db_lim - 522 mp->b_cont->b_wptr; 523 print_len = snprintf((char *)mp->b_cont->b_wptr, 524 buf_len, "%d\n", i); 525 if (print_len < buf_len) { 526 mp->b_cont->b_wptr += print_len; 527 } else { 528 mp->b_cont->b_wptr += buf_len; 529 } 530 for (; udp != NULL; udp = udp->udp_bind_hash) { 531 if (zoneid == GLOBAL_ZONEID || 532 zoneid == udp->udp_connp->conn_zoneid) 533 udp_report_item(mp->b_cont, udp); 534 } 535 } 536 mutex_exit(&udpf->uf_lock); 537 } 538 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 539 return (0); 540 } 541 542 /* 543 * Hash list removal routine for udp_t structures. 544 */ 545 static void 546 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 547 { 548 udp_t *udpnext; 549 kmutex_t *lockp; 550 udp_stack_t *us = udp->udp_us; 551 552 if (udp->udp_ptpbhn == NULL) 553 return; 554 555 /* 556 * Extract the lock pointer in case there are concurrent 557 * hash_remove's for this instance. 558 */ 559 ASSERT(udp->udp_port != 0); 560 if (!caller_holds_lock) { 561 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 562 us->us_bind_fanout_size)].uf_lock; 563 ASSERT(lockp != NULL); 564 mutex_enter(lockp); 565 } 566 if (udp->udp_ptpbhn != NULL) { 567 udpnext = udp->udp_bind_hash; 568 if (udpnext != NULL) { 569 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 570 udp->udp_bind_hash = NULL; 571 } 572 *udp->udp_ptpbhn = udpnext; 573 udp->udp_ptpbhn = NULL; 574 } 575 if (!caller_holds_lock) { 576 mutex_exit(lockp); 577 } 578 } 579 580 static void 581 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 582 { 583 udp_t **udpp; 584 udp_t *udpnext; 585 586 ASSERT(MUTEX_HELD(&uf->uf_lock)); 587 ASSERT(udp->udp_ptpbhn == NULL); 588 udpp = &uf->uf_udp; 589 udpnext = udpp[0]; 590 if (udpnext != NULL) { 591 /* 592 * If the new udp bound to the INADDR_ANY address 593 * and the first one in the list is not bound to 594 * INADDR_ANY we skip all entries until we find the 595 * first one bound to INADDR_ANY. 596 * This makes sure that applications binding to a 597 * specific address get preference over those binding to 598 * INADDR_ANY. 599 */ 600 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 601 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 602 while ((udpnext = udpp[0]) != NULL && 603 !V6_OR_V4_INADDR_ANY( 604 udpnext->udp_bound_v6src)) { 605 udpp = &(udpnext->udp_bind_hash); 606 } 607 if (udpnext != NULL) 608 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 609 } else { 610 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 611 } 612 } 613 udp->udp_bind_hash = udpnext; 614 udp->udp_ptpbhn = udpp; 615 udpp[0] = udp; 616 } 617 618 /* 619 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 620 * passed to udp_wput. 621 * It associates a port number and local address with the stream. 622 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 623 * protocol type (IPPROTO_UDP) placed in the message following the address. 624 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 625 * (Called as writer.) 626 * 627 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 628 * without setting SO_REUSEADDR. This is needed so that they 629 * can be viewed as two independent transport protocols. 630 * However, anonymouns ports are allocated from the same range to avoid 631 * duplicating the us->us_next_port_to_try. 632 */ 633 static void 634 udp_tpi_bind(queue_t *q, mblk_t *mp) 635 { 636 sin_t *sin; 637 sin6_t *sin6; 638 mblk_t *mp1; 639 struct T_bind_req *tbr; 640 conn_t *connp; 641 udp_t *udp; 642 int error; 643 struct sockaddr *sa; 644 cred_t *cr; 645 646 /* 647 * All Solaris components should pass a db_credp 648 * for this TPI message, hence we ASSERT. 649 * But in case there is some other M_PROTO that looks 650 * like a TPI message sent by some other kernel 651 * component, we check and return an error. 652 */ 653 cr = msg_getcred(mp, NULL); 654 ASSERT(cr != NULL); 655 if (cr == NULL) { 656 udp_err_ack(q, mp, TSYSERR, EINVAL); 657 return; 658 } 659 660 connp = Q_TO_CONN(q); 661 udp = connp->conn_udp; 662 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 663 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 664 "udp_bind: bad req, len %u", 665 (uint_t)(mp->b_wptr - mp->b_rptr)); 666 udp_err_ack(q, mp, TPROTO, 0); 667 return; 668 } 669 if (udp->udp_state != TS_UNBND) { 670 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 671 "udp_bind: bad state, %u", udp->udp_state); 672 udp_err_ack(q, mp, TOUTSTATE, 0); 673 return; 674 } 675 /* 676 * Reallocate the message to make sure we have enough room for an 677 * address and the protocol type. 678 */ 679 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 680 if (!mp1) { 681 udp_err_ack(q, mp, TSYSERR, ENOMEM); 682 return; 683 } 684 685 mp = mp1; 686 687 /* Reset the message type in preparation for shipping it back. */ 688 DB_TYPE(mp) = M_PCPROTO; 689 690 tbr = (struct T_bind_req *)mp->b_rptr; 691 switch (tbr->ADDR_length) { 692 case 0: /* Request for a generic port */ 693 tbr->ADDR_offset = sizeof (struct T_bind_req); 694 if (udp->udp_family == AF_INET) { 695 tbr->ADDR_length = sizeof (sin_t); 696 sin = (sin_t *)&tbr[1]; 697 *sin = sin_null; 698 sin->sin_family = AF_INET; 699 mp->b_wptr = (uchar_t *)&sin[1]; 700 sa = (struct sockaddr *)sin; 701 } else { 702 ASSERT(udp->udp_family == AF_INET6); 703 tbr->ADDR_length = sizeof (sin6_t); 704 sin6 = (sin6_t *)&tbr[1]; 705 *sin6 = sin6_null; 706 sin6->sin6_family = AF_INET6; 707 mp->b_wptr = (uchar_t *)&sin6[1]; 708 sa = (struct sockaddr *)sin6; 709 } 710 break; 711 712 case sizeof (sin_t): /* Complete IPv4 address */ 713 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 714 sizeof (sin_t)); 715 if (sa == NULL || !OK_32PTR((char *)sa)) { 716 udp_err_ack(q, mp, TSYSERR, EINVAL); 717 return; 718 } 719 if (udp->udp_family != AF_INET || 720 sa->sa_family != AF_INET) { 721 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 722 return; 723 } 724 break; 725 726 case sizeof (sin6_t): /* complete IPv6 address */ 727 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 728 sizeof (sin6_t)); 729 if (sa == NULL || !OK_32PTR((char *)sa)) { 730 udp_err_ack(q, mp, TSYSERR, EINVAL); 731 return; 732 } 733 if (udp->udp_family != AF_INET6 || 734 sa->sa_family != AF_INET6) { 735 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 736 return; 737 } 738 break; 739 740 default: /* Invalid request */ 741 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 742 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 743 udp_err_ack(q, mp, TBADADDR, 0); 744 return; 745 } 746 747 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 748 tbr->PRIM_type != O_T_BIND_REQ); 749 750 if (error != 0) { 751 if (error > 0) { 752 udp_err_ack(q, mp, TSYSERR, error); 753 } else { 754 udp_err_ack(q, mp, -error, 0); 755 } 756 } else { 757 tbr->PRIM_type = T_BIND_ACK; 758 qreply(q, mp); 759 } 760 } 761 762 /* 763 * This routine handles each T_CONN_REQ message passed to udp. It 764 * associates a default destination address with the stream. 765 * 766 * This routine sends down a T_BIND_REQ to IP with the following mblks: 767 * T_BIND_REQ - specifying local and remote address/port 768 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 769 * T_OK_ACK - for the T_CONN_REQ 770 * T_CONN_CON - to keep the TPI user happy 771 * 772 * The connect completes in udp_do_connect. 773 * When a T_BIND_ACK is received information is extracted from the IRE 774 * and the two appended messages are sent to the TPI user. 775 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 776 * convert it to an error ack for the appropriate primitive. 777 */ 778 static void 779 udp_tpi_connect(queue_t *q, mblk_t *mp) 780 { 781 mblk_t *mp1; 782 udp_t *udp; 783 conn_t *connp = Q_TO_CONN(q); 784 int error; 785 socklen_t len; 786 struct sockaddr *sa; 787 struct T_conn_req *tcr; 788 cred_t *cr; 789 790 /* 791 * All Solaris components should pass a db_credp 792 * for this TPI message, hence we ASSERT. 793 * But in case there is some other M_PROTO that looks 794 * like a TPI message sent by some other kernel 795 * component, we check and return an error. 796 */ 797 cr = msg_getcred(mp, NULL); 798 ASSERT(cr != NULL); 799 if (cr == NULL) { 800 udp_err_ack(q, mp, TSYSERR, EINVAL); 801 return; 802 } 803 804 udp = connp->conn_udp; 805 tcr = (struct T_conn_req *)mp->b_rptr; 806 807 /* A bit of sanity checking */ 808 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 809 udp_err_ack(q, mp, TPROTO, 0); 810 return; 811 } 812 813 if (tcr->OPT_length != 0) { 814 udp_err_ack(q, mp, TBADOPT, 0); 815 return; 816 } 817 818 /* 819 * Determine packet type based on type of address passed in 820 * the request should contain an IPv4 or IPv6 address. 821 * Make sure that address family matches the type of 822 * family of the the address passed down 823 */ 824 len = tcr->DEST_length; 825 switch (tcr->DEST_length) { 826 default: 827 udp_err_ack(q, mp, TBADADDR, 0); 828 return; 829 830 case sizeof (sin_t): 831 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 832 sizeof (sin_t)); 833 break; 834 835 case sizeof (sin6_t): 836 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 837 sizeof (sin6_t)); 838 break; 839 } 840 841 error = proto_verify_ip_addr(udp->udp_family, sa, len); 842 if (error != 0) { 843 udp_err_ack(q, mp, TSYSERR, error); 844 return; 845 } 846 847 /* 848 * We have to send a connection confirmation to 849 * keep TLI happy. 850 */ 851 if (udp->udp_family == AF_INET) { 852 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 853 sizeof (sin_t), NULL, 0); 854 } else { 855 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 856 sizeof (sin6_t), NULL, 0); 857 } 858 if (mp1 == NULL) { 859 udp_err_ack(q, mp, TSYSERR, ENOMEM); 860 return; 861 } 862 863 /* 864 * Allocate the largest primitive we need to send back 865 * T_error_ack is > than T_ok_ack 866 */ 867 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 868 if (mp == NULL) { 869 /* Unable to reuse the T_CONN_REQ for the ack. */ 870 freemsg(mp1); 871 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 872 return; 873 } 874 875 error = udp_do_connect(connp, sa, len, cr); 876 if (error != 0) { 877 freeb(mp1); 878 if (error < 0) 879 udp_err_ack(q, mp, -error, 0); 880 else 881 udp_err_ack(q, mp, TSYSERR, error); 882 } else { 883 mp = mi_tpi_ok_ack_alloc(mp); 884 ASSERT(mp != NULL); 885 putnext(connp->conn_rq, mp); 886 putnext(connp->conn_rq, mp1); 887 } 888 } 889 890 static int 891 udp_tpi_close(queue_t *q, int flags) 892 { 893 conn_t *connp; 894 895 if (flags & SO_FALLBACK) { 896 /* 897 * stream is being closed while in fallback 898 * simply free the resources that were allocated 899 */ 900 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 901 qprocsoff(q); 902 goto done; 903 } 904 905 connp = Q_TO_CONN(q); 906 udp_do_close(connp); 907 done: 908 q->q_ptr = WR(q)->q_ptr = NULL; 909 return (0); 910 } 911 912 /* 913 * Called in the close path to quiesce the conn 914 */ 915 void 916 udp_quiesce_conn(conn_t *connp) 917 { 918 udp_t *udp = connp->conn_udp; 919 920 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 921 /* 922 * Running in cluster mode - register unbind information 923 */ 924 if (udp->udp_ipversion == IPV4_VERSION) { 925 (*cl_inet_unbind)( 926 connp->conn_netstack->netstack_stackid, 927 IPPROTO_UDP, AF_INET, 928 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 929 (in_port_t)udp->udp_port, NULL); 930 } else { 931 (*cl_inet_unbind)( 932 connp->conn_netstack->netstack_stackid, 933 IPPROTO_UDP, AF_INET6, 934 (uint8_t *)(&(udp->udp_v6src)), 935 (in_port_t)udp->udp_port, NULL); 936 } 937 } 938 939 udp_bind_hash_remove(udp, B_FALSE); 940 941 } 942 943 void 944 udp_close_free(conn_t *connp) 945 { 946 udp_t *udp = connp->conn_udp; 947 948 /* If there are any options associated with the stream, free them. */ 949 if (udp->udp_ip_snd_options != NULL) { 950 mi_free((char *)udp->udp_ip_snd_options); 951 udp->udp_ip_snd_options = NULL; 952 udp->udp_ip_snd_options_len = 0; 953 } 954 955 if (udp->udp_ip_rcv_options != NULL) { 956 mi_free((char *)udp->udp_ip_rcv_options); 957 udp->udp_ip_rcv_options = NULL; 958 udp->udp_ip_rcv_options_len = 0; 959 } 960 961 /* Free memory associated with sticky options */ 962 if (udp->udp_sticky_hdrs_len != 0) { 963 kmem_free(udp->udp_sticky_hdrs, 964 udp->udp_sticky_hdrs_len); 965 udp->udp_sticky_hdrs = NULL; 966 udp->udp_sticky_hdrs_len = 0; 967 } 968 969 ip6_pkt_free(&udp->udp_sticky_ipp); 970 971 /* 972 * Clear any fields which the kmem_cache constructor clears. 973 * Only udp_connp needs to be preserved. 974 * TBD: We should make this more efficient to avoid clearing 975 * everything. 976 */ 977 ASSERT(udp->udp_connp == connp); 978 bzero(udp, sizeof (udp_t)); 979 udp->udp_connp = connp; 980 } 981 982 static int 983 udp_do_disconnect(conn_t *connp) 984 { 985 udp_t *udp; 986 mblk_t *ire_mp; 987 udp_fanout_t *udpf; 988 udp_stack_t *us; 989 int error; 990 991 udp = connp->conn_udp; 992 us = udp->udp_us; 993 rw_enter(&udp->udp_rwlock, RW_WRITER); 994 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 995 rw_exit(&udp->udp_rwlock); 996 return (-TOUTSTATE); 997 } 998 udp->udp_pending_op = T_DISCON_REQ; 999 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1000 us->us_bind_fanout_size)]; 1001 mutex_enter(&udpf->uf_lock); 1002 udp->udp_v6src = udp->udp_bound_v6src; 1003 udp->udp_state = TS_IDLE; 1004 mutex_exit(&udpf->uf_lock); 1005 1006 if (udp->udp_family == AF_INET6) { 1007 /* Rebuild the header template */ 1008 error = udp_build_hdrs(udp); 1009 if (error != 0) { 1010 udp->udp_pending_op = -1; 1011 rw_exit(&udp->udp_rwlock); 1012 return (error); 1013 } 1014 } 1015 1016 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 1017 if (ire_mp == NULL) { 1018 mutex_enter(&udpf->uf_lock); 1019 udp->udp_pending_op = -1; 1020 mutex_exit(&udpf->uf_lock); 1021 rw_exit(&udp->udp_rwlock); 1022 return (ENOMEM); 1023 } 1024 1025 rw_exit(&udp->udp_rwlock); 1026 1027 if (udp->udp_family == AF_INET6) { 1028 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 1029 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 1030 } else { 1031 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 1032 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1033 } 1034 1035 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1036 } 1037 1038 1039 static void 1040 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1041 { 1042 conn_t *connp = Q_TO_CONN(q); 1043 int error; 1044 1045 /* 1046 * Allocate the largest primitive we need to send back 1047 * T_error_ack is > than T_ok_ack 1048 */ 1049 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1050 if (mp == NULL) { 1051 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1052 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1053 return; 1054 } 1055 1056 error = udp_do_disconnect(connp); 1057 1058 if (error != 0) { 1059 if (error < 0) { 1060 udp_err_ack(q, mp, -error, 0); 1061 } else { 1062 udp_err_ack(q, mp, TSYSERR, error); 1063 } 1064 } else { 1065 mp = mi_tpi_ok_ack_alloc(mp); 1066 ASSERT(mp != NULL); 1067 qreply(q, mp); 1068 } 1069 } 1070 1071 int 1072 udp_disconnect(conn_t *connp) 1073 { 1074 int error; 1075 udp_t *udp = connp->conn_udp; 1076 1077 udp->udp_dgram_errind = B_FALSE; 1078 1079 error = udp_do_disconnect(connp); 1080 1081 if (error < 0) 1082 error = proto_tlitosyserr(-error); 1083 1084 return (error); 1085 } 1086 1087 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1088 static void 1089 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1090 { 1091 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1092 qreply(q, mp); 1093 } 1094 1095 /* Shorthand to generate and send TPI error acks to our client */ 1096 static void 1097 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1098 int sys_error) 1099 { 1100 struct T_error_ack *teackp; 1101 1102 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1103 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1104 teackp = (struct T_error_ack *)mp->b_rptr; 1105 teackp->ERROR_prim = primitive; 1106 teackp->TLI_error = t_error; 1107 teackp->UNIX_error = sys_error; 1108 qreply(q, mp); 1109 } 1110 } 1111 1112 /*ARGSUSED*/ 1113 static int 1114 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1115 { 1116 int i; 1117 udp_t *udp = Q_TO_UDP(q); 1118 udp_stack_t *us = udp->udp_us; 1119 1120 for (i = 0; i < us->us_num_epriv_ports; i++) { 1121 if (us->us_epriv_ports[i] != 0) 1122 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1123 } 1124 return (0); 1125 } 1126 1127 /* ARGSUSED */ 1128 static int 1129 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1130 cred_t *cr) 1131 { 1132 long new_value; 1133 int i; 1134 udp_t *udp = Q_TO_UDP(q); 1135 udp_stack_t *us = udp->udp_us; 1136 1137 /* 1138 * Fail the request if the new value does not lie within the 1139 * port number limits. 1140 */ 1141 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1142 new_value <= 0 || new_value >= 65536) { 1143 return (EINVAL); 1144 } 1145 1146 /* Check if the value is already in the list */ 1147 for (i = 0; i < us->us_num_epriv_ports; i++) { 1148 if (new_value == us->us_epriv_ports[i]) { 1149 return (EEXIST); 1150 } 1151 } 1152 /* Find an empty slot */ 1153 for (i = 0; i < us->us_num_epriv_ports; i++) { 1154 if (us->us_epriv_ports[i] == 0) 1155 break; 1156 } 1157 if (i == us->us_num_epriv_ports) { 1158 return (EOVERFLOW); 1159 } 1160 1161 /* Set the new value */ 1162 us->us_epriv_ports[i] = (in_port_t)new_value; 1163 return (0); 1164 } 1165 1166 /* ARGSUSED */ 1167 static int 1168 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1169 cred_t *cr) 1170 { 1171 long new_value; 1172 int i; 1173 udp_t *udp = Q_TO_UDP(q); 1174 udp_stack_t *us = udp->udp_us; 1175 1176 /* 1177 * Fail the request if the new value does not lie within the 1178 * port number limits. 1179 */ 1180 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1181 new_value <= 0 || new_value >= 65536) { 1182 return (EINVAL); 1183 } 1184 1185 /* Check that the value is already in the list */ 1186 for (i = 0; i < us->us_num_epriv_ports; i++) { 1187 if (us->us_epriv_ports[i] == new_value) 1188 break; 1189 } 1190 if (i == us->us_num_epriv_ports) { 1191 return (ESRCH); 1192 } 1193 1194 /* Clear the value */ 1195 us->us_epriv_ports[i] = 0; 1196 return (0); 1197 } 1198 1199 /* At minimum we need 4 bytes of UDP header */ 1200 #define ICMP_MIN_UDP_HDR 4 1201 1202 /* 1203 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1204 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1205 * Assumes that IP has pulled up everything up to and including the ICMP header. 1206 */ 1207 static void 1208 udp_icmp_error(conn_t *connp, mblk_t *mp) 1209 { 1210 icmph_t *icmph; 1211 ipha_t *ipha; 1212 int iph_hdr_length; 1213 udpha_t *udpha; 1214 sin_t sin; 1215 sin6_t sin6; 1216 mblk_t *mp1; 1217 int error = 0; 1218 udp_t *udp = connp->conn_udp; 1219 1220 mp1 = NULL; 1221 ipha = (ipha_t *)mp->b_rptr; 1222 1223 ASSERT(OK_32PTR(mp->b_rptr)); 1224 1225 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1226 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1227 udp_icmp_error_ipv6(connp, mp); 1228 return; 1229 } 1230 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1231 1232 /* Skip past the outer IP and ICMP headers */ 1233 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1234 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1235 ipha = (ipha_t *)&icmph[1]; 1236 1237 /* Skip past the inner IP and find the ULP header */ 1238 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1239 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1240 1241 switch (icmph->icmph_type) { 1242 case ICMP_DEST_UNREACHABLE: 1243 switch (icmph->icmph_code) { 1244 case ICMP_FRAGMENTATION_NEEDED: 1245 /* 1246 * IP has already adjusted the path MTU. 1247 */ 1248 break; 1249 case ICMP_PORT_UNREACHABLE: 1250 case ICMP_PROTOCOL_UNREACHABLE: 1251 error = ECONNREFUSED; 1252 break; 1253 default: 1254 /* Transient errors */ 1255 break; 1256 } 1257 break; 1258 default: 1259 /* Transient errors */ 1260 break; 1261 } 1262 if (error == 0) { 1263 freemsg(mp); 1264 return; 1265 } 1266 1267 /* 1268 * Deliver T_UDERROR_IND when the application has asked for it. 1269 * The socket layer enables this automatically when connected. 1270 */ 1271 if (!udp->udp_dgram_errind) { 1272 freemsg(mp); 1273 return; 1274 } 1275 1276 1277 switch (udp->udp_family) { 1278 case AF_INET: 1279 sin = sin_null; 1280 sin.sin_family = AF_INET; 1281 sin.sin_addr.s_addr = ipha->ipha_dst; 1282 sin.sin_port = udpha->uha_dst_port; 1283 if (IPCL_IS_NONSTR(connp)) { 1284 rw_enter(&udp->udp_rwlock, RW_WRITER); 1285 if (udp->udp_state == TS_DATA_XFER) { 1286 if (sin.sin_port == udp->udp_dstport && 1287 sin.sin_addr.s_addr == 1288 V4_PART_OF_V6(udp->udp_v6dst)) { 1289 1290 rw_exit(&udp->udp_rwlock); 1291 (*connp->conn_upcalls->su_set_error) 1292 (connp->conn_upper_handle, error); 1293 goto done; 1294 } 1295 } else { 1296 udp->udp_delayed_error = error; 1297 *((sin_t *)&udp->udp_delayed_addr) = sin; 1298 } 1299 rw_exit(&udp->udp_rwlock); 1300 } else { 1301 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1302 NULL, 0, error); 1303 } 1304 break; 1305 case AF_INET6: 1306 sin6 = sin6_null; 1307 sin6.sin6_family = AF_INET6; 1308 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1309 sin6.sin6_port = udpha->uha_dst_port; 1310 if (IPCL_IS_NONSTR(connp)) { 1311 rw_enter(&udp->udp_rwlock, RW_WRITER); 1312 if (udp->udp_state == TS_DATA_XFER) { 1313 if (sin6.sin6_port == udp->udp_dstport && 1314 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1315 &udp->udp_v6dst)) { 1316 rw_exit(&udp->udp_rwlock); 1317 (*connp->conn_upcalls->su_set_error) 1318 (connp->conn_upper_handle, error); 1319 goto done; 1320 } 1321 } else { 1322 udp->udp_delayed_error = error; 1323 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1324 } 1325 rw_exit(&udp->udp_rwlock); 1326 } else { 1327 1328 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1329 NULL, 0, error); 1330 } 1331 break; 1332 } 1333 if (mp1 != NULL) 1334 putnext(connp->conn_rq, mp1); 1335 done: 1336 freemsg(mp); 1337 } 1338 1339 /* 1340 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1341 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1342 * Assumes that IP has pulled up all the extension headers as well as the 1343 * ICMPv6 header. 1344 */ 1345 static void 1346 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1347 { 1348 icmp6_t *icmp6; 1349 ip6_t *ip6h, *outer_ip6h; 1350 uint16_t iph_hdr_length; 1351 uint8_t *nexthdrp; 1352 udpha_t *udpha; 1353 sin6_t sin6; 1354 mblk_t *mp1; 1355 int error = 0; 1356 udp_t *udp = connp->conn_udp; 1357 udp_stack_t *us = udp->udp_us; 1358 1359 outer_ip6h = (ip6_t *)mp->b_rptr; 1360 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1361 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1362 else 1363 iph_hdr_length = IPV6_HDR_LEN; 1364 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1365 ip6h = (ip6_t *)&icmp6[1]; 1366 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1367 freemsg(mp); 1368 return; 1369 } 1370 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1371 1372 switch (icmp6->icmp6_type) { 1373 case ICMP6_DST_UNREACH: 1374 switch (icmp6->icmp6_code) { 1375 case ICMP6_DST_UNREACH_NOPORT: 1376 error = ECONNREFUSED; 1377 break; 1378 case ICMP6_DST_UNREACH_ADMIN: 1379 case ICMP6_DST_UNREACH_NOROUTE: 1380 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1381 case ICMP6_DST_UNREACH_ADDR: 1382 /* Transient errors */ 1383 break; 1384 default: 1385 break; 1386 } 1387 break; 1388 case ICMP6_PACKET_TOO_BIG: { 1389 struct T_unitdata_ind *tudi; 1390 struct T_opthdr *toh; 1391 size_t udi_size; 1392 mblk_t *newmp; 1393 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1394 sizeof (struct ip6_mtuinfo); 1395 sin6_t *sin6; 1396 struct ip6_mtuinfo *mtuinfo; 1397 1398 /* 1399 * If the application has requested to receive path mtu 1400 * information, send up an empty message containing an 1401 * IPV6_PATHMTU ancillary data item. 1402 */ 1403 if (!udp->udp_ipv6_recvpathmtu) 1404 break; 1405 1406 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1407 opt_length; 1408 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1409 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1410 break; 1411 } 1412 1413 /* 1414 * newmp->b_cont is left to NULL on purpose. This is an 1415 * empty message containing only ancillary data. 1416 */ 1417 newmp->b_datap->db_type = M_PROTO; 1418 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1419 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1420 tudi->PRIM_type = T_UNITDATA_IND; 1421 tudi->SRC_length = sizeof (sin6_t); 1422 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1423 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1424 tudi->OPT_length = opt_length; 1425 1426 sin6 = (sin6_t *)&tudi[1]; 1427 bzero(sin6, sizeof (sin6_t)); 1428 sin6->sin6_family = AF_INET6; 1429 sin6->sin6_addr = udp->udp_v6dst; 1430 1431 toh = (struct T_opthdr *)&sin6[1]; 1432 toh->level = IPPROTO_IPV6; 1433 toh->name = IPV6_PATHMTU; 1434 toh->len = opt_length; 1435 toh->status = 0; 1436 1437 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1438 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1439 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1440 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1441 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1442 /* 1443 * We've consumed everything we need from the original 1444 * message. Free it, then send our empty message. 1445 */ 1446 freemsg(mp); 1447 if (!IPCL_IS_NONSTR(connp)) { 1448 putnext(connp->conn_rq, newmp); 1449 } else { 1450 (*connp->conn_upcalls->su_recv) 1451 (connp->conn_upper_handle, newmp, 0, 0, &error, 1452 NULL); 1453 } 1454 return; 1455 } 1456 case ICMP6_TIME_EXCEEDED: 1457 /* Transient errors */ 1458 break; 1459 case ICMP6_PARAM_PROB: 1460 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1461 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1462 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1463 (uchar_t *)nexthdrp) { 1464 error = ECONNREFUSED; 1465 break; 1466 } 1467 break; 1468 } 1469 if (error == 0) { 1470 freemsg(mp); 1471 return; 1472 } 1473 1474 /* 1475 * Deliver T_UDERROR_IND when the application has asked for it. 1476 * The socket layer enables this automatically when connected. 1477 */ 1478 if (!udp->udp_dgram_errind) { 1479 freemsg(mp); 1480 return; 1481 } 1482 1483 sin6 = sin6_null; 1484 sin6.sin6_family = AF_INET6; 1485 sin6.sin6_addr = ip6h->ip6_dst; 1486 sin6.sin6_port = udpha->uha_dst_port; 1487 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1488 1489 if (IPCL_IS_NONSTR(connp)) { 1490 rw_enter(&udp->udp_rwlock, RW_WRITER); 1491 if (udp->udp_state == TS_DATA_XFER) { 1492 if (sin6.sin6_port == udp->udp_dstport && 1493 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1494 &udp->udp_v6dst)) { 1495 rw_exit(&udp->udp_rwlock); 1496 (*connp->conn_upcalls->su_set_error) 1497 (connp->conn_upper_handle, error); 1498 goto done; 1499 } 1500 } else { 1501 udp->udp_delayed_error = error; 1502 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1503 } 1504 rw_exit(&udp->udp_rwlock); 1505 } else { 1506 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1507 NULL, 0, error); 1508 if (mp1 != NULL) 1509 putnext(connp->conn_rq, mp1); 1510 } 1511 1512 done: 1513 freemsg(mp); 1514 } 1515 1516 /* 1517 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1518 * The local address is filled in if endpoint is bound. The remote address 1519 * is filled in if remote address has been precified ("connected endpoint") 1520 * (The concept of connected CLTS sockets is alien to published TPI 1521 * but we support it anyway). 1522 */ 1523 static void 1524 udp_addr_req(queue_t *q, mblk_t *mp) 1525 { 1526 sin_t *sin; 1527 sin6_t *sin6; 1528 mblk_t *ackmp; 1529 struct T_addr_ack *taa; 1530 udp_t *udp = Q_TO_UDP(q); 1531 1532 /* Make it large enough for worst case */ 1533 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1534 2 * sizeof (sin6_t), 1); 1535 if (ackmp == NULL) { 1536 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1537 return; 1538 } 1539 taa = (struct T_addr_ack *)ackmp->b_rptr; 1540 1541 bzero(taa, sizeof (struct T_addr_ack)); 1542 ackmp->b_wptr = (uchar_t *)&taa[1]; 1543 1544 taa->PRIM_type = T_ADDR_ACK; 1545 ackmp->b_datap->db_type = M_PCPROTO; 1546 rw_enter(&udp->udp_rwlock, RW_READER); 1547 /* 1548 * Note: Following code assumes 32 bit alignment of basic 1549 * data structures like sin_t and struct T_addr_ack. 1550 */ 1551 if (udp->udp_state != TS_UNBND) { 1552 /* 1553 * Fill in local address first 1554 */ 1555 taa->LOCADDR_offset = sizeof (*taa); 1556 if (udp->udp_family == AF_INET) { 1557 taa->LOCADDR_length = sizeof (sin_t); 1558 sin = (sin_t *)&taa[1]; 1559 /* Fill zeroes and then initialize non-zero fields */ 1560 *sin = sin_null; 1561 sin->sin_family = AF_INET; 1562 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1563 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1564 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1565 sin->sin_addr.s_addr); 1566 } else { 1567 /* 1568 * INADDR_ANY 1569 * udp_v6src is not set, we might be bound to 1570 * broadcast/multicast. Use udp_bound_v6src as 1571 * local address instead (that could 1572 * also still be INADDR_ANY) 1573 */ 1574 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1575 sin->sin_addr.s_addr); 1576 } 1577 sin->sin_port = udp->udp_port; 1578 ackmp->b_wptr = (uchar_t *)&sin[1]; 1579 if (udp->udp_state == TS_DATA_XFER) { 1580 /* 1581 * connected, fill remote address too 1582 */ 1583 taa->REMADDR_length = sizeof (sin_t); 1584 /* assumed 32-bit alignment */ 1585 taa->REMADDR_offset = taa->LOCADDR_offset + 1586 taa->LOCADDR_length; 1587 1588 sin = (sin_t *)(ackmp->b_rptr + 1589 taa->REMADDR_offset); 1590 /* initialize */ 1591 *sin = sin_null; 1592 sin->sin_family = AF_INET; 1593 sin->sin_addr.s_addr = 1594 V4_PART_OF_V6(udp->udp_v6dst); 1595 sin->sin_port = udp->udp_dstport; 1596 ackmp->b_wptr = (uchar_t *)&sin[1]; 1597 } 1598 } else { 1599 taa->LOCADDR_length = sizeof (sin6_t); 1600 sin6 = (sin6_t *)&taa[1]; 1601 /* Fill zeroes and then initialize non-zero fields */ 1602 *sin6 = sin6_null; 1603 sin6->sin6_family = AF_INET6; 1604 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1605 sin6->sin6_addr = udp->udp_v6src; 1606 } else { 1607 /* 1608 * UNSPECIFIED 1609 * udp_v6src is not set, we might be bound to 1610 * broadcast/multicast. Use udp_bound_v6src as 1611 * local address instead (that could 1612 * also still be UNSPECIFIED) 1613 */ 1614 sin6->sin6_addr = 1615 udp->udp_bound_v6src; 1616 } 1617 sin6->sin6_port = udp->udp_port; 1618 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1619 if (udp->udp_state == TS_DATA_XFER) { 1620 /* 1621 * connected, fill remote address too 1622 */ 1623 taa->REMADDR_length = sizeof (sin6_t); 1624 /* assumed 32-bit alignment */ 1625 taa->REMADDR_offset = taa->LOCADDR_offset + 1626 taa->LOCADDR_length; 1627 1628 sin6 = (sin6_t *)(ackmp->b_rptr + 1629 taa->REMADDR_offset); 1630 /* initialize */ 1631 *sin6 = sin6_null; 1632 sin6->sin6_family = AF_INET6; 1633 sin6->sin6_addr = udp->udp_v6dst; 1634 sin6->sin6_port = udp->udp_dstport; 1635 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1636 } 1637 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1638 } 1639 } 1640 rw_exit(&udp->udp_rwlock); 1641 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1642 qreply(q, ackmp); 1643 } 1644 1645 static void 1646 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1647 { 1648 if (udp->udp_family == AF_INET) { 1649 *tap = udp_g_t_info_ack_ipv4; 1650 } else { 1651 *tap = udp_g_t_info_ack_ipv6; 1652 } 1653 tap->CURRENT_state = udp->udp_state; 1654 tap->OPT_size = udp_max_optsize; 1655 } 1656 1657 static void 1658 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1659 t_uscalar_t cap_bits1) 1660 { 1661 tcap->CAP_bits1 = 0; 1662 1663 if (cap_bits1 & TC1_INFO) { 1664 udp_copy_info(&tcap->INFO_ack, udp); 1665 tcap->CAP_bits1 |= TC1_INFO; 1666 } 1667 } 1668 1669 /* 1670 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1671 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1672 * udp_g_t_info_ack. The current state of the stream is copied from 1673 * udp_state. 1674 */ 1675 static void 1676 udp_capability_req(queue_t *q, mblk_t *mp) 1677 { 1678 t_uscalar_t cap_bits1; 1679 struct T_capability_ack *tcap; 1680 udp_t *udp = Q_TO_UDP(q); 1681 1682 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1683 1684 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1685 mp->b_datap->db_type, T_CAPABILITY_ACK); 1686 if (!mp) 1687 return; 1688 1689 tcap = (struct T_capability_ack *)mp->b_rptr; 1690 udp_do_capability_ack(udp, tcap, cap_bits1); 1691 1692 qreply(q, mp); 1693 } 1694 1695 /* 1696 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1697 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1698 * The current state of the stream is copied from udp_state. 1699 */ 1700 static void 1701 udp_info_req(queue_t *q, mblk_t *mp) 1702 { 1703 udp_t *udp = Q_TO_UDP(q); 1704 1705 /* Create a T_INFO_ACK message. */ 1706 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1707 T_INFO_ACK); 1708 if (!mp) 1709 return; 1710 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1711 qreply(q, mp); 1712 } 1713 1714 /* 1715 * IP recognizes seven kinds of bind requests: 1716 * 1717 * - A zero-length address binds only to the protocol number. 1718 * 1719 * - A 4-byte address is treated as a request to 1720 * validate that the address is a valid local IPv4 1721 * address, appropriate for an application to bind to. 1722 * IP does the verification, but does not make any note 1723 * of the address at this time. 1724 * 1725 * - A 16-byte address contains is treated as a request 1726 * to validate a local IPv6 address, as the 4-byte 1727 * address case above. 1728 * 1729 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1730 * use it for the inbound fanout of packets. 1731 * 1732 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1733 * use it for the inbound fanout of packets. 1734 * 1735 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1736 * information consisting of local and remote addresses 1737 * and ports. In this case, the addresses are both 1738 * validated as appropriate for this operation, and, if 1739 * so, the information is retained for use in the 1740 * inbound fanout. 1741 * 1742 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1743 * fanout information, like the 12-byte case above. 1744 * 1745 * IP will also fill in the IRE request mblk with information 1746 * regarding our peer. In all cases, we notify IP of our protocol 1747 * type by appending a single protocol byte to the bind request. 1748 */ 1749 static mblk_t * 1750 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1751 { 1752 char *cp; 1753 mblk_t *mp; 1754 struct T_bind_req *tbr; 1755 ipa_conn_t *ac; 1756 ipa6_conn_t *ac6; 1757 sin_t *sin; 1758 sin6_t *sin6; 1759 1760 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1761 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1762 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1763 if (!mp) 1764 return (mp); 1765 mp->b_datap->db_type = M_PROTO; 1766 tbr = (struct T_bind_req *)mp->b_rptr; 1767 tbr->PRIM_type = bind_prim; 1768 tbr->ADDR_offset = sizeof (*tbr); 1769 tbr->CONIND_number = 0; 1770 tbr->ADDR_length = addr_length; 1771 cp = (char *)&tbr[1]; 1772 switch (addr_length) { 1773 case sizeof (ipa_conn_t): 1774 ASSERT(udp->udp_family == AF_INET); 1775 /* Append a request for an IRE */ 1776 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1777 if (!mp->b_cont) { 1778 freemsg(mp); 1779 return (NULL); 1780 } 1781 mp->b_cont->b_wptr += sizeof (ire_t); 1782 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1783 1784 /* cp known to be 32 bit aligned */ 1785 ac = (ipa_conn_t *)cp; 1786 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1787 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1788 ac->ac_fport = udp->udp_dstport; 1789 ac->ac_lport = udp->udp_port; 1790 break; 1791 1792 case sizeof (ipa6_conn_t): 1793 ASSERT(udp->udp_family == AF_INET6); 1794 /* Append a request for an IRE */ 1795 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1796 if (!mp->b_cont) { 1797 freemsg(mp); 1798 return (NULL); 1799 } 1800 mp->b_cont->b_wptr += sizeof (ire_t); 1801 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1802 1803 /* cp known to be 32 bit aligned */ 1804 ac6 = (ipa6_conn_t *)cp; 1805 ac6->ac6_laddr = udp->udp_v6src; 1806 ac6->ac6_faddr = udp->udp_v6dst; 1807 ac6->ac6_fport = udp->udp_dstport; 1808 ac6->ac6_lport = udp->udp_port; 1809 break; 1810 1811 case sizeof (sin_t): 1812 ASSERT(udp->udp_family == AF_INET); 1813 /* Append a request for an IRE */ 1814 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1815 if (!mp->b_cont) { 1816 freemsg(mp); 1817 return (NULL); 1818 } 1819 mp->b_cont->b_wptr += sizeof (ire_t); 1820 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1821 1822 sin = (sin_t *)cp; 1823 *sin = sin_null; 1824 sin->sin_family = AF_INET; 1825 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1826 sin->sin_port = udp->udp_port; 1827 break; 1828 1829 case sizeof (sin6_t): 1830 ASSERT(udp->udp_family == AF_INET6); 1831 /* Append a request for an IRE */ 1832 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1833 if (!mp->b_cont) { 1834 freemsg(mp); 1835 return (NULL); 1836 } 1837 mp->b_cont->b_wptr += sizeof (ire_t); 1838 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1839 1840 sin6 = (sin6_t *)cp; 1841 *sin6 = sin6_null; 1842 sin6->sin6_family = AF_INET6; 1843 sin6->sin6_addr = udp->udp_bound_v6src; 1844 sin6->sin6_port = udp->udp_port; 1845 break; 1846 } 1847 /* Add protocol number to end */ 1848 cp[addr_length] = (char)IPPROTO_UDP; 1849 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1850 return (mp); 1851 } 1852 1853 /* For /dev/udp aka AF_INET open */ 1854 static int 1855 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1856 { 1857 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1858 } 1859 1860 /* For /dev/udp6 aka AF_INET6 open */ 1861 static int 1862 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1863 { 1864 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1865 } 1866 1867 /* 1868 * This is the open routine for udp. It allocates a udp_t structure for 1869 * the stream and, on the first open of the module, creates an ND table. 1870 */ 1871 /*ARGSUSED2*/ 1872 static int 1873 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1874 boolean_t isv6) 1875 { 1876 int error; 1877 udp_t *udp; 1878 conn_t *connp; 1879 dev_t conn_dev; 1880 udp_stack_t *us; 1881 vmem_t *minor_arena; 1882 1883 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1884 1885 /* If the stream is already open, return immediately. */ 1886 if (q->q_ptr != NULL) 1887 return (0); 1888 1889 if (sflag == MODOPEN) 1890 return (EINVAL); 1891 1892 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1893 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1894 minor_arena = ip_minor_arena_la; 1895 } else { 1896 /* 1897 * Either minor numbers in the large arena were exhausted 1898 * or a non socket application is doing the open. 1899 * Try to allocate from the small arena. 1900 */ 1901 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1902 return (EBUSY); 1903 1904 minor_arena = ip_minor_arena_sa; 1905 } 1906 1907 if (flag & SO_FALLBACK) { 1908 /* 1909 * Non streams socket needs a stream to fallback to 1910 */ 1911 RD(q)->q_ptr = (void *)conn_dev; 1912 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1913 WR(q)->q_ptr = (void *)minor_arena; 1914 qprocson(q); 1915 return (0); 1916 } 1917 1918 connp = udp_do_open(credp, isv6, KM_SLEEP); 1919 if (connp == NULL) { 1920 inet_minor_free(minor_arena, conn_dev); 1921 return (ENOMEM); 1922 } 1923 udp = connp->conn_udp; 1924 us = udp->udp_us; 1925 1926 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1927 connp->conn_dev = conn_dev; 1928 connp->conn_minor_arena = minor_arena; 1929 1930 /* 1931 * Initialize the udp_t structure for this stream. 1932 */ 1933 q->q_ptr = connp; 1934 WR(q)->q_ptr = connp; 1935 connp->conn_rq = q; 1936 connp->conn_wq = WR(q); 1937 1938 rw_enter(&udp->udp_rwlock, RW_WRITER); 1939 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1940 ASSERT(connp->conn_udp == udp); 1941 ASSERT(udp->udp_connp == connp); 1942 1943 if (flag & SO_SOCKSTR) { 1944 connp->conn_flags |= IPCL_SOCKET; 1945 udp->udp_issocket = B_TRUE; 1946 udp->udp_direct_sockfs = B_TRUE; 1947 } 1948 1949 q->q_hiwat = us->us_recv_hiwat; 1950 WR(q)->q_hiwat = us->us_xmit_hiwat; 1951 WR(q)->q_lowat = us->us_xmit_lowat; 1952 1953 qprocson(q); 1954 1955 if (udp->udp_family == AF_INET6) { 1956 /* Build initial header template for transmit */ 1957 if ((error = udp_build_hdrs(udp)) != 0) { 1958 rw_exit(&udp->udp_rwlock); 1959 qprocsoff(q); 1960 inet_minor_free(minor_arena, conn_dev); 1961 ipcl_conn_destroy(connp); 1962 return (error); 1963 } 1964 } 1965 rw_exit(&udp->udp_rwlock); 1966 1967 /* Set the Stream head write offset and high watermark. */ 1968 (void) proto_set_tx_wroff(q, connp, 1969 udp->udp_max_hdr_len + us->us_wroff_extra); 1970 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1971 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1972 1973 mutex_enter(&connp->conn_lock); 1974 connp->conn_state_flags &= ~CONN_INCIPIENT; 1975 mutex_exit(&connp->conn_lock); 1976 return (0); 1977 } 1978 1979 /* 1980 * Which UDP options OK to set through T_UNITDATA_REQ... 1981 */ 1982 /* ARGSUSED */ 1983 static boolean_t 1984 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1985 { 1986 return (B_TRUE); 1987 } 1988 1989 /* 1990 * This routine gets default values of certain options whose default 1991 * values are maintained by protcol specific code 1992 */ 1993 /* ARGSUSED */ 1994 int 1995 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1996 { 1997 udp_t *udp = Q_TO_UDP(q); 1998 udp_stack_t *us = udp->udp_us; 1999 int *i1 = (int *)ptr; 2000 2001 switch (level) { 2002 case IPPROTO_IP: 2003 switch (name) { 2004 case IP_MULTICAST_TTL: 2005 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2006 return (sizeof (uchar_t)); 2007 case IP_MULTICAST_LOOP: 2008 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2009 return (sizeof (uchar_t)); 2010 } 2011 break; 2012 case IPPROTO_IPV6: 2013 switch (name) { 2014 case IPV6_MULTICAST_HOPS: 2015 *i1 = IP_DEFAULT_MULTICAST_TTL; 2016 return (sizeof (int)); 2017 case IPV6_MULTICAST_LOOP: 2018 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2019 return (sizeof (int)); 2020 case IPV6_UNICAST_HOPS: 2021 *i1 = us->us_ipv6_hoplimit; 2022 return (sizeof (int)); 2023 } 2024 break; 2025 } 2026 return (-1); 2027 } 2028 2029 /* 2030 * This routine retrieves the current status of socket options. 2031 * It returns the size of the option retrieved. 2032 */ 2033 static int 2034 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2035 { 2036 udp_t *udp = connp->conn_udp; 2037 udp_stack_t *us = udp->udp_us; 2038 int *i1 = (int *)ptr; 2039 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2040 int len; 2041 2042 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2043 switch (level) { 2044 case SOL_SOCKET: 2045 switch (name) { 2046 case SO_DEBUG: 2047 *i1 = udp->udp_debug; 2048 break; /* goto sizeof (int) option return */ 2049 case SO_REUSEADDR: 2050 *i1 = udp->udp_reuseaddr; 2051 break; /* goto sizeof (int) option return */ 2052 case SO_TYPE: 2053 *i1 = SOCK_DGRAM; 2054 break; /* goto sizeof (int) option return */ 2055 2056 /* 2057 * The following three items are available here, 2058 * but are only meaningful to IP. 2059 */ 2060 case SO_DONTROUTE: 2061 *i1 = udp->udp_dontroute; 2062 break; /* goto sizeof (int) option return */ 2063 case SO_USELOOPBACK: 2064 *i1 = udp->udp_useloopback; 2065 break; /* goto sizeof (int) option return */ 2066 case SO_BROADCAST: 2067 *i1 = udp->udp_broadcast; 2068 break; /* goto sizeof (int) option return */ 2069 2070 case SO_SNDBUF: 2071 *i1 = udp->udp_xmit_hiwat; 2072 break; /* goto sizeof (int) option return */ 2073 case SO_RCVBUF: 2074 *i1 = udp->udp_rcv_disply_hiwat; 2075 break; /* goto sizeof (int) option return */ 2076 case SO_DGRAM_ERRIND: 2077 *i1 = udp->udp_dgram_errind; 2078 break; /* goto sizeof (int) option return */ 2079 case SO_RECVUCRED: 2080 *i1 = udp->udp_recvucred; 2081 break; /* goto sizeof (int) option return */ 2082 case SO_TIMESTAMP: 2083 *i1 = udp->udp_timestamp; 2084 break; /* goto sizeof (int) option return */ 2085 case SO_ANON_MLP: 2086 *i1 = connp->conn_anon_mlp; 2087 break; /* goto sizeof (int) option return */ 2088 case SO_MAC_EXEMPT: 2089 *i1 = connp->conn_mac_exempt; 2090 break; /* goto sizeof (int) option return */ 2091 case SO_ALLZONES: 2092 *i1 = connp->conn_allzones; 2093 break; /* goto sizeof (int) option return */ 2094 case SO_EXCLBIND: 2095 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2096 break; 2097 case SO_PROTOTYPE: 2098 *i1 = IPPROTO_UDP; 2099 break; 2100 case SO_DOMAIN: 2101 *i1 = udp->udp_family; 2102 break; 2103 default: 2104 return (-1); 2105 } 2106 break; 2107 case IPPROTO_IP: 2108 if (udp->udp_family != AF_INET) 2109 return (-1); 2110 switch (name) { 2111 case IP_OPTIONS: 2112 case T_IP_OPTIONS: 2113 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2114 if (len > 0) { 2115 bcopy(udp->udp_ip_rcv_options + 2116 udp->udp_label_len, ptr, len); 2117 } 2118 return (len); 2119 case IP_TOS: 2120 case T_IP_TOS: 2121 *i1 = (int)udp->udp_type_of_service; 2122 break; /* goto sizeof (int) option return */ 2123 case IP_TTL: 2124 *i1 = (int)udp->udp_ttl; 2125 break; /* goto sizeof (int) option return */ 2126 case IP_DHCPINIT_IF: 2127 return (-EINVAL); 2128 case IP_NEXTHOP: 2129 case IP_RECVPKTINFO: 2130 /* 2131 * This also handles IP_PKTINFO. 2132 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2133 * Differentiation is based on the size of the argument 2134 * passed in. 2135 * This option is handled in IP which will return an 2136 * error for IP_PKTINFO as it's not supported as a 2137 * sticky option. 2138 */ 2139 return (-EINVAL); 2140 case IP_MULTICAST_IF: 2141 /* 0 address if not set */ 2142 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2143 return (sizeof (ipaddr_t)); 2144 case IP_MULTICAST_TTL: 2145 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2146 return (sizeof (uchar_t)); 2147 case IP_MULTICAST_LOOP: 2148 *ptr = connp->conn_multicast_loop; 2149 return (sizeof (uint8_t)); 2150 case IP_RECVOPTS: 2151 *i1 = udp->udp_recvopts; 2152 break; /* goto sizeof (int) option return */ 2153 case IP_RECVDSTADDR: 2154 *i1 = udp->udp_recvdstaddr; 2155 break; /* goto sizeof (int) option return */ 2156 case IP_RECVIF: 2157 *i1 = udp->udp_recvif; 2158 break; /* goto sizeof (int) option return */ 2159 case IP_RECVSLLA: 2160 *i1 = udp->udp_recvslla; 2161 break; /* goto sizeof (int) option return */ 2162 case IP_RECVTTL: 2163 *i1 = udp->udp_recvttl; 2164 break; /* goto sizeof (int) option return */ 2165 case IP_ADD_MEMBERSHIP: 2166 case IP_DROP_MEMBERSHIP: 2167 case IP_BLOCK_SOURCE: 2168 case IP_UNBLOCK_SOURCE: 2169 case IP_ADD_SOURCE_MEMBERSHIP: 2170 case IP_DROP_SOURCE_MEMBERSHIP: 2171 case MCAST_JOIN_GROUP: 2172 case MCAST_LEAVE_GROUP: 2173 case MCAST_BLOCK_SOURCE: 2174 case MCAST_UNBLOCK_SOURCE: 2175 case MCAST_JOIN_SOURCE_GROUP: 2176 case MCAST_LEAVE_SOURCE_GROUP: 2177 /* cannot "get" the value for these */ 2178 return (-1); 2179 case IP_BOUND_IF: 2180 /* Zero if not set */ 2181 *i1 = udp->udp_bound_if; 2182 break; /* goto sizeof (int) option return */ 2183 case IP_UNSPEC_SRC: 2184 *i1 = udp->udp_unspec_source; 2185 break; /* goto sizeof (int) option return */ 2186 case IP_BROADCAST_TTL: 2187 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2188 return (sizeof (uchar_t)); 2189 default: 2190 return (-1); 2191 } 2192 break; 2193 case IPPROTO_IPV6: 2194 if (udp->udp_family != AF_INET6) 2195 return (-1); 2196 switch (name) { 2197 case IPV6_UNICAST_HOPS: 2198 *i1 = (unsigned int)udp->udp_ttl; 2199 break; /* goto sizeof (int) option return */ 2200 case IPV6_MULTICAST_IF: 2201 /* 0 index if not set */ 2202 *i1 = udp->udp_multicast_if_index; 2203 break; /* goto sizeof (int) option return */ 2204 case IPV6_MULTICAST_HOPS: 2205 *i1 = udp->udp_multicast_ttl; 2206 break; /* goto sizeof (int) option return */ 2207 case IPV6_MULTICAST_LOOP: 2208 *i1 = connp->conn_multicast_loop; 2209 break; /* goto sizeof (int) option return */ 2210 case IPV6_JOIN_GROUP: 2211 case IPV6_LEAVE_GROUP: 2212 case MCAST_JOIN_GROUP: 2213 case MCAST_LEAVE_GROUP: 2214 case MCAST_BLOCK_SOURCE: 2215 case MCAST_UNBLOCK_SOURCE: 2216 case MCAST_JOIN_SOURCE_GROUP: 2217 case MCAST_LEAVE_SOURCE_GROUP: 2218 /* cannot "get" the value for these */ 2219 return (-1); 2220 case IPV6_BOUND_IF: 2221 /* Zero if not set */ 2222 *i1 = udp->udp_bound_if; 2223 break; /* goto sizeof (int) option return */ 2224 case IPV6_UNSPEC_SRC: 2225 *i1 = udp->udp_unspec_source; 2226 break; /* goto sizeof (int) option return */ 2227 case IPV6_RECVPKTINFO: 2228 *i1 = udp->udp_ip_recvpktinfo; 2229 break; /* goto sizeof (int) option return */ 2230 case IPV6_RECVTCLASS: 2231 *i1 = udp->udp_ipv6_recvtclass; 2232 break; /* goto sizeof (int) option return */ 2233 case IPV6_RECVPATHMTU: 2234 *i1 = udp->udp_ipv6_recvpathmtu; 2235 break; /* goto sizeof (int) option return */ 2236 case IPV6_RECVHOPLIMIT: 2237 *i1 = udp->udp_ipv6_recvhoplimit; 2238 break; /* goto sizeof (int) option return */ 2239 case IPV6_RECVHOPOPTS: 2240 *i1 = udp->udp_ipv6_recvhopopts; 2241 break; /* goto sizeof (int) option return */ 2242 case IPV6_RECVDSTOPTS: 2243 *i1 = udp->udp_ipv6_recvdstopts; 2244 break; /* goto sizeof (int) option return */ 2245 case _OLD_IPV6_RECVDSTOPTS: 2246 *i1 = udp->udp_old_ipv6_recvdstopts; 2247 break; /* goto sizeof (int) option return */ 2248 case IPV6_RECVRTHDRDSTOPTS: 2249 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2250 break; /* goto sizeof (int) option return */ 2251 case IPV6_RECVRTHDR: 2252 *i1 = udp->udp_ipv6_recvrthdr; 2253 break; /* goto sizeof (int) option return */ 2254 case IPV6_PKTINFO: { 2255 /* XXX assumes that caller has room for max size! */ 2256 struct in6_pktinfo *pkti; 2257 2258 pkti = (struct in6_pktinfo *)ptr; 2259 if (ipp->ipp_fields & IPPF_IFINDEX) 2260 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2261 else 2262 pkti->ipi6_ifindex = 0; 2263 if (ipp->ipp_fields & IPPF_ADDR) 2264 pkti->ipi6_addr = ipp->ipp_addr; 2265 else 2266 pkti->ipi6_addr = ipv6_all_zeros; 2267 return (sizeof (struct in6_pktinfo)); 2268 } 2269 case IPV6_TCLASS: 2270 if (ipp->ipp_fields & IPPF_TCLASS) 2271 *i1 = ipp->ipp_tclass; 2272 else 2273 *i1 = IPV6_FLOW_TCLASS( 2274 IPV6_DEFAULT_VERS_AND_FLOW); 2275 break; /* goto sizeof (int) option return */ 2276 case IPV6_NEXTHOP: { 2277 sin6_t *sin6 = (sin6_t *)ptr; 2278 2279 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2280 return (0); 2281 *sin6 = sin6_null; 2282 sin6->sin6_family = AF_INET6; 2283 sin6->sin6_addr = ipp->ipp_nexthop; 2284 return (sizeof (sin6_t)); 2285 } 2286 case IPV6_HOPOPTS: 2287 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2288 return (0); 2289 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2290 return (0); 2291 /* 2292 * The cipso/label option is added by kernel. 2293 * User is not usually aware of this option. 2294 * We copy out the hbh opt after the label option. 2295 */ 2296 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2297 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2298 if (udp->udp_label_len_v6 > 0) { 2299 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2300 ptr[1] = (ipp->ipp_hopoptslen - 2301 udp->udp_label_len_v6 + 7) / 8 - 1; 2302 } 2303 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2304 case IPV6_RTHDRDSTOPTS: 2305 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2306 return (0); 2307 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2308 return (ipp->ipp_rtdstoptslen); 2309 case IPV6_RTHDR: 2310 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2311 return (0); 2312 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2313 return (ipp->ipp_rthdrlen); 2314 case IPV6_DSTOPTS: 2315 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2316 return (0); 2317 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2318 return (ipp->ipp_dstoptslen); 2319 case IPV6_PATHMTU: 2320 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2321 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2322 us->us_netstack)); 2323 default: 2324 return (-1); 2325 } 2326 break; 2327 case IPPROTO_UDP: 2328 switch (name) { 2329 case UDP_ANONPRIVBIND: 2330 *i1 = udp->udp_anon_priv_bind; 2331 break; 2332 case UDP_EXCLBIND: 2333 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2334 break; 2335 case UDP_RCVHDR: 2336 *i1 = udp->udp_rcvhdr ? 1 : 0; 2337 break; 2338 case UDP_NAT_T_ENDPOINT: 2339 *i1 = udp->udp_nat_t_endpoint; 2340 break; 2341 default: 2342 return (-1); 2343 } 2344 break; 2345 default: 2346 return (-1); 2347 } 2348 return (sizeof (int)); 2349 } 2350 2351 int 2352 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2353 { 2354 udp_t *udp; 2355 int err; 2356 2357 udp = Q_TO_UDP(q); 2358 2359 rw_enter(&udp->udp_rwlock, RW_READER); 2360 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2361 rw_exit(&udp->udp_rwlock); 2362 return (err); 2363 } 2364 2365 /* 2366 * This routine sets socket options. 2367 */ 2368 /* ARGSUSED */ 2369 static int 2370 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2371 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2372 void *thisdg_attrs, boolean_t checkonly) 2373 { 2374 udpattrs_t *attrs = thisdg_attrs; 2375 int *i1 = (int *)invalp; 2376 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2377 udp_t *udp = connp->conn_udp; 2378 udp_stack_t *us = udp->udp_us; 2379 int error; 2380 uint_t newlen; 2381 size_t sth_wroff; 2382 2383 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2384 /* 2385 * For fixed length options, no sanity check 2386 * of passed in length is done. It is assumed *_optcom_req() 2387 * routines do the right thing. 2388 */ 2389 switch (level) { 2390 case SOL_SOCKET: 2391 switch (name) { 2392 case SO_REUSEADDR: 2393 if (!checkonly) { 2394 udp->udp_reuseaddr = onoff; 2395 PASS_OPT_TO_IP(connp); 2396 } 2397 break; 2398 case SO_DEBUG: 2399 if (!checkonly) 2400 udp->udp_debug = onoff; 2401 break; 2402 /* 2403 * The following three items are available here, 2404 * but are only meaningful to IP. 2405 */ 2406 case SO_DONTROUTE: 2407 if (!checkonly) { 2408 udp->udp_dontroute = onoff; 2409 PASS_OPT_TO_IP(connp); 2410 } 2411 break; 2412 case SO_USELOOPBACK: 2413 if (!checkonly) { 2414 udp->udp_useloopback = onoff; 2415 PASS_OPT_TO_IP(connp); 2416 } 2417 break; 2418 case SO_BROADCAST: 2419 if (!checkonly) { 2420 udp->udp_broadcast = onoff; 2421 PASS_OPT_TO_IP(connp); 2422 } 2423 break; 2424 2425 case SO_SNDBUF: 2426 if (*i1 > us->us_max_buf) { 2427 *outlenp = 0; 2428 return (ENOBUFS); 2429 } 2430 if (!checkonly) { 2431 udp->udp_xmit_hiwat = *i1; 2432 connp->conn_wq->q_hiwat = *i1; 2433 } 2434 break; 2435 case SO_RCVBUF: 2436 if (*i1 > us->us_max_buf) { 2437 *outlenp = 0; 2438 return (ENOBUFS); 2439 } 2440 if (!checkonly) { 2441 int size; 2442 2443 udp->udp_rcv_disply_hiwat = *i1; 2444 size = udp_set_rcv_hiwat(udp, *i1); 2445 rw_exit(&udp->udp_rwlock); 2446 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2447 size); 2448 rw_enter(&udp->udp_rwlock, RW_WRITER); 2449 } 2450 break; 2451 case SO_DGRAM_ERRIND: 2452 if (!checkonly) 2453 udp->udp_dgram_errind = onoff; 2454 break; 2455 case SO_RECVUCRED: 2456 if (!checkonly) 2457 udp->udp_recvucred = onoff; 2458 break; 2459 case SO_ALLZONES: 2460 /* 2461 * "soft" error (negative) 2462 * option not handled at this level 2463 * Do not modify *outlenp. 2464 */ 2465 return (-EINVAL); 2466 case SO_TIMESTAMP: 2467 if (!checkonly) 2468 udp->udp_timestamp = onoff; 2469 break; 2470 case SO_ANON_MLP: 2471 if (!checkonly) { 2472 connp->conn_anon_mlp = onoff; 2473 PASS_OPT_TO_IP(connp); 2474 } 2475 break; 2476 case SO_MAC_EXEMPT: 2477 if (secpolicy_net_mac_aware(cr) != 0 || 2478 udp->udp_state != TS_UNBND) 2479 return (EACCES); 2480 if (!checkonly) { 2481 connp->conn_mac_exempt = onoff; 2482 PASS_OPT_TO_IP(connp); 2483 } 2484 break; 2485 case SCM_UCRED: { 2486 struct ucred_s *ucr; 2487 cred_t *cr, *newcr; 2488 ts_label_t *tsl; 2489 2490 /* 2491 * Only sockets that have proper privileges and are 2492 * bound to MLPs will have any other value here, so 2493 * this implicitly tests for privilege to set label. 2494 */ 2495 if (connp->conn_mlp_type == mlptSingle) 2496 break; 2497 ucr = (struct ucred_s *)invalp; 2498 if (inlen != ucredsize || 2499 ucr->uc_labeloff < sizeof (*ucr) || 2500 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2501 return (EINVAL); 2502 if (!checkonly) { 2503 mblk_t *mb; 2504 pid_t cpid; 2505 2506 if (attrs == NULL || 2507 (mb = attrs->udpattr_mb) == NULL) 2508 return (EINVAL); 2509 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2510 cr = udp->udp_connp->conn_cred; 2511 ASSERT(cr != NULL); 2512 if ((tsl = crgetlabel(cr)) == NULL) 2513 return (EINVAL); 2514 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2515 tsl->tsl_doi, KM_NOSLEEP); 2516 if (newcr == NULL) 2517 return (ENOSR); 2518 mblk_setcred(mb, newcr, cpid); 2519 attrs->udpattr_credset = B_TRUE; 2520 crfree(newcr); 2521 } 2522 break; 2523 } 2524 case SO_EXCLBIND: 2525 if (!checkonly) 2526 udp->udp_exclbind = onoff; 2527 break; 2528 case SO_RCVTIMEO: 2529 case SO_SNDTIMEO: 2530 /* 2531 * Pass these two options in order for third part 2532 * protocol usage. Here just return directly. 2533 */ 2534 return (0); 2535 default: 2536 *outlenp = 0; 2537 return (EINVAL); 2538 } 2539 break; 2540 case IPPROTO_IP: 2541 if (udp->udp_family != AF_INET) { 2542 *outlenp = 0; 2543 return (ENOPROTOOPT); 2544 } 2545 switch (name) { 2546 case IP_OPTIONS: 2547 case T_IP_OPTIONS: 2548 /* Save options for use by IP. */ 2549 newlen = inlen + udp->udp_label_len; 2550 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2551 *outlenp = 0; 2552 return (EINVAL); 2553 } 2554 if (checkonly) 2555 break; 2556 2557 /* 2558 * Update the stored options taking into account 2559 * any CIPSO option which we should not overwrite. 2560 */ 2561 if (!tsol_option_set(&udp->udp_ip_snd_options, 2562 &udp->udp_ip_snd_options_len, 2563 udp->udp_label_len, invalp, inlen)) { 2564 *outlenp = 0; 2565 return (ENOMEM); 2566 } 2567 2568 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2569 UDPH_SIZE + udp->udp_ip_snd_options_len; 2570 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2571 rw_exit(&udp->udp_rwlock); 2572 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2573 sth_wroff); 2574 rw_enter(&udp->udp_rwlock, RW_WRITER); 2575 break; 2576 2577 case IP_TTL: 2578 if (!checkonly) { 2579 udp->udp_ttl = (uchar_t)*i1; 2580 } 2581 break; 2582 case IP_TOS: 2583 case T_IP_TOS: 2584 if (!checkonly) { 2585 udp->udp_type_of_service = (uchar_t)*i1; 2586 } 2587 break; 2588 case IP_MULTICAST_IF: { 2589 /* 2590 * TODO should check OPTMGMT reply and undo this if 2591 * there is an error. 2592 */ 2593 struct in_addr *inap = (struct in_addr *)invalp; 2594 if (!checkonly) { 2595 udp->udp_multicast_if_addr = 2596 inap->s_addr; 2597 PASS_OPT_TO_IP(connp); 2598 } 2599 break; 2600 } 2601 case IP_MULTICAST_TTL: 2602 if (!checkonly) 2603 udp->udp_multicast_ttl = *invalp; 2604 break; 2605 case IP_MULTICAST_LOOP: 2606 if (!checkonly) { 2607 connp->conn_multicast_loop = *invalp; 2608 PASS_OPT_TO_IP(connp); 2609 } 2610 break; 2611 case IP_RECVOPTS: 2612 if (!checkonly) 2613 udp->udp_recvopts = onoff; 2614 break; 2615 case IP_RECVDSTADDR: 2616 if (!checkonly) 2617 udp->udp_recvdstaddr = onoff; 2618 break; 2619 case IP_RECVIF: 2620 if (!checkonly) { 2621 udp->udp_recvif = onoff; 2622 PASS_OPT_TO_IP(connp); 2623 } 2624 break; 2625 case IP_RECVSLLA: 2626 if (!checkonly) { 2627 udp->udp_recvslla = onoff; 2628 PASS_OPT_TO_IP(connp); 2629 } 2630 break; 2631 case IP_RECVTTL: 2632 if (!checkonly) 2633 udp->udp_recvttl = onoff; 2634 break; 2635 case IP_PKTINFO: { 2636 /* 2637 * This also handles IP_RECVPKTINFO. 2638 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2639 * Differentiation is based on the size of the 2640 * argument passed in. 2641 */ 2642 struct in_pktinfo *pktinfop; 2643 ip4_pkt_t *attr_pktinfop; 2644 2645 if (checkonly) 2646 break; 2647 2648 if (inlen == sizeof (int)) { 2649 /* 2650 * This is IP_RECVPKTINFO option. 2651 * Keep a local copy of whether this option is 2652 * set or not and pass it down to IP for 2653 * processing. 2654 */ 2655 2656 udp->udp_ip_recvpktinfo = onoff; 2657 return (-EINVAL); 2658 } 2659 2660 if (attrs == NULL || 2661 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2662 /* 2663 * sticky option or no buffer to return 2664 * the results. 2665 */ 2666 return (EINVAL); 2667 } 2668 2669 if (inlen != sizeof (struct in_pktinfo)) 2670 return (EINVAL); 2671 2672 pktinfop = (struct in_pktinfo *)invalp; 2673 2674 /* 2675 * At least one of the values should be specified 2676 */ 2677 if (pktinfop->ipi_ifindex == 0 && 2678 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2679 return (EINVAL); 2680 } 2681 2682 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2683 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2684 2685 break; 2686 } 2687 case IP_ADD_MEMBERSHIP: 2688 case IP_DROP_MEMBERSHIP: 2689 case IP_BLOCK_SOURCE: 2690 case IP_UNBLOCK_SOURCE: 2691 case IP_ADD_SOURCE_MEMBERSHIP: 2692 case IP_DROP_SOURCE_MEMBERSHIP: 2693 case MCAST_JOIN_GROUP: 2694 case MCAST_LEAVE_GROUP: 2695 case MCAST_BLOCK_SOURCE: 2696 case MCAST_UNBLOCK_SOURCE: 2697 case MCAST_JOIN_SOURCE_GROUP: 2698 case MCAST_LEAVE_SOURCE_GROUP: 2699 case IP_SEC_OPT: 2700 case IP_NEXTHOP: 2701 case IP_DHCPINIT_IF: 2702 /* 2703 * "soft" error (negative) 2704 * option not handled at this level 2705 * Do not modify *outlenp. 2706 */ 2707 return (-EINVAL); 2708 case IP_BOUND_IF: 2709 if (!checkonly) { 2710 udp->udp_bound_if = *i1; 2711 PASS_OPT_TO_IP(connp); 2712 } 2713 break; 2714 case IP_UNSPEC_SRC: 2715 if (!checkonly) { 2716 udp->udp_unspec_source = onoff; 2717 PASS_OPT_TO_IP(connp); 2718 } 2719 break; 2720 case IP_BROADCAST_TTL: 2721 if (!checkonly) 2722 connp->conn_broadcast_ttl = *invalp; 2723 break; 2724 default: 2725 *outlenp = 0; 2726 return (EINVAL); 2727 } 2728 break; 2729 case IPPROTO_IPV6: { 2730 ip6_pkt_t *ipp; 2731 boolean_t sticky; 2732 2733 if (udp->udp_family != AF_INET6) { 2734 *outlenp = 0; 2735 return (ENOPROTOOPT); 2736 } 2737 /* 2738 * Deal with both sticky options and ancillary data 2739 */ 2740 sticky = B_FALSE; 2741 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2742 NULL) { 2743 /* sticky options, or none */ 2744 ipp = &udp->udp_sticky_ipp; 2745 sticky = B_TRUE; 2746 } 2747 2748 switch (name) { 2749 case IPV6_MULTICAST_IF: 2750 if (!checkonly) { 2751 udp->udp_multicast_if_index = *i1; 2752 PASS_OPT_TO_IP(connp); 2753 } 2754 break; 2755 case IPV6_UNICAST_HOPS: 2756 /* -1 means use default */ 2757 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2758 *outlenp = 0; 2759 return (EINVAL); 2760 } 2761 if (!checkonly) { 2762 if (*i1 == -1) { 2763 udp->udp_ttl = ipp->ipp_unicast_hops = 2764 us->us_ipv6_hoplimit; 2765 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2766 /* Pass modified value to IP. */ 2767 *i1 = udp->udp_ttl; 2768 } else { 2769 udp->udp_ttl = ipp->ipp_unicast_hops = 2770 (uint8_t)*i1; 2771 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2772 } 2773 /* Rebuild the header template */ 2774 error = udp_build_hdrs(udp); 2775 if (error != 0) { 2776 *outlenp = 0; 2777 return (error); 2778 } 2779 } 2780 break; 2781 case IPV6_MULTICAST_HOPS: 2782 /* -1 means use default */ 2783 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2784 *outlenp = 0; 2785 return (EINVAL); 2786 } 2787 if (!checkonly) { 2788 if (*i1 == -1) { 2789 udp->udp_multicast_ttl = 2790 ipp->ipp_multicast_hops = 2791 IP_DEFAULT_MULTICAST_TTL; 2792 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2793 /* Pass modified value to IP. */ 2794 *i1 = udp->udp_multicast_ttl; 2795 } else { 2796 udp->udp_multicast_ttl = 2797 ipp->ipp_multicast_hops = 2798 (uint8_t)*i1; 2799 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2800 } 2801 } 2802 break; 2803 case IPV6_MULTICAST_LOOP: 2804 if (*i1 != 0 && *i1 != 1) { 2805 *outlenp = 0; 2806 return (EINVAL); 2807 } 2808 if (!checkonly) { 2809 connp->conn_multicast_loop = *i1; 2810 PASS_OPT_TO_IP(connp); 2811 } 2812 break; 2813 case IPV6_JOIN_GROUP: 2814 case IPV6_LEAVE_GROUP: 2815 case MCAST_JOIN_GROUP: 2816 case MCAST_LEAVE_GROUP: 2817 case MCAST_BLOCK_SOURCE: 2818 case MCAST_UNBLOCK_SOURCE: 2819 case MCAST_JOIN_SOURCE_GROUP: 2820 case MCAST_LEAVE_SOURCE_GROUP: 2821 /* 2822 * "soft" error (negative) 2823 * option not handled at this level 2824 * Note: Do not modify *outlenp 2825 */ 2826 return (-EINVAL); 2827 case IPV6_BOUND_IF: 2828 if (!checkonly) { 2829 udp->udp_bound_if = *i1; 2830 PASS_OPT_TO_IP(connp); 2831 } 2832 break; 2833 case IPV6_UNSPEC_SRC: 2834 if (!checkonly) { 2835 udp->udp_unspec_source = onoff; 2836 PASS_OPT_TO_IP(connp); 2837 } 2838 break; 2839 /* 2840 * Set boolean switches for ancillary data delivery 2841 */ 2842 case IPV6_RECVPKTINFO: 2843 if (!checkonly) { 2844 udp->udp_ip_recvpktinfo = onoff; 2845 PASS_OPT_TO_IP(connp); 2846 } 2847 break; 2848 case IPV6_RECVTCLASS: 2849 if (!checkonly) { 2850 udp->udp_ipv6_recvtclass = onoff; 2851 PASS_OPT_TO_IP(connp); 2852 } 2853 break; 2854 case IPV6_RECVPATHMTU: 2855 if (!checkonly) { 2856 udp->udp_ipv6_recvpathmtu = onoff; 2857 PASS_OPT_TO_IP(connp); 2858 } 2859 break; 2860 case IPV6_RECVHOPLIMIT: 2861 if (!checkonly) { 2862 udp->udp_ipv6_recvhoplimit = onoff; 2863 PASS_OPT_TO_IP(connp); 2864 } 2865 break; 2866 case IPV6_RECVHOPOPTS: 2867 if (!checkonly) { 2868 udp->udp_ipv6_recvhopopts = onoff; 2869 PASS_OPT_TO_IP(connp); 2870 } 2871 break; 2872 case IPV6_RECVDSTOPTS: 2873 if (!checkonly) { 2874 udp->udp_ipv6_recvdstopts = onoff; 2875 PASS_OPT_TO_IP(connp); 2876 } 2877 break; 2878 case _OLD_IPV6_RECVDSTOPTS: 2879 if (!checkonly) 2880 udp->udp_old_ipv6_recvdstopts = onoff; 2881 break; 2882 case IPV6_RECVRTHDRDSTOPTS: 2883 if (!checkonly) { 2884 udp->udp_ipv6_recvrthdrdstopts = onoff; 2885 PASS_OPT_TO_IP(connp); 2886 } 2887 break; 2888 case IPV6_RECVRTHDR: 2889 if (!checkonly) { 2890 udp->udp_ipv6_recvrthdr = onoff; 2891 PASS_OPT_TO_IP(connp); 2892 } 2893 break; 2894 /* 2895 * Set sticky options or ancillary data. 2896 * If sticky options, (re)build any extension headers 2897 * that might be needed as a result. 2898 */ 2899 case IPV6_PKTINFO: 2900 /* 2901 * The source address and ifindex are verified 2902 * in ip_opt_set(). For ancillary data the 2903 * source address is checked in ip_wput_v6. 2904 */ 2905 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2906 return (EINVAL); 2907 if (checkonly) 2908 break; 2909 2910 if (inlen == 0) { 2911 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2912 ipp->ipp_sticky_ignored |= 2913 (IPPF_IFINDEX|IPPF_ADDR); 2914 } else { 2915 struct in6_pktinfo *pkti; 2916 2917 pkti = (struct in6_pktinfo *)invalp; 2918 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2919 ipp->ipp_addr = pkti->ipi6_addr; 2920 if (ipp->ipp_ifindex != 0) 2921 ipp->ipp_fields |= IPPF_IFINDEX; 2922 else 2923 ipp->ipp_fields &= ~IPPF_IFINDEX; 2924 if (!IN6_IS_ADDR_UNSPECIFIED( 2925 &ipp->ipp_addr)) 2926 ipp->ipp_fields |= IPPF_ADDR; 2927 else 2928 ipp->ipp_fields &= ~IPPF_ADDR; 2929 } 2930 if (sticky) { 2931 error = udp_build_hdrs(udp); 2932 if (error != 0) 2933 return (error); 2934 PASS_OPT_TO_IP(connp); 2935 } 2936 break; 2937 case IPV6_HOPLIMIT: 2938 if (sticky) 2939 return (EINVAL); 2940 if (inlen != 0 && inlen != sizeof (int)) 2941 return (EINVAL); 2942 if (checkonly) 2943 break; 2944 2945 if (inlen == 0) { 2946 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2947 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2948 } else { 2949 if (*i1 > 255 || *i1 < -1) 2950 return (EINVAL); 2951 if (*i1 == -1) 2952 ipp->ipp_hoplimit = 2953 us->us_ipv6_hoplimit; 2954 else 2955 ipp->ipp_hoplimit = *i1; 2956 ipp->ipp_fields |= IPPF_HOPLIMIT; 2957 } 2958 break; 2959 case IPV6_TCLASS: 2960 if (inlen != 0 && inlen != sizeof (int)) 2961 return (EINVAL); 2962 if (checkonly) 2963 break; 2964 2965 if (inlen == 0) { 2966 ipp->ipp_fields &= ~IPPF_TCLASS; 2967 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2968 } else { 2969 if (*i1 > 255 || *i1 < -1) 2970 return (EINVAL); 2971 if (*i1 == -1) 2972 ipp->ipp_tclass = 0; 2973 else 2974 ipp->ipp_tclass = *i1; 2975 ipp->ipp_fields |= IPPF_TCLASS; 2976 } 2977 if (sticky) { 2978 error = udp_build_hdrs(udp); 2979 if (error != 0) 2980 return (error); 2981 } 2982 break; 2983 case IPV6_NEXTHOP: 2984 /* 2985 * IP will verify that the nexthop is reachable 2986 * and fail for sticky options. 2987 */ 2988 if (inlen != 0 && inlen != sizeof (sin6_t)) 2989 return (EINVAL); 2990 if (checkonly) 2991 break; 2992 2993 if (inlen == 0) { 2994 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2995 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2996 } else { 2997 sin6_t *sin6 = (sin6_t *)invalp; 2998 2999 if (sin6->sin6_family != AF_INET6) { 3000 return (EAFNOSUPPORT); 3001 } 3002 if (IN6_IS_ADDR_V4MAPPED( 3003 &sin6->sin6_addr)) 3004 return (EADDRNOTAVAIL); 3005 ipp->ipp_nexthop = sin6->sin6_addr; 3006 if (!IN6_IS_ADDR_UNSPECIFIED( 3007 &ipp->ipp_nexthop)) 3008 ipp->ipp_fields |= IPPF_NEXTHOP; 3009 else 3010 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3011 } 3012 if (sticky) { 3013 error = udp_build_hdrs(udp); 3014 if (error != 0) 3015 return (error); 3016 PASS_OPT_TO_IP(connp); 3017 } 3018 break; 3019 case IPV6_HOPOPTS: { 3020 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3021 /* 3022 * Sanity checks - minimum size, size a multiple of 3023 * eight bytes, and matching size passed in. 3024 */ 3025 if (inlen != 0 && 3026 inlen != (8 * (hopts->ip6h_len + 1))) 3027 return (EINVAL); 3028 3029 if (checkonly) 3030 break; 3031 3032 error = optcom_pkt_set(invalp, inlen, sticky, 3033 (uchar_t **)&ipp->ipp_hopopts, 3034 &ipp->ipp_hopoptslen, 3035 sticky ? udp->udp_label_len_v6 : 0); 3036 if (error != 0) 3037 return (error); 3038 if (ipp->ipp_hopoptslen == 0) { 3039 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3040 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3041 } else { 3042 ipp->ipp_fields |= IPPF_HOPOPTS; 3043 } 3044 if (sticky) { 3045 error = udp_build_hdrs(udp); 3046 if (error != 0) 3047 return (error); 3048 } 3049 break; 3050 } 3051 case IPV6_RTHDRDSTOPTS: { 3052 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3053 3054 /* 3055 * Sanity checks - minimum size, size a multiple of 3056 * eight bytes, and matching size passed in. 3057 */ 3058 if (inlen != 0 && 3059 inlen != (8 * (dopts->ip6d_len + 1))) 3060 return (EINVAL); 3061 3062 if (checkonly) 3063 break; 3064 3065 if (inlen == 0) { 3066 if (sticky && 3067 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3068 kmem_free(ipp->ipp_rtdstopts, 3069 ipp->ipp_rtdstoptslen); 3070 ipp->ipp_rtdstopts = NULL; 3071 ipp->ipp_rtdstoptslen = 0; 3072 } 3073 3074 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3075 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3076 } else { 3077 error = optcom_pkt_set(invalp, inlen, sticky, 3078 (uchar_t **)&ipp->ipp_rtdstopts, 3079 &ipp->ipp_rtdstoptslen, 0); 3080 if (error != 0) 3081 return (error); 3082 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3083 } 3084 if (sticky) { 3085 error = udp_build_hdrs(udp); 3086 if (error != 0) 3087 return (error); 3088 } 3089 break; 3090 } 3091 case IPV6_DSTOPTS: { 3092 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3093 3094 /* 3095 * Sanity checks - minimum size, size a multiple of 3096 * eight bytes, and matching size passed in. 3097 */ 3098 if (inlen != 0 && 3099 inlen != (8 * (dopts->ip6d_len + 1))) 3100 return (EINVAL); 3101 3102 if (checkonly) 3103 break; 3104 3105 if (inlen == 0) { 3106 if (sticky && 3107 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3108 kmem_free(ipp->ipp_dstopts, 3109 ipp->ipp_dstoptslen); 3110 ipp->ipp_dstopts = NULL; 3111 ipp->ipp_dstoptslen = 0; 3112 } 3113 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3114 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3115 } else { 3116 error = optcom_pkt_set(invalp, inlen, sticky, 3117 (uchar_t **)&ipp->ipp_dstopts, 3118 &ipp->ipp_dstoptslen, 0); 3119 if (error != 0) 3120 return (error); 3121 ipp->ipp_fields |= IPPF_DSTOPTS; 3122 } 3123 if (sticky) { 3124 error = udp_build_hdrs(udp); 3125 if (error != 0) 3126 return (error); 3127 } 3128 break; 3129 } 3130 case IPV6_RTHDR: { 3131 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3132 3133 /* 3134 * Sanity checks - minimum size, size a multiple of 3135 * eight bytes, and matching size passed in. 3136 */ 3137 if (inlen != 0 && 3138 inlen != (8 * (rt->ip6r_len + 1))) 3139 return (EINVAL); 3140 3141 if (checkonly) 3142 break; 3143 3144 if (inlen == 0) { 3145 if (sticky && 3146 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3147 kmem_free(ipp->ipp_rthdr, 3148 ipp->ipp_rthdrlen); 3149 ipp->ipp_rthdr = NULL; 3150 ipp->ipp_rthdrlen = 0; 3151 } 3152 ipp->ipp_fields &= ~IPPF_RTHDR; 3153 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3154 } else { 3155 error = optcom_pkt_set(invalp, inlen, sticky, 3156 (uchar_t **)&ipp->ipp_rthdr, 3157 &ipp->ipp_rthdrlen, 0); 3158 if (error != 0) 3159 return (error); 3160 ipp->ipp_fields |= IPPF_RTHDR; 3161 } 3162 if (sticky) { 3163 error = udp_build_hdrs(udp); 3164 if (error != 0) 3165 return (error); 3166 } 3167 break; 3168 } 3169 3170 case IPV6_DONTFRAG: 3171 if (checkonly) 3172 break; 3173 3174 if (onoff) { 3175 ipp->ipp_fields |= IPPF_DONTFRAG; 3176 } else { 3177 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3178 } 3179 break; 3180 3181 case IPV6_USE_MIN_MTU: 3182 if (inlen != sizeof (int)) 3183 return (EINVAL); 3184 3185 if (*i1 < -1 || *i1 > 1) 3186 return (EINVAL); 3187 3188 if (checkonly) 3189 break; 3190 3191 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3192 ipp->ipp_use_min_mtu = *i1; 3193 break; 3194 3195 case IPV6_SEC_OPT: 3196 case IPV6_SRC_PREFERENCES: 3197 case IPV6_V6ONLY: 3198 /* Handled at the IP level */ 3199 return (-EINVAL); 3200 default: 3201 *outlenp = 0; 3202 return (EINVAL); 3203 } 3204 break; 3205 } /* end IPPROTO_IPV6 */ 3206 case IPPROTO_UDP: 3207 switch (name) { 3208 case UDP_ANONPRIVBIND: 3209 if ((error = secpolicy_net_privaddr(cr, 0, 3210 IPPROTO_UDP)) != 0) { 3211 *outlenp = 0; 3212 return (error); 3213 } 3214 if (!checkonly) { 3215 udp->udp_anon_priv_bind = onoff; 3216 } 3217 break; 3218 case UDP_EXCLBIND: 3219 if (!checkonly) 3220 udp->udp_exclbind = onoff; 3221 break; 3222 case UDP_RCVHDR: 3223 if (!checkonly) 3224 udp->udp_rcvhdr = onoff; 3225 break; 3226 case UDP_NAT_T_ENDPOINT: 3227 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3228 *outlenp = 0; 3229 return (error); 3230 } 3231 3232 /* 3233 * Use udp_family instead so we can avoid ambiguitites 3234 * with AF_INET6 sockets that may switch from IPv4 3235 * to IPv6. 3236 */ 3237 if (udp->udp_family != AF_INET) { 3238 *outlenp = 0; 3239 return (EAFNOSUPPORT); 3240 } 3241 3242 if (!checkonly) { 3243 int size; 3244 3245 udp->udp_nat_t_endpoint = onoff; 3246 3247 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3248 UDPH_SIZE + udp->udp_ip_snd_options_len; 3249 3250 /* Also, adjust wroff */ 3251 if (onoff) { 3252 udp->udp_max_hdr_len += 3253 sizeof (uint32_t); 3254 } 3255 size = udp->udp_max_hdr_len + 3256 us->us_wroff_extra; 3257 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3258 size); 3259 } 3260 break; 3261 default: 3262 *outlenp = 0; 3263 return (EINVAL); 3264 } 3265 break; 3266 default: 3267 *outlenp = 0; 3268 return (EINVAL); 3269 } 3270 /* 3271 * Common case of OK return with outval same as inval. 3272 */ 3273 if (invalp != outvalp) { 3274 /* don't trust bcopy for identical src/dst */ 3275 (void) bcopy(invalp, outvalp, inlen); 3276 } 3277 *outlenp = inlen; 3278 return (0); 3279 } 3280 3281 int 3282 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3283 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3284 void *thisdg_attrs, cred_t *cr) 3285 { 3286 int error; 3287 boolean_t checkonly; 3288 3289 error = 0; 3290 switch (optset_context) { 3291 case SETFN_OPTCOM_CHECKONLY: 3292 checkonly = B_TRUE; 3293 /* 3294 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3295 * inlen != 0 implies value supplied and 3296 * we have to "pretend" to set it. 3297 * inlen == 0 implies that there is no 3298 * value part in T_CHECK request and just validation 3299 * done elsewhere should be enough, we just return here. 3300 */ 3301 if (inlen == 0) { 3302 *outlenp = 0; 3303 goto done; 3304 } 3305 break; 3306 case SETFN_OPTCOM_NEGOTIATE: 3307 checkonly = B_FALSE; 3308 break; 3309 case SETFN_UD_NEGOTIATE: 3310 case SETFN_CONN_NEGOTIATE: 3311 checkonly = B_FALSE; 3312 /* 3313 * Negotiating local and "association-related" options 3314 * through T_UNITDATA_REQ. 3315 * 3316 * Following routine can filter out ones we do not 3317 * want to be "set" this way. 3318 */ 3319 if (!udp_opt_allow_udr_set(level, name)) { 3320 *outlenp = 0; 3321 error = EINVAL; 3322 goto done; 3323 } 3324 break; 3325 default: 3326 /* 3327 * We should never get here 3328 */ 3329 *outlenp = 0; 3330 error = EINVAL; 3331 goto done; 3332 } 3333 3334 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3335 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3336 3337 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3338 outvalp, cr, thisdg_attrs, checkonly); 3339 done: 3340 return (error); 3341 } 3342 3343 /* ARGSUSED */ 3344 int 3345 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3346 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3347 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3348 { 3349 conn_t *connp = Q_TO_CONN(q); 3350 int error; 3351 udp_t *udp = connp->conn_udp; 3352 3353 rw_enter(&udp->udp_rwlock, RW_WRITER); 3354 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3355 outlenp, outvalp, thisdg_attrs, cr); 3356 rw_exit(&udp->udp_rwlock); 3357 return (error); 3358 } 3359 3360 /* 3361 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3362 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3363 * headers, and the udp header. 3364 * Returns failure if can't allocate memory. 3365 */ 3366 static int 3367 udp_build_hdrs(udp_t *udp) 3368 { 3369 udp_stack_t *us = udp->udp_us; 3370 uchar_t *hdrs; 3371 uint_t hdrs_len; 3372 ip6_t *ip6h; 3373 ip6i_t *ip6i; 3374 udpha_t *udpha; 3375 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3376 size_t sth_wroff; 3377 conn_t *connp = udp->udp_connp; 3378 3379 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3380 ASSERT(connp != NULL); 3381 3382 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3383 ASSERT(hdrs_len != 0); 3384 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3385 /* Need to reallocate */ 3386 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3387 if (hdrs == NULL) 3388 return (ENOMEM); 3389 3390 if (udp->udp_sticky_hdrs_len != 0) { 3391 kmem_free(udp->udp_sticky_hdrs, 3392 udp->udp_sticky_hdrs_len); 3393 } 3394 udp->udp_sticky_hdrs = hdrs; 3395 udp->udp_sticky_hdrs_len = hdrs_len; 3396 } 3397 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3398 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3399 3400 /* Set header fields not in ipp */ 3401 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3402 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3403 ip6h = (ip6_t *)&ip6i[1]; 3404 } else { 3405 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3406 } 3407 3408 if (!(ipp->ipp_fields & IPPF_ADDR)) 3409 ip6h->ip6_src = udp->udp_v6src; 3410 3411 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3412 udpha->uha_src_port = udp->udp_port; 3413 3414 /* Try to get everything in a single mblk */ 3415 if (hdrs_len > udp->udp_max_hdr_len) { 3416 udp->udp_max_hdr_len = hdrs_len; 3417 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3418 rw_exit(&udp->udp_rwlock); 3419 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3420 udp->udp_connp, sth_wroff); 3421 rw_enter(&udp->udp_rwlock, RW_WRITER); 3422 } 3423 return (0); 3424 } 3425 3426 /* 3427 * This routine retrieves the value of an ND variable in a udpparam_t 3428 * structure. It is called through nd_getset when a user reads the 3429 * variable. 3430 */ 3431 /* ARGSUSED */ 3432 static int 3433 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3434 { 3435 udpparam_t *udppa = (udpparam_t *)cp; 3436 3437 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3438 return (0); 3439 } 3440 3441 /* 3442 * Walk through the param array specified registering each element with the 3443 * named dispatch (ND) handler. 3444 */ 3445 static boolean_t 3446 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3447 { 3448 for (; cnt-- > 0; udppa++) { 3449 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3450 if (!nd_load(ndp, udppa->udp_param_name, 3451 udp_param_get, udp_param_set, 3452 (caddr_t)udppa)) { 3453 nd_free(ndp); 3454 return (B_FALSE); 3455 } 3456 } 3457 } 3458 if (!nd_load(ndp, "udp_extra_priv_ports", 3459 udp_extra_priv_ports_get, NULL, NULL)) { 3460 nd_free(ndp); 3461 return (B_FALSE); 3462 } 3463 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3464 NULL, udp_extra_priv_ports_add, NULL)) { 3465 nd_free(ndp); 3466 return (B_FALSE); 3467 } 3468 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3469 NULL, udp_extra_priv_ports_del, NULL)) { 3470 nd_free(ndp); 3471 return (B_FALSE); 3472 } 3473 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3474 NULL)) { 3475 nd_free(ndp); 3476 return (B_FALSE); 3477 } 3478 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3479 NULL)) { 3480 nd_free(ndp); 3481 return (B_FALSE); 3482 } 3483 return (B_TRUE); 3484 } 3485 3486 /* This routine sets an ND variable in a udpparam_t structure. */ 3487 /* ARGSUSED */ 3488 static int 3489 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3490 { 3491 long new_value; 3492 udpparam_t *udppa = (udpparam_t *)cp; 3493 3494 /* 3495 * Fail the request if the new value does not lie within the 3496 * required bounds. 3497 */ 3498 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3499 new_value < udppa->udp_param_min || 3500 new_value > udppa->udp_param_max) { 3501 return (EINVAL); 3502 } 3503 3504 /* Set the new value */ 3505 udppa->udp_param_value = new_value; 3506 return (0); 3507 } 3508 3509 /* 3510 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3511 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3512 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3513 * then it's assumed to be allocated to be large enough. 3514 * 3515 * Returns zero if trimming of the security option causes all options to go 3516 * away. 3517 */ 3518 static size_t 3519 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3520 { 3521 struct T_opthdr *toh; 3522 size_t hol = ipp->ipp_hopoptslen; 3523 ip6_hbh_t *dstopt = NULL; 3524 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3525 size_t tlen, olen, plen; 3526 boolean_t deleting; 3527 const struct ip6_opt *sopt, *lastpad; 3528 struct ip6_opt *dopt; 3529 3530 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3531 toh->level = IPPROTO_IPV6; 3532 toh->name = IPV6_HOPOPTS; 3533 toh->status = 0; 3534 dstopt = (ip6_hbh_t *)(toh + 1); 3535 } 3536 3537 /* 3538 * If labeling is enabled, then skip the label option 3539 * but get other options if there are any. 3540 */ 3541 if (is_system_labeled()) { 3542 dopt = NULL; 3543 if (dstopt != NULL) { 3544 /* will fill in ip6h_len later */ 3545 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3546 dopt = (struct ip6_opt *)(dstopt + 1); 3547 } 3548 sopt = (const struct ip6_opt *)(srcopt + 1); 3549 hol -= sizeof (*srcopt); 3550 tlen = sizeof (*dstopt); 3551 lastpad = NULL; 3552 deleting = B_FALSE; 3553 /* 3554 * This loop finds the first (lastpad pointer) of any number of 3555 * pads that preceeds the security option, then treats the 3556 * security option as though it were a pad, and then finds the 3557 * next non-pad option (or end of list). 3558 * 3559 * It then treats the entire block as one big pad. To preserve 3560 * alignment of any options that follow, or just the end of the 3561 * list, it computes a minimal new padding size that keeps the 3562 * same alignment for the next option. 3563 * 3564 * If it encounters just a sequence of pads with no security 3565 * option, those are copied as-is rather than collapsed. 3566 * 3567 * Note that to handle the end of list case, the code makes one 3568 * loop with 'hol' set to zero. 3569 */ 3570 for (;;) { 3571 if (hol > 0) { 3572 if (sopt->ip6o_type == IP6OPT_PAD1) { 3573 if (lastpad == NULL) 3574 lastpad = sopt; 3575 sopt = (const struct ip6_opt *) 3576 &sopt->ip6o_len; 3577 hol--; 3578 continue; 3579 } 3580 olen = sopt->ip6o_len + sizeof (*sopt); 3581 if (olen > hol) 3582 olen = hol; 3583 if (sopt->ip6o_type == IP6OPT_PADN || 3584 sopt->ip6o_type == ip6opt_ls) { 3585 if (sopt->ip6o_type == ip6opt_ls) 3586 deleting = B_TRUE; 3587 if (lastpad == NULL) 3588 lastpad = sopt; 3589 sopt = (const struct ip6_opt *) 3590 ((const char *)sopt + olen); 3591 hol -= olen; 3592 continue; 3593 } 3594 } else { 3595 /* if nothing was copied at all, then delete */ 3596 if (tlen == sizeof (*dstopt)) 3597 return (0); 3598 /* last pass; pick up any trailing padding */ 3599 olen = 0; 3600 } 3601 if (deleting) { 3602 /* 3603 * compute aligning effect of deleted material 3604 * to reproduce with pad. 3605 */ 3606 plen = ((const char *)sopt - 3607 (const char *)lastpad) & 7; 3608 tlen += plen; 3609 if (dopt != NULL) { 3610 if (plen == 1) { 3611 dopt->ip6o_type = IP6OPT_PAD1; 3612 } else if (plen > 1) { 3613 plen -= sizeof (*dopt); 3614 dopt->ip6o_type = IP6OPT_PADN; 3615 dopt->ip6o_len = plen; 3616 if (plen > 0) 3617 bzero(dopt + 1, plen); 3618 } 3619 dopt = (struct ip6_opt *) 3620 ((char *)dopt + plen); 3621 } 3622 deleting = B_FALSE; 3623 lastpad = NULL; 3624 } 3625 /* if there's uncopied padding, then copy that now */ 3626 if (lastpad != NULL) { 3627 olen += (const char *)sopt - 3628 (const char *)lastpad; 3629 sopt = lastpad; 3630 lastpad = NULL; 3631 } 3632 if (dopt != NULL && olen > 0) { 3633 bcopy(sopt, dopt, olen); 3634 dopt = (struct ip6_opt *)((char *)dopt + olen); 3635 } 3636 if (hol == 0) 3637 break; 3638 tlen += olen; 3639 sopt = (const struct ip6_opt *) 3640 ((const char *)sopt + olen); 3641 hol -= olen; 3642 } 3643 /* go back and patch up the length value, rounded upward */ 3644 if (dstopt != NULL) 3645 dstopt->ip6h_len = (tlen - 1) >> 3; 3646 } else { 3647 tlen = hol; 3648 if (dstopt != NULL) 3649 bcopy(srcopt, dstopt, hol); 3650 } 3651 3652 tlen += sizeof (*toh); 3653 if (toh != NULL) 3654 toh->len = tlen; 3655 3656 return (tlen); 3657 } 3658 3659 /* 3660 * Update udp_rcv_opt_len from the packet. 3661 * Called when options received, and when no options received but 3662 * udp_ip_recv_opt_len has previously recorded options. 3663 */ 3664 static void 3665 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3666 { 3667 /* Save the options if any */ 3668 if (opt_len > 0) { 3669 if (opt_len > udp->udp_ip_rcv_options_len) { 3670 /* Need to allocate larger buffer */ 3671 if (udp->udp_ip_rcv_options_len != 0) 3672 mi_free((char *)udp->udp_ip_rcv_options); 3673 udp->udp_ip_rcv_options_len = 0; 3674 udp->udp_ip_rcv_options = 3675 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3676 if (udp->udp_ip_rcv_options != NULL) 3677 udp->udp_ip_rcv_options_len = opt_len; 3678 } 3679 if (udp->udp_ip_rcv_options_len != 0) { 3680 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3681 /* Adjust length if we are resusing the space */ 3682 udp->udp_ip_rcv_options_len = opt_len; 3683 } 3684 } else if (udp->udp_ip_rcv_options_len != 0) { 3685 /* Clear out previously recorded options */ 3686 mi_free((char *)udp->udp_ip_rcv_options); 3687 udp->udp_ip_rcv_options = NULL; 3688 udp->udp_ip_rcv_options_len = 0; 3689 } 3690 } 3691 3692 static void 3693 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3694 { 3695 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3696 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3697 /* 3698 * fallback has started but messages have not been moved yet 3699 */ 3700 if (udp->udp_fallback_queue_head == NULL) { 3701 ASSERT(udp->udp_fallback_queue_tail == NULL); 3702 udp->udp_fallback_queue_head = mp; 3703 udp->udp_fallback_queue_tail = mp; 3704 } else { 3705 ASSERT(udp->udp_fallback_queue_tail != NULL); 3706 udp->udp_fallback_queue_tail->b_next = mp; 3707 udp->udp_fallback_queue_tail = mp; 3708 } 3709 mutex_exit(&udp->udp_recv_lock); 3710 } else { 3711 /* 3712 * no more fallbacks possible, ok to drop lock. 3713 */ 3714 mutex_exit(&udp->udp_recv_lock); 3715 putnext(udp->udp_connp->conn_rq, mp); 3716 } 3717 } 3718 3719 /* ARGSUSED2 */ 3720 static void 3721 udp_input(void *arg1, mblk_t *mp, void *arg2) 3722 { 3723 conn_t *connp = (conn_t *)arg1; 3724 struct T_unitdata_ind *tudi; 3725 uchar_t *rptr; /* Pointer to IP header */ 3726 int hdr_length; /* Length of IP+UDP headers */ 3727 int opt_len; 3728 int udi_size; /* Size of T_unitdata_ind */ 3729 int mp_len; 3730 udp_t *udp; 3731 udpha_t *udpha; 3732 int ipversion; 3733 ip6_pkt_t ipp; 3734 ip6_t *ip6h; 3735 ip6i_t *ip6i; 3736 mblk_t *mp1; 3737 mblk_t *options_mp = NULL; 3738 ip_pktinfo_t *pinfo = NULL; 3739 cred_t *cr = NULL; 3740 pid_t cpid; 3741 uint32_t udp_ip_rcv_options_len; 3742 udp_bits_t udp_bits; 3743 cred_t *rcr = connp->conn_cred; 3744 udp_stack_t *us; 3745 3746 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3747 3748 udp = connp->conn_udp; 3749 us = udp->udp_us; 3750 rptr = mp->b_rptr; 3751 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3752 ASSERT(OK_32PTR(rptr)); 3753 3754 /* 3755 * IP should have prepended the options data in an M_CTL 3756 * Check M_CTL "type" to make sure are not here bcos of 3757 * a valid ICMP message 3758 */ 3759 if (DB_TYPE(mp) == M_CTL) { 3760 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3761 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3762 IN_PKTINFO) { 3763 /* 3764 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3765 * has been prepended to the packet by IP. We need to 3766 * extract the mblk and adjust the rptr 3767 */ 3768 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3769 options_mp = mp; 3770 mp = mp->b_cont; 3771 rptr = mp->b_rptr; 3772 UDP_STAT(us, udp_in_pktinfo); 3773 } else { 3774 /* 3775 * ICMP messages. 3776 */ 3777 udp_icmp_error(connp, mp); 3778 return; 3779 } 3780 } 3781 3782 mp_len = msgdsize(mp); 3783 /* 3784 * This is the inbound data path. 3785 * First, we check to make sure the IP version number is correct, 3786 * and then pull the IP and UDP headers into the first mblk. 3787 */ 3788 3789 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3790 ipp.ipp_fields = 0; 3791 3792 ipversion = IPH_HDR_VERSION(rptr); 3793 3794 rw_enter(&udp->udp_rwlock, RW_READER); 3795 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3796 udp_bits = udp->udp_bits; 3797 rw_exit(&udp->udp_rwlock); 3798 3799 switch (ipversion) { 3800 case IPV4_VERSION: 3801 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3802 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3803 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3804 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3805 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3806 udp->udp_family == AF_INET) { 3807 /* 3808 * Record/update udp_ip_rcv_options with the lock 3809 * held. Not needed for AF_INET6 sockets 3810 * since they don't support a getsockopt of IP_OPTIONS. 3811 */ 3812 rw_enter(&udp->udp_rwlock, RW_WRITER); 3813 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3814 opt_len); 3815 rw_exit(&udp->udp_rwlock); 3816 } 3817 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3818 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3819 udp->udp_ip_recvpktinfo) { 3820 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3821 ipp.ipp_fields |= IPPF_IFINDEX; 3822 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3823 } 3824 } 3825 break; 3826 case IPV6_VERSION: 3827 /* 3828 * IPv6 packets can only be received by applications 3829 * that are prepared to receive IPv6 addresses. 3830 * The IP fanout must ensure this. 3831 */ 3832 ASSERT(udp->udp_family == AF_INET6); 3833 3834 ip6h = (ip6_t *)rptr; 3835 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3836 3837 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3838 uint8_t nexthdrp; 3839 /* Look for ifindex information */ 3840 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3841 ip6i = (ip6i_t *)ip6h; 3842 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3843 goto tossit; 3844 3845 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3846 ASSERT(ip6i->ip6i_ifindex != 0); 3847 ipp.ipp_fields |= IPPF_IFINDEX; 3848 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3849 } 3850 rptr = (uchar_t *)&ip6i[1]; 3851 mp->b_rptr = rptr; 3852 if (rptr == mp->b_wptr) { 3853 mp1 = mp->b_cont; 3854 freeb(mp); 3855 mp = mp1; 3856 rptr = mp->b_rptr; 3857 } 3858 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3859 goto tossit; 3860 ip6h = (ip6_t *)rptr; 3861 mp_len = msgdsize(mp); 3862 } 3863 /* 3864 * Find any potentially interesting extension headers 3865 * as well as the length of the IPv6 + extension 3866 * headers. 3867 */ 3868 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3869 UDPH_SIZE; 3870 ASSERT(nexthdrp == IPPROTO_UDP); 3871 } else { 3872 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3873 ip6i = NULL; 3874 } 3875 break; 3876 default: 3877 ASSERT(0); 3878 } 3879 3880 /* 3881 * IP inspected the UDP header thus all of it must be in the mblk. 3882 * UDP length check is performed for IPv6 packets and IPv4 packets 3883 * to check if the size of the packet as specified 3884 * by the header is the same as the physical size of the packet. 3885 * FIXME? Didn't IP already check this? 3886 */ 3887 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3888 if ((MBLKL(mp) < hdr_length) || 3889 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3890 goto tossit; 3891 } 3892 3893 3894 /* Walk past the headers unless UDP_RCVHDR was set. */ 3895 if (!udp_bits.udpb_rcvhdr) { 3896 mp->b_rptr = rptr + hdr_length; 3897 mp_len -= hdr_length; 3898 } 3899 3900 /* 3901 * This is the inbound data path. Packets are passed upstream as 3902 * T_UNITDATA_IND messages with full IP headers still attached. 3903 */ 3904 if (udp->udp_family == AF_INET) { 3905 sin_t *sin; 3906 3907 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3908 3909 /* 3910 * Normally only send up the source address. 3911 * If IP_RECVDSTADDR is set we include the destination IP 3912 * address as an option. With IP_RECVOPTS we include all 3913 * the IP options. 3914 */ 3915 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3916 if (udp_bits.udpb_recvdstaddr) { 3917 udi_size += sizeof (struct T_opthdr) + 3918 sizeof (struct in_addr); 3919 UDP_STAT(us, udp_in_recvdstaddr); 3920 } 3921 3922 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3923 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3924 udi_size += sizeof (struct T_opthdr) + 3925 sizeof (struct in_pktinfo); 3926 UDP_STAT(us, udp_ip_rcvpktinfo); 3927 } 3928 3929 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3930 udi_size += sizeof (struct T_opthdr) + opt_len; 3931 UDP_STAT(us, udp_in_recvopts); 3932 } 3933 3934 /* 3935 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3936 * space accordingly 3937 */ 3938 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3939 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3940 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3941 UDP_STAT(us, udp_in_recvif); 3942 } 3943 3944 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3945 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3946 udi_size += sizeof (struct T_opthdr) + 3947 sizeof (struct sockaddr_dl); 3948 UDP_STAT(us, udp_in_recvslla); 3949 } 3950 3951 if ((udp_bits.udpb_recvucred) && 3952 (cr = msg_getcred(mp, &cpid)) != NULL) { 3953 udi_size += sizeof (struct T_opthdr) + ucredsize; 3954 UDP_STAT(us, udp_in_recvucred); 3955 } 3956 3957 /* 3958 * If SO_TIMESTAMP is set allocate the appropriate sized 3959 * buffer. Since gethrestime() expects a pointer aligned 3960 * argument, we allocate space necessary for extra 3961 * alignment (even though it might not be used). 3962 */ 3963 if (udp_bits.udpb_timestamp) { 3964 udi_size += sizeof (struct T_opthdr) + 3965 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3966 UDP_STAT(us, udp_in_timestamp); 3967 } 3968 3969 /* 3970 * If IP_RECVTTL is set allocate the appropriate sized buffer 3971 */ 3972 if (udp_bits.udpb_recvttl) { 3973 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3974 UDP_STAT(us, udp_in_recvttl); 3975 } 3976 3977 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3978 mp1 = allocb(udi_size, BPRI_MED); 3979 if (mp1 == NULL) { 3980 freemsg(mp); 3981 if (options_mp != NULL) 3982 freeb(options_mp); 3983 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3984 return; 3985 } 3986 mp1->b_cont = mp; 3987 mp = mp1; 3988 mp->b_datap->db_type = M_PROTO; 3989 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3990 mp->b_wptr = (uchar_t *)tudi + udi_size; 3991 tudi->PRIM_type = T_UNITDATA_IND; 3992 tudi->SRC_length = sizeof (sin_t); 3993 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3994 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3995 sizeof (sin_t); 3996 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3997 tudi->OPT_length = udi_size; 3998 sin = (sin_t *)&tudi[1]; 3999 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4000 sin->sin_port = udpha->uha_src_port; 4001 sin->sin_family = udp->udp_family; 4002 *(uint32_t *)&sin->sin_zero[0] = 0; 4003 *(uint32_t *)&sin->sin_zero[4] = 0; 4004 4005 /* 4006 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4007 * IP_RECVTTL has been set. 4008 */ 4009 if (udi_size != 0) { 4010 /* 4011 * Copy in destination address before options to avoid 4012 * any padding issues. 4013 */ 4014 char *dstopt; 4015 4016 dstopt = (char *)&sin[1]; 4017 if (udp_bits.udpb_recvdstaddr) { 4018 struct T_opthdr *toh; 4019 ipaddr_t *dstptr; 4020 4021 toh = (struct T_opthdr *)dstopt; 4022 toh->level = IPPROTO_IP; 4023 toh->name = IP_RECVDSTADDR; 4024 toh->len = sizeof (struct T_opthdr) + 4025 sizeof (ipaddr_t); 4026 toh->status = 0; 4027 dstopt += sizeof (struct T_opthdr); 4028 dstptr = (ipaddr_t *)dstopt; 4029 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4030 dstopt += sizeof (ipaddr_t); 4031 udi_size -= toh->len; 4032 } 4033 4034 if (udp_bits.udpb_recvopts && opt_len > 0) { 4035 struct T_opthdr *toh; 4036 4037 toh = (struct T_opthdr *)dstopt; 4038 toh->level = IPPROTO_IP; 4039 toh->name = IP_RECVOPTS; 4040 toh->len = sizeof (struct T_opthdr) + opt_len; 4041 toh->status = 0; 4042 dstopt += sizeof (struct T_opthdr); 4043 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4044 opt_len); 4045 dstopt += opt_len; 4046 udi_size -= toh->len; 4047 } 4048 4049 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4050 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4051 struct T_opthdr *toh; 4052 struct in_pktinfo *pktinfop; 4053 4054 toh = (struct T_opthdr *)dstopt; 4055 toh->level = IPPROTO_IP; 4056 toh->name = IP_PKTINFO; 4057 toh->len = sizeof (struct T_opthdr) + 4058 sizeof (*pktinfop); 4059 toh->status = 0; 4060 dstopt += sizeof (struct T_opthdr); 4061 pktinfop = (struct in_pktinfo *)dstopt; 4062 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4063 pktinfop->ipi_spec_dst = 4064 pinfo->ip_pkt_match_addr; 4065 pktinfop->ipi_addr.s_addr = 4066 ((ipha_t *)rptr)->ipha_dst; 4067 4068 dstopt += sizeof (struct in_pktinfo); 4069 udi_size -= toh->len; 4070 } 4071 4072 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4073 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4074 4075 struct T_opthdr *toh; 4076 struct sockaddr_dl *dstptr; 4077 4078 toh = (struct T_opthdr *)dstopt; 4079 toh->level = IPPROTO_IP; 4080 toh->name = IP_RECVSLLA; 4081 toh->len = sizeof (struct T_opthdr) + 4082 sizeof (struct sockaddr_dl); 4083 toh->status = 0; 4084 dstopt += sizeof (struct T_opthdr); 4085 dstptr = (struct sockaddr_dl *)dstopt; 4086 bcopy(&pinfo->ip_pkt_slla, dstptr, 4087 sizeof (struct sockaddr_dl)); 4088 dstopt += sizeof (struct sockaddr_dl); 4089 udi_size -= toh->len; 4090 } 4091 4092 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4093 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4094 4095 struct T_opthdr *toh; 4096 uint_t *dstptr; 4097 4098 toh = (struct T_opthdr *)dstopt; 4099 toh->level = IPPROTO_IP; 4100 toh->name = IP_RECVIF; 4101 toh->len = sizeof (struct T_opthdr) + 4102 sizeof (uint_t); 4103 toh->status = 0; 4104 dstopt += sizeof (struct T_opthdr); 4105 dstptr = (uint_t *)dstopt; 4106 *dstptr = pinfo->ip_pkt_ifindex; 4107 dstopt += sizeof (uint_t); 4108 udi_size -= toh->len; 4109 } 4110 4111 if (cr != NULL) { 4112 struct T_opthdr *toh; 4113 4114 toh = (struct T_opthdr *)dstopt; 4115 toh->level = SOL_SOCKET; 4116 toh->name = SCM_UCRED; 4117 toh->len = sizeof (struct T_opthdr) + ucredsize; 4118 toh->status = 0; 4119 dstopt += sizeof (struct T_opthdr); 4120 (void) cred2ucred(cr, cpid, dstopt, rcr); 4121 dstopt += ucredsize; 4122 udi_size -= toh->len; 4123 } 4124 4125 if (udp_bits.udpb_timestamp) { 4126 struct T_opthdr *toh; 4127 4128 toh = (struct T_opthdr *)dstopt; 4129 toh->level = SOL_SOCKET; 4130 toh->name = SCM_TIMESTAMP; 4131 toh->len = sizeof (struct T_opthdr) + 4132 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4133 toh->status = 0; 4134 dstopt += sizeof (struct T_opthdr); 4135 /* Align for gethrestime() */ 4136 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4137 sizeof (intptr_t)); 4138 gethrestime((timestruc_t *)dstopt); 4139 dstopt = (char *)toh + toh->len; 4140 udi_size -= toh->len; 4141 } 4142 4143 /* 4144 * CAUTION: 4145 * Due to aligment issues 4146 * Processing of IP_RECVTTL option 4147 * should always be the last. Adding 4148 * any option processing after this will 4149 * cause alignment panic. 4150 */ 4151 if (udp_bits.udpb_recvttl) { 4152 struct T_opthdr *toh; 4153 uint8_t *dstptr; 4154 4155 toh = (struct T_opthdr *)dstopt; 4156 toh->level = IPPROTO_IP; 4157 toh->name = IP_RECVTTL; 4158 toh->len = sizeof (struct T_opthdr) + 4159 sizeof (uint8_t); 4160 toh->status = 0; 4161 dstopt += sizeof (struct T_opthdr); 4162 dstptr = (uint8_t *)dstopt; 4163 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4164 dstopt += sizeof (uint8_t); 4165 udi_size -= toh->len; 4166 } 4167 4168 /* Consumed all of allocated space */ 4169 ASSERT(udi_size == 0); 4170 } 4171 } else { 4172 sin6_t *sin6; 4173 4174 /* 4175 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4176 * 4177 * Normally we only send up the address. If receiving of any 4178 * optional receive side information is enabled, we also send 4179 * that up as options. 4180 */ 4181 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4182 4183 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4184 IPPF_RTHDR|IPPF_IFINDEX)) { 4185 if ((udp_bits.udpb_ipv6_recvhopopts) && 4186 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4187 size_t hlen; 4188 4189 UDP_STAT(us, udp_in_recvhopopts); 4190 hlen = copy_hop_opts(&ipp, NULL); 4191 if (hlen == 0) 4192 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4193 udi_size += hlen; 4194 } 4195 if (((udp_bits.udpb_ipv6_recvdstopts) || 4196 udp_bits.udpb_old_ipv6_recvdstopts) && 4197 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4198 udi_size += sizeof (struct T_opthdr) + 4199 ipp.ipp_dstoptslen; 4200 UDP_STAT(us, udp_in_recvdstopts); 4201 } 4202 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4203 udp_bits.udpb_ipv6_recvrthdr && 4204 (ipp.ipp_fields & IPPF_RTHDR)) || 4205 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4206 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4207 udi_size += sizeof (struct T_opthdr) + 4208 ipp.ipp_rtdstoptslen; 4209 UDP_STAT(us, udp_in_recvrtdstopts); 4210 } 4211 if ((udp_bits.udpb_ipv6_recvrthdr) && 4212 (ipp.ipp_fields & IPPF_RTHDR)) { 4213 udi_size += sizeof (struct T_opthdr) + 4214 ipp.ipp_rthdrlen; 4215 UDP_STAT(us, udp_in_recvrthdr); 4216 } 4217 if ((udp_bits.udpb_ip_recvpktinfo) && 4218 (ipp.ipp_fields & IPPF_IFINDEX)) { 4219 udi_size += sizeof (struct T_opthdr) + 4220 sizeof (struct in6_pktinfo); 4221 UDP_STAT(us, udp_in_recvpktinfo); 4222 } 4223 4224 } 4225 if ((udp_bits.udpb_recvucred) && 4226 (cr = msg_getcred(mp, &cpid)) != NULL) { 4227 udi_size += sizeof (struct T_opthdr) + ucredsize; 4228 UDP_STAT(us, udp_in_recvucred); 4229 } 4230 4231 /* 4232 * If SO_TIMESTAMP is set allocate the appropriate sized 4233 * buffer. Since gethrestime() expects a pointer aligned 4234 * argument, we allocate space necessary for extra 4235 * alignment (even though it might not be used). 4236 */ 4237 if (udp_bits.udpb_timestamp) { 4238 udi_size += sizeof (struct T_opthdr) + 4239 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4240 UDP_STAT(us, udp_in_timestamp); 4241 } 4242 4243 if (udp_bits.udpb_ipv6_recvhoplimit) { 4244 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4245 UDP_STAT(us, udp_in_recvhoplimit); 4246 } 4247 4248 if (udp_bits.udpb_ipv6_recvtclass) { 4249 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4250 UDP_STAT(us, udp_in_recvtclass); 4251 } 4252 4253 mp1 = allocb(udi_size, BPRI_MED); 4254 if (mp1 == NULL) { 4255 freemsg(mp); 4256 if (options_mp != NULL) 4257 freeb(options_mp); 4258 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4259 return; 4260 } 4261 mp1->b_cont = mp; 4262 mp = mp1; 4263 mp->b_datap->db_type = M_PROTO; 4264 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4265 mp->b_wptr = (uchar_t *)tudi + udi_size; 4266 tudi->PRIM_type = T_UNITDATA_IND; 4267 tudi->SRC_length = sizeof (sin6_t); 4268 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4269 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4270 sizeof (sin6_t); 4271 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4272 tudi->OPT_length = udi_size; 4273 sin6 = (sin6_t *)&tudi[1]; 4274 if (ipversion == IPV4_VERSION) { 4275 in6_addr_t v6dst; 4276 4277 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4278 &sin6->sin6_addr); 4279 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4280 &v6dst); 4281 sin6->sin6_flowinfo = 0; 4282 sin6->sin6_scope_id = 0; 4283 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4284 connp->conn_zoneid, us->us_netstack); 4285 } else { 4286 sin6->sin6_addr = ip6h->ip6_src; 4287 /* No sin6_flowinfo per API */ 4288 sin6->sin6_flowinfo = 0; 4289 /* For link-scope source pass up scope id */ 4290 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4291 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4292 sin6->sin6_scope_id = ipp.ipp_ifindex; 4293 else 4294 sin6->sin6_scope_id = 0; 4295 sin6->__sin6_src_id = ip_srcid_find_addr( 4296 &ip6h->ip6_dst, connp->conn_zoneid, 4297 us->us_netstack); 4298 } 4299 sin6->sin6_port = udpha->uha_src_port; 4300 sin6->sin6_family = udp->udp_family; 4301 4302 if (udi_size != 0) { 4303 uchar_t *dstopt; 4304 4305 dstopt = (uchar_t *)&sin6[1]; 4306 if ((udp_bits.udpb_ip_recvpktinfo) && 4307 (ipp.ipp_fields & IPPF_IFINDEX)) { 4308 struct T_opthdr *toh; 4309 struct in6_pktinfo *pkti; 4310 4311 toh = (struct T_opthdr *)dstopt; 4312 toh->level = IPPROTO_IPV6; 4313 toh->name = IPV6_PKTINFO; 4314 toh->len = sizeof (struct T_opthdr) + 4315 sizeof (*pkti); 4316 toh->status = 0; 4317 dstopt += sizeof (struct T_opthdr); 4318 pkti = (struct in6_pktinfo *)dstopt; 4319 if (ipversion == IPV6_VERSION) 4320 pkti->ipi6_addr = ip6h->ip6_dst; 4321 else 4322 IN6_IPADDR_TO_V4MAPPED( 4323 ((ipha_t *)rptr)->ipha_dst, 4324 &pkti->ipi6_addr); 4325 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4326 dstopt += sizeof (*pkti); 4327 udi_size -= toh->len; 4328 } 4329 if (udp_bits.udpb_ipv6_recvhoplimit) { 4330 struct T_opthdr *toh; 4331 4332 toh = (struct T_opthdr *)dstopt; 4333 toh->level = IPPROTO_IPV6; 4334 toh->name = IPV6_HOPLIMIT; 4335 toh->len = sizeof (struct T_opthdr) + 4336 sizeof (uint_t); 4337 toh->status = 0; 4338 dstopt += sizeof (struct T_opthdr); 4339 if (ipversion == IPV6_VERSION) 4340 *(uint_t *)dstopt = ip6h->ip6_hops; 4341 else 4342 *(uint_t *)dstopt = 4343 ((ipha_t *)rptr)->ipha_ttl; 4344 dstopt += sizeof (uint_t); 4345 udi_size -= toh->len; 4346 } 4347 if (udp_bits.udpb_ipv6_recvtclass) { 4348 struct T_opthdr *toh; 4349 4350 toh = (struct T_opthdr *)dstopt; 4351 toh->level = IPPROTO_IPV6; 4352 toh->name = IPV6_TCLASS; 4353 toh->len = sizeof (struct T_opthdr) + 4354 sizeof (uint_t); 4355 toh->status = 0; 4356 dstopt += sizeof (struct T_opthdr); 4357 if (ipversion == IPV6_VERSION) { 4358 *(uint_t *)dstopt = 4359 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4360 } else { 4361 ipha_t *ipha = (ipha_t *)rptr; 4362 *(uint_t *)dstopt = 4363 ipha->ipha_type_of_service; 4364 } 4365 dstopt += sizeof (uint_t); 4366 udi_size -= toh->len; 4367 } 4368 if ((udp_bits.udpb_ipv6_recvhopopts) && 4369 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4370 size_t hlen; 4371 4372 hlen = copy_hop_opts(&ipp, dstopt); 4373 dstopt += hlen; 4374 udi_size -= hlen; 4375 } 4376 if ((udp_bits.udpb_ipv6_recvdstopts) && 4377 (udp_bits.udpb_ipv6_recvrthdr) && 4378 (ipp.ipp_fields & IPPF_RTHDR) && 4379 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4380 struct T_opthdr *toh; 4381 4382 toh = (struct T_opthdr *)dstopt; 4383 toh->level = IPPROTO_IPV6; 4384 toh->name = IPV6_DSTOPTS; 4385 toh->len = sizeof (struct T_opthdr) + 4386 ipp.ipp_rtdstoptslen; 4387 toh->status = 0; 4388 dstopt += sizeof (struct T_opthdr); 4389 bcopy(ipp.ipp_rtdstopts, dstopt, 4390 ipp.ipp_rtdstoptslen); 4391 dstopt += ipp.ipp_rtdstoptslen; 4392 udi_size -= toh->len; 4393 } 4394 if ((udp_bits.udpb_ipv6_recvrthdr) && 4395 (ipp.ipp_fields & IPPF_RTHDR)) { 4396 struct T_opthdr *toh; 4397 4398 toh = (struct T_opthdr *)dstopt; 4399 toh->level = IPPROTO_IPV6; 4400 toh->name = IPV6_RTHDR; 4401 toh->len = sizeof (struct T_opthdr) + 4402 ipp.ipp_rthdrlen; 4403 toh->status = 0; 4404 dstopt += sizeof (struct T_opthdr); 4405 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4406 dstopt += ipp.ipp_rthdrlen; 4407 udi_size -= toh->len; 4408 } 4409 if ((udp_bits.udpb_ipv6_recvdstopts) && 4410 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4411 struct T_opthdr *toh; 4412 4413 toh = (struct T_opthdr *)dstopt; 4414 toh->level = IPPROTO_IPV6; 4415 toh->name = IPV6_DSTOPTS; 4416 toh->len = sizeof (struct T_opthdr) + 4417 ipp.ipp_dstoptslen; 4418 toh->status = 0; 4419 dstopt += sizeof (struct T_opthdr); 4420 bcopy(ipp.ipp_dstopts, dstopt, 4421 ipp.ipp_dstoptslen); 4422 dstopt += ipp.ipp_dstoptslen; 4423 udi_size -= toh->len; 4424 } 4425 if (cr != NULL) { 4426 struct T_opthdr *toh; 4427 4428 toh = (struct T_opthdr *)dstopt; 4429 toh->level = SOL_SOCKET; 4430 toh->name = SCM_UCRED; 4431 toh->len = sizeof (struct T_opthdr) + ucredsize; 4432 toh->status = 0; 4433 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4434 dstopt += toh->len; 4435 udi_size -= toh->len; 4436 } 4437 if (udp_bits.udpb_timestamp) { 4438 struct T_opthdr *toh; 4439 4440 toh = (struct T_opthdr *)dstopt; 4441 toh->level = SOL_SOCKET; 4442 toh->name = SCM_TIMESTAMP; 4443 toh->len = sizeof (struct T_opthdr) + 4444 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4445 toh->status = 0; 4446 dstopt += sizeof (struct T_opthdr); 4447 /* Align for gethrestime() */ 4448 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4449 sizeof (intptr_t)); 4450 gethrestime((timestruc_t *)dstopt); 4451 dstopt = (uchar_t *)toh + toh->len; 4452 udi_size -= toh->len; 4453 } 4454 4455 /* Consumed all of allocated space */ 4456 ASSERT(udi_size == 0); 4457 } 4458 #undef sin6 4459 /* No IP_RECVDSTADDR for IPv6. */ 4460 } 4461 4462 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4463 if (options_mp != NULL) 4464 freeb(options_mp); 4465 4466 if (IPCL_IS_NONSTR(connp)) { 4467 int error; 4468 4469 if ((*connp->conn_upcalls->su_recv) 4470 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4471 NULL) < 0) { 4472 mutex_enter(&udp->udp_recv_lock); 4473 if (error == ENOSPC) { 4474 /* 4475 * let's confirm while holding the lock 4476 */ 4477 if ((*connp->conn_upcalls->su_recv) 4478 (connp->conn_upper_handle, NULL, 0, 0, 4479 &error, NULL) < 0) { 4480 if (error == ENOSPC) { 4481 connp->conn_flow_cntrld = 4482 B_TRUE; 4483 } else { 4484 ASSERT(error == EOPNOTSUPP); 4485 } 4486 } 4487 mutex_exit(&udp->udp_recv_lock); 4488 } else { 4489 ASSERT(error == EOPNOTSUPP); 4490 udp_queue_fallback(udp, mp); 4491 } 4492 } 4493 } else { 4494 putnext(connp->conn_rq, mp); 4495 } 4496 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4497 return; 4498 4499 tossit: 4500 freemsg(mp); 4501 if (options_mp != NULL) 4502 freeb(options_mp); 4503 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4504 } 4505 4506 /* 4507 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4508 * information that can be changing beneath us. 4509 */ 4510 mblk_t * 4511 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4512 { 4513 mblk_t *mpdata; 4514 mblk_t *mp_conn_ctl; 4515 mblk_t *mp_attr_ctl; 4516 mblk_t *mp6_conn_ctl; 4517 mblk_t *mp6_attr_ctl; 4518 mblk_t *mp_conn_tail; 4519 mblk_t *mp_attr_tail; 4520 mblk_t *mp6_conn_tail; 4521 mblk_t *mp6_attr_tail; 4522 struct opthdr *optp; 4523 mib2_udpEntry_t ude; 4524 mib2_udp6Entry_t ude6; 4525 mib2_transportMLPEntry_t mlp; 4526 int state; 4527 zoneid_t zoneid; 4528 int i; 4529 connf_t *connfp; 4530 conn_t *connp = Q_TO_CONN(q); 4531 int v4_conn_idx; 4532 int v6_conn_idx; 4533 boolean_t needattr; 4534 udp_t *udp; 4535 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4536 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4537 mblk_t *mp2ctl; 4538 4539 /* 4540 * make a copy of the original message 4541 */ 4542 mp2ctl = copymsg(mpctl); 4543 4544 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4545 if (mpctl == NULL || 4546 (mpdata = mpctl->b_cont) == NULL || 4547 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4548 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4549 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4550 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4551 freemsg(mp_conn_ctl); 4552 freemsg(mp_attr_ctl); 4553 freemsg(mp6_conn_ctl); 4554 freemsg(mpctl); 4555 freemsg(mp2ctl); 4556 return (0); 4557 } 4558 4559 zoneid = connp->conn_zoneid; 4560 4561 /* fixed length structure for IPv4 and IPv6 counters */ 4562 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4563 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4564 /* synchronize 64- and 32-bit counters */ 4565 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4566 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4567 4568 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4569 optp->level = MIB2_UDP; 4570 optp->name = 0; 4571 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4572 sizeof (us->us_udp_mib)); 4573 optp->len = msgdsize(mpdata); 4574 qreply(q, mpctl); 4575 4576 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4577 v4_conn_idx = v6_conn_idx = 0; 4578 4579 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4580 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4581 connp = NULL; 4582 4583 while ((connp = ipcl_get_next_conn(connfp, connp, 4584 IPCL_UDPCONN))) { 4585 udp = connp->conn_udp; 4586 if (zoneid != connp->conn_zoneid) 4587 continue; 4588 4589 /* 4590 * Note that the port numbers are sent in 4591 * host byte order 4592 */ 4593 4594 if (udp->udp_state == TS_UNBND) 4595 state = MIB2_UDP_unbound; 4596 else if (udp->udp_state == TS_IDLE) 4597 state = MIB2_UDP_idle; 4598 else if (udp->udp_state == TS_DATA_XFER) 4599 state = MIB2_UDP_connected; 4600 else 4601 state = MIB2_UDP_unknown; 4602 4603 needattr = B_FALSE; 4604 bzero(&mlp, sizeof (mlp)); 4605 if (connp->conn_mlp_type != mlptSingle) { 4606 if (connp->conn_mlp_type == mlptShared || 4607 connp->conn_mlp_type == mlptBoth) 4608 mlp.tme_flags |= MIB2_TMEF_SHARED; 4609 if (connp->conn_mlp_type == mlptPrivate || 4610 connp->conn_mlp_type == mlptBoth) 4611 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4612 needattr = B_TRUE; 4613 } 4614 4615 /* 4616 * Create an IPv4 table entry for IPv4 entries and also 4617 * any IPv6 entries which are bound to in6addr_any 4618 * (i.e. anything a IPv4 peer could connect/send to). 4619 */ 4620 if (udp->udp_ipversion == IPV4_VERSION || 4621 (udp->udp_state <= TS_IDLE && 4622 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4623 ude.udpEntryInfo.ue_state = state; 4624 /* 4625 * If in6addr_any this will set it to 4626 * INADDR_ANY 4627 */ 4628 ude.udpLocalAddress = 4629 V4_PART_OF_V6(udp->udp_v6src); 4630 ude.udpLocalPort = ntohs(udp->udp_port); 4631 if (udp->udp_state == TS_DATA_XFER) { 4632 /* 4633 * Can potentially get here for 4634 * v6 socket if another process 4635 * (say, ping) has just done a 4636 * sendto(), changing the state 4637 * from the TS_IDLE above to 4638 * TS_DATA_XFER by the time we hit 4639 * this part of the code. 4640 */ 4641 ude.udpEntryInfo.ue_RemoteAddress = 4642 V4_PART_OF_V6(udp->udp_v6dst); 4643 ude.udpEntryInfo.ue_RemotePort = 4644 ntohs(udp->udp_dstport); 4645 } else { 4646 ude.udpEntryInfo.ue_RemoteAddress = 0; 4647 ude.udpEntryInfo.ue_RemotePort = 0; 4648 } 4649 4650 /* 4651 * We make the assumption that all udp_t 4652 * structs will be created within an address 4653 * region no larger than 32-bits. 4654 */ 4655 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4656 ude.udpCreationProcess = 4657 (udp->udp_open_pid < 0) ? 4658 MIB2_UNKNOWN_PROCESS : 4659 udp->udp_open_pid; 4660 ude.udpCreationTime = udp->udp_open_time; 4661 4662 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4663 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4664 mlp.tme_connidx = v4_conn_idx++; 4665 if (needattr) 4666 (void) snmp_append_data2( 4667 mp_attr_ctl->b_cont, &mp_attr_tail, 4668 (char *)&mlp, sizeof (mlp)); 4669 } 4670 if (udp->udp_ipversion == IPV6_VERSION) { 4671 ude6.udp6EntryInfo.ue_state = state; 4672 ude6.udp6LocalAddress = udp->udp_v6src; 4673 ude6.udp6LocalPort = ntohs(udp->udp_port); 4674 ude6.udp6IfIndex = udp->udp_bound_if; 4675 if (udp->udp_state == TS_DATA_XFER) { 4676 ude6.udp6EntryInfo.ue_RemoteAddress = 4677 udp->udp_v6dst; 4678 ude6.udp6EntryInfo.ue_RemotePort = 4679 ntohs(udp->udp_dstport); 4680 } else { 4681 ude6.udp6EntryInfo.ue_RemoteAddress = 4682 sin6_null.sin6_addr; 4683 ude6.udp6EntryInfo.ue_RemotePort = 0; 4684 } 4685 /* 4686 * We make the assumption that all udp_t 4687 * structs will be created within an address 4688 * region no larger than 32-bits. 4689 */ 4690 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4691 ude6.udp6CreationProcess = 4692 (udp->udp_open_pid < 0) ? 4693 MIB2_UNKNOWN_PROCESS : 4694 udp->udp_open_pid; 4695 ude6.udp6CreationTime = udp->udp_open_time; 4696 4697 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4698 &mp6_conn_tail, (char *)&ude6, 4699 sizeof (ude6)); 4700 mlp.tme_connidx = v6_conn_idx++; 4701 if (needattr) 4702 (void) snmp_append_data2( 4703 mp6_attr_ctl->b_cont, 4704 &mp6_attr_tail, (char *)&mlp, 4705 sizeof (mlp)); 4706 } 4707 } 4708 } 4709 4710 /* IPv4 UDP endpoints */ 4711 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4712 sizeof (struct T_optmgmt_ack)]; 4713 optp->level = MIB2_UDP; 4714 optp->name = MIB2_UDP_ENTRY; 4715 optp->len = msgdsize(mp_conn_ctl->b_cont); 4716 qreply(q, mp_conn_ctl); 4717 4718 /* table of MLP attributes... */ 4719 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4720 sizeof (struct T_optmgmt_ack)]; 4721 optp->level = MIB2_UDP; 4722 optp->name = EXPER_XPORT_MLP; 4723 optp->len = msgdsize(mp_attr_ctl->b_cont); 4724 if (optp->len == 0) 4725 freemsg(mp_attr_ctl); 4726 else 4727 qreply(q, mp_attr_ctl); 4728 4729 /* IPv6 UDP endpoints */ 4730 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4731 sizeof (struct T_optmgmt_ack)]; 4732 optp->level = MIB2_UDP6; 4733 optp->name = MIB2_UDP6_ENTRY; 4734 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4735 qreply(q, mp6_conn_ctl); 4736 4737 /* table of MLP attributes... */ 4738 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4739 sizeof (struct T_optmgmt_ack)]; 4740 optp->level = MIB2_UDP6; 4741 optp->name = EXPER_XPORT_MLP; 4742 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4743 if (optp->len == 0) 4744 freemsg(mp6_attr_ctl); 4745 else 4746 qreply(q, mp6_attr_ctl); 4747 4748 return (mp2ctl); 4749 } 4750 4751 /* 4752 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4753 * NOTE: Per MIB-II, UDP has no writable data. 4754 * TODO: If this ever actually tries to set anything, it needs to be 4755 * to do the appropriate locking. 4756 */ 4757 /* ARGSUSED */ 4758 int 4759 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4760 uchar_t *ptr, int len) 4761 { 4762 switch (level) { 4763 case MIB2_UDP: 4764 return (0); 4765 default: 4766 return (1); 4767 } 4768 } 4769 4770 static void 4771 udp_report_item(mblk_t *mp, udp_t *udp) 4772 { 4773 char *state; 4774 char addrbuf1[INET6_ADDRSTRLEN]; 4775 char addrbuf2[INET6_ADDRSTRLEN]; 4776 uint_t print_len, buf_len; 4777 4778 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4779 ASSERT(buf_len >= 0); 4780 if (buf_len == 0) 4781 return; 4782 4783 if (udp->udp_state == TS_UNBND) 4784 state = "UNBOUND"; 4785 else if (udp->udp_state == TS_IDLE) 4786 state = "IDLE"; 4787 else if (udp->udp_state == TS_DATA_XFER) 4788 state = "CONNECTED"; 4789 else 4790 state = "UnkState"; 4791 print_len = snprintf((char *)mp->b_wptr, buf_len, 4792 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4793 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4794 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4795 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4796 ntohs(udp->udp_dstport), state); 4797 if (print_len < buf_len) { 4798 mp->b_wptr += print_len; 4799 } else { 4800 mp->b_wptr += buf_len; 4801 } 4802 } 4803 4804 /* Report for ndd "udp_status" */ 4805 /* ARGSUSED */ 4806 static int 4807 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4808 { 4809 zoneid_t zoneid; 4810 connf_t *connfp; 4811 conn_t *connp = Q_TO_CONN(q); 4812 udp_t *udp = connp->conn_udp; 4813 int i; 4814 udp_stack_t *us = udp->udp_us; 4815 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4816 4817 /* 4818 * Because of the ndd constraint, at most we can have 64K buffer 4819 * to put in all UDP info. So to be more efficient, just 4820 * allocate a 64K buffer here, assuming we need that large buffer. 4821 * This may be a problem as any user can read udp_status. Therefore 4822 * we limit the rate of doing this using us_ndd_get_info_interval. 4823 * This should be OK as normal users should not do this too often. 4824 */ 4825 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4826 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4827 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4828 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4829 return (0); 4830 } 4831 } 4832 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4833 /* The following may work even if we cannot get a large buf. */ 4834 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4835 return (0); 4836 } 4837 (void) mi_mpprintf(mp, 4838 "UDP " MI_COL_HDRPAD_STR 4839 /* 12345678[89ABCDEF] */ 4840 " zone lport src addr dest addr port state"); 4841 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4842 4843 zoneid = connp->conn_zoneid; 4844 4845 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4846 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4847 connp = NULL; 4848 4849 while ((connp = ipcl_get_next_conn(connfp, connp, 4850 IPCL_UDPCONN))) { 4851 udp = connp->conn_udp; 4852 if (zoneid != GLOBAL_ZONEID && 4853 zoneid != connp->conn_zoneid) 4854 continue; 4855 4856 udp_report_item(mp->b_cont, udp); 4857 } 4858 } 4859 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4860 return (0); 4861 } 4862 4863 /* 4864 * This routine creates a T_UDERROR_IND message and passes it upstream. 4865 * The address and options are copied from the T_UNITDATA_REQ message 4866 * passed in mp. This message is freed. 4867 */ 4868 static void 4869 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4870 t_scalar_t err) 4871 { 4872 struct T_unitdata_req *tudr; 4873 mblk_t *mp1; 4874 uchar_t *optaddr; 4875 t_scalar_t optlen; 4876 4877 if (DB_TYPE(mp) == M_DATA) { 4878 ASSERT(destaddr != NULL && destlen != 0); 4879 optaddr = NULL; 4880 optlen = 0; 4881 } else { 4882 if ((mp->b_wptr < mp->b_rptr) || 4883 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4884 goto done; 4885 } 4886 tudr = (struct T_unitdata_req *)mp->b_rptr; 4887 destaddr = mp->b_rptr + tudr->DEST_offset; 4888 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4889 destaddr + tudr->DEST_length < mp->b_rptr || 4890 destaddr + tudr->DEST_length > mp->b_wptr) { 4891 goto done; 4892 } 4893 optaddr = mp->b_rptr + tudr->OPT_offset; 4894 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4895 optaddr + tudr->OPT_length < mp->b_rptr || 4896 optaddr + tudr->OPT_length > mp->b_wptr) { 4897 goto done; 4898 } 4899 destlen = tudr->DEST_length; 4900 optlen = tudr->OPT_length; 4901 } 4902 4903 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4904 (char *)optaddr, optlen, err); 4905 if (mp1 != NULL) 4906 qreply(q, mp1); 4907 4908 done: 4909 freemsg(mp); 4910 } 4911 4912 /* 4913 * This routine removes a port number association from a stream. It 4914 * is called by udp_wput to handle T_UNBIND_REQ messages. 4915 */ 4916 static void 4917 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4918 { 4919 conn_t *connp = Q_TO_CONN(q); 4920 int error; 4921 4922 error = udp_do_unbind(connp); 4923 if (error) { 4924 if (error < 0) 4925 udp_err_ack(q, mp, -error, 0); 4926 else 4927 udp_err_ack(q, mp, TSYSERR, error); 4928 return; 4929 } 4930 4931 mp = mi_tpi_ok_ack_alloc(mp); 4932 ASSERT(mp != NULL); 4933 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4934 qreply(q, mp); 4935 } 4936 4937 /* 4938 * Don't let port fall into the privileged range. 4939 * Since the extra privileged ports can be arbitrary we also 4940 * ensure that we exclude those from consideration. 4941 * us->us_epriv_ports is not sorted thus we loop over it until 4942 * there are no changes. 4943 */ 4944 static in_port_t 4945 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4946 { 4947 int i; 4948 in_port_t nextport; 4949 boolean_t restart = B_FALSE; 4950 udp_stack_t *us = udp->udp_us; 4951 4952 if (random && udp_random_anon_port != 0) { 4953 (void) random_get_pseudo_bytes((uint8_t *)&port, 4954 sizeof (in_port_t)); 4955 /* 4956 * Unless changed by a sys admin, the smallest anon port 4957 * is 32768 and the largest anon port is 65535. It is 4958 * very likely (50%) for the random port to be smaller 4959 * than the smallest anon port. When that happens, 4960 * add port % (anon port range) to the smallest anon 4961 * port to get the random port. It should fall into the 4962 * valid anon port range. 4963 */ 4964 if (port < us->us_smallest_anon_port) { 4965 port = us->us_smallest_anon_port + 4966 port % (us->us_largest_anon_port - 4967 us->us_smallest_anon_port); 4968 } 4969 } 4970 4971 retry: 4972 if (port < us->us_smallest_anon_port) 4973 port = us->us_smallest_anon_port; 4974 4975 if (port > us->us_largest_anon_port) { 4976 port = us->us_smallest_anon_port; 4977 if (restart) 4978 return (0); 4979 restart = B_TRUE; 4980 } 4981 4982 if (port < us->us_smallest_nonpriv_port) 4983 port = us->us_smallest_nonpriv_port; 4984 4985 for (i = 0; i < us->us_num_epriv_ports; i++) { 4986 if (port == us->us_epriv_ports[i]) { 4987 port++; 4988 /* 4989 * Make sure that the port is in the 4990 * valid range. 4991 */ 4992 goto retry; 4993 } 4994 } 4995 4996 if (is_system_labeled() && 4997 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4998 port, IPPROTO_UDP, B_TRUE)) != 0) { 4999 port = nextport; 5000 goto retry; 5001 } 5002 5003 return (port); 5004 } 5005 5006 static int 5007 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 5008 boolean_t *update_lastdst) 5009 { 5010 int err; 5011 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5012 udp_t *udp = Q_TO_UDP(wq); 5013 udp_stack_t *us = udp->udp_us; 5014 cred_t *cr; 5015 5016 /* 5017 * All Solaris components should pass a db_credp 5018 * for this message, hence we ASSERT. 5019 * On production kernels we return an error to be robust against 5020 * random streams modules sitting on top of us. 5021 */ 5022 cr = msg_getcred(mp, NULL); 5023 ASSERT(cr != NULL); 5024 if (cr == NULL) 5025 return (EINVAL); 5026 5027 /* Note that we use the cred/label from the message to handle MLP */ 5028 err = tsol_compute_label(cr, dst, 5029 opt_storage, udp->udp_connp->conn_mac_exempt, 5030 us->us_netstack->netstack_ip); 5031 if (err == 0) { 5032 err = tsol_update_options(&udp->udp_ip_snd_options, 5033 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5034 opt_storage); 5035 } 5036 if (err != 0) { 5037 DTRACE_PROBE4( 5038 tx__ip__log__info__updatelabel__udp, 5039 char *, "queue(1) failed to update options(2) on mp(3)", 5040 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5041 } else { 5042 *update_lastdst = B_TRUE; 5043 } 5044 return (err); 5045 } 5046 5047 static mblk_t * 5048 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5049 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 5050 cred_t *cr, pid_t pid) 5051 { 5052 udp_t *udp = connp->conn_udp; 5053 mblk_t *mp1 = mp; 5054 mblk_t *mp2; 5055 ipha_t *ipha; 5056 int ip_hdr_length; 5057 uint32_t ip_len; 5058 udpha_t *udpha; 5059 boolean_t lock_held = B_FALSE; 5060 in_port_t uha_src_port; 5061 udpattrs_t attrs; 5062 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5063 uint32_t ip_snd_opt_len = 0; 5064 ip4_pkt_t pktinfo; 5065 ip4_pkt_t *pktinfop = &pktinfo; 5066 ip_opt_info_t optinfo; 5067 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5068 udp_stack_t *us = udp->udp_us; 5069 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5070 queue_t *q = connp->conn_wq; 5071 ire_t *ire; 5072 in6_addr_t v6dst; 5073 boolean_t update_lastdst = B_FALSE; 5074 5075 *error = 0; 5076 pktinfop->ip4_ill_index = 0; 5077 pktinfop->ip4_addr = INADDR_ANY; 5078 optinfo.ip_opt_flags = 0; 5079 optinfo.ip_opt_ill_index = 0; 5080 5081 if (v4dst == INADDR_ANY) 5082 v4dst = htonl(INADDR_LOOPBACK); 5083 5084 /* 5085 * If options passed in, feed it for verification and handling 5086 */ 5087 attrs.udpattr_credset = B_FALSE; 5088 if (IPCL_IS_NONSTR(connp)) { 5089 if (msg->msg_controllen != 0) { 5090 attrs.udpattr_ipp4 = pktinfop; 5091 attrs.udpattr_mb = mp; 5092 5093 rw_enter(&udp->udp_rwlock, RW_WRITER); 5094 *error = process_auxiliary_options(connp, 5095 msg->msg_control, msg->msg_controllen, 5096 &attrs, &udp_opt_obj, udp_opt_set, cr); 5097 rw_exit(&udp->udp_rwlock); 5098 if (*error) 5099 goto done; 5100 } 5101 } else { 5102 if (DB_TYPE(mp) != M_DATA) { 5103 mp1 = mp->b_cont; 5104 if (((struct T_unitdata_req *) 5105 mp->b_rptr)->OPT_length != 0) { 5106 attrs.udpattr_ipp4 = pktinfop; 5107 attrs.udpattr_mb = mp; 5108 if (udp_unitdata_opt_process(q, mp, error, 5109 &attrs) < 0) 5110 goto done; 5111 /* 5112 * Note: success in processing options. 5113 * mp option buffer represented by 5114 * OPT_length/offset now potentially modified 5115 * and contain option setting results 5116 */ 5117 ASSERT(*error == 0); 5118 } 5119 } 5120 } 5121 5122 /* mp1 points to the M_DATA mblk carrying the packet */ 5123 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5124 5125 /* 5126 * Determine whether we need to mark the mblk with the user's 5127 * credentials. 5128 * If labeled then sockfs would have already done this. 5129 */ 5130 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5131 5132 ire = connp->conn_ire_cache; 5133 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 5134 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5135 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 5136 mblk_setcred(mp, cr, pid); 5137 } 5138 5139 rw_enter(&udp->udp_rwlock, RW_READER); 5140 lock_held = B_TRUE; 5141 5142 /* 5143 * Cluster and TSOL note: 5144 * udp.udp_v6lastdst is shared by Cluster and TSOL 5145 * udp.udp_lastdstport is used by Cluster 5146 * 5147 * Both Cluster and TSOL need to update the dest addr and/or port. 5148 * Updating is done after both Cluster and TSOL checks, protected 5149 * by conn_lock. 5150 */ 5151 mutex_enter(&connp->conn_lock); 5152 5153 if (cl_inet_connect2 != NULL && 5154 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5155 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5156 udp->udp_lastdstport != port)) { 5157 mutex_exit(&connp->conn_lock); 5158 *error = 0; 5159 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5160 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5161 if (*error != 0) { 5162 *error = EHOSTUNREACH; 5163 goto done; 5164 } 5165 update_lastdst = B_TRUE; 5166 mutex_enter(&connp->conn_lock); 5167 } 5168 5169 /* 5170 * Check if our saved options are valid; update if not. 5171 * TSOL Note: Since we are not in WRITER mode, UDP packets 5172 * to different destination may require different labels, 5173 * or worse, UDP packets to same IP address may require 5174 * different labels due to use of shared all-zones address. 5175 * We use conn_lock to ensure that lastdst, ip_snd_options, 5176 * and ip_snd_options_len are consistent for the current 5177 * destination and are updated atomically. 5178 */ 5179 if (is_system_labeled()) { 5180 /* Using UDP MLP requires SCM_UCRED from user */ 5181 if (connp->conn_mlp_type != mlptSingle && 5182 !attrs.udpattr_credset) { 5183 mutex_exit(&connp->conn_lock); 5184 DTRACE_PROBE4( 5185 tx__ip__log__info__output__udp, 5186 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5187 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5188 *error = ECONNREFUSED; 5189 goto done; 5190 } 5191 /* 5192 * update label option for this UDP socket if 5193 * - the destination has changed, or 5194 * - the UDP socket is MLP 5195 */ 5196 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5197 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5198 connp->conn_mlp_type != mlptSingle) && 5199 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5200 != 0) { 5201 mutex_exit(&connp->conn_lock); 5202 goto done; 5203 } 5204 } 5205 if (update_lastdst) { 5206 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5207 udp->udp_lastdstport = port; 5208 } 5209 if (udp->udp_ip_snd_options_len > 0) { 5210 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5211 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5212 } 5213 mutex_exit(&connp->conn_lock); 5214 5215 /* Add an IP header */ 5216 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5217 (insert_spi ? sizeof (uint32_t) : 0); 5218 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5219 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5220 !OK_32PTR(ipha)) { 5221 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5222 if (mp2 == NULL) { 5223 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5224 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5225 *error = ENOMEM; 5226 goto done; 5227 } 5228 mp2->b_wptr = DB_LIM(mp2); 5229 mp2->b_cont = mp1; 5230 mp1 = mp2; 5231 if (DB_TYPE(mp) != M_DATA) 5232 mp->b_cont = mp1; 5233 else 5234 mp = mp1; 5235 5236 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5237 } 5238 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5239 #ifdef _BIG_ENDIAN 5240 /* Set version, header length, and tos */ 5241 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5242 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5243 udp->udp_type_of_service); 5244 /* Set ttl and protocol */ 5245 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5246 #else 5247 /* Set version, header length, and tos */ 5248 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5249 ((udp->udp_type_of_service << 8) | 5250 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5251 /* Set ttl and protocol */ 5252 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5253 #endif 5254 if (pktinfop->ip4_addr != INADDR_ANY) { 5255 ipha->ipha_src = pktinfop->ip4_addr; 5256 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5257 } else { 5258 /* 5259 * Copy our address into the packet. If this is zero, 5260 * first look at __sin6_src_id for a hint. If we leave the 5261 * source as INADDR_ANY then ip will fill in the real source 5262 * address. 5263 */ 5264 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5265 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5266 in6_addr_t v6src; 5267 5268 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5269 us->us_netstack); 5270 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5271 } 5272 } 5273 uha_src_port = udp->udp_port; 5274 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5275 rw_exit(&udp->udp_rwlock); 5276 lock_held = B_FALSE; 5277 } 5278 5279 if (pktinfop->ip4_ill_index != 0) { 5280 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5281 } 5282 5283 ipha->ipha_fragment_offset_and_flags = 0; 5284 ipha->ipha_ident = 0; 5285 5286 mp1->b_rptr = (uchar_t *)ipha; 5287 5288 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5289 (uintptr_t)UINT_MAX); 5290 5291 /* Determine length of packet */ 5292 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5293 if ((mp2 = mp1->b_cont) != NULL) { 5294 do { 5295 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5296 ip_len += (uint32_t)MBLKL(mp2); 5297 } while ((mp2 = mp2->b_cont) != NULL); 5298 } 5299 /* 5300 * If the size of the packet is greater than the maximum allowed by 5301 * ip, return an error. Passing this down could cause panics because 5302 * the size will have wrapped and be inconsistent with the msg size. 5303 */ 5304 if (ip_len > IP_MAXPACKET) { 5305 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5306 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5307 *error = EMSGSIZE; 5308 goto done; 5309 } 5310 ipha->ipha_length = htons((uint16_t)ip_len); 5311 ip_len -= ip_hdr_length; 5312 ip_len = htons((uint16_t)ip_len); 5313 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5314 5315 /* Insert all-0s SPI now. */ 5316 if (insert_spi) 5317 *((uint32_t *)(udpha + 1)) = 0; 5318 5319 /* 5320 * Copy in the destination address 5321 */ 5322 ipha->ipha_dst = v4dst; 5323 5324 /* 5325 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5326 */ 5327 if (CLASSD(v4dst)) 5328 ipha->ipha_ttl = udp->udp_multicast_ttl; 5329 5330 udpha->uha_dst_port = port; 5331 udpha->uha_src_port = uha_src_port; 5332 5333 if (ip_snd_opt_len > 0) { 5334 uint32_t cksum; 5335 5336 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5337 lock_held = B_FALSE; 5338 rw_exit(&udp->udp_rwlock); 5339 /* 5340 * Massage source route putting first source route in ipha_dst. 5341 * Ignore the destination in T_unitdata_req. 5342 * Create a checksum adjustment for a source route, if any. 5343 */ 5344 cksum = ip_massage_options(ipha, us->us_netstack); 5345 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5346 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5347 (ipha->ipha_dst & 0xFFFF); 5348 if ((int)cksum < 0) 5349 cksum--; 5350 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5351 /* 5352 * IP does the checksum if uha_checksum is non-zero, 5353 * We make it easy for IP to include our pseudo header 5354 * by putting our length in uha_checksum. 5355 */ 5356 cksum += ip_len; 5357 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5358 /* There might be a carry. */ 5359 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5360 #ifdef _LITTLE_ENDIAN 5361 if (us->us_do_checksum) 5362 ip_len = (cksum << 16) | ip_len; 5363 #else 5364 if (us->us_do_checksum) 5365 ip_len = (ip_len << 16) | cksum; 5366 else 5367 ip_len <<= 16; 5368 #endif 5369 } else { 5370 /* 5371 * IP does the checksum if uha_checksum is non-zero, 5372 * We make it easy for IP to include our pseudo header 5373 * by putting our length in uha_checksum. 5374 */ 5375 if (us->us_do_checksum) 5376 ip_len |= (ip_len << 16); 5377 #ifndef _LITTLE_ENDIAN 5378 else 5379 ip_len <<= 16; 5380 #endif 5381 } 5382 ASSERT(!lock_held); 5383 /* Set UDP length and checksum */ 5384 *((uint32_t *)&udpha->uha_length) = ip_len; 5385 5386 if (DB_TYPE(mp) != M_DATA) { 5387 cred_t *cr; 5388 pid_t cpid; 5389 5390 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5391 cr = msg_extractcred(mp, &cpid); 5392 if (cr != NULL) { 5393 if (mp1->b_datap->db_credp != NULL) 5394 crfree(mp1->b_datap->db_credp); 5395 mp1->b_datap->db_credp = cr; 5396 mp1->b_datap->db_cpid = cpid; 5397 } 5398 ASSERT(mp != mp1); 5399 freeb(mp); 5400 } 5401 5402 /* mp has been consumed and we'll return success */ 5403 ASSERT(*error == 0); 5404 mp = NULL; 5405 5406 /* We're done. Pass the packet to ip. */ 5407 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5408 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5409 "udp_wput_end: q %p (%S)", q, "end"); 5410 5411 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5412 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5413 connp->conn_dontroute || 5414 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5415 optinfo.ip_opt_ill_index != 0 || 5416 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5417 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5418 ipst->ips_ip_g_mrouter != NULL) { 5419 UDP_STAT(us, udp_ip_send); 5420 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5421 &optinfo); 5422 } else { 5423 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5424 } 5425 5426 done: 5427 if (lock_held) 5428 rw_exit(&udp->udp_rwlock); 5429 if (*error != 0) { 5430 ASSERT(mp != NULL); 5431 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5432 } 5433 return (mp); 5434 } 5435 5436 static void 5437 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5438 { 5439 conn_t *connp = udp->udp_connp; 5440 ipaddr_t src, dst; 5441 ire_t *ire; 5442 ipif_t *ipif = NULL; 5443 mblk_t *ire_fp_mp; 5444 boolean_t retry_caching; 5445 udp_stack_t *us = udp->udp_us; 5446 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5447 5448 dst = ipha->ipha_dst; 5449 src = ipha->ipha_src; 5450 ASSERT(ipha->ipha_ident == 0); 5451 5452 if (CLASSD(dst)) { 5453 int err; 5454 5455 ipif = conn_get_held_ipif(connp, 5456 &connp->conn_multicast_ipif, &err); 5457 5458 if (ipif == NULL || ipif->ipif_isv6 || 5459 (ipif->ipif_ill->ill_phyint->phyint_flags & 5460 PHYI_LOOPBACK)) { 5461 if (ipif != NULL) 5462 ipif_refrele(ipif); 5463 UDP_STAT(us, udp_ip_send); 5464 ip_output(connp, mp, q, IP_WPUT); 5465 return; 5466 } 5467 } 5468 5469 retry_caching = B_FALSE; 5470 mutex_enter(&connp->conn_lock); 5471 ire = connp->conn_ire_cache; 5472 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5473 5474 if (ire == NULL || ire->ire_addr != dst || 5475 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5476 retry_caching = B_TRUE; 5477 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5478 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5479 5480 ASSERT(ipif != NULL); 5481 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5482 retry_caching = B_TRUE; 5483 } 5484 5485 if (!retry_caching) { 5486 ASSERT(ire != NULL); 5487 IRE_REFHOLD(ire); 5488 mutex_exit(&connp->conn_lock); 5489 } else { 5490 boolean_t cached = B_FALSE; 5491 5492 connp->conn_ire_cache = NULL; 5493 mutex_exit(&connp->conn_lock); 5494 5495 /* Release the old ire */ 5496 if (ire != NULL) { 5497 IRE_REFRELE_NOTR(ire); 5498 ire = NULL; 5499 } 5500 5501 if (CLASSD(dst)) { 5502 ASSERT(ipif != NULL); 5503 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5504 connp->conn_zoneid, msg_getlabel(mp), 5505 MATCH_IRE_ILL, ipst); 5506 } else { 5507 ASSERT(ipif == NULL); 5508 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5509 msg_getlabel(mp), ipst); 5510 } 5511 5512 if (ire == NULL) { 5513 if (ipif != NULL) 5514 ipif_refrele(ipif); 5515 UDP_STAT(us, udp_ire_null); 5516 ip_output(connp, mp, q, IP_WPUT); 5517 return; 5518 } 5519 IRE_REFHOLD_NOTR(ire); 5520 5521 mutex_enter(&connp->conn_lock); 5522 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5523 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5524 irb_t *irb = ire->ire_bucket; 5525 5526 /* 5527 * IRE's created for non-connection oriented transports 5528 * are normally initialized with IRE_MARK_TEMPORARY set 5529 * in the ire_marks. These IRE's are preferentially 5530 * reaped when the hash chain length in the cache 5531 * bucket exceeds the maximum value specified in 5532 * ip[6]_ire_max_bucket_cnt. This can severely affect 5533 * UDP performance if IRE cache entries that we need 5534 * to reuse are continually removed. To remedy this, 5535 * when we cache the IRE in the conn_t, we remove the 5536 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5537 * set. 5538 */ 5539 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5540 rw_enter(&irb->irb_lock, RW_WRITER); 5541 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5542 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5543 irb->irb_tmp_ire_cnt--; 5544 } 5545 rw_exit(&irb->irb_lock); 5546 } 5547 connp->conn_ire_cache = ire; 5548 cached = B_TRUE; 5549 } 5550 mutex_exit(&connp->conn_lock); 5551 5552 /* 5553 * We can continue to use the ire but since it was not 5554 * cached, we should drop the extra reference. 5555 */ 5556 if (!cached) 5557 IRE_REFRELE_NOTR(ire); 5558 } 5559 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5560 ASSERT(!CLASSD(dst) || ipif != NULL); 5561 5562 /* 5563 * Check if we can take the fast-path. 5564 * Note that "incomplete" ire's (where the link-layer for next hop 5565 * is not resolved, or where the fast-path header in nce_fp_mp is not 5566 * available yet) are sent down the legacy (slow) path 5567 */ 5568 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5569 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5570 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5571 ((ire->ire_nce == NULL) || 5572 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5573 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5574 if (ipif != NULL) 5575 ipif_refrele(ipif); 5576 UDP_STAT(us, udp_ip_ire_send); 5577 IRE_REFRELE(ire); 5578 ip_output(connp, mp, q, IP_WPUT); 5579 return; 5580 } 5581 5582 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5583 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5584 ipha->ipha_src = ipif->ipif_src_addr; 5585 else 5586 ipha->ipha_src = ire->ire_src_addr; 5587 } 5588 5589 if (ipif != NULL) 5590 ipif_refrele(ipif); 5591 5592 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5593 } 5594 5595 static void 5596 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5597 { 5598 ipaddr_t src, dst; 5599 ill_t *ill; 5600 mblk_t *ire_fp_mp; 5601 uint_t ire_fp_mp_len; 5602 uint16_t *up; 5603 uint32_t cksum, hcksum_txflags; 5604 queue_t *dev_q; 5605 udp_t *udp = connp->conn_udp; 5606 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5607 udp_stack_t *us = udp->udp_us; 5608 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5609 boolean_t ll_multicast = B_FALSE; 5610 boolean_t direct_send; 5611 5612 dev_q = ire->ire_stq->q_next; 5613 ASSERT(dev_q != NULL); 5614 5615 ill = ire_to_ill(ire); 5616 ASSERT(ill != NULL); 5617 5618 /* 5619 * For the direct send case, if resetting of conn_direct_blocked 5620 * was missed, it is still ok because the putq() would enable 5621 * the queue and write service will drain it out. 5622 */ 5623 direct_send = ILL_DIRECT_CAPABLE(ill); 5624 5625 /* is queue flow controlled? */ 5626 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5627 DEV_Q_FLOW_BLOCKED(dev_q))) { 5628 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5629 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5630 if (ipst->ips_ip_output_queue) { 5631 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5632 (void) putq(connp->conn_wq, mp); 5633 } else { 5634 freemsg(mp); 5635 } 5636 ire_refrele(ire); 5637 return; 5638 } 5639 5640 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5641 ire_fp_mp_len = MBLKL(ire_fp_mp); 5642 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5643 5644 dst = ipha->ipha_dst; 5645 src = ipha->ipha_src; 5646 5647 5648 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5649 5650 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5651 #ifndef _BIG_ENDIAN 5652 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5653 #endif 5654 5655 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5656 ASSERT(ill->ill_hcksum_capab != NULL); 5657 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5658 } else { 5659 hcksum_txflags = 0; 5660 } 5661 5662 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5663 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5664 5665 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5666 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5667 if (*up != 0) { 5668 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5669 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5670 ntohs(ipha->ipha_length), cksum); 5671 5672 /* Software checksum? */ 5673 if (DB_CKSUMFLAGS(mp) == 0) { 5674 UDP_STAT(us, udp_out_sw_cksum); 5675 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5676 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5677 } 5678 } 5679 5680 if (!CLASSD(dst)) { 5681 ipha->ipha_fragment_offset_and_flags |= 5682 (uint32_t)htons(ire->ire_frag_flag); 5683 } 5684 5685 /* Calculate IP header checksum if hardware isn't capable */ 5686 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5687 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5688 ((uint16_t *)ipha)[4]); 5689 } 5690 5691 if (CLASSD(dst)) { 5692 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5693 ip_multicast_loopback(q, ill, mp, 5694 connp->conn_multicast_loop ? 0 : 5695 IP_FF_NO_MCAST_LOOP, zoneid); 5696 } 5697 5698 /* If multicast TTL is 0 then we are done */ 5699 if (ipha->ipha_ttl == 0) { 5700 freemsg(mp); 5701 ire_refrele(ire); 5702 return; 5703 } 5704 ll_multicast = B_TRUE; 5705 } 5706 5707 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5708 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5709 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5710 5711 UPDATE_OB_PKT_COUNT(ire); 5712 ire->ire_last_used_time = lbolt; 5713 5714 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5715 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5716 ntohs(ipha->ipha_length)); 5717 5718 DTRACE_PROBE4(ip4__physical__out__start, 5719 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5720 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5721 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5722 ll_multicast, ipst); 5723 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5724 if (ipst->ips_ipobs_enabled && mp != NULL) { 5725 zoneid_t szone; 5726 5727 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5728 ipst, ALL_ZONES); 5729 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5730 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5731 } 5732 5733 if (mp == NULL) 5734 goto bail; 5735 5736 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5737 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5738 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5739 5740 if (direct_send) { 5741 uintptr_t cookie; 5742 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5743 5744 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5745 (uintptr_t)connp, 0); 5746 if (cookie != NULL) { 5747 idl_tx_list_t *idl_txl; 5748 5749 /* 5750 * Flow controlled. 5751 */ 5752 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5753 cookie, conn_t *, connp); 5754 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5755 mutex_enter(&idl_txl->txl_lock); 5756 /* 5757 * Check again after holding txl_lock to see if Tx 5758 * ring is still blocked and only then insert the 5759 * connp into the drain list. 5760 */ 5761 if (connp->conn_direct_blocked || 5762 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5763 cookie) == 0)) { 5764 mutex_exit(&idl_txl->txl_lock); 5765 goto bail; 5766 } 5767 if (idl_txl->txl_cookie != NULL && 5768 idl_txl->txl_cookie != cookie) { 5769 DTRACE_PROBE2(udp__xmit__collision, 5770 uintptr_t, cookie, 5771 uintptr_t, idl_txl->txl_cookie); 5772 UDP_STAT(us, udp_cookie_coll); 5773 } else { 5774 connp->conn_direct_blocked = B_TRUE; 5775 idl_txl->txl_cookie = cookie; 5776 conn_drain_insert(connp, idl_txl); 5777 DTRACE_PROBE1(udp__xmit__insert, 5778 conn_t *, connp); 5779 } 5780 mutex_exit(&idl_txl->txl_lock); 5781 } 5782 } else { 5783 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5784 putnext(ire->ire_stq, mp); 5785 } 5786 bail: 5787 IRE_REFRELE(ire); 5788 } 5789 5790 static boolean_t 5791 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5792 boolean_t *update_lastdst) 5793 { 5794 udp_t *udp = Q_TO_UDP(wq); 5795 int err; 5796 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5797 udp_stack_t *us = udp->udp_us; 5798 cred_t *cr; 5799 5800 /* 5801 * All Solaris components should pass a db_credp 5802 * for this message, hence we ASSERT. 5803 * On production kernels we return an error to be robust against 5804 * random streams modules sitting on top of us. 5805 */ 5806 cr = msg_getcred(mp, NULL); 5807 ASSERT(cr != NULL); 5808 if (cr == NULL) 5809 return (EINVAL); 5810 5811 /* Note that we use the cred/label from the message to handle MLP */ 5812 err = tsol_compute_label_v6(cr, 5813 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5814 us->us_netstack->netstack_ip); 5815 if (err == 0) { 5816 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5817 &udp->udp_label_len_v6, opt_storage); 5818 } 5819 if (err != 0) { 5820 DTRACE_PROBE4( 5821 tx__ip__log__drop__updatelabel__udp6, 5822 char *, "queue(1) failed to update options(2) on mp(3)", 5823 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5824 } else { 5825 *update_lastdst = B_TRUE; 5826 } 5827 return (err); 5828 } 5829 5830 static int 5831 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5832 pid_t pid) 5833 { 5834 udp_t *udp = connp->conn_udp; 5835 udp_stack_t *us = udp->udp_us; 5836 ipaddr_t v4dst; 5837 in_port_t dstport; 5838 boolean_t mapped_addr; 5839 struct sockaddr_storage ss; 5840 sin_t *sin; 5841 sin6_t *sin6; 5842 struct sockaddr *addr; 5843 socklen_t addrlen; 5844 int error; 5845 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5846 5847 /* M_DATA for connected socket */ 5848 5849 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5850 UDP_DBGSTAT(us, udp_data_conn); 5851 5852 mutex_enter(&connp->conn_lock); 5853 if (udp->udp_state != TS_DATA_XFER) { 5854 mutex_exit(&connp->conn_lock); 5855 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5856 UDP_STAT(us, udp_out_err_notconn); 5857 freemsg(mp); 5858 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5859 "udp_wput_end: connp %p (%S)", connp, 5860 "not-connected; address required"); 5861 return (EDESTADDRREQ); 5862 } 5863 5864 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5865 if (mapped_addr) 5866 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5867 5868 /* Initialize addr and addrlen as if they're passed in */ 5869 if (udp->udp_family == AF_INET) { 5870 sin = (sin_t *)&ss; 5871 sin->sin_family = AF_INET; 5872 dstport = sin->sin_port = udp->udp_dstport; 5873 ASSERT(mapped_addr); 5874 sin->sin_addr.s_addr = v4dst; 5875 addr = (struct sockaddr *)sin; 5876 addrlen = sizeof (*sin); 5877 } else { 5878 sin6 = (sin6_t *)&ss; 5879 sin6->sin6_family = AF_INET6; 5880 dstport = sin6->sin6_port = udp->udp_dstport; 5881 sin6->sin6_flowinfo = udp->udp_flowinfo; 5882 sin6->sin6_addr = udp->udp_v6dst; 5883 sin6->sin6_scope_id = 0; 5884 sin6->__sin6_src_id = 0; 5885 addr = (struct sockaddr *)sin6; 5886 addrlen = sizeof (*sin6); 5887 } 5888 mutex_exit(&connp->conn_lock); 5889 5890 if (mapped_addr) { 5891 /* 5892 * Handle both AF_INET and AF_INET6; the latter 5893 * for IPV4 mapped destination addresses. Note 5894 * here that both addr and addrlen point to the 5895 * corresponding struct depending on the address 5896 * family of the socket. 5897 */ 5898 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5899 insert_spi, msg, cr, pid); 5900 } else { 5901 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5902 } 5903 if (error == 0) { 5904 ASSERT(mp == NULL); 5905 return (0); 5906 } 5907 5908 UDP_STAT(us, udp_out_err_output); 5909 ASSERT(mp != NULL); 5910 if (IPCL_IS_NONSTR(connp)) { 5911 freemsg(mp); 5912 return (error); 5913 } else { 5914 /* mp is freed by the following routine */ 5915 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5916 (t_scalar_t)addrlen, (t_scalar_t)error); 5917 return (0); 5918 } 5919 } 5920 5921 /* ARGSUSED */ 5922 static int 5923 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5924 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5925 { 5926 5927 udp_t *udp = connp->conn_udp; 5928 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5929 int error = 0; 5930 sin6_t *sin6; 5931 sin_t *sin; 5932 uint_t srcid; 5933 uint16_t port; 5934 ipaddr_t v4dst; 5935 5936 5937 ASSERT(addr != NULL); 5938 5939 switch (udp->udp_family) { 5940 case AF_INET6: 5941 sin6 = (sin6_t *)addr; 5942 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5943 /* 5944 * Destination is a non-IPv4-compatible IPv6 address. 5945 * Send out an IPv6 format packet. 5946 */ 5947 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5948 pid); 5949 if (error != 0) 5950 goto ud_error; 5951 5952 return (0); 5953 } 5954 /* 5955 * If the local address is not zero or a mapped address 5956 * return an error. It would be possible to send an IPv4 5957 * packet but the response would never make it back to the 5958 * application since it is bound to a non-mapped address. 5959 */ 5960 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5961 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5962 error = EADDRNOTAVAIL; 5963 goto ud_error; 5964 } 5965 /* Send IPv4 packet without modifying udp_ipversion */ 5966 /* Extract port and ipaddr */ 5967 port = sin6->sin6_port; 5968 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5969 srcid = sin6->__sin6_src_id; 5970 break; 5971 5972 case AF_INET: 5973 sin = (sin_t *)addr; 5974 /* Extract port and ipaddr */ 5975 port = sin->sin_port; 5976 v4dst = sin->sin_addr.s_addr; 5977 srcid = 0; 5978 break; 5979 } 5980 5981 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5982 msg, cr, pid); 5983 5984 if (error == 0) { 5985 ASSERT(mp == NULL); 5986 return (0); 5987 } 5988 5989 ud_error: 5990 ASSERT(mp != NULL); 5991 5992 return (error); 5993 } 5994 5995 /* 5996 * This routine handles all messages passed downstream. It either 5997 * consumes the message or passes it downstream; it never queues a 5998 * a message. 5999 * 6000 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6001 * is valid when we are directly beneath the stream head, and thus sockfs 6002 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6003 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6004 * connected endpoints. 6005 */ 6006 void 6007 udp_wput(queue_t *q, mblk_t *mp) 6008 { 6009 conn_t *connp = Q_TO_CONN(q); 6010 udp_t *udp = connp->conn_udp; 6011 int error = 0; 6012 struct sockaddr *addr; 6013 socklen_t addrlen; 6014 udp_stack_t *us = udp->udp_us; 6015 6016 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6017 "udp_wput_start: queue %p mp %p", q, mp); 6018 6019 /* 6020 * We directly handle several cases here: T_UNITDATA_REQ message 6021 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6022 * socket. 6023 */ 6024 switch (DB_TYPE(mp)) { 6025 case M_DATA: 6026 /* 6027 * Quick check for error cases. Checks will be done again 6028 * under the lock later on 6029 */ 6030 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6031 /* Not connected; address is required */ 6032 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6033 UDP_STAT(us, udp_out_err_notconn); 6034 freemsg(mp); 6035 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6036 "udp_wput_end: connp %p (%S)", connp, 6037 "not-connected; address required"); 6038 return; 6039 } 6040 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 6041 return; 6042 6043 case M_PROTO: 6044 case M_PCPROTO: { 6045 struct T_unitdata_req *tudr; 6046 6047 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6048 tudr = (struct T_unitdata_req *)mp->b_rptr; 6049 6050 /* Handle valid T_UNITDATA_REQ here */ 6051 if (MBLKL(mp) >= sizeof (*tudr) && 6052 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6053 if (mp->b_cont == NULL) { 6054 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6055 "udp_wput_end: q %p (%S)", q, "badaddr"); 6056 error = EPROTO; 6057 goto ud_error; 6058 } 6059 6060 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6061 tudr->DEST_length)) { 6062 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6063 "udp_wput_end: q %p (%S)", q, "badaddr"); 6064 error = EADDRNOTAVAIL; 6065 goto ud_error; 6066 } 6067 /* 6068 * If a port has not been bound to the stream, fail. 6069 * This is not a problem when sockfs is directly 6070 * above us, because it will ensure that the socket 6071 * is first bound before allowing data to be sent. 6072 */ 6073 if (udp->udp_state == TS_UNBND) { 6074 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6075 "udp_wput_end: q %p (%S)", q, "outstate"); 6076 error = EPROTO; 6077 goto ud_error; 6078 } 6079 addr = (struct sockaddr *) 6080 &mp->b_rptr[tudr->DEST_offset]; 6081 addrlen = tudr->DEST_length; 6082 if (tudr->OPT_length != 0) 6083 UDP_STAT(us, udp_out_opt); 6084 break; 6085 } 6086 /* FALLTHRU */ 6087 } 6088 default: 6089 udp_wput_other(q, mp); 6090 return; 6091 } 6092 ASSERT(addr != NULL); 6093 6094 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 6095 -1); 6096 if (error != 0) { 6097 ud_error: 6098 UDP_STAT(us, udp_out_err_output); 6099 ASSERT(mp != NULL); 6100 /* mp is freed by the following routine */ 6101 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6102 (t_scalar_t)error); 6103 } 6104 } 6105 6106 /* ARGSUSED */ 6107 static void 6108 udp_wput_fallback(queue_t *wq, mblk_t *mp) 6109 { 6110 #ifdef DEBUG 6111 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 6112 #endif 6113 freemsg(mp); 6114 } 6115 6116 6117 /* 6118 * udp_output_v6(): 6119 * Assumes that udp_wput did some sanity checking on the destination 6120 * address. 6121 */ 6122 static mblk_t * 6123 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6124 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6125 { 6126 ip6_t *ip6h; 6127 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6128 mblk_t *mp1 = mp; 6129 mblk_t *mp2; 6130 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6131 size_t ip_len; 6132 udpha_t *udph; 6133 udp_t *udp = connp->conn_udp; 6134 udp_stack_t *us = udp->udp_us; 6135 queue_t *q = connp->conn_wq; 6136 ip6_pkt_t ipp_s; /* For ancillary data options */ 6137 ip6_pkt_t *ipp = &ipp_s; 6138 ip6_pkt_t *tipp; /* temporary ipp */ 6139 uint32_t csum = 0; 6140 uint_t ignore = 0; 6141 uint_t option_exists = 0, is_sticky = 0; 6142 uint8_t *cp; 6143 uint8_t *nxthdr_ptr; 6144 in6_addr_t ip6_dst; 6145 in_port_t port; 6146 udpattrs_t attrs; 6147 boolean_t opt_present; 6148 ip6_hbh_t *hopoptsptr = NULL; 6149 uint_t hopoptslen = 0; 6150 boolean_t is_ancillary = B_FALSE; 6151 size_t sth_wroff = 0; 6152 ire_t *ire; 6153 boolean_t update_lastdst = B_FALSE; 6154 6155 *error = 0; 6156 6157 /* 6158 * If the local address is a mapped address return 6159 * an error. 6160 * It would be possible to send an IPv6 packet but the 6161 * response would never make it back to the application 6162 * since it is bound to a mapped address. 6163 */ 6164 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6165 *error = EADDRNOTAVAIL; 6166 goto done; 6167 } 6168 6169 ipp->ipp_fields = 0; 6170 ipp->ipp_sticky_ignored = 0; 6171 6172 /* 6173 * If TPI options passed in, feed it for verification and handling 6174 */ 6175 attrs.udpattr_credset = B_FALSE; 6176 opt_present = B_FALSE; 6177 if (IPCL_IS_NONSTR(connp)) { 6178 if (msg->msg_controllen != 0) { 6179 attrs.udpattr_ipp6 = ipp; 6180 attrs.udpattr_mb = mp; 6181 6182 rw_enter(&udp->udp_rwlock, RW_WRITER); 6183 *error = process_auxiliary_options(connp, 6184 msg->msg_control, msg->msg_controllen, 6185 &attrs, &udp_opt_obj, udp_opt_set, cr); 6186 rw_exit(&udp->udp_rwlock); 6187 if (*error) 6188 goto done; 6189 ASSERT(*error == 0); 6190 opt_present = B_TRUE; 6191 } 6192 } else { 6193 if (DB_TYPE(mp) != M_DATA) { 6194 mp1 = mp->b_cont; 6195 if (((struct T_unitdata_req *) 6196 mp->b_rptr)->OPT_length != 0) { 6197 attrs.udpattr_ipp6 = ipp; 6198 attrs.udpattr_mb = mp; 6199 if (udp_unitdata_opt_process(q, mp, error, 6200 &attrs) < 0) { 6201 goto done; 6202 } 6203 ASSERT(*error == 0); 6204 opt_present = B_TRUE; 6205 } 6206 } 6207 } 6208 6209 /* 6210 * Determine whether we need to mark the mblk with the user's 6211 * credentials. 6212 * If labeled then sockfs would have already done this. 6213 */ 6214 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6215 ire = connp->conn_ire_cache; 6216 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6217 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6218 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6219 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6220 mblk_setcred(mp, cr, pid); 6221 } 6222 6223 rw_enter(&udp->udp_rwlock, RW_READER); 6224 ignore = ipp->ipp_sticky_ignored; 6225 6226 /* mp1 points to the M_DATA mblk carrying the packet */ 6227 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6228 6229 if (sin6->sin6_scope_id != 0 && 6230 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6231 /* 6232 * IPPF_SCOPE_ID is special. It's neither a sticky 6233 * option nor ancillary data. It needs to be 6234 * explicitly set in options_exists. 6235 */ 6236 option_exists |= IPPF_SCOPE_ID; 6237 } 6238 6239 /* 6240 * Compute the destination address 6241 */ 6242 ip6_dst = sin6->sin6_addr; 6243 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6244 ip6_dst = ipv6_loopback; 6245 6246 port = sin6->sin6_port; 6247 6248 /* 6249 * Cluster and TSOL notes, Cluster check: 6250 * see comments in udp_output_v4(). 6251 */ 6252 mutex_enter(&connp->conn_lock); 6253 6254 if (cl_inet_connect2 != NULL && 6255 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6256 port != udp->udp_lastdstport)) { 6257 mutex_exit(&connp->conn_lock); 6258 *error = 0; 6259 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6260 if (*error != 0) { 6261 *error = EHOSTUNREACH; 6262 rw_exit(&udp->udp_rwlock); 6263 goto done; 6264 } 6265 update_lastdst = B_TRUE; 6266 mutex_enter(&connp->conn_lock); 6267 } 6268 6269 /* 6270 * If we're not going to the same destination as last time, then 6271 * recompute the label required. This is done in a separate routine to 6272 * avoid blowing up our stack here. 6273 * 6274 * TSOL Note: Since we are not in WRITER mode, UDP packets 6275 * to different destination may require different labels, 6276 * or worse, UDP packets to same IP address may require 6277 * different labels due to use of shared all-zones address. 6278 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6279 * and sticky ipp_hopoptslen are consistent for the current 6280 * destination and are updated atomically. 6281 */ 6282 if (is_system_labeled()) { 6283 /* Using UDP MLP requires SCM_UCRED from user */ 6284 if (connp->conn_mlp_type != mlptSingle && 6285 !attrs.udpattr_credset) { 6286 DTRACE_PROBE4( 6287 tx__ip__log__info__output__udp6, 6288 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6289 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6290 *error = ECONNREFUSED; 6291 rw_exit(&udp->udp_rwlock); 6292 mutex_exit(&connp->conn_lock); 6293 goto done; 6294 } 6295 /* 6296 * update label option for this UDP socket if 6297 * - the destination has changed, or 6298 * - the UDP socket is MLP 6299 */ 6300 if ((opt_present || 6301 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6302 connp->conn_mlp_type != mlptSingle) && 6303 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6304 &update_lastdst)) != 0) { 6305 rw_exit(&udp->udp_rwlock); 6306 mutex_exit(&connp->conn_lock); 6307 goto done; 6308 } 6309 } 6310 6311 if (update_lastdst) { 6312 udp->udp_v6lastdst = ip6_dst; 6313 udp->udp_lastdstport = port; 6314 } 6315 6316 /* 6317 * If there's a security label here, then we ignore any options the 6318 * user may try to set. We keep the peer's label as a hidden sticky 6319 * option. We make a private copy of this label before releasing the 6320 * lock so that label is kept consistent with the destination addr. 6321 */ 6322 if (udp->udp_label_len_v6 > 0) { 6323 ignore &= ~IPPF_HOPOPTS; 6324 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6325 } 6326 6327 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6328 /* No sticky options nor ancillary data. */ 6329 mutex_exit(&connp->conn_lock); 6330 goto no_options; 6331 } 6332 6333 /* 6334 * Go through the options figuring out where each is going to 6335 * come from and build two masks. The first mask indicates if 6336 * the option exists at all. The second mask indicates if the 6337 * option is sticky or ancillary. 6338 */ 6339 if (!(ignore & IPPF_HOPOPTS)) { 6340 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6341 option_exists |= IPPF_HOPOPTS; 6342 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6343 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6344 option_exists |= IPPF_HOPOPTS; 6345 is_sticky |= IPPF_HOPOPTS; 6346 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6347 hopoptsptr = kmem_alloc( 6348 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6349 if (hopoptsptr == NULL) { 6350 *error = ENOMEM; 6351 mutex_exit(&connp->conn_lock); 6352 goto done; 6353 } 6354 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6355 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6356 hopoptslen); 6357 udp_ip_hdr_len += hopoptslen; 6358 } 6359 } 6360 mutex_exit(&connp->conn_lock); 6361 6362 if (!(ignore & IPPF_RTHDR)) { 6363 if (ipp->ipp_fields & IPPF_RTHDR) { 6364 option_exists |= IPPF_RTHDR; 6365 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6366 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6367 option_exists |= IPPF_RTHDR; 6368 is_sticky |= IPPF_RTHDR; 6369 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6370 } 6371 } 6372 6373 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6374 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6375 option_exists |= IPPF_RTDSTOPTS; 6376 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6377 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6378 option_exists |= IPPF_RTDSTOPTS; 6379 is_sticky |= IPPF_RTDSTOPTS; 6380 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6381 } 6382 } 6383 6384 if (!(ignore & IPPF_DSTOPTS)) { 6385 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6386 option_exists |= IPPF_DSTOPTS; 6387 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6388 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6389 option_exists |= IPPF_DSTOPTS; 6390 is_sticky |= IPPF_DSTOPTS; 6391 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6392 } 6393 } 6394 6395 if (!(ignore & IPPF_IFINDEX)) { 6396 if (ipp->ipp_fields & IPPF_IFINDEX) { 6397 option_exists |= IPPF_IFINDEX; 6398 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6399 option_exists |= IPPF_IFINDEX; 6400 is_sticky |= IPPF_IFINDEX; 6401 } 6402 } 6403 6404 if (!(ignore & IPPF_ADDR)) { 6405 if (ipp->ipp_fields & IPPF_ADDR) { 6406 option_exists |= IPPF_ADDR; 6407 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6408 option_exists |= IPPF_ADDR; 6409 is_sticky |= IPPF_ADDR; 6410 } 6411 } 6412 6413 if (!(ignore & IPPF_DONTFRAG)) { 6414 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6415 option_exists |= IPPF_DONTFRAG; 6416 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6417 option_exists |= IPPF_DONTFRAG; 6418 is_sticky |= IPPF_DONTFRAG; 6419 } 6420 } 6421 6422 if (!(ignore & IPPF_USE_MIN_MTU)) { 6423 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6424 option_exists |= IPPF_USE_MIN_MTU; 6425 } else if (udp->udp_sticky_ipp.ipp_fields & 6426 IPPF_USE_MIN_MTU) { 6427 option_exists |= IPPF_USE_MIN_MTU; 6428 is_sticky |= IPPF_USE_MIN_MTU; 6429 } 6430 } 6431 6432 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6433 option_exists |= IPPF_HOPLIMIT; 6434 /* IPV6_HOPLIMIT can never be sticky */ 6435 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6436 6437 if (!(ignore & IPPF_UNICAST_HOPS) && 6438 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6439 option_exists |= IPPF_UNICAST_HOPS; 6440 is_sticky |= IPPF_UNICAST_HOPS; 6441 } 6442 6443 if (!(ignore & IPPF_MULTICAST_HOPS) && 6444 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6445 option_exists |= IPPF_MULTICAST_HOPS; 6446 is_sticky |= IPPF_MULTICAST_HOPS; 6447 } 6448 6449 if (!(ignore & IPPF_TCLASS)) { 6450 if (ipp->ipp_fields & IPPF_TCLASS) { 6451 option_exists |= IPPF_TCLASS; 6452 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6453 option_exists |= IPPF_TCLASS; 6454 is_sticky |= IPPF_TCLASS; 6455 } 6456 } 6457 6458 if (!(ignore & IPPF_NEXTHOP) && 6459 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6460 option_exists |= IPPF_NEXTHOP; 6461 is_sticky |= IPPF_NEXTHOP; 6462 } 6463 6464 no_options: 6465 6466 /* 6467 * If any options carried in the ip6i_t were specified, we 6468 * need to account for the ip6i_t in the data we'll be sending 6469 * down. 6470 */ 6471 if (option_exists & IPPF_HAS_IP6I) 6472 udp_ip_hdr_len += sizeof (ip6i_t); 6473 6474 /* check/fix buffer config, setup pointers into it */ 6475 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6476 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6477 !OK_32PTR(ip6h)) { 6478 6479 /* Try to get everything in a single mblk next time */ 6480 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6481 udp->udp_max_hdr_len = udp_ip_hdr_len; 6482 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6483 } 6484 6485 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6486 if (mp2 == NULL) { 6487 *error = ENOMEM; 6488 rw_exit(&udp->udp_rwlock); 6489 goto done; 6490 } 6491 mp2->b_wptr = DB_LIM(mp2); 6492 mp2->b_cont = mp1; 6493 mp1 = mp2; 6494 if (DB_TYPE(mp) != M_DATA) 6495 mp->b_cont = mp1; 6496 else 6497 mp = mp1; 6498 6499 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6500 } 6501 mp1->b_rptr = (unsigned char *)ip6h; 6502 ip6i = (ip6i_t *)ip6h; 6503 6504 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6505 if (option_exists & IPPF_HAS_IP6I) { 6506 ip6h = (ip6_t *)&ip6i[1]; 6507 ip6i->ip6i_flags = 0; 6508 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6509 6510 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6511 if (option_exists & IPPF_SCOPE_ID) { 6512 ip6i->ip6i_flags |= IP6I_IFINDEX; 6513 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6514 } else if (option_exists & IPPF_IFINDEX) { 6515 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6516 ASSERT(tipp->ipp_ifindex != 0); 6517 ip6i->ip6i_flags |= IP6I_IFINDEX; 6518 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6519 } 6520 6521 if (option_exists & IPPF_ADDR) { 6522 /* 6523 * Enable per-packet source address verification if 6524 * IPV6_PKTINFO specified the source address. 6525 * ip6_src is set in the transport's _wput function. 6526 */ 6527 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6528 } 6529 6530 if (option_exists & IPPF_DONTFRAG) { 6531 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6532 } 6533 6534 if (option_exists & IPPF_USE_MIN_MTU) { 6535 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6536 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6537 } 6538 6539 if (option_exists & IPPF_NEXTHOP) { 6540 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6541 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6542 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6543 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6544 } 6545 6546 /* 6547 * tell IP this is an ip6i_t private header 6548 */ 6549 ip6i->ip6i_nxt = IPPROTO_RAW; 6550 } 6551 6552 /* Initialize IPv6 header */ 6553 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6554 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6555 6556 /* Set the hoplimit of the outgoing packet. */ 6557 if (option_exists & IPPF_HOPLIMIT) { 6558 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6559 ip6h->ip6_hops = ipp->ipp_hoplimit; 6560 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6561 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6562 ip6h->ip6_hops = udp->udp_multicast_ttl; 6563 if (option_exists & IPPF_MULTICAST_HOPS) 6564 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6565 } else { 6566 ip6h->ip6_hops = udp->udp_ttl; 6567 if (option_exists & IPPF_UNICAST_HOPS) 6568 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6569 } 6570 6571 if (option_exists & IPPF_ADDR) { 6572 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6573 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6574 ip6h->ip6_src = tipp->ipp_addr; 6575 } else { 6576 /* 6577 * The source address was not set using IPV6_PKTINFO. 6578 * First look at the bound source. 6579 * If unspecified fallback to __sin6_src_id. 6580 */ 6581 ip6h->ip6_src = udp->udp_v6src; 6582 if (sin6->__sin6_src_id != 0 && 6583 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6584 ip_srcid_find_id(sin6->__sin6_src_id, 6585 &ip6h->ip6_src, connp->conn_zoneid, 6586 us->us_netstack); 6587 } 6588 } 6589 6590 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6591 cp = (uint8_t *)&ip6h[1]; 6592 6593 /* 6594 * Here's where we have to start stringing together 6595 * any extension headers in the right order: 6596 * Hop-by-hop, destination, routing, and final destination opts. 6597 */ 6598 if (option_exists & IPPF_HOPOPTS) { 6599 /* Hop-by-hop options */ 6600 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6601 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6602 if (hopoptslen == 0) { 6603 hopoptsptr = tipp->ipp_hopopts; 6604 hopoptslen = tipp->ipp_hopoptslen; 6605 is_ancillary = B_TRUE; 6606 } 6607 6608 *nxthdr_ptr = IPPROTO_HOPOPTS; 6609 nxthdr_ptr = &hbh->ip6h_nxt; 6610 6611 bcopy(hopoptsptr, cp, hopoptslen); 6612 cp += hopoptslen; 6613 6614 if (hopoptsptr != NULL && !is_ancillary) { 6615 kmem_free(hopoptsptr, hopoptslen); 6616 hopoptsptr = NULL; 6617 hopoptslen = 0; 6618 } 6619 } 6620 /* 6621 * En-route destination options 6622 * Only do them if there's a routing header as well 6623 */ 6624 if (option_exists & IPPF_RTDSTOPTS) { 6625 ip6_dest_t *dst = (ip6_dest_t *)cp; 6626 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6627 6628 *nxthdr_ptr = IPPROTO_DSTOPTS; 6629 nxthdr_ptr = &dst->ip6d_nxt; 6630 6631 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6632 cp += tipp->ipp_rtdstoptslen; 6633 } 6634 /* 6635 * Routing header next 6636 */ 6637 if (option_exists & IPPF_RTHDR) { 6638 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6639 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6640 6641 *nxthdr_ptr = IPPROTO_ROUTING; 6642 nxthdr_ptr = &rt->ip6r_nxt; 6643 6644 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6645 cp += tipp->ipp_rthdrlen; 6646 } 6647 /* 6648 * Do ultimate destination options 6649 */ 6650 if (option_exists & IPPF_DSTOPTS) { 6651 ip6_dest_t *dest = (ip6_dest_t *)cp; 6652 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6653 6654 *nxthdr_ptr = IPPROTO_DSTOPTS; 6655 nxthdr_ptr = &dest->ip6d_nxt; 6656 6657 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6658 cp += tipp->ipp_dstoptslen; 6659 } 6660 /* 6661 * Now set the last header pointer to the proto passed in 6662 */ 6663 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6664 *nxthdr_ptr = IPPROTO_UDP; 6665 6666 /* Update UDP header */ 6667 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6668 udph->uha_dst_port = sin6->sin6_port; 6669 udph->uha_src_port = udp->udp_port; 6670 6671 /* 6672 * Copy in the destination address 6673 */ 6674 ip6h->ip6_dst = ip6_dst; 6675 6676 ip6h->ip6_vcf = 6677 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6678 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6679 6680 if (option_exists & IPPF_TCLASS) { 6681 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6682 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6683 tipp->ipp_tclass); 6684 } 6685 rw_exit(&udp->udp_rwlock); 6686 6687 if (option_exists & IPPF_RTHDR) { 6688 ip6_rthdr_t *rth; 6689 6690 /* 6691 * Perform any processing needed for source routing. 6692 * We know that all extension headers will be in the same mblk 6693 * as the IPv6 header. 6694 */ 6695 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6696 if (rth != NULL && rth->ip6r_segleft != 0) { 6697 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6698 /* 6699 * Drop packet - only support Type 0 routing. 6700 * Notify the application as well. 6701 */ 6702 *error = EPROTO; 6703 goto done; 6704 } 6705 6706 /* 6707 * rth->ip6r_len is twice the number of 6708 * addresses in the header. Thus it must be even. 6709 */ 6710 if (rth->ip6r_len & 0x1) { 6711 *error = EPROTO; 6712 goto done; 6713 } 6714 /* 6715 * Shuffle the routing header and ip6_dst 6716 * addresses, and get the checksum difference 6717 * between the first hop (in ip6_dst) and 6718 * the destination (in the last routing hdr entry). 6719 */ 6720 csum = ip_massage_options_v6(ip6h, rth, 6721 us->us_netstack); 6722 /* 6723 * Verify that the first hop isn't a mapped address. 6724 * Routers along the path need to do this verification 6725 * for subsequent hops. 6726 */ 6727 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6728 *error = EADDRNOTAVAIL; 6729 goto done; 6730 } 6731 6732 cp += (rth->ip6r_len + 1)*8; 6733 } 6734 } 6735 6736 /* count up length of UDP packet */ 6737 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6738 if ((mp2 = mp1->b_cont) != NULL) { 6739 do { 6740 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6741 ip_len += (uint32_t)MBLKL(mp2); 6742 } while ((mp2 = mp2->b_cont) != NULL); 6743 } 6744 6745 /* 6746 * If the size of the packet is greater than the maximum allowed by 6747 * ip, return an error. Passing this down could cause panics because 6748 * the size will have wrapped and be inconsistent with the msg size. 6749 */ 6750 if (ip_len > IP_MAXPACKET) { 6751 *error = EMSGSIZE; 6752 goto done; 6753 } 6754 6755 /* Store the UDP length. Subtract length of extension hdrs */ 6756 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6757 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6758 6759 /* 6760 * We make it easy for IP to include our pseudo header 6761 * by putting our length in uh_checksum, modified (if 6762 * we have a routing header) by the checksum difference 6763 * between the ultimate destination and first hop addresses. 6764 * Note: UDP over IPv6 must always checksum the packet. 6765 */ 6766 csum += udph->uha_length; 6767 csum = (csum & 0xFFFF) + (csum >> 16); 6768 udph->uha_checksum = (uint16_t)csum; 6769 6770 #ifdef _LITTLE_ENDIAN 6771 ip_len = htons(ip_len); 6772 #endif 6773 ip6h->ip6_plen = ip_len; 6774 6775 if (DB_TYPE(mp) != M_DATA) { 6776 cred_t *cr; 6777 pid_t cpid; 6778 6779 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6780 cr = msg_extractcred(mp, &cpid); 6781 if (cr != NULL) { 6782 if (mp1->b_datap->db_credp != NULL) 6783 crfree(mp1->b_datap->db_credp); 6784 mp1->b_datap->db_credp = cr; 6785 mp1->b_datap->db_cpid = cpid; 6786 } 6787 6788 ASSERT(mp != mp1); 6789 freeb(mp); 6790 } 6791 6792 /* mp has been consumed and we'll return success */ 6793 ASSERT(*error == 0); 6794 mp = NULL; 6795 6796 /* We're done. Pass the packet to IP */ 6797 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6798 ip_output_v6(connp, mp1, q, IP_WPUT); 6799 6800 done: 6801 if (sth_wroff != 0) { 6802 (void) proto_set_tx_wroff(RD(q), connp, 6803 udp->udp_max_hdr_len + us->us_wroff_extra); 6804 } 6805 if (hopoptsptr != NULL && !is_ancillary) { 6806 kmem_free(hopoptsptr, hopoptslen); 6807 hopoptsptr = NULL; 6808 } 6809 if (*error != 0) { 6810 ASSERT(mp != NULL); 6811 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6812 } 6813 return (mp); 6814 } 6815 6816 6817 static int 6818 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6819 { 6820 sin_t *sin = (sin_t *)sa; 6821 sin6_t *sin6 = (sin6_t *)sa; 6822 6823 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6824 6825 if (udp->udp_state != TS_DATA_XFER) 6826 return (ENOTCONN); 6827 6828 switch (udp->udp_family) { 6829 case AF_INET: 6830 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6831 6832 if (*salenp < sizeof (sin_t)) 6833 return (EINVAL); 6834 6835 *salenp = sizeof (sin_t); 6836 *sin = sin_null; 6837 sin->sin_family = AF_INET; 6838 sin->sin_port = udp->udp_dstport; 6839 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6840 break; 6841 6842 case AF_INET6: 6843 if (*salenp < sizeof (sin6_t)) 6844 return (EINVAL); 6845 6846 *salenp = sizeof (sin6_t); 6847 *sin6 = sin6_null; 6848 sin6->sin6_family = AF_INET6; 6849 sin6->sin6_port = udp->udp_dstport; 6850 sin6->sin6_addr = udp->udp_v6dst; 6851 sin6->sin6_flowinfo = udp->udp_flowinfo; 6852 break; 6853 } 6854 6855 return (0); 6856 } 6857 6858 static int 6859 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6860 { 6861 sin_t *sin = (sin_t *)sa; 6862 sin6_t *sin6 = (sin6_t *)sa; 6863 6864 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6865 6866 switch (udp->udp_family) { 6867 case AF_INET: 6868 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6869 6870 if (*salenp < sizeof (sin_t)) 6871 return (EINVAL); 6872 6873 *salenp = sizeof (sin_t); 6874 *sin = sin_null; 6875 sin->sin_family = AF_INET; 6876 sin->sin_port = udp->udp_port; 6877 6878 /* 6879 * If udp_v6src is unspecified, we might be bound to broadcast 6880 * / multicast. Use udp_bound_v6src as local address instead 6881 * (that could also still be unspecified). 6882 */ 6883 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6884 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6885 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6886 } else { 6887 sin->sin_addr.s_addr = 6888 V4_PART_OF_V6(udp->udp_bound_v6src); 6889 } 6890 break; 6891 6892 case AF_INET6: 6893 if (*salenp < sizeof (sin6_t)) 6894 return (EINVAL); 6895 6896 *salenp = sizeof (sin6_t); 6897 *sin6 = sin6_null; 6898 sin6->sin6_family = AF_INET6; 6899 sin6->sin6_port = udp->udp_port; 6900 sin6->sin6_flowinfo = udp->udp_flowinfo; 6901 6902 /* 6903 * If udp_v6src is unspecified, we might be bound to broadcast 6904 * / multicast. Use udp_bound_v6src as local address instead 6905 * (that could also still be unspecified). 6906 */ 6907 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6908 sin6->sin6_addr = udp->udp_v6src; 6909 else 6910 sin6->sin6_addr = udp->udp_bound_v6src; 6911 break; 6912 } 6913 6914 return (0); 6915 } 6916 6917 /* 6918 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6919 */ 6920 static void 6921 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6922 { 6923 void *data; 6924 mblk_t *datamp = mp->b_cont; 6925 udp_t *udp = Q_TO_UDP(q); 6926 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6927 6928 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6929 cmdp->cb_error = EPROTO; 6930 qreply(q, mp); 6931 return; 6932 } 6933 data = datamp->b_rptr; 6934 6935 rw_enter(&udp->udp_rwlock, RW_READER); 6936 switch (cmdp->cb_cmd) { 6937 case TI_GETPEERNAME: 6938 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6939 break; 6940 case TI_GETMYNAME: 6941 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6942 break; 6943 default: 6944 cmdp->cb_error = EINVAL; 6945 break; 6946 } 6947 rw_exit(&udp->udp_rwlock); 6948 6949 qreply(q, mp); 6950 } 6951 6952 static void 6953 udp_disable_direct_sockfs(udp_t *udp) 6954 { 6955 udp->udp_issocket = B_FALSE; 6956 if (udp->udp_direct_sockfs) { 6957 /* 6958 * Disable read-side synchronous stream interface and 6959 * drain any queued data. 6960 */ 6961 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6962 ASSERT(!udp->udp_direct_sockfs); 6963 UDP_STAT(udp->udp_us, udp_sock_fallback); 6964 } 6965 } 6966 6967 static void 6968 udp_wput_other(queue_t *q, mblk_t *mp) 6969 { 6970 uchar_t *rptr = mp->b_rptr; 6971 struct datab *db; 6972 struct iocblk *iocp; 6973 cred_t *cr; 6974 conn_t *connp = Q_TO_CONN(q); 6975 udp_t *udp = connp->conn_udp; 6976 udp_stack_t *us; 6977 6978 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6979 "udp_wput_other_start: q %p", q); 6980 6981 us = udp->udp_us; 6982 db = mp->b_datap; 6983 6984 switch (db->db_type) { 6985 case M_CMD: 6986 udp_wput_cmdblk(q, mp); 6987 return; 6988 6989 case M_PROTO: 6990 case M_PCPROTO: 6991 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6992 freemsg(mp); 6993 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6994 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6995 return; 6996 } 6997 switch (((t_primp_t)rptr)->type) { 6998 case T_ADDR_REQ: 6999 udp_addr_req(q, mp); 7000 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7001 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7002 return; 7003 case O_T_BIND_REQ: 7004 case T_BIND_REQ: 7005 udp_tpi_bind(q, mp); 7006 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7007 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7008 return; 7009 case T_CONN_REQ: 7010 udp_tpi_connect(q, mp); 7011 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7012 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7013 return; 7014 case T_CAPABILITY_REQ: 7015 udp_capability_req(q, mp); 7016 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7017 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7018 return; 7019 case T_INFO_REQ: 7020 udp_info_req(q, mp); 7021 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7022 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7023 return; 7024 case T_UNITDATA_REQ: 7025 /* 7026 * If a T_UNITDATA_REQ gets here, the address must 7027 * be bad. Valid T_UNITDATA_REQs are handled 7028 * in udp_wput. 7029 */ 7030 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7031 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7032 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7033 return; 7034 case T_UNBIND_REQ: 7035 udp_tpi_unbind(q, mp); 7036 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7037 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7038 return; 7039 case T_SVR4_OPTMGMT_REQ: 7040 /* 7041 * All Solaris components should pass a db_credp 7042 * for this TPI message, hence we ASSERT. 7043 * But in case there is some other M_PROTO that looks 7044 * like a TPI message sent by some other kernel 7045 * component, we check and return an error. 7046 */ 7047 cr = msg_getcred(mp, NULL); 7048 ASSERT(cr != NULL); 7049 if (cr == NULL) { 7050 udp_err_ack(q, mp, TSYSERR, EINVAL); 7051 return; 7052 } 7053 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7054 cr)) { 7055 (void) svr4_optcom_req(q, 7056 mp, cr, &udp_opt_obj, B_TRUE); 7057 } 7058 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7059 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7060 return; 7061 7062 case T_OPTMGMT_REQ: 7063 /* 7064 * All Solaris components should pass a db_credp 7065 * for this TPI message, hence we ASSERT. 7066 * But in case there is some other M_PROTO that looks 7067 * like a TPI message sent by some other kernel 7068 * component, we check and return an error. 7069 */ 7070 cr = msg_getcred(mp, NULL); 7071 ASSERT(cr != NULL); 7072 if (cr == NULL) { 7073 udp_err_ack(q, mp, TSYSERR, EINVAL); 7074 return; 7075 } 7076 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7077 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7078 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7079 return; 7080 7081 case T_DISCON_REQ: 7082 udp_tpi_disconnect(q, mp); 7083 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7084 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7085 return; 7086 7087 /* The following TPI message is not supported by udp. */ 7088 case O_T_CONN_RES: 7089 case T_CONN_RES: 7090 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7091 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7092 "udp_wput_other_end: q %p (%S)", q, 7093 "connres/disconreq"); 7094 return; 7095 7096 /* The following 3 TPI messages are illegal for udp. */ 7097 case T_DATA_REQ: 7098 case T_EXDATA_REQ: 7099 case T_ORDREL_REQ: 7100 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7101 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7102 "udp_wput_other_end: q %p (%S)", q, 7103 "data/exdata/ordrel"); 7104 return; 7105 default: 7106 break; 7107 } 7108 break; 7109 case M_FLUSH: 7110 if (*rptr & FLUSHW) 7111 flushq(q, FLUSHDATA); 7112 break; 7113 case M_IOCTL: 7114 iocp = (struct iocblk *)mp->b_rptr; 7115 switch (iocp->ioc_cmd) { 7116 case TI_GETPEERNAME: 7117 if (udp->udp_state != TS_DATA_XFER) { 7118 /* 7119 * If a default destination address has not 7120 * been associated with the stream, then we 7121 * don't know the peer's name. 7122 */ 7123 iocp->ioc_error = ENOTCONN; 7124 iocp->ioc_count = 0; 7125 mp->b_datap->db_type = M_IOCACK; 7126 qreply(q, mp); 7127 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7128 "udp_wput_other_end: q %p (%S)", q, 7129 "getpeername"); 7130 return; 7131 } 7132 /* FALLTHRU */ 7133 case TI_GETMYNAME: { 7134 /* 7135 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7136 * need to copyin the user's strbuf structure. 7137 * Processing will continue in the M_IOCDATA case 7138 * below. 7139 */ 7140 mi_copyin(q, mp, NULL, 7141 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7142 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7143 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7144 return; 7145 } 7146 case ND_SET: 7147 /* nd_getset performs the necessary checking */ 7148 case ND_GET: 7149 if (nd_getset(q, us->us_nd, mp)) { 7150 qreply(q, mp); 7151 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7152 "udp_wput_other_end: q %p (%S)", q, "get"); 7153 return; 7154 } 7155 break; 7156 case _SIOCSOCKFALLBACK: 7157 /* 7158 * Either sockmod is about to be popped and the 7159 * socket would now be treated as a plain stream, 7160 * or a module is about to be pushed so we could 7161 * no longer use read-side synchronous stream. 7162 * Drain any queued data and disable direct sockfs 7163 * interface from now on. 7164 */ 7165 if (!udp->udp_issocket) { 7166 DB_TYPE(mp) = M_IOCNAK; 7167 iocp->ioc_error = EINVAL; 7168 } else { 7169 udp_disable_direct_sockfs(udp); 7170 7171 DB_TYPE(mp) = M_IOCACK; 7172 iocp->ioc_error = 0; 7173 } 7174 iocp->ioc_count = 0; 7175 iocp->ioc_rval = 0; 7176 qreply(q, mp); 7177 return; 7178 default: 7179 break; 7180 } 7181 break; 7182 case M_IOCDATA: 7183 udp_wput_iocdata(q, mp); 7184 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7185 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7186 return; 7187 default: 7188 /* Unrecognized messages are passed through without change. */ 7189 break; 7190 } 7191 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7192 "udp_wput_other_end: q %p (%S)", q, "end"); 7193 ip_output(connp, mp, q, IP_WPUT); 7194 } 7195 7196 /* 7197 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7198 * messages. 7199 */ 7200 static void 7201 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7202 { 7203 mblk_t *mp1; 7204 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7205 STRUCT_HANDLE(strbuf, sb); 7206 udp_t *udp = Q_TO_UDP(q); 7207 int error; 7208 uint_t addrlen; 7209 7210 /* Make sure it is one of ours. */ 7211 switch (iocp->ioc_cmd) { 7212 case TI_GETMYNAME: 7213 case TI_GETPEERNAME: 7214 break; 7215 default: 7216 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7217 return; 7218 } 7219 7220 switch (mi_copy_state(q, mp, &mp1)) { 7221 case -1: 7222 return; 7223 case MI_COPY_CASE(MI_COPY_IN, 1): 7224 break; 7225 case MI_COPY_CASE(MI_COPY_OUT, 1): 7226 /* 7227 * The address has been copied out, so now 7228 * copyout the strbuf. 7229 */ 7230 mi_copyout(q, mp); 7231 return; 7232 case MI_COPY_CASE(MI_COPY_OUT, 2): 7233 /* 7234 * The address and strbuf have been copied out. 7235 * We're done, so just acknowledge the original 7236 * M_IOCTL. 7237 */ 7238 mi_copy_done(q, mp, 0); 7239 return; 7240 default: 7241 /* 7242 * Something strange has happened, so acknowledge 7243 * the original M_IOCTL with an EPROTO error. 7244 */ 7245 mi_copy_done(q, mp, EPROTO); 7246 return; 7247 } 7248 7249 /* 7250 * Now we have the strbuf structure for TI_GETMYNAME 7251 * and TI_GETPEERNAME. Next we copyout the requested 7252 * address and then we'll copyout the strbuf. 7253 */ 7254 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7255 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7256 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7257 mi_copy_done(q, mp, EINVAL); 7258 return; 7259 } 7260 7261 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7262 7263 if (mp1 == NULL) 7264 return; 7265 7266 rw_enter(&udp->udp_rwlock, RW_READER); 7267 switch (iocp->ioc_cmd) { 7268 case TI_GETMYNAME: 7269 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7270 break; 7271 case TI_GETPEERNAME: 7272 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7273 break; 7274 } 7275 rw_exit(&udp->udp_rwlock); 7276 7277 if (error != 0) { 7278 mi_copy_done(q, mp, error); 7279 } else { 7280 mp1->b_wptr += addrlen; 7281 STRUCT_FSET(sb, len, addrlen); 7282 7283 /* Copy out the address */ 7284 mi_copyout(q, mp); 7285 } 7286 } 7287 7288 static int 7289 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7290 udpattrs_t *udpattrs) 7291 { 7292 struct T_unitdata_req *udreqp; 7293 int is_absreq_failure; 7294 cred_t *cr; 7295 7296 ASSERT(((t_primp_t)mp->b_rptr)->type); 7297 7298 /* 7299 * All Solaris components should pass a db_credp 7300 * for this TPI message, hence we should ASSERT. 7301 * However, RPC (svc_clts_ksend) does this odd thing where it 7302 * passes the options from a T_UNITDATA_IND unchanged in a 7303 * T_UNITDATA_REQ. While that is the right thing to do for 7304 * some options, SCM_UCRED being the key one, this also makes it 7305 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7306 */ 7307 cr = msg_getcred(mp, NULL); 7308 if (cr == NULL) { 7309 cr = Q_TO_CONN(q)->conn_cred; 7310 } 7311 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7312 7313 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7314 udreqp->OPT_offset, cr, &udp_opt_obj, 7315 udpattrs, &is_absreq_failure); 7316 7317 if (*errorp != 0) { 7318 /* 7319 * Note: No special action needed in this 7320 * module for "is_absreq_failure" 7321 */ 7322 return (-1); /* failure */ 7323 } 7324 ASSERT(is_absreq_failure == 0); 7325 return (0); /* success */ 7326 } 7327 7328 void 7329 udp_ddi_g_init(void) 7330 { 7331 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7332 udp_opt_obj.odb_opt_arr_cnt); 7333 7334 /* 7335 * We want to be informed each time a stack is created or 7336 * destroyed in the kernel, so we can maintain the 7337 * set of udp_stack_t's. 7338 */ 7339 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7340 } 7341 7342 void 7343 udp_ddi_g_destroy(void) 7344 { 7345 netstack_unregister(NS_UDP); 7346 } 7347 7348 #define INET_NAME "ip" 7349 7350 /* 7351 * Initialize the UDP stack instance. 7352 */ 7353 static void * 7354 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7355 { 7356 udp_stack_t *us; 7357 udpparam_t *pa; 7358 int i; 7359 int error = 0; 7360 major_t major; 7361 7362 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7363 us->us_netstack = ns; 7364 7365 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7366 us->us_epriv_ports[0] = 2049; 7367 us->us_epriv_ports[1] = 4045; 7368 7369 /* 7370 * The smallest anonymous port in the priviledged port range which UDP 7371 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7372 */ 7373 us->us_min_anonpriv_port = 512; 7374 7375 us->us_bind_fanout_size = udp_bind_fanout_size; 7376 7377 /* Roundup variable that might have been modified in /etc/system */ 7378 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7379 /* Not a power of two. Round up to nearest power of two */ 7380 for (i = 0; i < 31; i++) { 7381 if (us->us_bind_fanout_size < (1 << i)) 7382 break; 7383 } 7384 us->us_bind_fanout_size = 1 << i; 7385 } 7386 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7387 sizeof (udp_fanout_t), KM_SLEEP); 7388 for (i = 0; i < us->us_bind_fanout_size; i++) { 7389 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7390 NULL); 7391 } 7392 7393 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7394 7395 us->us_param_arr = pa; 7396 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7397 7398 (void) udp_param_register(&us->us_nd, 7399 us->us_param_arr, A_CNT(udp_param_arr)); 7400 7401 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7402 us->us_mibkp = udp_kstat_init(stackid); 7403 7404 major = mod_name_to_major(INET_NAME); 7405 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7406 ASSERT(error == 0); 7407 return (us); 7408 } 7409 7410 /* 7411 * Free the UDP stack instance. 7412 */ 7413 static void 7414 udp_stack_fini(netstackid_t stackid, void *arg) 7415 { 7416 udp_stack_t *us = (udp_stack_t *)arg; 7417 int i; 7418 7419 for (i = 0; i < us->us_bind_fanout_size; i++) { 7420 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7421 } 7422 7423 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7424 sizeof (udp_fanout_t)); 7425 7426 us->us_bind_fanout = NULL; 7427 7428 nd_free(&us->us_nd); 7429 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7430 us->us_param_arr = NULL; 7431 7432 udp_kstat_fini(stackid, us->us_mibkp); 7433 us->us_mibkp = NULL; 7434 7435 udp_kstat2_fini(stackid, us->us_kstat); 7436 us->us_kstat = NULL; 7437 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7438 7439 ldi_ident_release(us->us_ldi_ident); 7440 kmem_free(us, sizeof (*us)); 7441 } 7442 7443 static void * 7444 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7445 { 7446 kstat_t *ksp; 7447 7448 udp_stat_t template = { 7449 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7450 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7451 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7452 { "udp_drain", KSTAT_DATA_UINT64 }, 7453 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7454 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7455 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7456 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7457 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7458 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7459 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7460 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7461 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7462 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7463 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7464 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7465 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7466 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7467 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7468 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7469 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7470 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7471 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7472 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7473 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7474 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7475 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7476 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7477 #ifdef DEBUG 7478 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7479 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7480 #endif 7481 }; 7482 7483 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7484 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7485 KSTAT_FLAG_VIRTUAL, stackid); 7486 7487 if (ksp == NULL) 7488 return (NULL); 7489 7490 bcopy(&template, us_statisticsp, sizeof (template)); 7491 ksp->ks_data = (void *)us_statisticsp; 7492 ksp->ks_private = (void *)(uintptr_t)stackid; 7493 7494 kstat_install(ksp); 7495 return (ksp); 7496 } 7497 7498 static void 7499 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7500 { 7501 if (ksp != NULL) { 7502 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7503 kstat_delete_netstack(ksp, stackid); 7504 } 7505 } 7506 7507 static void * 7508 udp_kstat_init(netstackid_t stackid) 7509 { 7510 kstat_t *ksp; 7511 7512 udp_named_kstat_t template = { 7513 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7514 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7515 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7516 { "entrySize", KSTAT_DATA_INT32, 0 }, 7517 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7518 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7519 }; 7520 7521 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7522 KSTAT_TYPE_NAMED, 7523 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7524 7525 if (ksp == NULL || ksp->ks_data == NULL) 7526 return (NULL); 7527 7528 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7529 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7530 7531 bcopy(&template, ksp->ks_data, sizeof (template)); 7532 ksp->ks_update = udp_kstat_update; 7533 ksp->ks_private = (void *)(uintptr_t)stackid; 7534 7535 kstat_install(ksp); 7536 return (ksp); 7537 } 7538 7539 static void 7540 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7541 { 7542 if (ksp != NULL) { 7543 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7544 kstat_delete_netstack(ksp, stackid); 7545 } 7546 } 7547 7548 static int 7549 udp_kstat_update(kstat_t *kp, int rw) 7550 { 7551 udp_named_kstat_t *udpkp; 7552 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7553 netstack_t *ns; 7554 udp_stack_t *us; 7555 7556 if ((kp == NULL) || (kp->ks_data == NULL)) 7557 return (EIO); 7558 7559 if (rw == KSTAT_WRITE) 7560 return (EACCES); 7561 7562 ns = netstack_find_by_stackid(stackid); 7563 if (ns == NULL) 7564 return (-1); 7565 us = ns->netstack_udp; 7566 if (us == NULL) { 7567 netstack_rele(ns); 7568 return (-1); 7569 } 7570 udpkp = (udp_named_kstat_t *)kp->ks_data; 7571 7572 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7573 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7574 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7575 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7576 netstack_rele(ns); 7577 return (0); 7578 } 7579 7580 /* 7581 * Read-side synchronous stream info entry point, called as a 7582 * result of handling certain STREAMS ioctl operations. 7583 */ 7584 static int 7585 udp_rinfop(queue_t *q, infod_t *dp) 7586 { 7587 mblk_t *mp; 7588 uint_t cmd = dp->d_cmd; 7589 int res = 0; 7590 int error = 0; 7591 udp_t *udp = Q_TO_UDP(q); 7592 struct stdata *stp = STREAM(q); 7593 7594 mutex_enter(&udp->udp_drain_lock); 7595 /* If shutdown on read has happened, return nothing */ 7596 mutex_enter(&stp->sd_lock); 7597 if (stp->sd_flag & STREOF) { 7598 mutex_exit(&stp->sd_lock); 7599 goto done; 7600 } 7601 mutex_exit(&stp->sd_lock); 7602 7603 if ((mp = udp->udp_rcv_list_head) == NULL) 7604 goto done; 7605 7606 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7607 7608 if (cmd & INFOD_COUNT) { 7609 /* 7610 * Return the number of messages. 7611 */ 7612 dp->d_count += udp->udp_rcv_msgcnt; 7613 res |= INFOD_COUNT; 7614 } 7615 if (cmd & INFOD_BYTES) { 7616 /* 7617 * Return size of all data messages. 7618 */ 7619 dp->d_bytes += udp->udp_rcv_cnt; 7620 res |= INFOD_BYTES; 7621 } 7622 if (cmd & INFOD_FIRSTBYTES) { 7623 /* 7624 * Return size of first data message. 7625 */ 7626 dp->d_bytes = msgdsize(mp); 7627 res |= INFOD_FIRSTBYTES; 7628 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7629 } 7630 if (cmd & INFOD_COPYOUT) { 7631 mblk_t *mp1 = mp->b_cont; 7632 int n; 7633 /* 7634 * Return data contents of first message. 7635 */ 7636 ASSERT(DB_TYPE(mp1) == M_DATA); 7637 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7638 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7639 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7640 UIO_READ, dp->d_uiop)) != 0) { 7641 goto done; 7642 } 7643 mp1 = mp1->b_cont; 7644 } 7645 res |= INFOD_COPYOUT; 7646 dp->d_cmd &= ~INFOD_COPYOUT; 7647 } 7648 done: 7649 mutex_exit(&udp->udp_drain_lock); 7650 7651 dp->d_res |= res; 7652 7653 return (error); 7654 } 7655 7656 /* 7657 * Read-side synchronous stream entry point. This is called as a result 7658 * of recv/read operation done at sockfs, and is guaranteed to execute 7659 * outside of the interrupt thread context. It returns a single datagram 7660 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7661 */ 7662 static int 7663 udp_rrw(queue_t *q, struiod_t *dp) 7664 { 7665 mblk_t *mp; 7666 udp_t *udp = Q_TO_UDP(q); 7667 udp_stack_t *us = udp->udp_us; 7668 7669 /* 7670 * Dequeue datagram from the head of the list and return 7671 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7672 * set/cleared depending on whether or not there's data 7673 * remaining in the list. 7674 */ 7675 mutex_enter(&udp->udp_drain_lock); 7676 if (!udp->udp_direct_sockfs) { 7677 mutex_exit(&udp->udp_drain_lock); 7678 UDP_STAT(us, udp_rrw_busy); 7679 return (EBUSY); 7680 } 7681 if ((mp = udp->udp_rcv_list_head) != NULL) { 7682 uint_t size = msgdsize(mp); 7683 7684 /* Last datagram in the list? */ 7685 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7686 udp->udp_rcv_list_tail = NULL; 7687 mp->b_next = NULL; 7688 7689 udp->udp_rcv_cnt -= size; 7690 udp->udp_rcv_msgcnt--; 7691 UDP_STAT(us, udp_rrw_msgcnt); 7692 7693 /* No longer flow-controlling? */ 7694 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7695 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7696 udp->udp_drain_qfull = B_FALSE; 7697 } 7698 if (udp->udp_rcv_list_head == NULL) { 7699 /* 7700 * Either we just dequeued the last datagram or 7701 * we get here from sockfs and have nothing to 7702 * return; in this case clear RSLEEP. 7703 */ 7704 ASSERT(udp->udp_rcv_cnt == 0); 7705 ASSERT(udp->udp_rcv_msgcnt == 0); 7706 ASSERT(udp->udp_rcv_list_tail == NULL); 7707 STR_WAKEUP_CLEAR(STREAM(q)); 7708 } else { 7709 /* 7710 * More data follows; we need udp_rrw() to be 7711 * called in future to pick up the rest. 7712 */ 7713 STR_WAKEUP_SET(STREAM(q)); 7714 } 7715 mutex_exit(&udp->udp_drain_lock); 7716 dp->d_mp = mp; 7717 return (0); 7718 } 7719 7720 /* 7721 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7722 * list; this is typically executed within the interrupt thread context 7723 * and so we do things as quickly as possible. 7724 */ 7725 static void 7726 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7727 { 7728 ASSERT(q == RD(q)); 7729 ASSERT(pkt_len == msgdsize(mp)); 7730 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7731 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7732 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7733 7734 mutex_enter(&udp->udp_drain_lock); 7735 /* 7736 * Wake up and signal the receiving app; it is okay to do this 7737 * before enqueueing the mp because we are holding the drain lock. 7738 * One of the advantages of synchronous stream is the ability for 7739 * us to find out when the application performs a read on the 7740 * socket by way of udp_rrw() entry point being called. We need 7741 * to generate SIGPOLL/SIGIO for each received data in the case 7742 * of asynchronous socket just as in the strrput() case. However, 7743 * we only wake the application up when necessary, i.e. during the 7744 * first enqueue. When udp_rrw() is called, we send up a single 7745 * datagram upstream and call STR_WAKEUP_SET() again when there 7746 * are still data remaining in our receive queue. 7747 */ 7748 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7749 if (udp->udp_rcv_list_head == NULL) 7750 udp->udp_rcv_list_head = mp; 7751 else 7752 udp->udp_rcv_list_tail->b_next = mp; 7753 udp->udp_rcv_list_tail = mp; 7754 udp->udp_rcv_cnt += pkt_len; 7755 udp->udp_rcv_msgcnt++; 7756 7757 /* Need to flow-control? */ 7758 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7759 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7760 udp->udp_drain_qfull = B_TRUE; 7761 7762 mutex_exit(&udp->udp_drain_lock); 7763 } 7764 7765 /* 7766 * Drain the contents of receive list to the module upstream; we do 7767 * this during close or when we fallback to the slow mode due to 7768 * sockmod being popped or a module being pushed on top of us. 7769 */ 7770 static void 7771 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7772 { 7773 mblk_t *mp; 7774 udp_stack_t *us = udp->udp_us; 7775 7776 mutex_enter(&udp->udp_drain_lock); 7777 /* 7778 * There is no race with a concurrent udp_input() sending 7779 * up packets using putnext() after we have cleared the 7780 * udp_direct_sockfs flag but before we have completed 7781 * sending up the packets in udp_rcv_list, since we are 7782 * either a writer or we have quiesced the conn. 7783 */ 7784 udp->udp_direct_sockfs = B_FALSE; 7785 mutex_exit(&udp->udp_drain_lock); 7786 7787 if (udp->udp_rcv_list_head != NULL) 7788 UDP_STAT(us, udp_drain); 7789 7790 /* 7791 * Send up everything via putnext(); note here that we 7792 * don't need the udp_drain_lock to protect us since 7793 * nothing can enter udp_rrw() and that we currently 7794 * have exclusive access to this udp. 7795 */ 7796 while ((mp = udp->udp_rcv_list_head) != NULL) { 7797 udp->udp_rcv_list_head = mp->b_next; 7798 mp->b_next = NULL; 7799 udp->udp_rcv_cnt -= msgdsize(mp); 7800 udp->udp_rcv_msgcnt--; 7801 if (closing) { 7802 freemsg(mp); 7803 } else { 7804 ASSERT(q == RD(q)); 7805 putnext(q, mp); 7806 } 7807 } 7808 ASSERT(udp->udp_rcv_cnt == 0); 7809 ASSERT(udp->udp_rcv_msgcnt == 0); 7810 ASSERT(udp->udp_rcv_list_head == NULL); 7811 udp->udp_rcv_list_tail = NULL; 7812 udp->udp_drain_qfull = B_FALSE; 7813 } 7814 7815 static size_t 7816 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7817 { 7818 udp_stack_t *us = udp->udp_us; 7819 7820 /* We add a bit of extra buffering */ 7821 size += size >> 1; 7822 if (size > us->us_max_buf) 7823 size = us->us_max_buf; 7824 7825 udp->udp_rcv_hiwat = size; 7826 return (size); 7827 } 7828 7829 /* 7830 * For the lower queue so that UDP can be a dummy mux. 7831 * Nobody should be sending 7832 * packets up this stream 7833 */ 7834 static void 7835 udp_lrput(queue_t *q, mblk_t *mp) 7836 { 7837 mblk_t *mp1; 7838 7839 switch (mp->b_datap->db_type) { 7840 case M_FLUSH: 7841 /* Turn around */ 7842 if (*mp->b_rptr & FLUSHW) { 7843 *mp->b_rptr &= ~FLUSHR; 7844 qreply(q, mp); 7845 return; 7846 } 7847 break; 7848 } 7849 /* Could receive messages that passed through ar_rput */ 7850 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7851 mp1->b_prev = mp1->b_next = NULL; 7852 freemsg(mp); 7853 } 7854 7855 /* 7856 * For the lower queue so that UDP can be a dummy mux. 7857 * Nobody should be sending packets down this stream. 7858 */ 7859 /* ARGSUSED */ 7860 void 7861 udp_lwput(queue_t *q, mblk_t *mp) 7862 { 7863 freemsg(mp); 7864 } 7865 7866 /* 7867 * Below routines for UDP socket module. 7868 */ 7869 7870 static conn_t * 7871 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7872 { 7873 udp_t *udp; 7874 conn_t *connp; 7875 zoneid_t zoneid; 7876 netstack_t *ns; 7877 udp_stack_t *us; 7878 7879 ns = netstack_find_by_cred(credp); 7880 ASSERT(ns != NULL); 7881 us = ns->netstack_udp; 7882 ASSERT(us != NULL); 7883 7884 /* 7885 * For exclusive stacks we set the zoneid to zero 7886 * to make UDP operate as if in the global zone. 7887 */ 7888 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7889 zoneid = GLOBAL_ZONEID; 7890 else 7891 zoneid = crgetzoneid(credp); 7892 7893 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7894 7895 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7896 if (connp == NULL) { 7897 netstack_rele(ns); 7898 return (NULL); 7899 } 7900 udp = connp->conn_udp; 7901 7902 /* 7903 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7904 * done by netstack_find_by_cred() 7905 */ 7906 netstack_rele(ns); 7907 7908 rw_enter(&udp->udp_rwlock, RW_WRITER); 7909 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7910 ASSERT(connp->conn_udp == udp); 7911 ASSERT(udp->udp_connp == connp); 7912 7913 /* Set the initial state of the stream and the privilege status. */ 7914 udp->udp_state = TS_UNBND; 7915 if (isv6) { 7916 udp->udp_family = AF_INET6; 7917 udp->udp_ipversion = IPV6_VERSION; 7918 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7919 udp->udp_ttl = us->us_ipv6_hoplimit; 7920 connp->conn_af_isv6 = B_TRUE; 7921 connp->conn_flags |= IPCL_ISV6; 7922 } else { 7923 udp->udp_family = AF_INET; 7924 udp->udp_ipversion = IPV4_VERSION; 7925 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7926 udp->udp_ttl = us->us_ipv4_ttl; 7927 connp->conn_af_isv6 = B_FALSE; 7928 connp->conn_flags &= ~IPCL_ISV6; 7929 } 7930 7931 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7932 udp->udp_pending_op = -1; 7933 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7934 connp->conn_zoneid = zoneid; 7935 7936 udp->udp_open_time = lbolt64; 7937 udp->udp_open_pid = curproc->p_pid; 7938 7939 /* 7940 * If the caller has the process-wide flag set, then default to MAC 7941 * exempt mode. This allows read-down to unlabeled hosts. 7942 */ 7943 if (getpflags(NET_MAC_AWARE, credp) != 0) 7944 connp->conn_mac_exempt = B_TRUE; 7945 7946 connp->conn_ulp_labeled = is_system_labeled(); 7947 7948 udp->udp_us = us; 7949 7950 connp->conn_recv = udp_input; 7951 crhold(credp); 7952 connp->conn_cred = credp; 7953 7954 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7955 7956 rw_exit(&udp->udp_rwlock); 7957 7958 return (connp); 7959 } 7960 7961 /* ARGSUSED */ 7962 sock_lower_handle_t 7963 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7964 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7965 { 7966 udp_t *udp = NULL; 7967 udp_stack_t *us; 7968 conn_t *connp; 7969 boolean_t isv6; 7970 7971 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7972 (proto != 0 && proto != IPPROTO_UDP)) { 7973 *errorp = EPROTONOSUPPORT; 7974 return (NULL); 7975 } 7976 7977 if (family == AF_INET6) 7978 isv6 = B_TRUE; 7979 else 7980 isv6 = B_FALSE; 7981 7982 connp = udp_do_open(credp, isv6, flags); 7983 if (connp == NULL) { 7984 *errorp = ENOMEM; 7985 return (NULL); 7986 } 7987 7988 udp = connp->conn_udp; 7989 ASSERT(udp != NULL); 7990 us = udp->udp_us; 7991 ASSERT(us != NULL); 7992 7993 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7994 7995 /* Set flow control */ 7996 rw_enter(&udp->udp_rwlock, RW_WRITER); 7997 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7998 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7999 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 8000 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 8001 udp->udp_xmit_lowat = us->us_xmit_lowat; 8002 8003 if (udp->udp_family == AF_INET6) { 8004 /* Build initial header template for transmit */ 8005 if ((*errorp = udp_build_hdrs(udp)) != 0) { 8006 rw_exit(&udp->udp_rwlock); 8007 ipcl_conn_destroy(connp); 8008 return (NULL); 8009 } 8010 } 8011 rw_exit(&udp->udp_rwlock); 8012 8013 connp->conn_flow_cntrld = B_FALSE; 8014 8015 ASSERT(us->us_ldi_ident != NULL); 8016 8017 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 8018 ip1dbg(("udp_create: create of IP helper stream failed\n")); 8019 udp_do_close(connp); 8020 return (NULL); 8021 } 8022 8023 /* Set the send flow control */ 8024 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 8025 connp->conn_wq->q_lowat = us->us_xmit_lowat; 8026 8027 mutex_enter(&connp->conn_lock); 8028 connp->conn_state_flags &= ~CONN_INCIPIENT; 8029 mutex_exit(&connp->conn_lock); 8030 8031 *errorp = 0; 8032 *smodep = SM_ATOMIC; 8033 *sock_downcalls = &sock_udp_downcalls; 8034 return ((sock_lower_handle_t)connp); 8035 } 8036 8037 /* ARGSUSED */ 8038 void 8039 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 8040 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 8041 { 8042 conn_t *connp = (conn_t *)proto_handle; 8043 udp_t *udp = connp->conn_udp; 8044 udp_stack_t *us = udp->udp_us; 8045 struct sock_proto_props sopp; 8046 8047 /* All Solaris components should pass a cred for this operation. */ 8048 ASSERT(cr != NULL); 8049 8050 connp->conn_upcalls = sock_upcalls; 8051 connp->conn_upper_handle = sock_handle; 8052 8053 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 8054 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 8055 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 8056 sopp.sopp_maxblk = INFPSZ; 8057 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 8058 sopp.sopp_maxaddrlen = sizeof (sin6_t); 8059 sopp.sopp_maxpsz = 8060 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 8061 UDP_MAXPACKET_IPV6; 8062 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 8063 udp_mod_info.mi_minpsz; 8064 8065 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 8066 &sopp); 8067 } 8068 8069 static void 8070 udp_do_close(conn_t *connp) 8071 { 8072 udp_t *udp; 8073 8074 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 8075 udp = connp->conn_udp; 8076 8077 udp_quiesce_conn(connp); 8078 ip_quiesce_conn(connp); 8079 8080 if (!IPCL_IS_NONSTR(connp)) { 8081 /* 8082 * Disable read-side synchronous stream 8083 * interface and drain any queued data. 8084 */ 8085 ASSERT(connp->conn_wq != NULL); 8086 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 8087 ASSERT(!udp->udp_direct_sockfs); 8088 8089 ASSERT(connp->conn_rq != NULL); 8090 qprocsoff(connp->conn_rq); 8091 } 8092 8093 ASSERT(udp->udp_rcv_cnt == 0); 8094 ASSERT(udp->udp_rcv_msgcnt == 0); 8095 ASSERT(udp->udp_rcv_list_head == NULL); 8096 ASSERT(udp->udp_rcv_list_tail == NULL); 8097 8098 udp_close_free(connp); 8099 8100 /* 8101 * Now we are truly single threaded on this stream, and can 8102 * delete the things hanging off the connp, and finally the connp. 8103 * We removed this connp from the fanout list, it cannot be 8104 * accessed thru the fanouts, and we already waited for the 8105 * conn_ref to drop to 0. We are already in close, so 8106 * there cannot be any other thread from the top. qprocsoff 8107 * has completed, and service has completed or won't run in 8108 * future. 8109 */ 8110 ASSERT(connp->conn_ref == 1); 8111 if (!IPCL_IS_NONSTR(connp)) { 8112 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 8113 } else { 8114 ip_free_helper_stream(connp); 8115 } 8116 8117 connp->conn_ref--; 8118 ipcl_conn_destroy(connp); 8119 } 8120 8121 /* ARGSUSED */ 8122 int 8123 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 8124 { 8125 conn_t *connp = (conn_t *)proto_handle; 8126 8127 /* All Solaris components should pass a cred for this operation. */ 8128 ASSERT(cr != NULL); 8129 8130 udp_do_close(connp); 8131 return (0); 8132 } 8133 8134 static int 8135 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 8136 boolean_t bind_to_req_port_only) 8137 { 8138 sin_t *sin; 8139 sin6_t *sin6; 8140 sin6_t sin6addr; 8141 in_port_t port; /* Host byte order */ 8142 in_port_t requested_port; /* Host byte order */ 8143 int count; 8144 in6_addr_t v6src; 8145 int loopmax; 8146 udp_fanout_t *udpf; 8147 in_port_t lport; /* Network byte order */ 8148 zoneid_t zoneid; 8149 udp_t *udp; 8150 boolean_t is_inaddr_any; 8151 mlp_type_t addrtype, mlptype; 8152 udp_stack_t *us; 8153 int error = 0; 8154 mblk_t *mp = NULL; 8155 8156 udp = connp->conn_udp; 8157 us = udp->udp_us; 8158 8159 if (udp->udp_state != TS_UNBND) { 8160 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8161 "udp_bind: bad state, %u", udp->udp_state); 8162 return (-TOUTSTATE); 8163 } 8164 8165 switch (len) { 8166 case 0: 8167 if (udp->udp_family == AF_INET) { 8168 sin = (sin_t *)&sin6addr; 8169 *sin = sin_null; 8170 sin->sin_family = AF_INET; 8171 sin->sin_addr.s_addr = INADDR_ANY; 8172 udp->udp_ipversion = IPV4_VERSION; 8173 } else { 8174 ASSERT(udp->udp_family == AF_INET6); 8175 sin6 = (sin6_t *)&sin6addr; 8176 *sin6 = sin6_null; 8177 sin6->sin6_family = AF_INET6; 8178 V6_SET_ZERO(sin6->sin6_addr); 8179 udp->udp_ipversion = IPV6_VERSION; 8180 } 8181 port = 0; 8182 break; 8183 8184 case sizeof (sin_t): /* Complete IPv4 address */ 8185 sin = (sin_t *)sa; 8186 8187 if (sin == NULL || !OK_32PTR((char *)sin)) 8188 return (EINVAL); 8189 8190 if (udp->udp_family != AF_INET || 8191 sin->sin_family != AF_INET) { 8192 return (EAFNOSUPPORT); 8193 } 8194 port = ntohs(sin->sin_port); 8195 break; 8196 8197 case sizeof (sin6_t): /* complete IPv6 address */ 8198 sin6 = (sin6_t *)sa; 8199 8200 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8201 return (EINVAL); 8202 8203 if (udp->udp_family != AF_INET6 || 8204 sin6->sin6_family != AF_INET6) { 8205 return (EAFNOSUPPORT); 8206 } 8207 port = ntohs(sin6->sin6_port); 8208 break; 8209 8210 default: /* Invalid request */ 8211 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8212 "udp_bind: bad ADDR_length length %u", len); 8213 return (-TBADADDR); 8214 } 8215 8216 requested_port = port; 8217 8218 if (requested_port == 0 || !bind_to_req_port_only) 8219 bind_to_req_port_only = B_FALSE; 8220 else /* T_BIND_REQ and requested_port != 0 */ 8221 bind_to_req_port_only = B_TRUE; 8222 8223 if (requested_port == 0) { 8224 /* 8225 * If the application passed in zero for the port number, it 8226 * doesn't care which port number we bind to. Get one in the 8227 * valid range. 8228 */ 8229 if (udp->udp_anon_priv_bind) { 8230 port = udp_get_next_priv_port(udp); 8231 } else { 8232 port = udp_update_next_port(udp, 8233 us->us_next_port_to_try, B_TRUE); 8234 } 8235 } else { 8236 /* 8237 * If the port is in the well-known privileged range, 8238 * make sure the caller was privileged. 8239 */ 8240 int i; 8241 boolean_t priv = B_FALSE; 8242 8243 if (port < us->us_smallest_nonpriv_port) { 8244 priv = B_TRUE; 8245 } else { 8246 for (i = 0; i < us->us_num_epriv_ports; i++) { 8247 if (port == us->us_epriv_ports[i]) { 8248 priv = B_TRUE; 8249 break; 8250 } 8251 } 8252 } 8253 8254 if (priv) { 8255 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8256 return (-TACCES); 8257 } 8258 } 8259 8260 if (port == 0) 8261 return (-TNOADDR); 8262 8263 /* 8264 * The state must be TS_UNBND. TPI mandates that users must send 8265 * TPI primitives only 1 at a time and wait for the response before 8266 * sending the next primitive. 8267 */ 8268 rw_enter(&udp->udp_rwlock, RW_WRITER); 8269 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8270 rw_exit(&udp->udp_rwlock); 8271 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8272 "udp_bind: bad state, %u", udp->udp_state); 8273 return (-TOUTSTATE); 8274 } 8275 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8276 udp->udp_pending_op = T_BIND_REQ; 8277 /* 8278 * Copy the source address into our udp structure. This address 8279 * may still be zero; if so, IP will fill in the correct address 8280 * each time an outbound packet is passed to it. Since the udp is 8281 * not yet in the bind hash list, we don't grab the uf_lock to 8282 * change udp_ipversion 8283 */ 8284 if (udp->udp_family == AF_INET) { 8285 ASSERT(sin != NULL); 8286 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8287 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8288 udp->udp_ip_snd_options_len; 8289 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8290 } else { 8291 ASSERT(sin6 != NULL); 8292 v6src = sin6->sin6_addr; 8293 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8294 /* 8295 * no need to hold the uf_lock to set the udp_ipversion 8296 * since we are not yet in the fanout list 8297 */ 8298 udp->udp_ipversion = IPV4_VERSION; 8299 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8300 UDPH_SIZE + udp->udp_ip_snd_options_len; 8301 } else { 8302 udp->udp_ipversion = IPV6_VERSION; 8303 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8304 } 8305 } 8306 8307 /* 8308 * If udp_reuseaddr is not set, then we have to make sure that 8309 * the IP address and port number the application requested 8310 * (or we selected for the application) is not being used by 8311 * another stream. If another stream is already using the 8312 * requested IP address and port, the behavior depends on 8313 * "bind_to_req_port_only". If set the bind fails; otherwise we 8314 * search for any an unused port to bind to the the stream. 8315 * 8316 * As per the BSD semantics, as modified by the Deering multicast 8317 * changes, if udp_reuseaddr is set, then we allow multiple binds 8318 * to the same port independent of the local IP address. 8319 * 8320 * This is slightly different than in SunOS 4.X which did not 8321 * support IP multicast. Note that the change implemented by the 8322 * Deering multicast code effects all binds - not only binding 8323 * to IP multicast addresses. 8324 * 8325 * Note that when binding to port zero we ignore SO_REUSEADDR in 8326 * order to guarantee a unique port. 8327 */ 8328 8329 count = 0; 8330 if (udp->udp_anon_priv_bind) { 8331 /* 8332 * loopmax = (IPPORT_RESERVED-1) - 8333 * us->us_min_anonpriv_port + 1 8334 */ 8335 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8336 } else { 8337 loopmax = us->us_largest_anon_port - 8338 us->us_smallest_anon_port + 1; 8339 } 8340 8341 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8342 zoneid = connp->conn_zoneid; 8343 8344 for (;;) { 8345 udp_t *udp1; 8346 boolean_t found_exclbind = B_FALSE; 8347 8348 /* 8349 * Walk through the list of udp streams bound to 8350 * requested port with the same IP address. 8351 */ 8352 lport = htons(port); 8353 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8354 us->us_bind_fanout_size)]; 8355 mutex_enter(&udpf->uf_lock); 8356 for (udp1 = udpf->uf_udp; udp1 != NULL; 8357 udp1 = udp1->udp_bind_hash) { 8358 if (lport != udp1->udp_port) 8359 continue; 8360 8361 /* 8362 * On a labeled system, we must treat bindings to ports 8363 * on shared IP addresses by sockets with MAC exemption 8364 * privilege as being in all zones, as there's 8365 * otherwise no way to identify the right receiver. 8366 */ 8367 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8368 IPCL_ZONE_MATCH(connp, 8369 udp1->udp_connp->conn_zoneid)) && 8370 !connp->conn_mac_exempt && \ 8371 !udp1->udp_connp->conn_mac_exempt) 8372 continue; 8373 8374 /* 8375 * If UDP_EXCLBIND is set for either the bound or 8376 * binding endpoint, the semantics of bind 8377 * is changed according to the following chart. 8378 * 8379 * spec = specified address (v4 or v6) 8380 * unspec = unspecified address (v4 or v6) 8381 * A = specified addresses are different for endpoints 8382 * 8383 * bound bind to allowed? 8384 * ------------------------------------- 8385 * unspec unspec no 8386 * unspec spec no 8387 * spec unspec no 8388 * spec spec yes if A 8389 * 8390 * For labeled systems, SO_MAC_EXEMPT behaves the same 8391 * as UDP_EXCLBIND, except that zoneid is ignored. 8392 */ 8393 if (udp1->udp_exclbind || udp->udp_exclbind || 8394 udp1->udp_connp->conn_mac_exempt || 8395 connp->conn_mac_exempt) { 8396 if (V6_OR_V4_INADDR_ANY( 8397 udp1->udp_bound_v6src) || 8398 is_inaddr_any || 8399 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8400 &v6src)) { 8401 found_exclbind = B_TRUE; 8402 break; 8403 } 8404 continue; 8405 } 8406 8407 /* 8408 * Check ipversion to allow IPv4 and IPv6 sockets to 8409 * have disjoint port number spaces. 8410 */ 8411 if (udp->udp_ipversion != udp1->udp_ipversion) { 8412 8413 /* 8414 * On the first time through the loop, if the 8415 * the user intentionally specified a 8416 * particular port number, then ignore any 8417 * bindings of the other protocol that may 8418 * conflict. This allows the user to bind IPv6 8419 * alone and get both v4 and v6, or bind both 8420 * both and get each seperately. On subsequent 8421 * times through the loop, we're checking a 8422 * port that we chose (not the user) and thus 8423 * we do not allow casual duplicate bindings. 8424 */ 8425 if (count == 0 && requested_port != 0) 8426 continue; 8427 } 8428 8429 /* 8430 * No difference depending on SO_REUSEADDR. 8431 * 8432 * If existing port is bound to a 8433 * non-wildcard IP address and 8434 * the requesting stream is bound to 8435 * a distinct different IP addresses 8436 * (non-wildcard, also), keep going. 8437 */ 8438 if (!is_inaddr_any && 8439 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8440 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8441 &v6src)) { 8442 continue; 8443 } 8444 break; 8445 } 8446 8447 if (!found_exclbind && 8448 (udp->udp_reuseaddr && requested_port != 0)) { 8449 break; 8450 } 8451 8452 if (udp1 == NULL) { 8453 /* 8454 * No other stream has this IP address 8455 * and port number. We can use it. 8456 */ 8457 break; 8458 } 8459 mutex_exit(&udpf->uf_lock); 8460 if (bind_to_req_port_only) { 8461 /* 8462 * We get here only when requested port 8463 * is bound (and only first of the for() 8464 * loop iteration). 8465 * 8466 * The semantics of this bind request 8467 * require it to fail so we return from 8468 * the routine (and exit the loop). 8469 * 8470 */ 8471 udp->udp_pending_op = -1; 8472 rw_exit(&udp->udp_rwlock); 8473 return (-TADDRBUSY); 8474 } 8475 8476 if (udp->udp_anon_priv_bind) { 8477 port = udp_get_next_priv_port(udp); 8478 } else { 8479 if ((count == 0) && (requested_port != 0)) { 8480 /* 8481 * If the application wants us to find 8482 * a port, get one to start with. Set 8483 * requested_port to 0, so that we will 8484 * update us->us_next_port_to_try below. 8485 */ 8486 port = udp_update_next_port(udp, 8487 us->us_next_port_to_try, B_TRUE); 8488 requested_port = 0; 8489 } else { 8490 port = udp_update_next_port(udp, port + 1, 8491 B_FALSE); 8492 } 8493 } 8494 8495 if (port == 0 || ++count >= loopmax) { 8496 /* 8497 * We've tried every possible port number and 8498 * there are none available, so send an error 8499 * to the user. 8500 */ 8501 udp->udp_pending_op = -1; 8502 rw_exit(&udp->udp_rwlock); 8503 return (-TNOADDR); 8504 } 8505 } 8506 8507 /* 8508 * Copy the source address into our udp structure. This address 8509 * may still be zero; if so, ip will fill in the correct address 8510 * each time an outbound packet is passed to it. 8511 * If we are binding to a broadcast or multicast address then 8512 * udp_post_ip_bind_connect will clear the source address 8513 * when udp_do_bind success. 8514 */ 8515 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8516 udp->udp_port = lport; 8517 /* 8518 * Now reset the the next anonymous port if the application requested 8519 * an anonymous port, or we handed out the next anonymous port. 8520 */ 8521 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8522 us->us_next_port_to_try = port + 1; 8523 } 8524 8525 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8526 if (udp->udp_family == AF_INET) { 8527 sin->sin_port = udp->udp_port; 8528 } else { 8529 sin6->sin6_port = udp->udp_port; 8530 /* Rebuild the header template */ 8531 error = udp_build_hdrs(udp); 8532 if (error != 0) { 8533 udp->udp_pending_op = -1; 8534 rw_exit(&udp->udp_rwlock); 8535 mutex_exit(&udpf->uf_lock); 8536 return (error); 8537 } 8538 } 8539 udp->udp_state = TS_IDLE; 8540 udp_bind_hash_insert(udpf, udp); 8541 mutex_exit(&udpf->uf_lock); 8542 rw_exit(&udp->udp_rwlock); 8543 8544 if (cl_inet_bind) { 8545 /* 8546 * Running in cluster mode - register bind information 8547 */ 8548 if (udp->udp_ipversion == IPV4_VERSION) { 8549 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8550 IPPROTO_UDP, AF_INET, 8551 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8552 (in_port_t)udp->udp_port, NULL); 8553 } else { 8554 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8555 IPPROTO_UDP, AF_INET6, 8556 (uint8_t *)&(udp->udp_v6src), 8557 (in_port_t)udp->udp_port, NULL); 8558 } 8559 } 8560 8561 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8562 if (is_system_labeled() && (!connp->conn_anon_port || 8563 connp->conn_anon_mlp)) { 8564 uint16_t mlpport; 8565 zone_t *zone; 8566 8567 zone = crgetzone(cr); 8568 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8569 mlptSingle; 8570 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8571 &v6src, us->us_netstack->netstack_ip); 8572 if (addrtype == mlptSingle) { 8573 rw_enter(&udp->udp_rwlock, RW_WRITER); 8574 udp->udp_pending_op = -1; 8575 rw_exit(&udp->udp_rwlock); 8576 connp->conn_anon_port = B_FALSE; 8577 connp->conn_mlp_type = mlptSingle; 8578 return (-TNOADDR); 8579 } 8580 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8581 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8582 addrtype); 8583 if (mlptype != mlptSingle && 8584 (connp->conn_mlp_type == mlptSingle || 8585 secpolicy_net_bindmlp(cr) != 0)) { 8586 if (udp->udp_debug) { 8587 (void) strlog(UDP_MOD_ID, 0, 1, 8588 SL_ERROR|SL_TRACE, 8589 "udp_bind: no priv for multilevel port %d", 8590 mlpport); 8591 } 8592 rw_enter(&udp->udp_rwlock, RW_WRITER); 8593 udp->udp_pending_op = -1; 8594 rw_exit(&udp->udp_rwlock); 8595 connp->conn_anon_port = B_FALSE; 8596 connp->conn_mlp_type = mlptSingle; 8597 return (-TACCES); 8598 } 8599 8600 /* 8601 * If we're specifically binding a shared IP address and the 8602 * port is MLP on shared addresses, then check to see if this 8603 * zone actually owns the MLP. Reject if not. 8604 */ 8605 if (mlptype == mlptShared && addrtype == mlptShared) { 8606 /* 8607 * No need to handle exclusive-stack zones since 8608 * ALL_ZONES only applies to the shared stack. 8609 */ 8610 zoneid_t mlpzone; 8611 8612 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8613 htons(mlpport)); 8614 if (connp->conn_zoneid != mlpzone) { 8615 if (udp->udp_debug) { 8616 (void) strlog(UDP_MOD_ID, 0, 1, 8617 SL_ERROR|SL_TRACE, 8618 "udp_bind: attempt to bind port " 8619 "%d on shared addr in zone %d " 8620 "(should be %d)", 8621 mlpport, connp->conn_zoneid, 8622 mlpzone); 8623 } 8624 rw_enter(&udp->udp_rwlock, RW_WRITER); 8625 udp->udp_pending_op = -1; 8626 rw_exit(&udp->udp_rwlock); 8627 connp->conn_anon_port = B_FALSE; 8628 connp->conn_mlp_type = mlptSingle; 8629 return (-TACCES); 8630 } 8631 } 8632 if (connp->conn_anon_port) { 8633 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8634 port, B_TRUE); 8635 if (error != 0) { 8636 if (udp->udp_debug) { 8637 (void) strlog(UDP_MOD_ID, 0, 1, 8638 SL_ERROR|SL_TRACE, 8639 "udp_bind: cannot establish anon " 8640 "MLP for port %d", port); 8641 } 8642 rw_enter(&udp->udp_rwlock, RW_WRITER); 8643 udp->udp_pending_op = -1; 8644 rw_exit(&udp->udp_rwlock); 8645 connp->conn_anon_port = B_FALSE; 8646 connp->conn_mlp_type = mlptSingle; 8647 return (-TACCES); 8648 } 8649 } 8650 connp->conn_mlp_type = mlptype; 8651 } 8652 8653 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8654 /* 8655 * Append a request for an IRE if udp_v6src not 8656 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8657 */ 8658 mp = allocb(sizeof (ire_t), BPRI_HI); 8659 if (!mp) { 8660 rw_enter(&udp->udp_rwlock, RW_WRITER); 8661 udp->udp_pending_op = -1; 8662 rw_exit(&udp->udp_rwlock); 8663 return (ENOMEM); 8664 } 8665 mp->b_wptr += sizeof (ire_t); 8666 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8667 } 8668 if (udp->udp_family == AF_INET6) { 8669 ASSERT(udp->udp_connp->conn_af_isv6); 8670 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8671 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8672 } else { 8673 ASSERT(!udp->udp_connp->conn_af_isv6); 8674 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8675 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8676 B_TRUE); 8677 } 8678 8679 (void) udp_post_ip_bind_connect(udp, mp, error); 8680 return (error); 8681 } 8682 8683 int 8684 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8685 socklen_t len, cred_t *cr) 8686 { 8687 int error; 8688 conn_t *connp; 8689 8690 /* All Solaris components should pass a cred for this operation. */ 8691 ASSERT(cr != NULL); 8692 8693 connp = (conn_t *)proto_handle; 8694 8695 if (sa == NULL) 8696 error = udp_do_unbind(connp); 8697 else 8698 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8699 8700 if (error < 0) { 8701 if (error == -TOUTSTATE) 8702 error = EINVAL; 8703 else 8704 error = proto_tlitosyserr(-error); 8705 } 8706 8707 return (error); 8708 } 8709 8710 static int 8711 udp_implicit_bind(conn_t *connp, cred_t *cr) 8712 { 8713 int error; 8714 8715 /* All Solaris components should pass a cred for this operation. */ 8716 ASSERT(cr != NULL); 8717 8718 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8719 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8720 } 8721 8722 /* 8723 * This routine removes a port number association from a stream. It 8724 * is called by udp_unbind and udp_tpi_unbind. 8725 */ 8726 static int 8727 udp_do_unbind(conn_t *connp) 8728 { 8729 udp_t *udp = connp->conn_udp; 8730 udp_fanout_t *udpf; 8731 udp_stack_t *us = udp->udp_us; 8732 8733 if (cl_inet_unbind != NULL) { 8734 /* 8735 * Running in cluster mode - register unbind information 8736 */ 8737 if (udp->udp_ipversion == IPV4_VERSION) { 8738 (*cl_inet_unbind)( 8739 connp->conn_netstack->netstack_stackid, 8740 IPPROTO_UDP, AF_INET, 8741 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8742 (in_port_t)udp->udp_port, NULL); 8743 } else { 8744 (*cl_inet_unbind)( 8745 connp->conn_netstack->netstack_stackid, 8746 IPPROTO_UDP, AF_INET6, 8747 (uint8_t *)&(udp->udp_v6src), 8748 (in_port_t)udp->udp_port, NULL); 8749 } 8750 } 8751 8752 rw_enter(&udp->udp_rwlock, RW_WRITER); 8753 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8754 rw_exit(&udp->udp_rwlock); 8755 return (-TOUTSTATE); 8756 } 8757 udp->udp_pending_op = T_UNBIND_REQ; 8758 rw_exit(&udp->udp_rwlock); 8759 8760 /* 8761 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8762 * and therefore ip_unbind must never return NULL. 8763 */ 8764 ip_unbind(connp); 8765 8766 /* 8767 * Once we're unbound from IP, the pending operation may be cleared 8768 * here. 8769 */ 8770 rw_enter(&udp->udp_rwlock, RW_WRITER); 8771 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8772 us->us_bind_fanout_size)]; 8773 8774 mutex_enter(&udpf->uf_lock); 8775 udp_bind_hash_remove(udp, B_TRUE); 8776 V6_SET_ZERO(udp->udp_v6src); 8777 V6_SET_ZERO(udp->udp_bound_v6src); 8778 udp->udp_port = 0; 8779 mutex_exit(&udpf->uf_lock); 8780 8781 udp->udp_pending_op = -1; 8782 udp->udp_state = TS_UNBND; 8783 if (udp->udp_family == AF_INET6) 8784 (void) udp_build_hdrs(udp); 8785 rw_exit(&udp->udp_rwlock); 8786 8787 return (0); 8788 } 8789 8790 static int 8791 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8792 { 8793 ire_t *ire; 8794 udp_fanout_t *udpf; 8795 udp_stack_t *us = udp->udp_us; 8796 8797 ASSERT(udp->udp_pending_op != -1); 8798 rw_enter(&udp->udp_rwlock, RW_WRITER); 8799 if (error == 0) { 8800 /* For udp_do_connect() success */ 8801 /* udp_do_bind() success will do nothing in here */ 8802 /* 8803 * If a broadcast/multicast address was bound, set 8804 * the source address to 0. 8805 * This ensures no datagrams with broadcast address 8806 * as source address are emitted (which would violate 8807 * RFC1122 - Hosts requirements) 8808 * 8809 * Note that when connecting the returned IRE is 8810 * for the destination address and we only perform 8811 * the broadcast check for the source address (it 8812 * is OK to connect to a broadcast/multicast address.) 8813 */ 8814 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8815 ire = (ire_t *)ire_mp->b_rptr; 8816 8817 /* 8818 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8819 * multicast local address. 8820 */ 8821 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8822 us->us_bind_fanout_size)]; 8823 if (ire->ire_type == IRE_BROADCAST && 8824 udp->udp_state != TS_DATA_XFER) { 8825 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8826 udp->udp_pending_op == O_T_BIND_REQ); 8827 /* 8828 * This was just a local bind to a broadcast 8829 * addr. 8830 */ 8831 mutex_enter(&udpf->uf_lock); 8832 V6_SET_ZERO(udp->udp_v6src); 8833 mutex_exit(&udpf->uf_lock); 8834 if (udp->udp_family == AF_INET6) 8835 (void) udp_build_hdrs(udp); 8836 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8837 if (udp->udp_family == AF_INET6) 8838 (void) udp_build_hdrs(udp); 8839 } 8840 } 8841 } else { 8842 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8843 us->us_bind_fanout_size)]; 8844 mutex_enter(&udpf->uf_lock); 8845 8846 if (udp->udp_state == TS_DATA_XFER) { 8847 /* Connect failed */ 8848 /* Revert back to the bound source */ 8849 udp->udp_v6src = udp->udp_bound_v6src; 8850 udp->udp_state = TS_IDLE; 8851 } else { 8852 /* For udp_do_bind() failed */ 8853 V6_SET_ZERO(udp->udp_v6src); 8854 V6_SET_ZERO(udp->udp_bound_v6src); 8855 udp->udp_state = TS_UNBND; 8856 udp_bind_hash_remove(udp, B_TRUE); 8857 udp->udp_port = 0; 8858 } 8859 mutex_exit(&udpf->uf_lock); 8860 if (udp->udp_family == AF_INET6) 8861 (void) udp_build_hdrs(udp); 8862 } 8863 udp->udp_pending_op = -1; 8864 rw_exit(&udp->udp_rwlock); 8865 if (ire_mp != NULL) 8866 freeb(ire_mp); 8867 return (error); 8868 } 8869 8870 /* 8871 * It associates a default destination address with the stream. 8872 */ 8873 static int 8874 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8875 cred_t *cr) 8876 { 8877 sin6_t *sin6; 8878 sin_t *sin; 8879 in6_addr_t v6dst; 8880 ipaddr_t v4dst; 8881 uint16_t dstport; 8882 uint32_t flowinfo; 8883 mblk_t *ire_mp; 8884 udp_fanout_t *udpf; 8885 udp_t *udp, *udp1; 8886 ushort_t ipversion; 8887 udp_stack_t *us; 8888 int error; 8889 8890 udp = connp->conn_udp; 8891 us = udp->udp_us; 8892 8893 /* 8894 * Address has been verified by the caller 8895 */ 8896 switch (len) { 8897 default: 8898 /* 8899 * Should never happen 8900 */ 8901 return (EINVAL); 8902 8903 case sizeof (sin_t): 8904 sin = (sin_t *)sa; 8905 v4dst = sin->sin_addr.s_addr; 8906 dstport = sin->sin_port; 8907 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8908 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8909 ipversion = IPV4_VERSION; 8910 break; 8911 8912 case sizeof (sin6_t): 8913 sin6 = (sin6_t *)sa; 8914 v6dst = sin6->sin6_addr; 8915 dstport = sin6->sin6_port; 8916 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8917 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8918 ipversion = IPV4_VERSION; 8919 flowinfo = 0; 8920 } else { 8921 ipversion = IPV6_VERSION; 8922 flowinfo = sin6->sin6_flowinfo; 8923 } 8924 break; 8925 } 8926 8927 if (dstport == 0) 8928 return (-TBADADDR); 8929 8930 rw_enter(&udp->udp_rwlock, RW_WRITER); 8931 8932 /* 8933 * This UDP must have bound to a port already before doing a connect. 8934 * TPI mandates that users must send TPI primitives only 1 at a time 8935 * and wait for the response before sending the next primitive. 8936 */ 8937 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8938 rw_exit(&udp->udp_rwlock); 8939 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8940 "udp_connect: bad state, %u", udp->udp_state); 8941 return (-TOUTSTATE); 8942 } 8943 udp->udp_pending_op = T_CONN_REQ; 8944 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8945 8946 if (ipversion == IPV4_VERSION) { 8947 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8948 udp->udp_ip_snd_options_len; 8949 } else { 8950 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8951 } 8952 8953 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8954 us->us_bind_fanout_size)]; 8955 8956 mutex_enter(&udpf->uf_lock); 8957 if (udp->udp_state == TS_DATA_XFER) { 8958 /* Already connected - clear out state */ 8959 udp->udp_v6src = udp->udp_bound_v6src; 8960 udp->udp_state = TS_IDLE; 8961 } 8962 8963 /* 8964 * Create a default IP header with no IP options. 8965 */ 8966 udp->udp_dstport = dstport; 8967 udp->udp_ipversion = ipversion; 8968 if (ipversion == IPV4_VERSION) { 8969 /* 8970 * Interpret a zero destination to mean loopback. 8971 * Update the T_CONN_REQ (sin/sin6) since it is used to 8972 * generate the T_CONN_CON. 8973 */ 8974 if (v4dst == INADDR_ANY) { 8975 v4dst = htonl(INADDR_LOOPBACK); 8976 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8977 if (udp->udp_family == AF_INET) { 8978 sin->sin_addr.s_addr = v4dst; 8979 } else { 8980 sin6->sin6_addr = v6dst; 8981 } 8982 } 8983 udp->udp_v6dst = v6dst; 8984 udp->udp_flowinfo = 0; 8985 8986 /* 8987 * If the destination address is multicast and 8988 * an outgoing multicast interface has been set, 8989 * use the address of that interface as our 8990 * source address if no source address has been set. 8991 */ 8992 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8993 CLASSD(v4dst) && 8994 udp->udp_multicast_if_addr != INADDR_ANY) { 8995 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8996 &udp->udp_v6src); 8997 } 8998 } else { 8999 ASSERT(udp->udp_ipversion == IPV6_VERSION); 9000 /* 9001 * Interpret a zero destination to mean loopback. 9002 * Update the T_CONN_REQ (sin/sin6) since it is used to 9003 * generate the T_CONN_CON. 9004 */ 9005 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 9006 v6dst = ipv6_loopback; 9007 sin6->sin6_addr = v6dst; 9008 } 9009 udp->udp_v6dst = v6dst; 9010 udp->udp_flowinfo = flowinfo; 9011 /* 9012 * If the destination address is multicast and 9013 * an outgoing multicast interface has been set, 9014 * then the ip bind logic will pick the correct source 9015 * address (i.e. matching the outgoing multicast interface). 9016 */ 9017 } 9018 9019 /* 9020 * Verify that the src/port/dst/port is unique for all 9021 * connections in TS_DATA_XFER 9022 */ 9023 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 9024 if (udp1->udp_state != TS_DATA_XFER) 9025 continue; 9026 if (udp->udp_port != udp1->udp_port || 9027 udp->udp_ipversion != udp1->udp_ipversion || 9028 dstport != udp1->udp_dstport || 9029 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 9030 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 9031 !(IPCL_ZONE_MATCH(udp->udp_connp, 9032 udp1->udp_connp->conn_zoneid) || 9033 IPCL_ZONE_MATCH(udp1->udp_connp, 9034 udp->udp_connp->conn_zoneid))) 9035 continue; 9036 mutex_exit(&udpf->uf_lock); 9037 udp->udp_pending_op = -1; 9038 rw_exit(&udp->udp_rwlock); 9039 return (-TBADADDR); 9040 } 9041 9042 if (cl_inet_connect2 != NULL) { 9043 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 9044 if (error != 0) { 9045 mutex_exit(&udpf->uf_lock); 9046 udp->udp_pending_op = -1; 9047 rw_exit(&udp->udp_rwlock); 9048 return (-TBADADDR); 9049 } 9050 } 9051 9052 udp->udp_state = TS_DATA_XFER; 9053 mutex_exit(&udpf->uf_lock); 9054 9055 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 9056 if (ire_mp == NULL) { 9057 mutex_enter(&udpf->uf_lock); 9058 udp->udp_state = TS_IDLE; 9059 udp->udp_pending_op = -1; 9060 mutex_exit(&udpf->uf_lock); 9061 rw_exit(&udp->udp_rwlock); 9062 return (ENOMEM); 9063 } 9064 9065 rw_exit(&udp->udp_rwlock); 9066 9067 ire_mp->b_wptr += sizeof (ire_t); 9068 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 9069 9070 if (udp->udp_family == AF_INET) { 9071 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 9072 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 9073 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 9074 B_TRUE, B_TRUE, cr); 9075 } else { 9076 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 9077 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 9078 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 9079 } 9080 9081 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 9082 } 9083 9084 /* ARGSUSED */ 9085 static int 9086 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 9087 socklen_t len, sock_connid_t *id, cred_t *cr) 9088 { 9089 conn_t *connp = (conn_t *)proto_handle; 9090 udp_t *udp = connp->conn_udp; 9091 int error; 9092 boolean_t did_bind = B_FALSE; 9093 9094 /* All Solaris components should pass a cred for this operation. */ 9095 ASSERT(cr != NULL); 9096 9097 if (sa == NULL) { 9098 /* 9099 * Disconnect 9100 * Make sure we are connected 9101 */ 9102 if (udp->udp_state != TS_DATA_XFER) 9103 return (EINVAL); 9104 9105 error = udp_disconnect(connp); 9106 return (error); 9107 } 9108 9109 error = proto_verify_ip_addr(udp->udp_family, sa, len); 9110 if (error != 0) 9111 goto done; 9112 9113 /* do an implicit bind if necessary */ 9114 if (udp->udp_state == TS_UNBND) { 9115 error = udp_implicit_bind(connp, cr); 9116 /* 9117 * We could be racing with an actual bind, in which case 9118 * we would see EPROTO. We cross our fingers and try 9119 * to connect. 9120 */ 9121 if (!(error == 0 || error == EPROTO)) 9122 goto done; 9123 did_bind = B_TRUE; 9124 } 9125 /* 9126 * set SO_DGRAM_ERRIND 9127 */ 9128 udp->udp_dgram_errind = B_TRUE; 9129 9130 error = udp_do_connect(connp, sa, len, cr); 9131 9132 if (error != 0 && did_bind) { 9133 int unbind_err; 9134 9135 unbind_err = udp_do_unbind(connp); 9136 ASSERT(unbind_err == 0); 9137 } 9138 9139 if (error == 0) { 9140 *id = 0; 9141 (*connp->conn_upcalls->su_connected) 9142 (connp->conn_upper_handle, 0, NULL, -1); 9143 } else if (error < 0) { 9144 error = proto_tlitosyserr(-error); 9145 } 9146 9147 done: 9148 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 9149 /* 9150 * No need to hold locks to set state 9151 * after connect failure socket state is undefined 9152 * We set the state only to imitate old sockfs behavior 9153 */ 9154 udp->udp_state = TS_IDLE; 9155 } 9156 return (error); 9157 } 9158 9159 /* ARGSUSED */ 9160 int 9161 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 9162 cred_t *cr) 9163 { 9164 conn_t *connp = (conn_t *)proto_handle; 9165 udp_t *udp = connp->conn_udp; 9166 udp_stack_t *us = udp->udp_us; 9167 int error = 0; 9168 9169 ASSERT(DB_TYPE(mp) == M_DATA); 9170 9171 /* All Solaris components should pass a cred for this operation. */ 9172 ASSERT(cr != NULL); 9173 9174 /* If labeled then sockfs should have already set db_credp */ 9175 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 9176 9177 /* 9178 * If the socket is connected and no change in destination 9179 */ 9180 if (msg->msg_namelen == 0) { 9181 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9182 if (error == EDESTADDRREQ) 9183 return (error); 9184 else 9185 return (udp->udp_dgram_errind ? error : 0); 9186 } 9187 9188 /* 9189 * Do an implicit bind if necessary. 9190 */ 9191 if (udp->udp_state == TS_UNBND) { 9192 error = udp_implicit_bind(connp, cr); 9193 /* 9194 * We could be racing with an actual bind, in which case 9195 * we would see EPROTO. We cross our fingers and try 9196 * to send. 9197 */ 9198 if (!(error == 0 || error == EPROTO)) { 9199 freemsg(mp); 9200 return (error); 9201 } 9202 } 9203 9204 rw_enter(&udp->udp_rwlock, RW_WRITER); 9205 9206 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9207 rw_exit(&udp->udp_rwlock); 9208 freemsg(mp); 9209 return (EISCONN); 9210 } 9211 9212 9213 if (udp->udp_delayed_error != 0) { 9214 boolean_t match; 9215 9216 error = udp->udp_delayed_error; 9217 match = B_FALSE; 9218 udp->udp_delayed_error = 0; 9219 switch (udp->udp_family) { 9220 case AF_INET: { 9221 /* Compare just IP address and port */ 9222 sin_t *sin1 = (sin_t *)msg->msg_name; 9223 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9224 9225 if (msg->msg_namelen == sizeof (sin_t) && 9226 sin1->sin_port == sin2->sin_port && 9227 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9228 match = B_TRUE; 9229 9230 break; 9231 } 9232 case AF_INET6: { 9233 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9234 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9235 9236 if (msg->msg_namelen == sizeof (sin6_t) && 9237 sin1->sin6_port == sin2->sin6_port && 9238 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9239 &sin2->sin6_addr)) 9240 match = B_TRUE; 9241 break; 9242 } 9243 default: 9244 ASSERT(0); 9245 } 9246 9247 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9248 9249 if (match) { 9250 rw_exit(&udp->udp_rwlock); 9251 freemsg(mp); 9252 return (error); 9253 } 9254 } 9255 9256 error = proto_verify_ip_addr(udp->udp_family, 9257 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9258 rw_exit(&udp->udp_rwlock); 9259 9260 if (error != 0) { 9261 freemsg(mp); 9262 return (error); 9263 } 9264 9265 error = udp_send_not_connected(connp, mp, 9266 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9267 curproc->p_pid); 9268 if (error != 0) { 9269 UDP_STAT(us, udp_out_err_output); 9270 freemsg(mp); 9271 } 9272 return (udp->udp_dgram_errind ? error : 0); 9273 } 9274 9275 void 9276 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9277 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9278 { 9279 conn_t *connp = (conn_t *)proto_handle; 9280 udp_t *udp; 9281 struct T_capability_ack tca; 9282 struct sockaddr_in6 laddr, faddr; 9283 socklen_t laddrlen, faddrlen; 9284 short opts; 9285 struct stroptions *stropt; 9286 mblk_t *stropt_mp; 9287 int error; 9288 9289 udp = connp->conn_udp; 9290 9291 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9292 9293 /* 9294 * setup the fallback stream that was allocated 9295 */ 9296 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9297 connp->conn_minor_arena = WR(q)->q_ptr; 9298 9299 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9300 9301 WR(q)->q_qinfo = &udp_winit; 9302 9303 connp->conn_rq = RD(q); 9304 connp->conn_wq = WR(q); 9305 9306 /* Notify stream head about options before sending up data */ 9307 stropt_mp->b_datap->db_type = M_SETOPTS; 9308 stropt_mp->b_wptr += sizeof (*stropt); 9309 stropt = (struct stroptions *)stropt_mp->b_rptr; 9310 stropt->so_flags = SO_WROFF | SO_HIWAT; 9311 stropt->so_wroff = 9312 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9313 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9314 putnext(RD(q), stropt_mp); 9315 9316 /* 9317 * Free the helper stream 9318 */ 9319 ip_free_helper_stream(connp); 9320 9321 if (!direct_sockfs) 9322 udp_disable_direct_sockfs(udp); 9323 9324 /* 9325 * Collect the information needed to sync with the sonode 9326 */ 9327 udp_do_capability_ack(udp, &tca, TC1_INFO); 9328 9329 laddrlen = faddrlen = sizeof (sin6_t); 9330 (void) udp_getsockname((sock_lower_handle_t)connp, 9331 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9332 error = udp_getpeername((sock_lower_handle_t)connp, 9333 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9334 if (error != 0) 9335 faddrlen = 0; 9336 9337 opts = 0; 9338 if (udp->udp_dgram_errind) 9339 opts |= SO_DGRAM_ERRIND; 9340 if (udp->udp_dontroute) 9341 opts |= SO_DONTROUTE; 9342 9343 /* 9344 * Once we grab the drain lock, no data will be send up 9345 * to the socket. So we notify the socket that the endpoint 9346 * is quiescent and it's therefore safe move data from 9347 * the socket to the stream head. 9348 */ 9349 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9350 (struct sockaddr *)&laddr, laddrlen, 9351 (struct sockaddr *)&faddr, faddrlen, opts); 9352 9353 /* 9354 * push up any packets that were queued in udp_t 9355 */ 9356 9357 mutex_enter(&udp->udp_recv_lock); 9358 while (udp->udp_fallback_queue_head != NULL) { 9359 mblk_t *mp; 9360 mp = udp->udp_fallback_queue_head; 9361 udp->udp_fallback_queue_head = mp->b_next; 9362 mutex_exit(&udp->udp_recv_lock); 9363 mp->b_next = NULL; 9364 putnext(RD(q), mp); 9365 mutex_enter(&udp->udp_recv_lock); 9366 } 9367 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9368 /* 9369 * No longer a streams less socket 9370 */ 9371 connp->conn_flags &= ~IPCL_NONSTR; 9372 mutex_exit(&udp->udp_recv_lock); 9373 9374 ASSERT(connp->conn_ref >= 1); 9375 } 9376 9377 static int 9378 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9379 { 9380 sin_t *sin = (sin_t *)sa; 9381 sin6_t *sin6 = (sin6_t *)sa; 9382 9383 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9384 ASSERT(udp != NULL); 9385 9386 if (udp->udp_state != TS_DATA_XFER) 9387 return (ENOTCONN); 9388 9389 switch (udp->udp_family) { 9390 case AF_INET: 9391 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9392 9393 if (*salenp < sizeof (sin_t)) 9394 return (EINVAL); 9395 9396 *salenp = sizeof (sin_t); 9397 *sin = sin_null; 9398 sin->sin_family = AF_INET; 9399 sin->sin_port = udp->udp_dstport; 9400 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9401 break; 9402 case AF_INET6: 9403 if (*salenp < sizeof (sin6_t)) 9404 return (EINVAL); 9405 9406 *salenp = sizeof (sin6_t); 9407 *sin6 = sin6_null; 9408 sin6->sin6_family = AF_INET6; 9409 sin6->sin6_port = udp->udp_dstport; 9410 sin6->sin6_addr = udp->udp_v6dst; 9411 sin6->sin6_flowinfo = udp->udp_flowinfo; 9412 break; 9413 } 9414 9415 return (0); 9416 } 9417 9418 /* ARGSUSED */ 9419 int 9420 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9421 socklen_t *salenp, cred_t *cr) 9422 { 9423 conn_t *connp = (conn_t *)proto_handle; 9424 udp_t *udp = connp->conn_udp; 9425 int error; 9426 9427 /* All Solaris components should pass a cred for this operation. */ 9428 ASSERT(cr != NULL); 9429 9430 ASSERT(udp != NULL); 9431 9432 rw_enter(&udp->udp_rwlock, RW_READER); 9433 9434 error = udp_do_getpeername(udp, sa, salenp); 9435 9436 rw_exit(&udp->udp_rwlock); 9437 9438 return (error); 9439 } 9440 9441 static int 9442 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9443 { 9444 sin_t *sin = (sin_t *)sa; 9445 sin6_t *sin6 = (sin6_t *)sa; 9446 9447 ASSERT(udp != NULL); 9448 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9449 9450 switch (udp->udp_family) { 9451 case AF_INET: 9452 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9453 9454 if (*salenp < sizeof (sin_t)) 9455 return (EINVAL); 9456 9457 *salenp = sizeof (sin_t); 9458 *sin = sin_null; 9459 sin->sin_family = AF_INET; 9460 if (udp->udp_state == TS_UNBND) { 9461 break; 9462 } 9463 sin->sin_port = udp->udp_port; 9464 9465 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9466 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9467 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9468 } else { 9469 /* 9470 * INADDR_ANY 9471 * udp_v6src is not set, we might be bound to 9472 * broadcast/multicast. Use udp_bound_v6src as 9473 * local address instead (that could 9474 * also still be INADDR_ANY) 9475 */ 9476 sin->sin_addr.s_addr = 9477 V4_PART_OF_V6(udp->udp_bound_v6src); 9478 } 9479 break; 9480 9481 case AF_INET6: 9482 if (*salenp < sizeof (sin6_t)) 9483 return (EINVAL); 9484 9485 *salenp = sizeof (sin6_t); 9486 *sin6 = sin6_null; 9487 sin6->sin6_family = AF_INET6; 9488 if (udp->udp_state == TS_UNBND) { 9489 break; 9490 } 9491 sin6->sin6_port = udp->udp_port; 9492 9493 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9494 sin6->sin6_addr = udp->udp_v6src; 9495 } else { 9496 /* 9497 * UNSPECIFIED 9498 * udp_v6src is not set, we might be bound to 9499 * broadcast/multicast. Use udp_bound_v6src as 9500 * local address instead (that could 9501 * also still be UNSPECIFIED) 9502 */ 9503 sin6->sin6_addr = udp->udp_bound_v6src; 9504 } 9505 } 9506 return (0); 9507 } 9508 9509 /* ARGSUSED */ 9510 int 9511 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9512 socklen_t *salenp, cred_t *cr) 9513 { 9514 conn_t *connp = (conn_t *)proto_handle; 9515 udp_t *udp = connp->conn_udp; 9516 int error; 9517 9518 /* All Solaris components should pass a cred for this operation. */ 9519 ASSERT(cr != NULL); 9520 9521 ASSERT(udp != NULL); 9522 rw_enter(&udp->udp_rwlock, RW_READER); 9523 9524 error = udp_do_getsockname(udp, sa, salenp); 9525 9526 rw_exit(&udp->udp_rwlock); 9527 9528 return (error); 9529 } 9530 9531 int 9532 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9533 void *optvalp, socklen_t *optlen, cred_t *cr) 9534 { 9535 conn_t *connp = (conn_t *)proto_handle; 9536 udp_t *udp = connp->conn_udp; 9537 int error; 9538 t_uscalar_t max_optbuf_len; 9539 void *optvalp_buf; 9540 int len; 9541 9542 /* All Solaris components should pass a cred for this operation. */ 9543 ASSERT(cr != NULL); 9544 9545 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9546 udp_opt_obj.odb_opt_des_arr, 9547 udp_opt_obj.odb_opt_arr_cnt, 9548 udp_opt_obj.odb_topmost_tpiprovider, 9549 B_FALSE, B_TRUE, cr); 9550 if (error != 0) { 9551 if (error < 0) 9552 error = proto_tlitosyserr(-error); 9553 return (error); 9554 } 9555 9556 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9557 rw_enter(&udp->udp_rwlock, RW_READER); 9558 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9559 rw_exit(&udp->udp_rwlock); 9560 9561 if (len < 0) { 9562 /* 9563 * Pass on to IP 9564 */ 9565 kmem_free(optvalp_buf, max_optbuf_len); 9566 return (ip_get_options(connp, level, option_name, 9567 optvalp, optlen, cr)); 9568 } else { 9569 /* 9570 * update optlen and copy option value 9571 */ 9572 t_uscalar_t size = MIN(len, *optlen); 9573 bcopy(optvalp_buf, optvalp, size); 9574 bcopy(&size, optlen, sizeof (size)); 9575 9576 kmem_free(optvalp_buf, max_optbuf_len); 9577 return (0); 9578 } 9579 } 9580 9581 int 9582 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9583 const void *optvalp, socklen_t optlen, cred_t *cr) 9584 { 9585 conn_t *connp = (conn_t *)proto_handle; 9586 udp_t *udp = connp->conn_udp; 9587 int error; 9588 9589 /* All Solaris components should pass a cred for this operation. */ 9590 ASSERT(cr != NULL); 9591 9592 error = proto_opt_check(level, option_name, optlen, NULL, 9593 udp_opt_obj.odb_opt_des_arr, 9594 udp_opt_obj.odb_opt_arr_cnt, 9595 udp_opt_obj.odb_topmost_tpiprovider, 9596 B_TRUE, B_FALSE, cr); 9597 9598 if (error != 0) { 9599 if (error < 0) 9600 error = proto_tlitosyserr(-error); 9601 return (error); 9602 } 9603 9604 rw_enter(&udp->udp_rwlock, RW_WRITER); 9605 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9606 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9607 NULL, cr); 9608 rw_exit(&udp->udp_rwlock); 9609 9610 if (error < 0) { 9611 /* 9612 * Pass on to ip 9613 */ 9614 error = ip_set_options(connp, level, option_name, optvalp, 9615 optlen, cr); 9616 } 9617 9618 return (error); 9619 } 9620 9621 void 9622 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9623 { 9624 conn_t *connp = (conn_t *)proto_handle; 9625 udp_t *udp = connp->conn_udp; 9626 9627 mutex_enter(&udp->udp_recv_lock); 9628 connp->conn_flow_cntrld = B_FALSE; 9629 mutex_exit(&udp->udp_recv_lock); 9630 } 9631 9632 /* ARGSUSED */ 9633 int 9634 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9635 { 9636 conn_t *connp = (conn_t *)proto_handle; 9637 9638 /* All Solaris components should pass a cred for this operation. */ 9639 ASSERT(cr != NULL); 9640 9641 /* shut down the send side */ 9642 if (how != SHUT_RD) 9643 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9644 SOCK_OPCTL_SHUT_SEND, 0); 9645 /* shut down the recv side */ 9646 if (how != SHUT_WR) 9647 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9648 SOCK_OPCTL_SHUT_RECV, 0); 9649 return (0); 9650 } 9651 9652 int 9653 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9654 int mode, int32_t *rvalp, cred_t *cr) 9655 { 9656 conn_t *connp = (conn_t *)proto_handle; 9657 int error; 9658 9659 /* All Solaris components should pass a cred for this operation. */ 9660 ASSERT(cr != NULL); 9661 9662 switch (cmd) { 9663 case ND_SET: 9664 case ND_GET: 9665 case _SIOCSOCKFALLBACK: 9666 case TI_GETPEERNAME: 9667 case TI_GETMYNAME: 9668 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9669 cmd)); 9670 error = EINVAL; 9671 break; 9672 default: 9673 /* 9674 * Pass on to IP using helper stream 9675 */ 9676 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9677 cmd, arg, mode, cr, rvalp); 9678 break; 9679 } 9680 return (error); 9681 } 9682 9683 /* ARGSUSED */ 9684 int 9685 udp_accept(sock_lower_handle_t lproto_handle, 9686 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9687 cred_t *cr) 9688 { 9689 return (EOPNOTSUPP); 9690 } 9691 9692 /* ARGSUSED */ 9693 int 9694 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9695 { 9696 return (EOPNOTSUPP); 9697 } 9698 9699 sock_downcalls_t sock_udp_downcalls = { 9700 udp_activate, /* sd_activate */ 9701 udp_accept, /* sd_accept */ 9702 udp_bind, /* sd_bind */ 9703 udp_listen, /* sd_listen */ 9704 udp_connect, /* sd_connect */ 9705 udp_getpeername, /* sd_getpeername */ 9706 udp_getsockname, /* sd_getsockname */ 9707 udp_getsockopt, /* sd_getsockopt */ 9708 udp_setsockopt, /* sd_setsockopt */ 9709 udp_send, /* sd_send */ 9710 NULL, /* sd_send_uio */ 9711 NULL, /* sd_recv_uio */ 9712 NULL, /* sd_poll */ 9713 udp_shutdown, /* sd_shutdown */ 9714 udp_clr_flowctrl, /* sd_setflowctrl */ 9715 udp_ioctl, /* sd_ioctl */ 9716 udp_close /* sd_close */ 9717 }; 9718