1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 #define NDD_TOO_QUICK_MSG \ 137 "ndd get info rate too high for non-privileged users, try again " \ 138 "later.\n" 139 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 140 141 /* Option processing attrs */ 142 typedef struct udpattrs_s { 143 union { 144 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 145 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 146 } udpattr_ippu; 147 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 148 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 149 mblk_t *udpattr_mb; 150 boolean_t udpattr_credset; 151 } udpattrs_t; 152 153 static void udp_addr_req(queue_t *q, mblk_t *mp); 154 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 155 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 156 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 157 static int udp_build_hdrs(udp_t *udp); 158 static void udp_capability_req(queue_t *q, mblk_t *mp); 159 static int udp_tpi_close(queue_t *q, int flags); 160 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 161 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 162 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 165 t_scalar_t tlierr, int unixerr); 166 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 167 cred_t *cr); 168 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 169 char *value, caddr_t cp, cred_t *cr); 170 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static void udp_icmp_error(conn_t *, mblk_t *); 173 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 174 static void udp_info_req(queue_t *q, mblk_t *mp); 175 static void udp_input(void *, mblk_t *, void *); 176 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 177 t_scalar_t addr_length); 178 static void udp_lrput(queue_t *, mblk_t *); 179 static void udp_lwput(queue_t *, mblk_t *); 180 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 181 cred_t *credp, boolean_t isv6); 182 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp); 184 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 187 int *errorp, udpattrs_t *udpattrs); 188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 189 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 191 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 192 cred_t *cr); 193 static void udp_report_item(mblk_t *mp, udp_t *udp); 194 static int udp_rinfop(queue_t *q, infod_t *dp); 195 static int udp_rrw(queue_t *q, struiod_t *dp); 196 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 197 cred_t *cr); 198 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 199 ipha_t *ipha); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 229 cred_t *, pid_t); 230 231 /* Common routine for TPI and socket module */ 232 static conn_t *udp_do_open(cred_t *, boolean_t, int); 233 static void udp_do_close(conn_t *); 234 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 235 boolean_t); 236 static int udp_do_unbind(conn_t *); 237 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 238 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 239 240 int udp_getsockname(sock_lower_handle_t, 241 struct sockaddr *, socklen_t *, cred_t *); 242 int udp_getpeername(sock_lower_handle_t, 243 struct sockaddr *, socklen_t *, cred_t *); 244 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 245 cred_t *cr); 246 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 247 248 #define UDP_RECV_HIWATER (56 * 1024) 249 #define UDP_RECV_LOWATER 128 250 #define UDP_XMIT_HIWATER (56 * 1024) 251 #define UDP_XMIT_LOWATER 1024 252 253 /* 254 * The following is defined in tcp.c 255 */ 256 extern int (*cl_inet_connect2)(netstackid_t stack_id, 257 uint8_t protocol, boolean_t is_outgoing, 258 sa_family_t addr_family, 259 uint8_t *laddrp, in_port_t lport, 260 uint8_t *faddrp, in_port_t fport, void *args); 261 262 /* 263 * Checks if the given destination addr/port is allowed out. 264 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 265 * Called for each connect() and for sendto()/sendmsg() to a different 266 * destination. 267 * For connect(), called in udp_connect(). 268 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 269 * 270 * This macro assumes that the cl_inet_connect2 hook is not NULL. 271 * Please check this before calling this macro. 272 * 273 * void 274 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 275 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 276 */ 277 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 278 (err) = 0; \ 279 /* \ 280 * Running in cluster mode - check and register active \ 281 * "connection" information \ 282 */ \ 283 if ((udp)->udp_ipversion == IPV4_VERSION) \ 284 (err) = (*cl_inet_connect2)( \ 285 (cp)->conn_netstack->netstack_stackid, \ 286 IPPROTO_UDP, is_outgoing, AF_INET, \ 287 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 288 (udp)->udp_port, \ 289 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 290 (in_port_t)(fport), NULL); \ 291 else \ 292 (err) = (*cl_inet_connect2)( \ 293 (cp)->conn_netstack->netstack_stackid, \ 294 IPPROTO_UDP, is_outgoing, AF_INET6, \ 295 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 296 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 297 } 298 299 static struct module_info udp_mod_info = { 300 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 301 }; 302 303 /* 304 * Entry points for UDP as a device. 305 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 306 */ 307 static struct qinit udp_rinitv4 = { 308 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 309 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 310 }; 311 312 static struct qinit udp_rinitv6 = { 313 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 314 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 315 }; 316 317 static struct qinit udp_winit = { 318 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 319 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 320 }; 321 322 /* UDP entry point during fallback */ 323 struct qinit udp_fallback_sock_winit = { 324 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 325 }; 326 327 /* 328 * UDP needs to handle I_LINK and I_PLINK since ifconfig 329 * likes to use it as a place to hang the various streams. 330 */ 331 static struct qinit udp_lrinit = { 332 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 333 &udp_mod_info 334 }; 335 336 static struct qinit udp_lwinit = { 337 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 338 &udp_mod_info 339 }; 340 341 /* For AF_INET aka /dev/udp */ 342 struct streamtab udpinfov4 = { 343 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 344 }; 345 346 /* For AF_INET6 aka /dev/udp6 */ 347 struct streamtab udpinfov6 = { 348 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 349 }; 350 351 static sin_t sin_null; /* Zero address for quick clears */ 352 static sin6_t sin6_null; /* Zero address for quick clears */ 353 354 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 355 356 /* Default structure copied into T_INFO_ACK messages */ 357 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 358 T_INFO_ACK, 359 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 360 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 361 T_INVALID, /* CDATA_size. udp does not support connect data. */ 362 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 363 sizeof (sin_t), /* ADDR_size. */ 364 0, /* OPT_size - not initialized here */ 365 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 366 T_CLTS, /* SERV_type. udp supports connection-less. */ 367 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 368 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 369 }; 370 371 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 372 373 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 374 T_INFO_ACK, 375 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 376 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 377 T_INVALID, /* CDATA_size. udp does not support connect data. */ 378 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 379 sizeof (sin6_t), /* ADDR_size. */ 380 0, /* OPT_size - not initialized here */ 381 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 382 T_CLTS, /* SERV_type. udp supports connection-less. */ 383 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 384 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 385 }; 386 387 /* largest UDP port number */ 388 #define UDP_MAX_PORT 65535 389 390 /* 391 * Table of ND variables supported by udp. These are loaded into us_nd 392 * in udp_open. 393 * All of these are alterable, within the min/max values given, at run time. 394 */ 395 /* BEGIN CSTYLED */ 396 udpparam_t udp_param_arr[] = { 397 /*min max value name */ 398 { 0L, 256, 32, "udp_wroff_extra" }, 399 { 1L, 255, 255, "udp_ipv4_ttl" }, 400 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 401 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 402 { 0, 1, 1, "udp_do_checksum" }, 403 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 404 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 405 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 406 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 407 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 408 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 409 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 410 }; 411 /* END CSTYLED */ 412 413 /* Setable in /etc/system */ 414 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 415 uint32_t udp_random_anon_port = 1; 416 417 /* 418 * Hook functions to enable cluster networking. 419 * On non-clustered systems these vectors must always be NULL 420 */ 421 422 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 423 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 424 void *args) = NULL; 425 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 426 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 427 void *args) = NULL; 428 429 typedef union T_primitives *t_primp_t; 430 431 /* 432 * Return the next anonymous port in the privileged port range for 433 * bind checking. 434 * 435 * Trusted Extension (TX) notes: TX allows administrator to mark or 436 * reserve ports as Multilevel ports (MLP). MLP has special function 437 * on TX systems. Once a port is made MLP, it's not available as 438 * ordinary port. This creates "holes" in the port name space. It 439 * may be necessary to skip the "holes" find a suitable anon port. 440 */ 441 static in_port_t 442 udp_get_next_priv_port(udp_t *udp) 443 { 444 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 445 in_port_t nextport; 446 boolean_t restart = B_FALSE; 447 udp_stack_t *us = udp->udp_us; 448 449 retry: 450 if (next_priv_port < us->us_min_anonpriv_port || 451 next_priv_port >= IPPORT_RESERVED) { 452 next_priv_port = IPPORT_RESERVED - 1; 453 if (restart) 454 return (0); 455 restart = B_TRUE; 456 } 457 458 if (is_system_labeled() && 459 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 460 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 461 next_priv_port = nextport; 462 goto retry; 463 } 464 465 return (next_priv_port--); 466 } 467 468 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 469 /* ARGSUSED */ 470 static int 471 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 472 { 473 udp_fanout_t *udpf; 474 int i; 475 zoneid_t zoneid; 476 conn_t *connp; 477 udp_t *udp; 478 udp_stack_t *us; 479 480 connp = Q_TO_CONN(q); 481 udp = connp->conn_udp; 482 us = udp->udp_us; 483 484 /* Refer to comments in udp_status_report(). */ 485 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 486 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 487 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 488 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 489 return (0); 490 } 491 } 492 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 493 /* The following may work even if we cannot get a large buf. */ 494 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 495 return (0); 496 } 497 498 (void) mi_mpprintf(mp, 499 "UDP " MI_COL_HDRPAD_STR 500 /* 12345678[89ABCDEF] */ 501 " zone lport src addr dest addr port state"); 502 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 503 504 zoneid = connp->conn_zoneid; 505 506 for (i = 0; i < us->us_bind_fanout_size; i++) { 507 udpf = &us->us_bind_fanout[i]; 508 mutex_enter(&udpf->uf_lock); 509 510 /* Print the hash index. */ 511 udp = udpf->uf_udp; 512 if (zoneid != GLOBAL_ZONEID) { 513 /* skip to first entry in this zone; might be none */ 514 while (udp != NULL && 515 udp->udp_connp->conn_zoneid != zoneid) 516 udp = udp->udp_bind_hash; 517 } 518 if (udp != NULL) { 519 uint_t print_len, buf_len; 520 521 buf_len = mp->b_cont->b_datap->db_lim - 522 mp->b_cont->b_wptr; 523 print_len = snprintf((char *)mp->b_cont->b_wptr, 524 buf_len, "%d\n", i); 525 if (print_len < buf_len) { 526 mp->b_cont->b_wptr += print_len; 527 } else { 528 mp->b_cont->b_wptr += buf_len; 529 } 530 for (; udp != NULL; udp = udp->udp_bind_hash) { 531 if (zoneid == GLOBAL_ZONEID || 532 zoneid == udp->udp_connp->conn_zoneid) 533 udp_report_item(mp->b_cont, udp); 534 } 535 } 536 mutex_exit(&udpf->uf_lock); 537 } 538 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 539 return (0); 540 } 541 542 /* 543 * Hash list removal routine for udp_t structures. 544 */ 545 static void 546 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 547 { 548 udp_t *udpnext; 549 kmutex_t *lockp; 550 udp_stack_t *us = udp->udp_us; 551 552 if (udp->udp_ptpbhn == NULL) 553 return; 554 555 /* 556 * Extract the lock pointer in case there are concurrent 557 * hash_remove's for this instance. 558 */ 559 ASSERT(udp->udp_port != 0); 560 if (!caller_holds_lock) { 561 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 562 us->us_bind_fanout_size)].uf_lock; 563 ASSERT(lockp != NULL); 564 mutex_enter(lockp); 565 } 566 if (udp->udp_ptpbhn != NULL) { 567 udpnext = udp->udp_bind_hash; 568 if (udpnext != NULL) { 569 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 570 udp->udp_bind_hash = NULL; 571 } 572 *udp->udp_ptpbhn = udpnext; 573 udp->udp_ptpbhn = NULL; 574 } 575 if (!caller_holds_lock) { 576 mutex_exit(lockp); 577 } 578 } 579 580 static void 581 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 582 { 583 udp_t **udpp; 584 udp_t *udpnext; 585 586 ASSERT(MUTEX_HELD(&uf->uf_lock)); 587 ASSERT(udp->udp_ptpbhn == NULL); 588 udpp = &uf->uf_udp; 589 udpnext = udpp[0]; 590 if (udpnext != NULL) { 591 /* 592 * If the new udp bound to the INADDR_ANY address 593 * and the first one in the list is not bound to 594 * INADDR_ANY we skip all entries until we find the 595 * first one bound to INADDR_ANY. 596 * This makes sure that applications binding to a 597 * specific address get preference over those binding to 598 * INADDR_ANY. 599 */ 600 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 601 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 602 while ((udpnext = udpp[0]) != NULL && 603 !V6_OR_V4_INADDR_ANY( 604 udpnext->udp_bound_v6src)) { 605 udpp = &(udpnext->udp_bind_hash); 606 } 607 if (udpnext != NULL) 608 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 609 } else { 610 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 611 } 612 } 613 udp->udp_bind_hash = udpnext; 614 udp->udp_ptpbhn = udpp; 615 udpp[0] = udp; 616 } 617 618 /* 619 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 620 * passed to udp_wput. 621 * It associates a port number and local address with the stream. 622 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 623 * protocol type (IPPROTO_UDP) placed in the message following the address. 624 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 625 * (Called as writer.) 626 * 627 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 628 * without setting SO_REUSEADDR. This is needed so that they 629 * can be viewed as two independent transport protocols. 630 * However, anonymouns ports are allocated from the same range to avoid 631 * duplicating the us->us_next_port_to_try. 632 */ 633 static void 634 udp_tpi_bind(queue_t *q, mblk_t *mp) 635 { 636 sin_t *sin; 637 sin6_t *sin6; 638 mblk_t *mp1; 639 struct T_bind_req *tbr; 640 conn_t *connp; 641 udp_t *udp; 642 int error; 643 struct sockaddr *sa; 644 cred_t *cr; 645 646 /* 647 * All Solaris components should pass a db_credp 648 * for this TPI message, hence we ASSERT. 649 * But in case there is some other M_PROTO that looks 650 * like a TPI message sent by some other kernel 651 * component, we check and return an error. 652 */ 653 cr = msg_getcred(mp, NULL); 654 ASSERT(cr != NULL); 655 if (cr == NULL) { 656 udp_err_ack(q, mp, TSYSERR, EINVAL); 657 return; 658 } 659 660 connp = Q_TO_CONN(q); 661 udp = connp->conn_udp; 662 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 663 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 664 "udp_bind: bad req, len %u", 665 (uint_t)(mp->b_wptr - mp->b_rptr)); 666 udp_err_ack(q, mp, TPROTO, 0); 667 return; 668 } 669 if (udp->udp_state != TS_UNBND) { 670 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 671 "udp_bind: bad state, %u", udp->udp_state); 672 udp_err_ack(q, mp, TOUTSTATE, 0); 673 return; 674 } 675 /* 676 * Reallocate the message to make sure we have enough room for an 677 * address and the protocol type. 678 */ 679 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 680 if (!mp1) { 681 udp_err_ack(q, mp, TSYSERR, ENOMEM); 682 return; 683 } 684 685 mp = mp1; 686 687 /* Reset the message type in preparation for shipping it back. */ 688 DB_TYPE(mp) = M_PCPROTO; 689 690 tbr = (struct T_bind_req *)mp->b_rptr; 691 switch (tbr->ADDR_length) { 692 case 0: /* Request for a generic port */ 693 tbr->ADDR_offset = sizeof (struct T_bind_req); 694 if (udp->udp_family == AF_INET) { 695 tbr->ADDR_length = sizeof (sin_t); 696 sin = (sin_t *)&tbr[1]; 697 *sin = sin_null; 698 sin->sin_family = AF_INET; 699 mp->b_wptr = (uchar_t *)&sin[1]; 700 sa = (struct sockaddr *)sin; 701 } else { 702 ASSERT(udp->udp_family == AF_INET6); 703 tbr->ADDR_length = sizeof (sin6_t); 704 sin6 = (sin6_t *)&tbr[1]; 705 *sin6 = sin6_null; 706 sin6->sin6_family = AF_INET6; 707 mp->b_wptr = (uchar_t *)&sin6[1]; 708 sa = (struct sockaddr *)sin6; 709 } 710 break; 711 712 case sizeof (sin_t): /* Complete IPv4 address */ 713 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 714 sizeof (sin_t)); 715 if (sa == NULL || !OK_32PTR((char *)sa)) { 716 udp_err_ack(q, mp, TSYSERR, EINVAL); 717 return; 718 } 719 if (udp->udp_family != AF_INET || 720 sa->sa_family != AF_INET) { 721 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 722 return; 723 } 724 break; 725 726 case sizeof (sin6_t): /* complete IPv6 address */ 727 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 728 sizeof (sin6_t)); 729 if (sa == NULL || !OK_32PTR((char *)sa)) { 730 udp_err_ack(q, mp, TSYSERR, EINVAL); 731 return; 732 } 733 if (udp->udp_family != AF_INET6 || 734 sa->sa_family != AF_INET6) { 735 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 736 return; 737 } 738 break; 739 740 default: /* Invalid request */ 741 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 742 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 743 udp_err_ack(q, mp, TBADADDR, 0); 744 return; 745 } 746 747 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 748 tbr->PRIM_type != O_T_BIND_REQ); 749 750 if (error != 0) { 751 if (error > 0) { 752 udp_err_ack(q, mp, TSYSERR, error); 753 } else { 754 udp_err_ack(q, mp, -error, 0); 755 } 756 } else { 757 tbr->PRIM_type = T_BIND_ACK; 758 qreply(q, mp); 759 } 760 } 761 762 /* 763 * This routine handles each T_CONN_REQ message passed to udp. It 764 * associates a default destination address with the stream. 765 * 766 * This routine sends down a T_BIND_REQ to IP with the following mblks: 767 * T_BIND_REQ - specifying local and remote address/port 768 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 769 * T_OK_ACK - for the T_CONN_REQ 770 * T_CONN_CON - to keep the TPI user happy 771 * 772 * The connect completes in udp_do_connect. 773 * When a T_BIND_ACK is received information is extracted from the IRE 774 * and the two appended messages are sent to the TPI user. 775 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 776 * convert it to an error ack for the appropriate primitive. 777 */ 778 static void 779 udp_tpi_connect(queue_t *q, mblk_t *mp) 780 { 781 mblk_t *mp1; 782 udp_t *udp; 783 conn_t *connp = Q_TO_CONN(q); 784 int error; 785 socklen_t len; 786 struct sockaddr *sa; 787 struct T_conn_req *tcr; 788 cred_t *cr; 789 790 /* 791 * All Solaris components should pass a db_credp 792 * for this TPI message, hence we ASSERT. 793 * But in case there is some other M_PROTO that looks 794 * like a TPI message sent by some other kernel 795 * component, we check and return an error. 796 */ 797 cr = msg_getcred(mp, NULL); 798 ASSERT(cr != NULL); 799 if (cr == NULL) { 800 udp_err_ack(q, mp, TSYSERR, EINVAL); 801 return; 802 } 803 804 udp = connp->conn_udp; 805 tcr = (struct T_conn_req *)mp->b_rptr; 806 807 /* A bit of sanity checking */ 808 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 809 udp_err_ack(q, mp, TPROTO, 0); 810 return; 811 } 812 813 if (tcr->OPT_length != 0) { 814 udp_err_ack(q, mp, TBADOPT, 0); 815 return; 816 } 817 818 /* 819 * Determine packet type based on type of address passed in 820 * the request should contain an IPv4 or IPv6 address. 821 * Make sure that address family matches the type of 822 * family of the the address passed down 823 */ 824 len = tcr->DEST_length; 825 switch (tcr->DEST_length) { 826 default: 827 udp_err_ack(q, mp, TBADADDR, 0); 828 return; 829 830 case sizeof (sin_t): 831 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 832 sizeof (sin_t)); 833 break; 834 835 case sizeof (sin6_t): 836 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 837 sizeof (sin6_t)); 838 break; 839 } 840 841 error = proto_verify_ip_addr(udp->udp_family, sa, len); 842 if (error != 0) { 843 udp_err_ack(q, mp, TSYSERR, error); 844 return; 845 } 846 847 /* 848 * We have to send a connection confirmation to 849 * keep TLI happy. 850 */ 851 if (udp->udp_family == AF_INET) { 852 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 853 sizeof (sin_t), NULL, 0); 854 } else { 855 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 856 sizeof (sin6_t), NULL, 0); 857 } 858 if (mp1 == NULL) { 859 udp_err_ack(q, mp, TSYSERR, ENOMEM); 860 return; 861 } 862 863 /* 864 * ok_ack for T_CONN_REQ 865 */ 866 mp = mi_tpi_ok_ack_alloc(mp); 867 if (mp == NULL) { 868 /* Unable to reuse the T_CONN_REQ for the ack. */ 869 freemsg(mp1); 870 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 871 return; 872 } 873 874 error = udp_do_connect(connp, sa, len, cr); 875 if (error != 0) { 876 freeb(mp1); 877 if (error < 0) 878 udp_err_ack(q, mp, -error, 0); 879 else 880 udp_err_ack(q, mp, TSYSERR, error); 881 } else { 882 putnext(connp->conn_rq, mp); 883 putnext(connp->conn_rq, mp1); 884 } 885 } 886 887 static int 888 udp_tpi_close(queue_t *q, int flags) 889 { 890 conn_t *connp; 891 892 if (flags & SO_FALLBACK) { 893 /* 894 * stream is being closed while in fallback 895 * simply free the resources that were allocated 896 */ 897 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 898 qprocsoff(q); 899 goto done; 900 } 901 902 connp = Q_TO_CONN(q); 903 udp_do_close(connp); 904 done: 905 q->q_ptr = WR(q)->q_ptr = NULL; 906 return (0); 907 } 908 909 /* 910 * Called in the close path to quiesce the conn 911 */ 912 void 913 udp_quiesce_conn(conn_t *connp) 914 { 915 udp_t *udp = connp->conn_udp; 916 917 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 918 /* 919 * Running in cluster mode - register unbind information 920 */ 921 if (udp->udp_ipversion == IPV4_VERSION) { 922 (*cl_inet_unbind)( 923 connp->conn_netstack->netstack_stackid, 924 IPPROTO_UDP, AF_INET, 925 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 926 (in_port_t)udp->udp_port, NULL); 927 } else { 928 (*cl_inet_unbind)( 929 connp->conn_netstack->netstack_stackid, 930 IPPROTO_UDP, AF_INET6, 931 (uint8_t *)(&(udp->udp_v6src)), 932 (in_port_t)udp->udp_port, NULL); 933 } 934 } 935 936 udp_bind_hash_remove(udp, B_FALSE); 937 938 } 939 940 void 941 udp_close_free(conn_t *connp) 942 { 943 udp_t *udp = connp->conn_udp; 944 945 /* If there are any options associated with the stream, free them. */ 946 if (udp->udp_ip_snd_options != NULL) { 947 mi_free((char *)udp->udp_ip_snd_options); 948 udp->udp_ip_snd_options = NULL; 949 udp->udp_ip_snd_options_len = 0; 950 } 951 952 if (udp->udp_ip_rcv_options != NULL) { 953 mi_free((char *)udp->udp_ip_rcv_options); 954 udp->udp_ip_rcv_options = NULL; 955 udp->udp_ip_rcv_options_len = 0; 956 } 957 958 /* Free memory associated with sticky options */ 959 if (udp->udp_sticky_hdrs_len != 0) { 960 kmem_free(udp->udp_sticky_hdrs, 961 udp->udp_sticky_hdrs_len); 962 udp->udp_sticky_hdrs = NULL; 963 udp->udp_sticky_hdrs_len = 0; 964 } 965 966 ip6_pkt_free(&udp->udp_sticky_ipp); 967 968 /* 969 * Clear any fields which the kmem_cache constructor clears. 970 * Only udp_connp needs to be preserved. 971 * TBD: We should make this more efficient to avoid clearing 972 * everything. 973 */ 974 ASSERT(udp->udp_connp == connp); 975 bzero(udp, sizeof (udp_t)); 976 udp->udp_connp = connp; 977 } 978 979 static int 980 udp_do_disconnect(conn_t *connp) 981 { 982 udp_t *udp; 983 mblk_t *ire_mp; 984 udp_fanout_t *udpf; 985 udp_stack_t *us; 986 int error; 987 988 udp = connp->conn_udp; 989 us = udp->udp_us; 990 rw_enter(&udp->udp_rwlock, RW_WRITER); 991 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 992 rw_exit(&udp->udp_rwlock); 993 return (-TOUTSTATE); 994 } 995 udp->udp_pending_op = T_DISCON_REQ; 996 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 997 us->us_bind_fanout_size)]; 998 mutex_enter(&udpf->uf_lock); 999 udp->udp_v6src = udp->udp_bound_v6src; 1000 udp->udp_state = TS_IDLE; 1001 mutex_exit(&udpf->uf_lock); 1002 1003 if (udp->udp_family == AF_INET6) { 1004 /* Rebuild the header template */ 1005 error = udp_build_hdrs(udp); 1006 if (error != 0) { 1007 udp->udp_pending_op = -1; 1008 rw_exit(&udp->udp_rwlock); 1009 return (error); 1010 } 1011 } 1012 1013 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 1014 if (ire_mp == NULL) { 1015 mutex_enter(&udpf->uf_lock); 1016 udp->udp_pending_op = -1; 1017 mutex_exit(&udpf->uf_lock); 1018 rw_exit(&udp->udp_rwlock); 1019 return (ENOMEM); 1020 } 1021 1022 rw_exit(&udp->udp_rwlock); 1023 1024 if (udp->udp_family == AF_INET6) { 1025 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 1026 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 1027 } else { 1028 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 1029 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1030 } 1031 1032 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1033 } 1034 1035 1036 static void 1037 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1038 { 1039 conn_t *connp = Q_TO_CONN(q); 1040 int error; 1041 1042 /* 1043 * Allocate the largest primitive we need to send back 1044 * T_error_ack is > than T_ok_ack 1045 */ 1046 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1047 if (mp == NULL) { 1048 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1049 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1050 return; 1051 } 1052 1053 error = udp_do_disconnect(connp); 1054 1055 if (error != 0) { 1056 if (error < 0) { 1057 udp_err_ack(q, mp, -error, 0); 1058 } else { 1059 udp_err_ack(q, mp, TSYSERR, error); 1060 } 1061 } else { 1062 mp = mi_tpi_ok_ack_alloc(mp); 1063 ASSERT(mp != NULL); 1064 qreply(q, mp); 1065 } 1066 } 1067 1068 int 1069 udp_disconnect(conn_t *connp) 1070 { 1071 int error; 1072 udp_t *udp = connp->conn_udp; 1073 1074 udp->udp_dgram_errind = B_FALSE; 1075 1076 error = udp_do_disconnect(connp); 1077 1078 if (error < 0) 1079 error = proto_tlitosyserr(-error); 1080 1081 return (error); 1082 } 1083 1084 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1085 static void 1086 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1087 { 1088 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1089 qreply(q, mp); 1090 } 1091 1092 /* Shorthand to generate and send TPI error acks to our client */ 1093 static void 1094 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1095 int sys_error) 1096 { 1097 struct T_error_ack *teackp; 1098 1099 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1100 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1101 teackp = (struct T_error_ack *)mp->b_rptr; 1102 teackp->ERROR_prim = primitive; 1103 teackp->TLI_error = t_error; 1104 teackp->UNIX_error = sys_error; 1105 qreply(q, mp); 1106 } 1107 } 1108 1109 /*ARGSUSED*/ 1110 static int 1111 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1112 { 1113 int i; 1114 udp_t *udp = Q_TO_UDP(q); 1115 udp_stack_t *us = udp->udp_us; 1116 1117 for (i = 0; i < us->us_num_epriv_ports; i++) { 1118 if (us->us_epriv_ports[i] != 0) 1119 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1120 } 1121 return (0); 1122 } 1123 1124 /* ARGSUSED */ 1125 static int 1126 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1127 cred_t *cr) 1128 { 1129 long new_value; 1130 int i; 1131 udp_t *udp = Q_TO_UDP(q); 1132 udp_stack_t *us = udp->udp_us; 1133 1134 /* 1135 * Fail the request if the new value does not lie within the 1136 * port number limits. 1137 */ 1138 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1139 new_value <= 0 || new_value >= 65536) { 1140 return (EINVAL); 1141 } 1142 1143 /* Check if the value is already in the list */ 1144 for (i = 0; i < us->us_num_epriv_ports; i++) { 1145 if (new_value == us->us_epriv_ports[i]) { 1146 return (EEXIST); 1147 } 1148 } 1149 /* Find an empty slot */ 1150 for (i = 0; i < us->us_num_epriv_ports; i++) { 1151 if (us->us_epriv_ports[i] == 0) 1152 break; 1153 } 1154 if (i == us->us_num_epriv_ports) { 1155 return (EOVERFLOW); 1156 } 1157 1158 /* Set the new value */ 1159 us->us_epriv_ports[i] = (in_port_t)new_value; 1160 return (0); 1161 } 1162 1163 /* ARGSUSED */ 1164 static int 1165 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1166 cred_t *cr) 1167 { 1168 long new_value; 1169 int i; 1170 udp_t *udp = Q_TO_UDP(q); 1171 udp_stack_t *us = udp->udp_us; 1172 1173 /* 1174 * Fail the request if the new value does not lie within the 1175 * port number limits. 1176 */ 1177 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1178 new_value <= 0 || new_value >= 65536) { 1179 return (EINVAL); 1180 } 1181 1182 /* Check that the value is already in the list */ 1183 for (i = 0; i < us->us_num_epriv_ports; i++) { 1184 if (us->us_epriv_ports[i] == new_value) 1185 break; 1186 } 1187 if (i == us->us_num_epriv_ports) { 1188 return (ESRCH); 1189 } 1190 1191 /* Clear the value */ 1192 us->us_epriv_ports[i] = 0; 1193 return (0); 1194 } 1195 1196 /* At minimum we need 4 bytes of UDP header */ 1197 #define ICMP_MIN_UDP_HDR 4 1198 1199 /* 1200 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1201 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1202 * Assumes that IP has pulled up everything up to and including the ICMP header. 1203 */ 1204 static void 1205 udp_icmp_error(conn_t *connp, mblk_t *mp) 1206 { 1207 icmph_t *icmph; 1208 ipha_t *ipha; 1209 int iph_hdr_length; 1210 udpha_t *udpha; 1211 sin_t sin; 1212 sin6_t sin6; 1213 mblk_t *mp1; 1214 int error = 0; 1215 udp_t *udp = connp->conn_udp; 1216 1217 mp1 = NULL; 1218 ipha = (ipha_t *)mp->b_rptr; 1219 1220 ASSERT(OK_32PTR(mp->b_rptr)); 1221 1222 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1223 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1224 udp_icmp_error_ipv6(connp, mp); 1225 return; 1226 } 1227 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1228 1229 /* Skip past the outer IP and ICMP headers */ 1230 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1231 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1232 ipha = (ipha_t *)&icmph[1]; 1233 1234 /* Skip past the inner IP and find the ULP header */ 1235 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1236 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1237 1238 switch (icmph->icmph_type) { 1239 case ICMP_DEST_UNREACHABLE: 1240 switch (icmph->icmph_code) { 1241 case ICMP_FRAGMENTATION_NEEDED: 1242 /* 1243 * IP has already adjusted the path MTU. 1244 */ 1245 break; 1246 case ICMP_PORT_UNREACHABLE: 1247 case ICMP_PROTOCOL_UNREACHABLE: 1248 error = ECONNREFUSED; 1249 break; 1250 default: 1251 /* Transient errors */ 1252 break; 1253 } 1254 break; 1255 default: 1256 /* Transient errors */ 1257 break; 1258 } 1259 if (error == 0) { 1260 freemsg(mp); 1261 return; 1262 } 1263 1264 /* 1265 * Deliver T_UDERROR_IND when the application has asked for it. 1266 * The socket layer enables this automatically when connected. 1267 */ 1268 if (!udp->udp_dgram_errind) { 1269 freemsg(mp); 1270 return; 1271 } 1272 1273 1274 switch (udp->udp_family) { 1275 case AF_INET: 1276 sin = sin_null; 1277 sin.sin_family = AF_INET; 1278 sin.sin_addr.s_addr = ipha->ipha_dst; 1279 sin.sin_port = udpha->uha_dst_port; 1280 if (IPCL_IS_NONSTR(connp)) { 1281 rw_enter(&udp->udp_rwlock, RW_WRITER); 1282 if (udp->udp_state == TS_DATA_XFER) { 1283 if (sin.sin_port == udp->udp_dstport && 1284 sin.sin_addr.s_addr == 1285 V4_PART_OF_V6(udp->udp_v6dst)) { 1286 1287 rw_exit(&udp->udp_rwlock); 1288 (*connp->conn_upcalls->su_set_error) 1289 (connp->conn_upper_handle, error); 1290 goto done; 1291 } 1292 } else { 1293 udp->udp_delayed_error = error; 1294 *((sin_t *)&udp->udp_delayed_addr) = sin; 1295 } 1296 rw_exit(&udp->udp_rwlock); 1297 } else { 1298 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1299 NULL, 0, error); 1300 } 1301 break; 1302 case AF_INET6: 1303 sin6 = sin6_null; 1304 sin6.sin6_family = AF_INET6; 1305 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1306 sin6.sin6_port = udpha->uha_dst_port; 1307 if (IPCL_IS_NONSTR(connp)) { 1308 rw_enter(&udp->udp_rwlock, RW_WRITER); 1309 if (udp->udp_state == TS_DATA_XFER) { 1310 if (sin6.sin6_port == udp->udp_dstport && 1311 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1312 &udp->udp_v6dst)) { 1313 rw_exit(&udp->udp_rwlock); 1314 (*connp->conn_upcalls->su_set_error) 1315 (connp->conn_upper_handle, error); 1316 goto done; 1317 } 1318 } else { 1319 udp->udp_delayed_error = error; 1320 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1321 } 1322 rw_exit(&udp->udp_rwlock); 1323 } else { 1324 1325 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1326 NULL, 0, error); 1327 } 1328 break; 1329 } 1330 if (mp1 != NULL) 1331 putnext(connp->conn_rq, mp1); 1332 done: 1333 freemsg(mp); 1334 } 1335 1336 /* 1337 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1338 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1339 * Assumes that IP has pulled up all the extension headers as well as the 1340 * ICMPv6 header. 1341 */ 1342 static void 1343 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1344 { 1345 icmp6_t *icmp6; 1346 ip6_t *ip6h, *outer_ip6h; 1347 uint16_t iph_hdr_length; 1348 uint8_t *nexthdrp; 1349 udpha_t *udpha; 1350 sin6_t sin6; 1351 mblk_t *mp1; 1352 int error = 0; 1353 udp_t *udp = connp->conn_udp; 1354 udp_stack_t *us = udp->udp_us; 1355 1356 outer_ip6h = (ip6_t *)mp->b_rptr; 1357 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1358 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1359 else 1360 iph_hdr_length = IPV6_HDR_LEN; 1361 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1362 ip6h = (ip6_t *)&icmp6[1]; 1363 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1364 freemsg(mp); 1365 return; 1366 } 1367 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1368 1369 switch (icmp6->icmp6_type) { 1370 case ICMP6_DST_UNREACH: 1371 switch (icmp6->icmp6_code) { 1372 case ICMP6_DST_UNREACH_NOPORT: 1373 error = ECONNREFUSED; 1374 break; 1375 case ICMP6_DST_UNREACH_ADMIN: 1376 case ICMP6_DST_UNREACH_NOROUTE: 1377 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1378 case ICMP6_DST_UNREACH_ADDR: 1379 /* Transient errors */ 1380 break; 1381 default: 1382 break; 1383 } 1384 break; 1385 case ICMP6_PACKET_TOO_BIG: { 1386 struct T_unitdata_ind *tudi; 1387 struct T_opthdr *toh; 1388 size_t udi_size; 1389 mblk_t *newmp; 1390 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1391 sizeof (struct ip6_mtuinfo); 1392 sin6_t *sin6; 1393 struct ip6_mtuinfo *mtuinfo; 1394 1395 /* 1396 * If the application has requested to receive path mtu 1397 * information, send up an empty message containing an 1398 * IPV6_PATHMTU ancillary data item. 1399 */ 1400 if (!udp->udp_ipv6_recvpathmtu) 1401 break; 1402 1403 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1404 opt_length; 1405 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1406 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1407 break; 1408 } 1409 1410 /* 1411 * newmp->b_cont is left to NULL on purpose. This is an 1412 * empty message containing only ancillary data. 1413 */ 1414 newmp->b_datap->db_type = M_PROTO; 1415 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1416 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1417 tudi->PRIM_type = T_UNITDATA_IND; 1418 tudi->SRC_length = sizeof (sin6_t); 1419 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1420 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1421 tudi->OPT_length = opt_length; 1422 1423 sin6 = (sin6_t *)&tudi[1]; 1424 bzero(sin6, sizeof (sin6_t)); 1425 sin6->sin6_family = AF_INET6; 1426 sin6->sin6_addr = udp->udp_v6dst; 1427 1428 toh = (struct T_opthdr *)&sin6[1]; 1429 toh->level = IPPROTO_IPV6; 1430 toh->name = IPV6_PATHMTU; 1431 toh->len = opt_length; 1432 toh->status = 0; 1433 1434 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1435 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1436 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1437 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1438 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1439 /* 1440 * We've consumed everything we need from the original 1441 * message. Free it, then send our empty message. 1442 */ 1443 freemsg(mp); 1444 if (!IPCL_IS_NONSTR(connp)) { 1445 putnext(connp->conn_rq, newmp); 1446 } else { 1447 (*connp->conn_upcalls->su_recv) 1448 (connp->conn_upper_handle, newmp, 0, 0, &error, 1449 NULL); 1450 } 1451 return; 1452 } 1453 case ICMP6_TIME_EXCEEDED: 1454 /* Transient errors */ 1455 break; 1456 case ICMP6_PARAM_PROB: 1457 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1458 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1459 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1460 (uchar_t *)nexthdrp) { 1461 error = ECONNREFUSED; 1462 break; 1463 } 1464 break; 1465 } 1466 if (error == 0) { 1467 freemsg(mp); 1468 return; 1469 } 1470 1471 /* 1472 * Deliver T_UDERROR_IND when the application has asked for it. 1473 * The socket layer enables this automatically when connected. 1474 */ 1475 if (!udp->udp_dgram_errind) { 1476 freemsg(mp); 1477 return; 1478 } 1479 1480 sin6 = sin6_null; 1481 sin6.sin6_family = AF_INET6; 1482 sin6.sin6_addr = ip6h->ip6_dst; 1483 sin6.sin6_port = udpha->uha_dst_port; 1484 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1485 1486 if (IPCL_IS_NONSTR(connp)) { 1487 rw_enter(&udp->udp_rwlock, RW_WRITER); 1488 if (udp->udp_state == TS_DATA_XFER) { 1489 if (sin6.sin6_port == udp->udp_dstport && 1490 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1491 &udp->udp_v6dst)) { 1492 rw_exit(&udp->udp_rwlock); 1493 (*connp->conn_upcalls->su_set_error) 1494 (connp->conn_upper_handle, error); 1495 goto done; 1496 } 1497 } else { 1498 udp->udp_delayed_error = error; 1499 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1500 } 1501 rw_exit(&udp->udp_rwlock); 1502 } else { 1503 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1504 NULL, 0, error); 1505 if (mp1 != NULL) 1506 putnext(connp->conn_rq, mp1); 1507 } 1508 1509 done: 1510 freemsg(mp); 1511 } 1512 1513 /* 1514 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1515 * The local address is filled in if endpoint is bound. The remote address 1516 * is filled in if remote address has been precified ("connected endpoint") 1517 * (The concept of connected CLTS sockets is alien to published TPI 1518 * but we support it anyway). 1519 */ 1520 static void 1521 udp_addr_req(queue_t *q, mblk_t *mp) 1522 { 1523 sin_t *sin; 1524 sin6_t *sin6; 1525 mblk_t *ackmp; 1526 struct T_addr_ack *taa; 1527 udp_t *udp = Q_TO_UDP(q); 1528 1529 /* Make it large enough for worst case */ 1530 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1531 2 * sizeof (sin6_t), 1); 1532 if (ackmp == NULL) { 1533 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1534 return; 1535 } 1536 taa = (struct T_addr_ack *)ackmp->b_rptr; 1537 1538 bzero(taa, sizeof (struct T_addr_ack)); 1539 ackmp->b_wptr = (uchar_t *)&taa[1]; 1540 1541 taa->PRIM_type = T_ADDR_ACK; 1542 ackmp->b_datap->db_type = M_PCPROTO; 1543 rw_enter(&udp->udp_rwlock, RW_READER); 1544 /* 1545 * Note: Following code assumes 32 bit alignment of basic 1546 * data structures like sin_t and struct T_addr_ack. 1547 */ 1548 if (udp->udp_state != TS_UNBND) { 1549 /* 1550 * Fill in local address first 1551 */ 1552 taa->LOCADDR_offset = sizeof (*taa); 1553 if (udp->udp_family == AF_INET) { 1554 taa->LOCADDR_length = sizeof (sin_t); 1555 sin = (sin_t *)&taa[1]; 1556 /* Fill zeroes and then initialize non-zero fields */ 1557 *sin = sin_null; 1558 sin->sin_family = AF_INET; 1559 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1560 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1561 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1562 sin->sin_addr.s_addr); 1563 } else { 1564 /* 1565 * INADDR_ANY 1566 * udp_v6src is not set, we might be bound to 1567 * broadcast/multicast. Use udp_bound_v6src as 1568 * local address instead (that could 1569 * also still be INADDR_ANY) 1570 */ 1571 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1572 sin->sin_addr.s_addr); 1573 } 1574 sin->sin_port = udp->udp_port; 1575 ackmp->b_wptr = (uchar_t *)&sin[1]; 1576 if (udp->udp_state == TS_DATA_XFER) { 1577 /* 1578 * connected, fill remote address too 1579 */ 1580 taa->REMADDR_length = sizeof (sin_t); 1581 /* assumed 32-bit alignment */ 1582 taa->REMADDR_offset = taa->LOCADDR_offset + 1583 taa->LOCADDR_length; 1584 1585 sin = (sin_t *)(ackmp->b_rptr + 1586 taa->REMADDR_offset); 1587 /* initialize */ 1588 *sin = sin_null; 1589 sin->sin_family = AF_INET; 1590 sin->sin_addr.s_addr = 1591 V4_PART_OF_V6(udp->udp_v6dst); 1592 sin->sin_port = udp->udp_dstport; 1593 ackmp->b_wptr = (uchar_t *)&sin[1]; 1594 } 1595 } else { 1596 taa->LOCADDR_length = sizeof (sin6_t); 1597 sin6 = (sin6_t *)&taa[1]; 1598 /* Fill zeroes and then initialize non-zero fields */ 1599 *sin6 = sin6_null; 1600 sin6->sin6_family = AF_INET6; 1601 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1602 sin6->sin6_addr = udp->udp_v6src; 1603 } else { 1604 /* 1605 * UNSPECIFIED 1606 * udp_v6src is not set, we might be bound to 1607 * broadcast/multicast. Use udp_bound_v6src as 1608 * local address instead (that could 1609 * also still be UNSPECIFIED) 1610 */ 1611 sin6->sin6_addr = 1612 udp->udp_bound_v6src; 1613 } 1614 sin6->sin6_port = udp->udp_port; 1615 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1616 if (udp->udp_state == TS_DATA_XFER) { 1617 /* 1618 * connected, fill remote address too 1619 */ 1620 taa->REMADDR_length = sizeof (sin6_t); 1621 /* assumed 32-bit alignment */ 1622 taa->REMADDR_offset = taa->LOCADDR_offset + 1623 taa->LOCADDR_length; 1624 1625 sin6 = (sin6_t *)(ackmp->b_rptr + 1626 taa->REMADDR_offset); 1627 /* initialize */ 1628 *sin6 = sin6_null; 1629 sin6->sin6_family = AF_INET6; 1630 sin6->sin6_addr = udp->udp_v6dst; 1631 sin6->sin6_port = udp->udp_dstport; 1632 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1633 } 1634 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1635 } 1636 } 1637 rw_exit(&udp->udp_rwlock); 1638 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1639 qreply(q, ackmp); 1640 } 1641 1642 static void 1643 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1644 { 1645 if (udp->udp_family == AF_INET) { 1646 *tap = udp_g_t_info_ack_ipv4; 1647 } else { 1648 *tap = udp_g_t_info_ack_ipv6; 1649 } 1650 tap->CURRENT_state = udp->udp_state; 1651 tap->OPT_size = udp_max_optsize; 1652 } 1653 1654 static void 1655 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1656 t_uscalar_t cap_bits1) 1657 { 1658 tcap->CAP_bits1 = 0; 1659 1660 if (cap_bits1 & TC1_INFO) { 1661 udp_copy_info(&tcap->INFO_ack, udp); 1662 tcap->CAP_bits1 |= TC1_INFO; 1663 } 1664 } 1665 1666 /* 1667 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1668 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1669 * udp_g_t_info_ack. The current state of the stream is copied from 1670 * udp_state. 1671 */ 1672 static void 1673 udp_capability_req(queue_t *q, mblk_t *mp) 1674 { 1675 t_uscalar_t cap_bits1; 1676 struct T_capability_ack *tcap; 1677 udp_t *udp = Q_TO_UDP(q); 1678 1679 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1680 1681 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1682 mp->b_datap->db_type, T_CAPABILITY_ACK); 1683 if (!mp) 1684 return; 1685 1686 tcap = (struct T_capability_ack *)mp->b_rptr; 1687 udp_do_capability_ack(udp, tcap, cap_bits1); 1688 1689 qreply(q, mp); 1690 } 1691 1692 /* 1693 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1694 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1695 * The current state of the stream is copied from udp_state. 1696 */ 1697 static void 1698 udp_info_req(queue_t *q, mblk_t *mp) 1699 { 1700 udp_t *udp = Q_TO_UDP(q); 1701 1702 /* Create a T_INFO_ACK message. */ 1703 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1704 T_INFO_ACK); 1705 if (!mp) 1706 return; 1707 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1708 qreply(q, mp); 1709 } 1710 1711 /* 1712 * IP recognizes seven kinds of bind requests: 1713 * 1714 * - A zero-length address binds only to the protocol number. 1715 * 1716 * - A 4-byte address is treated as a request to 1717 * validate that the address is a valid local IPv4 1718 * address, appropriate for an application to bind to. 1719 * IP does the verification, but does not make any note 1720 * of the address at this time. 1721 * 1722 * - A 16-byte address contains is treated as a request 1723 * to validate a local IPv6 address, as the 4-byte 1724 * address case above. 1725 * 1726 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1727 * use it for the inbound fanout of packets. 1728 * 1729 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1730 * use it for the inbound fanout of packets. 1731 * 1732 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1733 * information consisting of local and remote addresses 1734 * and ports. In this case, the addresses are both 1735 * validated as appropriate for this operation, and, if 1736 * so, the information is retained for use in the 1737 * inbound fanout. 1738 * 1739 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1740 * fanout information, like the 12-byte case above. 1741 * 1742 * IP will also fill in the IRE request mblk with information 1743 * regarding our peer. In all cases, we notify IP of our protocol 1744 * type by appending a single protocol byte to the bind request. 1745 */ 1746 static mblk_t * 1747 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1748 { 1749 char *cp; 1750 mblk_t *mp; 1751 struct T_bind_req *tbr; 1752 ipa_conn_t *ac; 1753 ipa6_conn_t *ac6; 1754 sin_t *sin; 1755 sin6_t *sin6; 1756 1757 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1758 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1759 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1760 if (!mp) 1761 return (mp); 1762 mp->b_datap->db_type = M_PROTO; 1763 tbr = (struct T_bind_req *)mp->b_rptr; 1764 tbr->PRIM_type = bind_prim; 1765 tbr->ADDR_offset = sizeof (*tbr); 1766 tbr->CONIND_number = 0; 1767 tbr->ADDR_length = addr_length; 1768 cp = (char *)&tbr[1]; 1769 switch (addr_length) { 1770 case sizeof (ipa_conn_t): 1771 ASSERT(udp->udp_family == AF_INET); 1772 /* Append a request for an IRE */ 1773 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1774 if (!mp->b_cont) { 1775 freemsg(mp); 1776 return (NULL); 1777 } 1778 mp->b_cont->b_wptr += sizeof (ire_t); 1779 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1780 1781 /* cp known to be 32 bit aligned */ 1782 ac = (ipa_conn_t *)cp; 1783 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1784 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1785 ac->ac_fport = udp->udp_dstport; 1786 ac->ac_lport = udp->udp_port; 1787 break; 1788 1789 case sizeof (ipa6_conn_t): 1790 ASSERT(udp->udp_family == AF_INET6); 1791 /* Append a request for an IRE */ 1792 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1793 if (!mp->b_cont) { 1794 freemsg(mp); 1795 return (NULL); 1796 } 1797 mp->b_cont->b_wptr += sizeof (ire_t); 1798 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1799 1800 /* cp known to be 32 bit aligned */ 1801 ac6 = (ipa6_conn_t *)cp; 1802 ac6->ac6_laddr = udp->udp_v6src; 1803 ac6->ac6_faddr = udp->udp_v6dst; 1804 ac6->ac6_fport = udp->udp_dstport; 1805 ac6->ac6_lport = udp->udp_port; 1806 break; 1807 1808 case sizeof (sin_t): 1809 ASSERT(udp->udp_family == AF_INET); 1810 /* Append a request for an IRE */ 1811 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1812 if (!mp->b_cont) { 1813 freemsg(mp); 1814 return (NULL); 1815 } 1816 mp->b_cont->b_wptr += sizeof (ire_t); 1817 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1818 1819 sin = (sin_t *)cp; 1820 *sin = sin_null; 1821 sin->sin_family = AF_INET; 1822 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1823 sin->sin_port = udp->udp_port; 1824 break; 1825 1826 case sizeof (sin6_t): 1827 ASSERT(udp->udp_family == AF_INET6); 1828 /* Append a request for an IRE */ 1829 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1830 if (!mp->b_cont) { 1831 freemsg(mp); 1832 return (NULL); 1833 } 1834 mp->b_cont->b_wptr += sizeof (ire_t); 1835 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1836 1837 sin6 = (sin6_t *)cp; 1838 *sin6 = sin6_null; 1839 sin6->sin6_family = AF_INET6; 1840 sin6->sin6_addr = udp->udp_bound_v6src; 1841 sin6->sin6_port = udp->udp_port; 1842 break; 1843 } 1844 /* Add protocol number to end */ 1845 cp[addr_length] = (char)IPPROTO_UDP; 1846 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1847 return (mp); 1848 } 1849 1850 /* For /dev/udp aka AF_INET open */ 1851 static int 1852 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1853 { 1854 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1855 } 1856 1857 /* For /dev/udp6 aka AF_INET6 open */ 1858 static int 1859 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1860 { 1861 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1862 } 1863 1864 /* 1865 * This is the open routine for udp. It allocates a udp_t structure for 1866 * the stream and, on the first open of the module, creates an ND table. 1867 */ 1868 /*ARGSUSED2*/ 1869 static int 1870 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1871 boolean_t isv6) 1872 { 1873 int error; 1874 udp_t *udp; 1875 conn_t *connp; 1876 dev_t conn_dev; 1877 udp_stack_t *us; 1878 vmem_t *minor_arena; 1879 1880 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1881 1882 /* If the stream is already open, return immediately. */ 1883 if (q->q_ptr != NULL) 1884 return (0); 1885 1886 if (sflag == MODOPEN) 1887 return (EINVAL); 1888 1889 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1890 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1891 minor_arena = ip_minor_arena_la; 1892 } else { 1893 /* 1894 * Either minor numbers in the large arena were exhausted 1895 * or a non socket application is doing the open. 1896 * Try to allocate from the small arena. 1897 */ 1898 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1899 return (EBUSY); 1900 1901 minor_arena = ip_minor_arena_sa; 1902 } 1903 1904 if (flag & SO_FALLBACK) { 1905 /* 1906 * Non streams socket needs a stream to fallback to 1907 */ 1908 RD(q)->q_ptr = (void *)conn_dev; 1909 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1910 WR(q)->q_ptr = (void *)minor_arena; 1911 qprocson(q); 1912 return (0); 1913 } 1914 1915 connp = udp_do_open(credp, isv6, KM_SLEEP); 1916 if (connp == NULL) { 1917 inet_minor_free(minor_arena, conn_dev); 1918 return (ENOMEM); 1919 } 1920 udp = connp->conn_udp; 1921 us = udp->udp_us; 1922 1923 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1924 connp->conn_dev = conn_dev; 1925 connp->conn_minor_arena = minor_arena; 1926 1927 /* 1928 * Initialize the udp_t structure for this stream. 1929 */ 1930 q->q_ptr = connp; 1931 WR(q)->q_ptr = connp; 1932 connp->conn_rq = q; 1933 connp->conn_wq = WR(q); 1934 1935 rw_enter(&udp->udp_rwlock, RW_WRITER); 1936 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1937 ASSERT(connp->conn_udp == udp); 1938 ASSERT(udp->udp_connp == connp); 1939 1940 if (flag & SO_SOCKSTR) { 1941 connp->conn_flags |= IPCL_SOCKET; 1942 udp->udp_issocket = B_TRUE; 1943 udp->udp_direct_sockfs = B_TRUE; 1944 } 1945 1946 q->q_hiwat = us->us_recv_hiwat; 1947 WR(q)->q_hiwat = us->us_xmit_hiwat; 1948 WR(q)->q_lowat = us->us_xmit_lowat; 1949 1950 qprocson(q); 1951 1952 if (udp->udp_family == AF_INET6) { 1953 /* Build initial header template for transmit */ 1954 if ((error = udp_build_hdrs(udp)) != 0) { 1955 rw_exit(&udp->udp_rwlock); 1956 qprocsoff(q); 1957 inet_minor_free(minor_arena, conn_dev); 1958 ipcl_conn_destroy(connp); 1959 return (error); 1960 } 1961 } 1962 rw_exit(&udp->udp_rwlock); 1963 1964 /* Set the Stream head write offset and high watermark. */ 1965 (void) proto_set_tx_wroff(q, connp, 1966 udp->udp_max_hdr_len + us->us_wroff_extra); 1967 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1968 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1969 1970 mutex_enter(&connp->conn_lock); 1971 connp->conn_state_flags &= ~CONN_INCIPIENT; 1972 mutex_exit(&connp->conn_lock); 1973 return (0); 1974 } 1975 1976 /* 1977 * Which UDP options OK to set through T_UNITDATA_REQ... 1978 */ 1979 /* ARGSUSED */ 1980 static boolean_t 1981 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1982 { 1983 return (B_TRUE); 1984 } 1985 1986 /* 1987 * This routine gets default values of certain options whose default 1988 * values are maintained by protcol specific code 1989 */ 1990 /* ARGSUSED */ 1991 int 1992 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1993 { 1994 udp_t *udp = Q_TO_UDP(q); 1995 udp_stack_t *us = udp->udp_us; 1996 int *i1 = (int *)ptr; 1997 1998 switch (level) { 1999 case IPPROTO_IP: 2000 switch (name) { 2001 case IP_MULTICAST_TTL: 2002 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2003 return (sizeof (uchar_t)); 2004 case IP_MULTICAST_LOOP: 2005 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2006 return (sizeof (uchar_t)); 2007 } 2008 break; 2009 case IPPROTO_IPV6: 2010 switch (name) { 2011 case IPV6_MULTICAST_HOPS: 2012 *i1 = IP_DEFAULT_MULTICAST_TTL; 2013 return (sizeof (int)); 2014 case IPV6_MULTICAST_LOOP: 2015 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2016 return (sizeof (int)); 2017 case IPV6_UNICAST_HOPS: 2018 *i1 = us->us_ipv6_hoplimit; 2019 return (sizeof (int)); 2020 } 2021 break; 2022 } 2023 return (-1); 2024 } 2025 2026 /* 2027 * This routine retrieves the current status of socket options. 2028 * It returns the size of the option retrieved. 2029 */ 2030 static int 2031 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2032 { 2033 udp_t *udp = connp->conn_udp; 2034 udp_stack_t *us = udp->udp_us; 2035 int *i1 = (int *)ptr; 2036 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2037 int len; 2038 2039 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2040 switch (level) { 2041 case SOL_SOCKET: 2042 switch (name) { 2043 case SO_DEBUG: 2044 *i1 = udp->udp_debug; 2045 break; /* goto sizeof (int) option return */ 2046 case SO_REUSEADDR: 2047 *i1 = udp->udp_reuseaddr; 2048 break; /* goto sizeof (int) option return */ 2049 case SO_TYPE: 2050 *i1 = SOCK_DGRAM; 2051 break; /* goto sizeof (int) option return */ 2052 2053 /* 2054 * The following three items are available here, 2055 * but are only meaningful to IP. 2056 */ 2057 case SO_DONTROUTE: 2058 *i1 = udp->udp_dontroute; 2059 break; /* goto sizeof (int) option return */ 2060 case SO_USELOOPBACK: 2061 *i1 = udp->udp_useloopback; 2062 break; /* goto sizeof (int) option return */ 2063 case SO_BROADCAST: 2064 *i1 = udp->udp_broadcast; 2065 break; /* goto sizeof (int) option return */ 2066 2067 case SO_SNDBUF: 2068 *i1 = udp->udp_xmit_hiwat; 2069 break; /* goto sizeof (int) option return */ 2070 case SO_RCVBUF: 2071 *i1 = udp->udp_rcv_disply_hiwat; 2072 break; /* goto sizeof (int) option return */ 2073 case SO_DGRAM_ERRIND: 2074 *i1 = udp->udp_dgram_errind; 2075 break; /* goto sizeof (int) option return */ 2076 case SO_RECVUCRED: 2077 *i1 = udp->udp_recvucred; 2078 break; /* goto sizeof (int) option return */ 2079 case SO_TIMESTAMP: 2080 *i1 = udp->udp_timestamp; 2081 break; /* goto sizeof (int) option return */ 2082 case SO_ANON_MLP: 2083 *i1 = connp->conn_anon_mlp; 2084 break; /* goto sizeof (int) option return */ 2085 case SO_MAC_EXEMPT: 2086 *i1 = connp->conn_mac_exempt; 2087 break; /* goto sizeof (int) option return */ 2088 case SO_ALLZONES: 2089 *i1 = connp->conn_allzones; 2090 break; /* goto sizeof (int) option return */ 2091 case SO_EXCLBIND: 2092 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2093 break; 2094 case SO_PROTOTYPE: 2095 *i1 = IPPROTO_UDP; 2096 break; 2097 case SO_DOMAIN: 2098 *i1 = udp->udp_family; 2099 break; 2100 default: 2101 return (-1); 2102 } 2103 break; 2104 case IPPROTO_IP: 2105 if (udp->udp_family != AF_INET) 2106 return (-1); 2107 switch (name) { 2108 case IP_OPTIONS: 2109 case T_IP_OPTIONS: 2110 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2111 if (len > 0) { 2112 bcopy(udp->udp_ip_rcv_options + 2113 udp->udp_label_len, ptr, len); 2114 } 2115 return (len); 2116 case IP_TOS: 2117 case T_IP_TOS: 2118 *i1 = (int)udp->udp_type_of_service; 2119 break; /* goto sizeof (int) option return */ 2120 case IP_TTL: 2121 *i1 = (int)udp->udp_ttl; 2122 break; /* goto sizeof (int) option return */ 2123 case IP_DHCPINIT_IF: 2124 return (-EINVAL); 2125 case IP_NEXTHOP: 2126 case IP_RECVPKTINFO: 2127 /* 2128 * This also handles IP_PKTINFO. 2129 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2130 * Differentiation is based on the size of the argument 2131 * passed in. 2132 * This option is handled in IP which will return an 2133 * error for IP_PKTINFO as it's not supported as a 2134 * sticky option. 2135 */ 2136 return (-EINVAL); 2137 case IP_MULTICAST_IF: 2138 /* 0 address if not set */ 2139 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2140 return (sizeof (ipaddr_t)); 2141 case IP_MULTICAST_TTL: 2142 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2143 return (sizeof (uchar_t)); 2144 case IP_MULTICAST_LOOP: 2145 *ptr = connp->conn_multicast_loop; 2146 return (sizeof (uint8_t)); 2147 case IP_RECVOPTS: 2148 *i1 = udp->udp_recvopts; 2149 break; /* goto sizeof (int) option return */ 2150 case IP_RECVDSTADDR: 2151 *i1 = udp->udp_recvdstaddr; 2152 break; /* goto sizeof (int) option return */ 2153 case IP_RECVIF: 2154 *i1 = udp->udp_recvif; 2155 break; /* goto sizeof (int) option return */ 2156 case IP_RECVSLLA: 2157 *i1 = udp->udp_recvslla; 2158 break; /* goto sizeof (int) option return */ 2159 case IP_RECVTTL: 2160 *i1 = udp->udp_recvttl; 2161 break; /* goto sizeof (int) option return */ 2162 case IP_ADD_MEMBERSHIP: 2163 case IP_DROP_MEMBERSHIP: 2164 case IP_BLOCK_SOURCE: 2165 case IP_UNBLOCK_SOURCE: 2166 case IP_ADD_SOURCE_MEMBERSHIP: 2167 case IP_DROP_SOURCE_MEMBERSHIP: 2168 case MCAST_JOIN_GROUP: 2169 case MCAST_LEAVE_GROUP: 2170 case MCAST_BLOCK_SOURCE: 2171 case MCAST_UNBLOCK_SOURCE: 2172 case MCAST_JOIN_SOURCE_GROUP: 2173 case MCAST_LEAVE_SOURCE_GROUP: 2174 /* cannot "get" the value for these */ 2175 return (-1); 2176 case IP_BOUND_IF: 2177 /* Zero if not set */ 2178 *i1 = udp->udp_bound_if; 2179 break; /* goto sizeof (int) option return */ 2180 case IP_UNSPEC_SRC: 2181 *i1 = udp->udp_unspec_source; 2182 break; /* goto sizeof (int) option return */ 2183 case IP_BROADCAST_TTL: 2184 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2185 return (sizeof (uchar_t)); 2186 default: 2187 return (-1); 2188 } 2189 break; 2190 case IPPROTO_IPV6: 2191 if (udp->udp_family != AF_INET6) 2192 return (-1); 2193 switch (name) { 2194 case IPV6_UNICAST_HOPS: 2195 *i1 = (unsigned int)udp->udp_ttl; 2196 break; /* goto sizeof (int) option return */ 2197 case IPV6_MULTICAST_IF: 2198 /* 0 index if not set */ 2199 *i1 = udp->udp_multicast_if_index; 2200 break; /* goto sizeof (int) option return */ 2201 case IPV6_MULTICAST_HOPS: 2202 *i1 = udp->udp_multicast_ttl; 2203 break; /* goto sizeof (int) option return */ 2204 case IPV6_MULTICAST_LOOP: 2205 *i1 = connp->conn_multicast_loop; 2206 break; /* goto sizeof (int) option return */ 2207 case IPV6_JOIN_GROUP: 2208 case IPV6_LEAVE_GROUP: 2209 case MCAST_JOIN_GROUP: 2210 case MCAST_LEAVE_GROUP: 2211 case MCAST_BLOCK_SOURCE: 2212 case MCAST_UNBLOCK_SOURCE: 2213 case MCAST_JOIN_SOURCE_GROUP: 2214 case MCAST_LEAVE_SOURCE_GROUP: 2215 /* cannot "get" the value for these */ 2216 return (-1); 2217 case IPV6_BOUND_IF: 2218 /* Zero if not set */ 2219 *i1 = udp->udp_bound_if; 2220 break; /* goto sizeof (int) option return */ 2221 case IPV6_UNSPEC_SRC: 2222 *i1 = udp->udp_unspec_source; 2223 break; /* goto sizeof (int) option return */ 2224 case IPV6_RECVPKTINFO: 2225 *i1 = udp->udp_ip_recvpktinfo; 2226 break; /* goto sizeof (int) option return */ 2227 case IPV6_RECVTCLASS: 2228 *i1 = udp->udp_ipv6_recvtclass; 2229 break; /* goto sizeof (int) option return */ 2230 case IPV6_RECVPATHMTU: 2231 *i1 = udp->udp_ipv6_recvpathmtu; 2232 break; /* goto sizeof (int) option return */ 2233 case IPV6_RECVHOPLIMIT: 2234 *i1 = udp->udp_ipv6_recvhoplimit; 2235 break; /* goto sizeof (int) option return */ 2236 case IPV6_RECVHOPOPTS: 2237 *i1 = udp->udp_ipv6_recvhopopts; 2238 break; /* goto sizeof (int) option return */ 2239 case IPV6_RECVDSTOPTS: 2240 *i1 = udp->udp_ipv6_recvdstopts; 2241 break; /* goto sizeof (int) option return */ 2242 case _OLD_IPV6_RECVDSTOPTS: 2243 *i1 = udp->udp_old_ipv6_recvdstopts; 2244 break; /* goto sizeof (int) option return */ 2245 case IPV6_RECVRTHDRDSTOPTS: 2246 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2247 break; /* goto sizeof (int) option return */ 2248 case IPV6_RECVRTHDR: 2249 *i1 = udp->udp_ipv6_recvrthdr; 2250 break; /* goto sizeof (int) option return */ 2251 case IPV6_PKTINFO: { 2252 /* XXX assumes that caller has room for max size! */ 2253 struct in6_pktinfo *pkti; 2254 2255 pkti = (struct in6_pktinfo *)ptr; 2256 if (ipp->ipp_fields & IPPF_IFINDEX) 2257 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2258 else 2259 pkti->ipi6_ifindex = 0; 2260 if (ipp->ipp_fields & IPPF_ADDR) 2261 pkti->ipi6_addr = ipp->ipp_addr; 2262 else 2263 pkti->ipi6_addr = ipv6_all_zeros; 2264 return (sizeof (struct in6_pktinfo)); 2265 } 2266 case IPV6_TCLASS: 2267 if (ipp->ipp_fields & IPPF_TCLASS) 2268 *i1 = ipp->ipp_tclass; 2269 else 2270 *i1 = IPV6_FLOW_TCLASS( 2271 IPV6_DEFAULT_VERS_AND_FLOW); 2272 break; /* goto sizeof (int) option return */ 2273 case IPV6_NEXTHOP: { 2274 sin6_t *sin6 = (sin6_t *)ptr; 2275 2276 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2277 return (0); 2278 *sin6 = sin6_null; 2279 sin6->sin6_family = AF_INET6; 2280 sin6->sin6_addr = ipp->ipp_nexthop; 2281 return (sizeof (sin6_t)); 2282 } 2283 case IPV6_HOPOPTS: 2284 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2285 return (0); 2286 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2287 return (0); 2288 /* 2289 * The cipso/label option is added by kernel. 2290 * User is not usually aware of this option. 2291 * We copy out the hbh opt after the label option. 2292 */ 2293 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2294 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2295 if (udp->udp_label_len_v6 > 0) { 2296 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2297 ptr[1] = (ipp->ipp_hopoptslen - 2298 udp->udp_label_len_v6 + 7) / 8 - 1; 2299 } 2300 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2301 case IPV6_RTHDRDSTOPTS: 2302 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2303 return (0); 2304 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2305 return (ipp->ipp_rtdstoptslen); 2306 case IPV6_RTHDR: 2307 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2308 return (0); 2309 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2310 return (ipp->ipp_rthdrlen); 2311 case IPV6_DSTOPTS: 2312 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2313 return (0); 2314 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2315 return (ipp->ipp_dstoptslen); 2316 case IPV6_PATHMTU: 2317 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2318 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2319 us->us_netstack)); 2320 default: 2321 return (-1); 2322 } 2323 break; 2324 case IPPROTO_UDP: 2325 switch (name) { 2326 case UDP_ANONPRIVBIND: 2327 *i1 = udp->udp_anon_priv_bind; 2328 break; 2329 case UDP_EXCLBIND: 2330 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2331 break; 2332 case UDP_RCVHDR: 2333 *i1 = udp->udp_rcvhdr ? 1 : 0; 2334 break; 2335 case UDP_NAT_T_ENDPOINT: 2336 *i1 = udp->udp_nat_t_endpoint; 2337 break; 2338 default: 2339 return (-1); 2340 } 2341 break; 2342 default: 2343 return (-1); 2344 } 2345 return (sizeof (int)); 2346 } 2347 2348 int 2349 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2350 { 2351 udp_t *udp; 2352 int err; 2353 2354 udp = Q_TO_UDP(q); 2355 2356 rw_enter(&udp->udp_rwlock, RW_READER); 2357 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2358 rw_exit(&udp->udp_rwlock); 2359 return (err); 2360 } 2361 2362 /* 2363 * This routine sets socket options. 2364 */ 2365 /* ARGSUSED */ 2366 static int 2367 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2368 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2369 void *thisdg_attrs, boolean_t checkonly) 2370 { 2371 udpattrs_t *attrs = thisdg_attrs; 2372 int *i1 = (int *)invalp; 2373 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2374 udp_t *udp = connp->conn_udp; 2375 udp_stack_t *us = udp->udp_us; 2376 int error; 2377 uint_t newlen; 2378 size_t sth_wroff; 2379 2380 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2381 /* 2382 * For fixed length options, no sanity check 2383 * of passed in length is done. It is assumed *_optcom_req() 2384 * routines do the right thing. 2385 */ 2386 switch (level) { 2387 case SOL_SOCKET: 2388 switch (name) { 2389 case SO_REUSEADDR: 2390 if (!checkonly) { 2391 udp->udp_reuseaddr = onoff; 2392 PASS_OPT_TO_IP(connp); 2393 } 2394 break; 2395 case SO_DEBUG: 2396 if (!checkonly) 2397 udp->udp_debug = onoff; 2398 break; 2399 /* 2400 * The following three items are available here, 2401 * but are only meaningful to IP. 2402 */ 2403 case SO_DONTROUTE: 2404 if (!checkonly) { 2405 udp->udp_dontroute = onoff; 2406 PASS_OPT_TO_IP(connp); 2407 } 2408 break; 2409 case SO_USELOOPBACK: 2410 if (!checkonly) { 2411 udp->udp_useloopback = onoff; 2412 PASS_OPT_TO_IP(connp); 2413 } 2414 break; 2415 case SO_BROADCAST: 2416 if (!checkonly) { 2417 udp->udp_broadcast = onoff; 2418 PASS_OPT_TO_IP(connp); 2419 } 2420 break; 2421 2422 case SO_SNDBUF: 2423 if (*i1 > us->us_max_buf) { 2424 *outlenp = 0; 2425 return (ENOBUFS); 2426 } 2427 if (!checkonly) { 2428 udp->udp_xmit_hiwat = *i1; 2429 connp->conn_wq->q_hiwat = *i1; 2430 } 2431 break; 2432 case SO_RCVBUF: 2433 if (*i1 > us->us_max_buf) { 2434 *outlenp = 0; 2435 return (ENOBUFS); 2436 } 2437 if (!checkonly) { 2438 int size; 2439 2440 udp->udp_rcv_disply_hiwat = *i1; 2441 size = udp_set_rcv_hiwat(udp, *i1); 2442 rw_exit(&udp->udp_rwlock); 2443 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2444 size); 2445 rw_enter(&udp->udp_rwlock, RW_WRITER); 2446 } 2447 break; 2448 case SO_DGRAM_ERRIND: 2449 if (!checkonly) 2450 udp->udp_dgram_errind = onoff; 2451 break; 2452 case SO_RECVUCRED: 2453 if (!checkonly) 2454 udp->udp_recvucred = onoff; 2455 break; 2456 case SO_ALLZONES: 2457 /* 2458 * "soft" error (negative) 2459 * option not handled at this level 2460 * Do not modify *outlenp. 2461 */ 2462 return (-EINVAL); 2463 case SO_TIMESTAMP: 2464 if (!checkonly) 2465 udp->udp_timestamp = onoff; 2466 break; 2467 case SO_ANON_MLP: 2468 if (!checkonly) { 2469 connp->conn_anon_mlp = onoff; 2470 PASS_OPT_TO_IP(connp); 2471 } 2472 break; 2473 case SO_MAC_EXEMPT: 2474 if (secpolicy_net_mac_aware(cr) != 0 || 2475 udp->udp_state != TS_UNBND) 2476 return (EACCES); 2477 if (!checkonly) { 2478 connp->conn_mac_exempt = onoff; 2479 PASS_OPT_TO_IP(connp); 2480 } 2481 break; 2482 case SCM_UCRED: { 2483 struct ucred_s *ucr; 2484 cred_t *cr, *newcr; 2485 ts_label_t *tsl; 2486 2487 /* 2488 * Only sockets that have proper privileges and are 2489 * bound to MLPs will have any other value here, so 2490 * this implicitly tests for privilege to set label. 2491 */ 2492 if (connp->conn_mlp_type == mlptSingle) 2493 break; 2494 ucr = (struct ucred_s *)invalp; 2495 if (inlen != ucredsize || 2496 ucr->uc_labeloff < sizeof (*ucr) || 2497 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2498 return (EINVAL); 2499 if (!checkonly) { 2500 mblk_t *mb; 2501 pid_t cpid; 2502 2503 if (attrs == NULL || 2504 (mb = attrs->udpattr_mb) == NULL) 2505 return (EINVAL); 2506 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2507 cr = udp->udp_connp->conn_cred; 2508 ASSERT(cr != NULL); 2509 if ((tsl = crgetlabel(cr)) == NULL) 2510 return (EINVAL); 2511 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2512 tsl->tsl_doi, KM_NOSLEEP); 2513 if (newcr == NULL) 2514 return (ENOSR); 2515 mblk_setcred(mb, newcr, cpid); 2516 attrs->udpattr_credset = B_TRUE; 2517 crfree(newcr); 2518 } 2519 break; 2520 } 2521 case SO_EXCLBIND: 2522 if (!checkonly) 2523 udp->udp_exclbind = onoff; 2524 break; 2525 case SO_RCVTIMEO: 2526 case SO_SNDTIMEO: 2527 /* 2528 * Pass these two options in order for third part 2529 * protocol usage. Here just return directly. 2530 */ 2531 return (0); 2532 default: 2533 *outlenp = 0; 2534 return (EINVAL); 2535 } 2536 break; 2537 case IPPROTO_IP: 2538 if (udp->udp_family != AF_INET) { 2539 *outlenp = 0; 2540 return (ENOPROTOOPT); 2541 } 2542 switch (name) { 2543 case IP_OPTIONS: 2544 case T_IP_OPTIONS: 2545 /* Save options for use by IP. */ 2546 newlen = inlen + udp->udp_label_len; 2547 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2548 *outlenp = 0; 2549 return (EINVAL); 2550 } 2551 if (checkonly) 2552 break; 2553 2554 /* 2555 * Update the stored options taking into account 2556 * any CIPSO option which we should not overwrite. 2557 */ 2558 if (!tsol_option_set(&udp->udp_ip_snd_options, 2559 &udp->udp_ip_snd_options_len, 2560 udp->udp_label_len, invalp, inlen)) { 2561 *outlenp = 0; 2562 return (ENOMEM); 2563 } 2564 2565 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2566 UDPH_SIZE + udp->udp_ip_snd_options_len; 2567 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2568 rw_exit(&udp->udp_rwlock); 2569 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2570 sth_wroff); 2571 rw_enter(&udp->udp_rwlock, RW_WRITER); 2572 break; 2573 2574 case IP_TTL: 2575 if (!checkonly) { 2576 udp->udp_ttl = (uchar_t)*i1; 2577 } 2578 break; 2579 case IP_TOS: 2580 case T_IP_TOS: 2581 if (!checkonly) { 2582 udp->udp_type_of_service = (uchar_t)*i1; 2583 } 2584 break; 2585 case IP_MULTICAST_IF: { 2586 /* 2587 * TODO should check OPTMGMT reply and undo this if 2588 * there is an error. 2589 */ 2590 struct in_addr *inap = (struct in_addr *)invalp; 2591 if (!checkonly) { 2592 udp->udp_multicast_if_addr = 2593 inap->s_addr; 2594 PASS_OPT_TO_IP(connp); 2595 } 2596 break; 2597 } 2598 case IP_MULTICAST_TTL: 2599 if (!checkonly) 2600 udp->udp_multicast_ttl = *invalp; 2601 break; 2602 case IP_MULTICAST_LOOP: 2603 if (!checkonly) { 2604 connp->conn_multicast_loop = *invalp; 2605 PASS_OPT_TO_IP(connp); 2606 } 2607 break; 2608 case IP_RECVOPTS: 2609 if (!checkonly) 2610 udp->udp_recvopts = onoff; 2611 break; 2612 case IP_RECVDSTADDR: 2613 if (!checkonly) 2614 udp->udp_recvdstaddr = onoff; 2615 break; 2616 case IP_RECVIF: 2617 if (!checkonly) { 2618 udp->udp_recvif = onoff; 2619 PASS_OPT_TO_IP(connp); 2620 } 2621 break; 2622 case IP_RECVSLLA: 2623 if (!checkonly) { 2624 udp->udp_recvslla = onoff; 2625 PASS_OPT_TO_IP(connp); 2626 } 2627 break; 2628 case IP_RECVTTL: 2629 if (!checkonly) 2630 udp->udp_recvttl = onoff; 2631 break; 2632 case IP_PKTINFO: { 2633 /* 2634 * This also handles IP_RECVPKTINFO. 2635 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2636 * Differentiation is based on the size of the 2637 * argument passed in. 2638 */ 2639 struct in_pktinfo *pktinfop; 2640 ip4_pkt_t *attr_pktinfop; 2641 2642 if (checkonly) 2643 break; 2644 2645 if (inlen == sizeof (int)) { 2646 /* 2647 * This is IP_RECVPKTINFO option. 2648 * Keep a local copy of whether this option is 2649 * set or not and pass it down to IP for 2650 * processing. 2651 */ 2652 2653 udp->udp_ip_recvpktinfo = onoff; 2654 return (-EINVAL); 2655 } 2656 2657 if (attrs == NULL || 2658 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2659 /* 2660 * sticky option or no buffer to return 2661 * the results. 2662 */ 2663 return (EINVAL); 2664 } 2665 2666 if (inlen != sizeof (struct in_pktinfo)) 2667 return (EINVAL); 2668 2669 pktinfop = (struct in_pktinfo *)invalp; 2670 2671 /* 2672 * At least one of the values should be specified 2673 */ 2674 if (pktinfop->ipi_ifindex == 0 && 2675 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2676 return (EINVAL); 2677 } 2678 2679 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2680 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2681 2682 break; 2683 } 2684 case IP_ADD_MEMBERSHIP: 2685 case IP_DROP_MEMBERSHIP: 2686 case IP_BLOCK_SOURCE: 2687 case IP_UNBLOCK_SOURCE: 2688 case IP_ADD_SOURCE_MEMBERSHIP: 2689 case IP_DROP_SOURCE_MEMBERSHIP: 2690 case MCAST_JOIN_GROUP: 2691 case MCAST_LEAVE_GROUP: 2692 case MCAST_BLOCK_SOURCE: 2693 case MCAST_UNBLOCK_SOURCE: 2694 case MCAST_JOIN_SOURCE_GROUP: 2695 case MCAST_LEAVE_SOURCE_GROUP: 2696 case IP_SEC_OPT: 2697 case IP_NEXTHOP: 2698 case IP_DHCPINIT_IF: 2699 /* 2700 * "soft" error (negative) 2701 * option not handled at this level 2702 * Do not modify *outlenp. 2703 */ 2704 return (-EINVAL); 2705 case IP_BOUND_IF: 2706 if (!checkonly) { 2707 udp->udp_bound_if = *i1; 2708 PASS_OPT_TO_IP(connp); 2709 } 2710 break; 2711 case IP_UNSPEC_SRC: 2712 if (!checkonly) { 2713 udp->udp_unspec_source = onoff; 2714 PASS_OPT_TO_IP(connp); 2715 } 2716 break; 2717 case IP_BROADCAST_TTL: 2718 if (!checkonly) 2719 connp->conn_broadcast_ttl = *invalp; 2720 break; 2721 default: 2722 *outlenp = 0; 2723 return (EINVAL); 2724 } 2725 break; 2726 case IPPROTO_IPV6: { 2727 ip6_pkt_t *ipp; 2728 boolean_t sticky; 2729 2730 if (udp->udp_family != AF_INET6) { 2731 *outlenp = 0; 2732 return (ENOPROTOOPT); 2733 } 2734 /* 2735 * Deal with both sticky options and ancillary data 2736 */ 2737 sticky = B_FALSE; 2738 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2739 NULL) { 2740 /* sticky options, or none */ 2741 ipp = &udp->udp_sticky_ipp; 2742 sticky = B_TRUE; 2743 } 2744 2745 switch (name) { 2746 case IPV6_MULTICAST_IF: 2747 if (!checkonly) { 2748 udp->udp_multicast_if_index = *i1; 2749 PASS_OPT_TO_IP(connp); 2750 } 2751 break; 2752 case IPV6_UNICAST_HOPS: 2753 /* -1 means use default */ 2754 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2755 *outlenp = 0; 2756 return (EINVAL); 2757 } 2758 if (!checkonly) { 2759 if (*i1 == -1) { 2760 udp->udp_ttl = ipp->ipp_unicast_hops = 2761 us->us_ipv6_hoplimit; 2762 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2763 /* Pass modified value to IP. */ 2764 *i1 = udp->udp_ttl; 2765 } else { 2766 udp->udp_ttl = ipp->ipp_unicast_hops = 2767 (uint8_t)*i1; 2768 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2769 } 2770 /* Rebuild the header template */ 2771 error = udp_build_hdrs(udp); 2772 if (error != 0) { 2773 *outlenp = 0; 2774 return (error); 2775 } 2776 } 2777 break; 2778 case IPV6_MULTICAST_HOPS: 2779 /* -1 means use default */ 2780 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2781 *outlenp = 0; 2782 return (EINVAL); 2783 } 2784 if (!checkonly) { 2785 if (*i1 == -1) { 2786 udp->udp_multicast_ttl = 2787 ipp->ipp_multicast_hops = 2788 IP_DEFAULT_MULTICAST_TTL; 2789 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2790 /* Pass modified value to IP. */ 2791 *i1 = udp->udp_multicast_ttl; 2792 } else { 2793 udp->udp_multicast_ttl = 2794 ipp->ipp_multicast_hops = 2795 (uint8_t)*i1; 2796 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2797 } 2798 } 2799 break; 2800 case IPV6_MULTICAST_LOOP: 2801 if (*i1 != 0 && *i1 != 1) { 2802 *outlenp = 0; 2803 return (EINVAL); 2804 } 2805 if (!checkonly) { 2806 connp->conn_multicast_loop = *i1; 2807 PASS_OPT_TO_IP(connp); 2808 } 2809 break; 2810 case IPV6_JOIN_GROUP: 2811 case IPV6_LEAVE_GROUP: 2812 case MCAST_JOIN_GROUP: 2813 case MCAST_LEAVE_GROUP: 2814 case MCAST_BLOCK_SOURCE: 2815 case MCAST_UNBLOCK_SOURCE: 2816 case MCAST_JOIN_SOURCE_GROUP: 2817 case MCAST_LEAVE_SOURCE_GROUP: 2818 /* 2819 * "soft" error (negative) 2820 * option not handled at this level 2821 * Note: Do not modify *outlenp 2822 */ 2823 return (-EINVAL); 2824 case IPV6_BOUND_IF: 2825 if (!checkonly) { 2826 udp->udp_bound_if = *i1; 2827 PASS_OPT_TO_IP(connp); 2828 } 2829 break; 2830 case IPV6_UNSPEC_SRC: 2831 if (!checkonly) { 2832 udp->udp_unspec_source = onoff; 2833 PASS_OPT_TO_IP(connp); 2834 } 2835 break; 2836 /* 2837 * Set boolean switches for ancillary data delivery 2838 */ 2839 case IPV6_RECVPKTINFO: 2840 if (!checkonly) { 2841 udp->udp_ip_recvpktinfo = onoff; 2842 PASS_OPT_TO_IP(connp); 2843 } 2844 break; 2845 case IPV6_RECVTCLASS: 2846 if (!checkonly) { 2847 udp->udp_ipv6_recvtclass = onoff; 2848 PASS_OPT_TO_IP(connp); 2849 } 2850 break; 2851 case IPV6_RECVPATHMTU: 2852 if (!checkonly) { 2853 udp->udp_ipv6_recvpathmtu = onoff; 2854 PASS_OPT_TO_IP(connp); 2855 } 2856 break; 2857 case IPV6_RECVHOPLIMIT: 2858 if (!checkonly) { 2859 udp->udp_ipv6_recvhoplimit = onoff; 2860 PASS_OPT_TO_IP(connp); 2861 } 2862 break; 2863 case IPV6_RECVHOPOPTS: 2864 if (!checkonly) { 2865 udp->udp_ipv6_recvhopopts = onoff; 2866 PASS_OPT_TO_IP(connp); 2867 } 2868 break; 2869 case IPV6_RECVDSTOPTS: 2870 if (!checkonly) { 2871 udp->udp_ipv6_recvdstopts = onoff; 2872 PASS_OPT_TO_IP(connp); 2873 } 2874 break; 2875 case _OLD_IPV6_RECVDSTOPTS: 2876 if (!checkonly) 2877 udp->udp_old_ipv6_recvdstopts = onoff; 2878 break; 2879 case IPV6_RECVRTHDRDSTOPTS: 2880 if (!checkonly) { 2881 udp->udp_ipv6_recvrthdrdstopts = onoff; 2882 PASS_OPT_TO_IP(connp); 2883 } 2884 break; 2885 case IPV6_RECVRTHDR: 2886 if (!checkonly) { 2887 udp->udp_ipv6_recvrthdr = onoff; 2888 PASS_OPT_TO_IP(connp); 2889 } 2890 break; 2891 /* 2892 * Set sticky options or ancillary data. 2893 * If sticky options, (re)build any extension headers 2894 * that might be needed as a result. 2895 */ 2896 case IPV6_PKTINFO: 2897 /* 2898 * The source address and ifindex are verified 2899 * in ip_opt_set(). For ancillary data the 2900 * source address is checked in ip_wput_v6. 2901 */ 2902 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2903 return (EINVAL); 2904 if (checkonly) 2905 break; 2906 2907 if (inlen == 0) { 2908 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2909 ipp->ipp_sticky_ignored |= 2910 (IPPF_IFINDEX|IPPF_ADDR); 2911 } else { 2912 struct in6_pktinfo *pkti; 2913 2914 pkti = (struct in6_pktinfo *)invalp; 2915 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2916 ipp->ipp_addr = pkti->ipi6_addr; 2917 if (ipp->ipp_ifindex != 0) 2918 ipp->ipp_fields |= IPPF_IFINDEX; 2919 else 2920 ipp->ipp_fields &= ~IPPF_IFINDEX; 2921 if (!IN6_IS_ADDR_UNSPECIFIED( 2922 &ipp->ipp_addr)) 2923 ipp->ipp_fields |= IPPF_ADDR; 2924 else 2925 ipp->ipp_fields &= ~IPPF_ADDR; 2926 } 2927 if (sticky) { 2928 error = udp_build_hdrs(udp); 2929 if (error != 0) 2930 return (error); 2931 PASS_OPT_TO_IP(connp); 2932 } 2933 break; 2934 case IPV6_HOPLIMIT: 2935 if (sticky) 2936 return (EINVAL); 2937 if (inlen != 0 && inlen != sizeof (int)) 2938 return (EINVAL); 2939 if (checkonly) 2940 break; 2941 2942 if (inlen == 0) { 2943 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2944 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2945 } else { 2946 if (*i1 > 255 || *i1 < -1) 2947 return (EINVAL); 2948 if (*i1 == -1) 2949 ipp->ipp_hoplimit = 2950 us->us_ipv6_hoplimit; 2951 else 2952 ipp->ipp_hoplimit = *i1; 2953 ipp->ipp_fields |= IPPF_HOPLIMIT; 2954 } 2955 break; 2956 case IPV6_TCLASS: 2957 if (inlen != 0 && inlen != sizeof (int)) 2958 return (EINVAL); 2959 if (checkonly) 2960 break; 2961 2962 if (inlen == 0) { 2963 ipp->ipp_fields &= ~IPPF_TCLASS; 2964 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2965 } else { 2966 if (*i1 > 255 || *i1 < -1) 2967 return (EINVAL); 2968 if (*i1 == -1) 2969 ipp->ipp_tclass = 0; 2970 else 2971 ipp->ipp_tclass = *i1; 2972 ipp->ipp_fields |= IPPF_TCLASS; 2973 } 2974 if (sticky) { 2975 error = udp_build_hdrs(udp); 2976 if (error != 0) 2977 return (error); 2978 } 2979 break; 2980 case IPV6_NEXTHOP: 2981 /* 2982 * IP will verify that the nexthop is reachable 2983 * and fail for sticky options. 2984 */ 2985 if (inlen != 0 && inlen != sizeof (sin6_t)) 2986 return (EINVAL); 2987 if (checkonly) 2988 break; 2989 2990 if (inlen == 0) { 2991 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2992 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2993 } else { 2994 sin6_t *sin6 = (sin6_t *)invalp; 2995 2996 if (sin6->sin6_family != AF_INET6) { 2997 return (EAFNOSUPPORT); 2998 } 2999 if (IN6_IS_ADDR_V4MAPPED( 3000 &sin6->sin6_addr)) 3001 return (EADDRNOTAVAIL); 3002 ipp->ipp_nexthop = sin6->sin6_addr; 3003 if (!IN6_IS_ADDR_UNSPECIFIED( 3004 &ipp->ipp_nexthop)) 3005 ipp->ipp_fields |= IPPF_NEXTHOP; 3006 else 3007 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3008 } 3009 if (sticky) { 3010 error = udp_build_hdrs(udp); 3011 if (error != 0) 3012 return (error); 3013 PASS_OPT_TO_IP(connp); 3014 } 3015 break; 3016 case IPV6_HOPOPTS: { 3017 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3018 /* 3019 * Sanity checks - minimum size, size a multiple of 3020 * eight bytes, and matching size passed in. 3021 */ 3022 if (inlen != 0 && 3023 inlen != (8 * (hopts->ip6h_len + 1))) 3024 return (EINVAL); 3025 3026 if (checkonly) 3027 break; 3028 3029 error = optcom_pkt_set(invalp, inlen, sticky, 3030 (uchar_t **)&ipp->ipp_hopopts, 3031 &ipp->ipp_hopoptslen, 3032 sticky ? udp->udp_label_len_v6 : 0); 3033 if (error != 0) 3034 return (error); 3035 if (ipp->ipp_hopoptslen == 0) { 3036 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3037 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3038 } else { 3039 ipp->ipp_fields |= IPPF_HOPOPTS; 3040 } 3041 if (sticky) { 3042 error = udp_build_hdrs(udp); 3043 if (error != 0) 3044 return (error); 3045 } 3046 break; 3047 } 3048 case IPV6_RTHDRDSTOPTS: { 3049 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3050 3051 /* 3052 * Sanity checks - minimum size, size a multiple of 3053 * eight bytes, and matching size passed in. 3054 */ 3055 if (inlen != 0 && 3056 inlen != (8 * (dopts->ip6d_len + 1))) 3057 return (EINVAL); 3058 3059 if (checkonly) 3060 break; 3061 3062 if (inlen == 0) { 3063 if (sticky && 3064 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3065 kmem_free(ipp->ipp_rtdstopts, 3066 ipp->ipp_rtdstoptslen); 3067 ipp->ipp_rtdstopts = NULL; 3068 ipp->ipp_rtdstoptslen = 0; 3069 } 3070 3071 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3072 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3073 } else { 3074 error = optcom_pkt_set(invalp, inlen, sticky, 3075 (uchar_t **)&ipp->ipp_rtdstopts, 3076 &ipp->ipp_rtdstoptslen, 0); 3077 if (error != 0) 3078 return (error); 3079 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3080 } 3081 if (sticky) { 3082 error = udp_build_hdrs(udp); 3083 if (error != 0) 3084 return (error); 3085 } 3086 break; 3087 } 3088 case IPV6_DSTOPTS: { 3089 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3090 3091 /* 3092 * Sanity checks - minimum size, size a multiple of 3093 * eight bytes, and matching size passed in. 3094 */ 3095 if (inlen != 0 && 3096 inlen != (8 * (dopts->ip6d_len + 1))) 3097 return (EINVAL); 3098 3099 if (checkonly) 3100 break; 3101 3102 if (inlen == 0) { 3103 if (sticky && 3104 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3105 kmem_free(ipp->ipp_dstopts, 3106 ipp->ipp_dstoptslen); 3107 ipp->ipp_dstopts = NULL; 3108 ipp->ipp_dstoptslen = 0; 3109 } 3110 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3111 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3112 } else { 3113 error = optcom_pkt_set(invalp, inlen, sticky, 3114 (uchar_t **)&ipp->ipp_dstopts, 3115 &ipp->ipp_dstoptslen, 0); 3116 if (error != 0) 3117 return (error); 3118 ipp->ipp_fields |= IPPF_DSTOPTS; 3119 } 3120 if (sticky) { 3121 error = udp_build_hdrs(udp); 3122 if (error != 0) 3123 return (error); 3124 } 3125 break; 3126 } 3127 case IPV6_RTHDR: { 3128 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3129 3130 /* 3131 * Sanity checks - minimum size, size a multiple of 3132 * eight bytes, and matching size passed in. 3133 */ 3134 if (inlen != 0 && 3135 inlen != (8 * (rt->ip6r_len + 1))) 3136 return (EINVAL); 3137 3138 if (checkonly) 3139 break; 3140 3141 if (inlen == 0) { 3142 if (sticky && 3143 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3144 kmem_free(ipp->ipp_rthdr, 3145 ipp->ipp_rthdrlen); 3146 ipp->ipp_rthdr = NULL; 3147 ipp->ipp_rthdrlen = 0; 3148 } 3149 ipp->ipp_fields &= ~IPPF_RTHDR; 3150 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3151 } else { 3152 error = optcom_pkt_set(invalp, inlen, sticky, 3153 (uchar_t **)&ipp->ipp_rthdr, 3154 &ipp->ipp_rthdrlen, 0); 3155 if (error != 0) 3156 return (error); 3157 ipp->ipp_fields |= IPPF_RTHDR; 3158 } 3159 if (sticky) { 3160 error = udp_build_hdrs(udp); 3161 if (error != 0) 3162 return (error); 3163 } 3164 break; 3165 } 3166 3167 case IPV6_DONTFRAG: 3168 if (checkonly) 3169 break; 3170 3171 if (onoff) { 3172 ipp->ipp_fields |= IPPF_DONTFRAG; 3173 } else { 3174 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3175 } 3176 break; 3177 3178 case IPV6_USE_MIN_MTU: 3179 if (inlen != sizeof (int)) 3180 return (EINVAL); 3181 3182 if (*i1 < -1 || *i1 > 1) 3183 return (EINVAL); 3184 3185 if (checkonly) 3186 break; 3187 3188 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3189 ipp->ipp_use_min_mtu = *i1; 3190 break; 3191 3192 case IPV6_SEC_OPT: 3193 case IPV6_SRC_PREFERENCES: 3194 case IPV6_V6ONLY: 3195 /* Handled at the IP level */ 3196 return (-EINVAL); 3197 default: 3198 *outlenp = 0; 3199 return (EINVAL); 3200 } 3201 break; 3202 } /* end IPPROTO_IPV6 */ 3203 case IPPROTO_UDP: 3204 switch (name) { 3205 case UDP_ANONPRIVBIND: 3206 if ((error = secpolicy_net_privaddr(cr, 0, 3207 IPPROTO_UDP)) != 0) { 3208 *outlenp = 0; 3209 return (error); 3210 } 3211 if (!checkonly) { 3212 udp->udp_anon_priv_bind = onoff; 3213 } 3214 break; 3215 case UDP_EXCLBIND: 3216 if (!checkonly) 3217 udp->udp_exclbind = onoff; 3218 break; 3219 case UDP_RCVHDR: 3220 if (!checkonly) 3221 udp->udp_rcvhdr = onoff; 3222 break; 3223 case UDP_NAT_T_ENDPOINT: 3224 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3225 *outlenp = 0; 3226 return (error); 3227 } 3228 3229 /* 3230 * Use udp_family instead so we can avoid ambiguitites 3231 * with AF_INET6 sockets that may switch from IPv4 3232 * to IPv6. 3233 */ 3234 if (udp->udp_family != AF_INET) { 3235 *outlenp = 0; 3236 return (EAFNOSUPPORT); 3237 } 3238 3239 if (!checkonly) { 3240 int size; 3241 3242 udp->udp_nat_t_endpoint = onoff; 3243 3244 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3245 UDPH_SIZE + udp->udp_ip_snd_options_len; 3246 3247 /* Also, adjust wroff */ 3248 if (onoff) { 3249 udp->udp_max_hdr_len += 3250 sizeof (uint32_t); 3251 } 3252 size = udp->udp_max_hdr_len + 3253 us->us_wroff_extra; 3254 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3255 size); 3256 } 3257 break; 3258 default: 3259 *outlenp = 0; 3260 return (EINVAL); 3261 } 3262 break; 3263 default: 3264 *outlenp = 0; 3265 return (EINVAL); 3266 } 3267 /* 3268 * Common case of OK return with outval same as inval. 3269 */ 3270 if (invalp != outvalp) { 3271 /* don't trust bcopy for identical src/dst */ 3272 (void) bcopy(invalp, outvalp, inlen); 3273 } 3274 *outlenp = inlen; 3275 return (0); 3276 } 3277 3278 int 3279 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3280 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3281 void *thisdg_attrs, cred_t *cr) 3282 { 3283 int error; 3284 boolean_t checkonly; 3285 3286 error = 0; 3287 switch (optset_context) { 3288 case SETFN_OPTCOM_CHECKONLY: 3289 checkonly = B_TRUE; 3290 /* 3291 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3292 * inlen != 0 implies value supplied and 3293 * we have to "pretend" to set it. 3294 * inlen == 0 implies that there is no 3295 * value part in T_CHECK request and just validation 3296 * done elsewhere should be enough, we just return here. 3297 */ 3298 if (inlen == 0) { 3299 *outlenp = 0; 3300 goto done; 3301 } 3302 break; 3303 case SETFN_OPTCOM_NEGOTIATE: 3304 checkonly = B_FALSE; 3305 break; 3306 case SETFN_UD_NEGOTIATE: 3307 case SETFN_CONN_NEGOTIATE: 3308 checkonly = B_FALSE; 3309 /* 3310 * Negotiating local and "association-related" options 3311 * through T_UNITDATA_REQ. 3312 * 3313 * Following routine can filter out ones we do not 3314 * want to be "set" this way. 3315 */ 3316 if (!udp_opt_allow_udr_set(level, name)) { 3317 *outlenp = 0; 3318 error = EINVAL; 3319 goto done; 3320 } 3321 break; 3322 default: 3323 /* 3324 * We should never get here 3325 */ 3326 *outlenp = 0; 3327 error = EINVAL; 3328 goto done; 3329 } 3330 3331 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3332 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3333 3334 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3335 outvalp, cr, thisdg_attrs, checkonly); 3336 done: 3337 return (error); 3338 } 3339 3340 /* ARGSUSED */ 3341 int 3342 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3343 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3344 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3345 { 3346 conn_t *connp = Q_TO_CONN(q); 3347 int error; 3348 udp_t *udp = connp->conn_udp; 3349 3350 rw_enter(&udp->udp_rwlock, RW_WRITER); 3351 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3352 outlenp, outvalp, thisdg_attrs, cr); 3353 rw_exit(&udp->udp_rwlock); 3354 return (error); 3355 } 3356 3357 /* 3358 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3359 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3360 * headers, and the udp header. 3361 * Returns failure if can't allocate memory. 3362 */ 3363 static int 3364 udp_build_hdrs(udp_t *udp) 3365 { 3366 udp_stack_t *us = udp->udp_us; 3367 uchar_t *hdrs; 3368 uint_t hdrs_len; 3369 ip6_t *ip6h; 3370 ip6i_t *ip6i; 3371 udpha_t *udpha; 3372 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3373 size_t sth_wroff; 3374 conn_t *connp = udp->udp_connp; 3375 3376 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3377 ASSERT(connp != NULL); 3378 3379 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3380 ASSERT(hdrs_len != 0); 3381 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3382 /* Need to reallocate */ 3383 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3384 if (hdrs == NULL) 3385 return (ENOMEM); 3386 3387 if (udp->udp_sticky_hdrs_len != 0) { 3388 kmem_free(udp->udp_sticky_hdrs, 3389 udp->udp_sticky_hdrs_len); 3390 } 3391 udp->udp_sticky_hdrs = hdrs; 3392 udp->udp_sticky_hdrs_len = hdrs_len; 3393 } 3394 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3395 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3396 3397 /* Set header fields not in ipp */ 3398 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3399 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3400 ip6h = (ip6_t *)&ip6i[1]; 3401 } else { 3402 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3403 } 3404 3405 if (!(ipp->ipp_fields & IPPF_ADDR)) 3406 ip6h->ip6_src = udp->udp_v6src; 3407 3408 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3409 udpha->uha_src_port = udp->udp_port; 3410 3411 /* Try to get everything in a single mblk */ 3412 if (hdrs_len > udp->udp_max_hdr_len) { 3413 udp->udp_max_hdr_len = hdrs_len; 3414 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3415 rw_exit(&udp->udp_rwlock); 3416 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3417 udp->udp_connp, sth_wroff); 3418 rw_enter(&udp->udp_rwlock, RW_WRITER); 3419 } 3420 return (0); 3421 } 3422 3423 /* 3424 * This routine retrieves the value of an ND variable in a udpparam_t 3425 * structure. It is called through nd_getset when a user reads the 3426 * variable. 3427 */ 3428 /* ARGSUSED */ 3429 static int 3430 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3431 { 3432 udpparam_t *udppa = (udpparam_t *)cp; 3433 3434 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3435 return (0); 3436 } 3437 3438 /* 3439 * Walk through the param array specified registering each element with the 3440 * named dispatch (ND) handler. 3441 */ 3442 static boolean_t 3443 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3444 { 3445 for (; cnt-- > 0; udppa++) { 3446 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3447 if (!nd_load(ndp, udppa->udp_param_name, 3448 udp_param_get, udp_param_set, 3449 (caddr_t)udppa)) { 3450 nd_free(ndp); 3451 return (B_FALSE); 3452 } 3453 } 3454 } 3455 if (!nd_load(ndp, "udp_extra_priv_ports", 3456 udp_extra_priv_ports_get, NULL, NULL)) { 3457 nd_free(ndp); 3458 return (B_FALSE); 3459 } 3460 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3461 NULL, udp_extra_priv_ports_add, NULL)) { 3462 nd_free(ndp); 3463 return (B_FALSE); 3464 } 3465 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3466 NULL, udp_extra_priv_ports_del, NULL)) { 3467 nd_free(ndp); 3468 return (B_FALSE); 3469 } 3470 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3471 NULL)) { 3472 nd_free(ndp); 3473 return (B_FALSE); 3474 } 3475 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3476 NULL)) { 3477 nd_free(ndp); 3478 return (B_FALSE); 3479 } 3480 return (B_TRUE); 3481 } 3482 3483 /* This routine sets an ND variable in a udpparam_t structure. */ 3484 /* ARGSUSED */ 3485 static int 3486 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3487 { 3488 long new_value; 3489 udpparam_t *udppa = (udpparam_t *)cp; 3490 3491 /* 3492 * Fail the request if the new value does not lie within the 3493 * required bounds. 3494 */ 3495 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3496 new_value < udppa->udp_param_min || 3497 new_value > udppa->udp_param_max) { 3498 return (EINVAL); 3499 } 3500 3501 /* Set the new value */ 3502 udppa->udp_param_value = new_value; 3503 return (0); 3504 } 3505 3506 /* 3507 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3508 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3509 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3510 * then it's assumed to be allocated to be large enough. 3511 * 3512 * Returns zero if trimming of the security option causes all options to go 3513 * away. 3514 */ 3515 static size_t 3516 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3517 { 3518 struct T_opthdr *toh; 3519 size_t hol = ipp->ipp_hopoptslen; 3520 ip6_hbh_t *dstopt = NULL; 3521 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3522 size_t tlen, olen, plen; 3523 boolean_t deleting; 3524 const struct ip6_opt *sopt, *lastpad; 3525 struct ip6_opt *dopt; 3526 3527 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3528 toh->level = IPPROTO_IPV6; 3529 toh->name = IPV6_HOPOPTS; 3530 toh->status = 0; 3531 dstopt = (ip6_hbh_t *)(toh + 1); 3532 } 3533 3534 /* 3535 * If labeling is enabled, then skip the label option 3536 * but get other options if there are any. 3537 */ 3538 if (is_system_labeled()) { 3539 dopt = NULL; 3540 if (dstopt != NULL) { 3541 /* will fill in ip6h_len later */ 3542 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3543 dopt = (struct ip6_opt *)(dstopt + 1); 3544 } 3545 sopt = (const struct ip6_opt *)(srcopt + 1); 3546 hol -= sizeof (*srcopt); 3547 tlen = sizeof (*dstopt); 3548 lastpad = NULL; 3549 deleting = B_FALSE; 3550 /* 3551 * This loop finds the first (lastpad pointer) of any number of 3552 * pads that preceeds the security option, then treats the 3553 * security option as though it were a pad, and then finds the 3554 * next non-pad option (or end of list). 3555 * 3556 * It then treats the entire block as one big pad. To preserve 3557 * alignment of any options that follow, or just the end of the 3558 * list, it computes a minimal new padding size that keeps the 3559 * same alignment for the next option. 3560 * 3561 * If it encounters just a sequence of pads with no security 3562 * option, those are copied as-is rather than collapsed. 3563 * 3564 * Note that to handle the end of list case, the code makes one 3565 * loop with 'hol' set to zero. 3566 */ 3567 for (;;) { 3568 if (hol > 0) { 3569 if (sopt->ip6o_type == IP6OPT_PAD1) { 3570 if (lastpad == NULL) 3571 lastpad = sopt; 3572 sopt = (const struct ip6_opt *) 3573 &sopt->ip6o_len; 3574 hol--; 3575 continue; 3576 } 3577 olen = sopt->ip6o_len + sizeof (*sopt); 3578 if (olen > hol) 3579 olen = hol; 3580 if (sopt->ip6o_type == IP6OPT_PADN || 3581 sopt->ip6o_type == ip6opt_ls) { 3582 if (sopt->ip6o_type == ip6opt_ls) 3583 deleting = B_TRUE; 3584 if (lastpad == NULL) 3585 lastpad = sopt; 3586 sopt = (const struct ip6_opt *) 3587 ((const char *)sopt + olen); 3588 hol -= olen; 3589 continue; 3590 } 3591 } else { 3592 /* if nothing was copied at all, then delete */ 3593 if (tlen == sizeof (*dstopt)) 3594 return (0); 3595 /* last pass; pick up any trailing padding */ 3596 olen = 0; 3597 } 3598 if (deleting) { 3599 /* 3600 * compute aligning effect of deleted material 3601 * to reproduce with pad. 3602 */ 3603 plen = ((const char *)sopt - 3604 (const char *)lastpad) & 7; 3605 tlen += plen; 3606 if (dopt != NULL) { 3607 if (plen == 1) { 3608 dopt->ip6o_type = IP6OPT_PAD1; 3609 } else if (plen > 1) { 3610 plen -= sizeof (*dopt); 3611 dopt->ip6o_type = IP6OPT_PADN; 3612 dopt->ip6o_len = plen; 3613 if (plen > 0) 3614 bzero(dopt + 1, plen); 3615 } 3616 dopt = (struct ip6_opt *) 3617 ((char *)dopt + plen); 3618 } 3619 deleting = B_FALSE; 3620 lastpad = NULL; 3621 } 3622 /* if there's uncopied padding, then copy that now */ 3623 if (lastpad != NULL) { 3624 olen += (const char *)sopt - 3625 (const char *)lastpad; 3626 sopt = lastpad; 3627 lastpad = NULL; 3628 } 3629 if (dopt != NULL && olen > 0) { 3630 bcopy(sopt, dopt, olen); 3631 dopt = (struct ip6_opt *)((char *)dopt + olen); 3632 } 3633 if (hol == 0) 3634 break; 3635 tlen += olen; 3636 sopt = (const struct ip6_opt *) 3637 ((const char *)sopt + olen); 3638 hol -= olen; 3639 } 3640 /* go back and patch up the length value, rounded upward */ 3641 if (dstopt != NULL) 3642 dstopt->ip6h_len = (tlen - 1) >> 3; 3643 } else { 3644 tlen = hol; 3645 if (dstopt != NULL) 3646 bcopy(srcopt, dstopt, hol); 3647 } 3648 3649 tlen += sizeof (*toh); 3650 if (toh != NULL) 3651 toh->len = tlen; 3652 3653 return (tlen); 3654 } 3655 3656 /* 3657 * Update udp_rcv_opt_len from the packet. 3658 * Called when options received, and when no options received but 3659 * udp_ip_recv_opt_len has previously recorded options. 3660 */ 3661 static void 3662 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3663 { 3664 /* Save the options if any */ 3665 if (opt_len > 0) { 3666 if (opt_len > udp->udp_ip_rcv_options_len) { 3667 /* Need to allocate larger buffer */ 3668 if (udp->udp_ip_rcv_options_len != 0) 3669 mi_free((char *)udp->udp_ip_rcv_options); 3670 udp->udp_ip_rcv_options_len = 0; 3671 udp->udp_ip_rcv_options = 3672 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3673 if (udp->udp_ip_rcv_options != NULL) 3674 udp->udp_ip_rcv_options_len = opt_len; 3675 } 3676 if (udp->udp_ip_rcv_options_len != 0) { 3677 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3678 /* Adjust length if we are resusing the space */ 3679 udp->udp_ip_rcv_options_len = opt_len; 3680 } 3681 } else if (udp->udp_ip_rcv_options_len != 0) { 3682 /* Clear out previously recorded options */ 3683 mi_free((char *)udp->udp_ip_rcv_options); 3684 udp->udp_ip_rcv_options = NULL; 3685 udp->udp_ip_rcv_options_len = 0; 3686 } 3687 } 3688 3689 static void 3690 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3691 { 3692 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3693 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3694 /* 3695 * fallback has started but messages have not been moved yet 3696 */ 3697 if (udp->udp_fallback_queue_head == NULL) { 3698 ASSERT(udp->udp_fallback_queue_tail == NULL); 3699 udp->udp_fallback_queue_head = mp; 3700 udp->udp_fallback_queue_tail = mp; 3701 } else { 3702 ASSERT(udp->udp_fallback_queue_tail != NULL); 3703 udp->udp_fallback_queue_tail->b_next = mp; 3704 udp->udp_fallback_queue_tail = mp; 3705 } 3706 mutex_exit(&udp->udp_recv_lock); 3707 } else { 3708 /* 3709 * no more fallbacks possible, ok to drop lock. 3710 */ 3711 mutex_exit(&udp->udp_recv_lock); 3712 putnext(udp->udp_connp->conn_rq, mp); 3713 } 3714 } 3715 3716 /* ARGSUSED2 */ 3717 static void 3718 udp_input(void *arg1, mblk_t *mp, void *arg2) 3719 { 3720 conn_t *connp = (conn_t *)arg1; 3721 struct T_unitdata_ind *tudi; 3722 uchar_t *rptr; /* Pointer to IP header */ 3723 int hdr_length; /* Length of IP+UDP headers */ 3724 int opt_len; 3725 int udi_size; /* Size of T_unitdata_ind */ 3726 int mp_len; 3727 udp_t *udp; 3728 udpha_t *udpha; 3729 int ipversion; 3730 ip6_pkt_t ipp; 3731 ip6_t *ip6h; 3732 ip6i_t *ip6i; 3733 mblk_t *mp1; 3734 mblk_t *options_mp = NULL; 3735 ip_pktinfo_t *pinfo = NULL; 3736 cred_t *cr = NULL; 3737 pid_t cpid; 3738 uint32_t udp_ip_rcv_options_len; 3739 udp_bits_t udp_bits; 3740 cred_t *rcr = connp->conn_cred; 3741 udp_stack_t *us; 3742 3743 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3744 3745 udp = connp->conn_udp; 3746 us = udp->udp_us; 3747 rptr = mp->b_rptr; 3748 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3749 ASSERT(OK_32PTR(rptr)); 3750 3751 /* 3752 * IP should have prepended the options data in an M_CTL 3753 * Check M_CTL "type" to make sure are not here bcos of 3754 * a valid ICMP message 3755 */ 3756 if (DB_TYPE(mp) == M_CTL) { 3757 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3758 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3759 IN_PKTINFO) { 3760 /* 3761 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3762 * has been prepended to the packet by IP. We need to 3763 * extract the mblk and adjust the rptr 3764 */ 3765 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3766 options_mp = mp; 3767 mp = mp->b_cont; 3768 rptr = mp->b_rptr; 3769 UDP_STAT(us, udp_in_pktinfo); 3770 } else { 3771 /* 3772 * ICMP messages. 3773 */ 3774 udp_icmp_error(connp, mp); 3775 return; 3776 } 3777 } 3778 3779 mp_len = msgdsize(mp); 3780 /* 3781 * This is the inbound data path. 3782 * First, we check to make sure the IP version number is correct, 3783 * and then pull the IP and UDP headers into the first mblk. 3784 */ 3785 3786 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3787 ipp.ipp_fields = 0; 3788 3789 ipversion = IPH_HDR_VERSION(rptr); 3790 3791 rw_enter(&udp->udp_rwlock, RW_READER); 3792 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3793 udp_bits = udp->udp_bits; 3794 rw_exit(&udp->udp_rwlock); 3795 3796 switch (ipversion) { 3797 case IPV4_VERSION: 3798 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3799 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3800 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3801 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3802 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3803 udp->udp_family == AF_INET) { 3804 /* 3805 * Record/update udp_ip_rcv_options with the lock 3806 * held. Not needed for AF_INET6 sockets 3807 * since they don't support a getsockopt of IP_OPTIONS. 3808 */ 3809 rw_enter(&udp->udp_rwlock, RW_WRITER); 3810 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3811 opt_len); 3812 rw_exit(&udp->udp_rwlock); 3813 } 3814 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3815 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3816 udp->udp_ip_recvpktinfo) { 3817 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3818 ipp.ipp_fields |= IPPF_IFINDEX; 3819 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3820 } 3821 } 3822 break; 3823 case IPV6_VERSION: 3824 /* 3825 * IPv6 packets can only be received by applications 3826 * that are prepared to receive IPv6 addresses. 3827 * The IP fanout must ensure this. 3828 */ 3829 ASSERT(udp->udp_family == AF_INET6); 3830 3831 ip6h = (ip6_t *)rptr; 3832 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3833 3834 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3835 uint8_t nexthdrp; 3836 /* Look for ifindex information */ 3837 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3838 ip6i = (ip6i_t *)ip6h; 3839 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3840 goto tossit; 3841 3842 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3843 ASSERT(ip6i->ip6i_ifindex != 0); 3844 ipp.ipp_fields |= IPPF_IFINDEX; 3845 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3846 } 3847 rptr = (uchar_t *)&ip6i[1]; 3848 mp->b_rptr = rptr; 3849 if (rptr == mp->b_wptr) { 3850 mp1 = mp->b_cont; 3851 freeb(mp); 3852 mp = mp1; 3853 rptr = mp->b_rptr; 3854 } 3855 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3856 goto tossit; 3857 ip6h = (ip6_t *)rptr; 3858 mp_len = msgdsize(mp); 3859 } 3860 /* 3861 * Find any potentially interesting extension headers 3862 * as well as the length of the IPv6 + extension 3863 * headers. 3864 */ 3865 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3866 UDPH_SIZE; 3867 ASSERT(nexthdrp == IPPROTO_UDP); 3868 } else { 3869 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3870 ip6i = NULL; 3871 } 3872 break; 3873 default: 3874 ASSERT(0); 3875 } 3876 3877 /* 3878 * IP inspected the UDP header thus all of it must be in the mblk. 3879 * UDP length check is performed for IPv6 packets and IPv4 packets 3880 * to check if the size of the packet as specified 3881 * by the header is the same as the physical size of the packet. 3882 * FIXME? Didn't IP already check this? 3883 */ 3884 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3885 if ((MBLKL(mp) < hdr_length) || 3886 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3887 goto tossit; 3888 } 3889 3890 3891 /* Walk past the headers unless UDP_RCVHDR was set. */ 3892 if (!udp_bits.udpb_rcvhdr) { 3893 mp->b_rptr = rptr + hdr_length; 3894 mp_len -= hdr_length; 3895 } 3896 3897 /* 3898 * This is the inbound data path. Packets are passed upstream as 3899 * T_UNITDATA_IND messages with full IP headers still attached. 3900 */ 3901 if (udp->udp_family == AF_INET) { 3902 sin_t *sin; 3903 3904 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3905 3906 /* 3907 * Normally only send up the source address. 3908 * If IP_RECVDSTADDR is set we include the destination IP 3909 * address as an option. With IP_RECVOPTS we include all 3910 * the IP options. 3911 */ 3912 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3913 if (udp_bits.udpb_recvdstaddr) { 3914 udi_size += sizeof (struct T_opthdr) + 3915 sizeof (struct in_addr); 3916 UDP_STAT(us, udp_in_recvdstaddr); 3917 } 3918 3919 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3920 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3921 udi_size += sizeof (struct T_opthdr) + 3922 sizeof (struct in_pktinfo); 3923 UDP_STAT(us, udp_ip_rcvpktinfo); 3924 } 3925 3926 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3927 udi_size += sizeof (struct T_opthdr) + opt_len; 3928 UDP_STAT(us, udp_in_recvopts); 3929 } 3930 3931 /* 3932 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3933 * space accordingly 3934 */ 3935 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3936 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3937 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3938 UDP_STAT(us, udp_in_recvif); 3939 } 3940 3941 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3942 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3943 udi_size += sizeof (struct T_opthdr) + 3944 sizeof (struct sockaddr_dl); 3945 UDP_STAT(us, udp_in_recvslla); 3946 } 3947 3948 if ((udp_bits.udpb_recvucred) && 3949 (cr = msg_getcred(mp, &cpid)) != NULL) { 3950 udi_size += sizeof (struct T_opthdr) + ucredsize; 3951 UDP_STAT(us, udp_in_recvucred); 3952 } 3953 3954 /* 3955 * If SO_TIMESTAMP is set allocate the appropriate sized 3956 * buffer. Since gethrestime() expects a pointer aligned 3957 * argument, we allocate space necessary for extra 3958 * alignment (even though it might not be used). 3959 */ 3960 if (udp_bits.udpb_timestamp) { 3961 udi_size += sizeof (struct T_opthdr) + 3962 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3963 UDP_STAT(us, udp_in_timestamp); 3964 } 3965 3966 /* 3967 * If IP_RECVTTL is set allocate the appropriate sized buffer 3968 */ 3969 if (udp_bits.udpb_recvttl) { 3970 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3971 UDP_STAT(us, udp_in_recvttl); 3972 } 3973 3974 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3975 mp1 = allocb(udi_size, BPRI_MED); 3976 if (mp1 == NULL) { 3977 freemsg(mp); 3978 if (options_mp != NULL) 3979 freeb(options_mp); 3980 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3981 return; 3982 } 3983 mp1->b_cont = mp; 3984 mp = mp1; 3985 mp->b_datap->db_type = M_PROTO; 3986 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3987 mp->b_wptr = (uchar_t *)tudi + udi_size; 3988 tudi->PRIM_type = T_UNITDATA_IND; 3989 tudi->SRC_length = sizeof (sin_t); 3990 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3991 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3992 sizeof (sin_t); 3993 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3994 tudi->OPT_length = udi_size; 3995 sin = (sin_t *)&tudi[1]; 3996 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3997 sin->sin_port = udpha->uha_src_port; 3998 sin->sin_family = udp->udp_family; 3999 *(uint32_t *)&sin->sin_zero[0] = 0; 4000 *(uint32_t *)&sin->sin_zero[4] = 0; 4001 4002 /* 4003 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4004 * IP_RECVTTL has been set. 4005 */ 4006 if (udi_size != 0) { 4007 /* 4008 * Copy in destination address before options to avoid 4009 * any padding issues. 4010 */ 4011 char *dstopt; 4012 4013 dstopt = (char *)&sin[1]; 4014 if (udp_bits.udpb_recvdstaddr) { 4015 struct T_opthdr *toh; 4016 ipaddr_t *dstptr; 4017 4018 toh = (struct T_opthdr *)dstopt; 4019 toh->level = IPPROTO_IP; 4020 toh->name = IP_RECVDSTADDR; 4021 toh->len = sizeof (struct T_opthdr) + 4022 sizeof (ipaddr_t); 4023 toh->status = 0; 4024 dstopt += sizeof (struct T_opthdr); 4025 dstptr = (ipaddr_t *)dstopt; 4026 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4027 dstopt += sizeof (ipaddr_t); 4028 udi_size -= toh->len; 4029 } 4030 4031 if (udp_bits.udpb_recvopts && opt_len > 0) { 4032 struct T_opthdr *toh; 4033 4034 toh = (struct T_opthdr *)dstopt; 4035 toh->level = IPPROTO_IP; 4036 toh->name = IP_RECVOPTS; 4037 toh->len = sizeof (struct T_opthdr) + opt_len; 4038 toh->status = 0; 4039 dstopt += sizeof (struct T_opthdr); 4040 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4041 opt_len); 4042 dstopt += opt_len; 4043 udi_size -= toh->len; 4044 } 4045 4046 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4047 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4048 struct T_opthdr *toh; 4049 struct in_pktinfo *pktinfop; 4050 4051 toh = (struct T_opthdr *)dstopt; 4052 toh->level = IPPROTO_IP; 4053 toh->name = IP_PKTINFO; 4054 toh->len = sizeof (struct T_opthdr) + 4055 sizeof (*pktinfop); 4056 toh->status = 0; 4057 dstopt += sizeof (struct T_opthdr); 4058 pktinfop = (struct in_pktinfo *)dstopt; 4059 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4060 pktinfop->ipi_spec_dst = 4061 pinfo->ip_pkt_match_addr; 4062 pktinfop->ipi_addr.s_addr = 4063 ((ipha_t *)rptr)->ipha_dst; 4064 4065 dstopt += sizeof (struct in_pktinfo); 4066 udi_size -= toh->len; 4067 } 4068 4069 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4070 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4071 4072 struct T_opthdr *toh; 4073 struct sockaddr_dl *dstptr; 4074 4075 toh = (struct T_opthdr *)dstopt; 4076 toh->level = IPPROTO_IP; 4077 toh->name = IP_RECVSLLA; 4078 toh->len = sizeof (struct T_opthdr) + 4079 sizeof (struct sockaddr_dl); 4080 toh->status = 0; 4081 dstopt += sizeof (struct T_opthdr); 4082 dstptr = (struct sockaddr_dl *)dstopt; 4083 bcopy(&pinfo->ip_pkt_slla, dstptr, 4084 sizeof (struct sockaddr_dl)); 4085 dstopt += sizeof (struct sockaddr_dl); 4086 udi_size -= toh->len; 4087 } 4088 4089 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4090 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4091 4092 struct T_opthdr *toh; 4093 uint_t *dstptr; 4094 4095 toh = (struct T_opthdr *)dstopt; 4096 toh->level = IPPROTO_IP; 4097 toh->name = IP_RECVIF; 4098 toh->len = sizeof (struct T_opthdr) + 4099 sizeof (uint_t); 4100 toh->status = 0; 4101 dstopt += sizeof (struct T_opthdr); 4102 dstptr = (uint_t *)dstopt; 4103 *dstptr = pinfo->ip_pkt_ifindex; 4104 dstopt += sizeof (uint_t); 4105 udi_size -= toh->len; 4106 } 4107 4108 if (cr != NULL) { 4109 struct T_opthdr *toh; 4110 4111 toh = (struct T_opthdr *)dstopt; 4112 toh->level = SOL_SOCKET; 4113 toh->name = SCM_UCRED; 4114 toh->len = sizeof (struct T_opthdr) + ucredsize; 4115 toh->status = 0; 4116 dstopt += sizeof (struct T_opthdr); 4117 (void) cred2ucred(cr, cpid, dstopt, rcr); 4118 dstopt += ucredsize; 4119 udi_size -= toh->len; 4120 } 4121 4122 if (udp_bits.udpb_timestamp) { 4123 struct T_opthdr *toh; 4124 4125 toh = (struct T_opthdr *)dstopt; 4126 toh->level = SOL_SOCKET; 4127 toh->name = SCM_TIMESTAMP; 4128 toh->len = sizeof (struct T_opthdr) + 4129 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4130 toh->status = 0; 4131 dstopt += sizeof (struct T_opthdr); 4132 /* Align for gethrestime() */ 4133 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4134 sizeof (intptr_t)); 4135 gethrestime((timestruc_t *)dstopt); 4136 dstopt = (char *)toh + toh->len; 4137 udi_size -= toh->len; 4138 } 4139 4140 /* 4141 * CAUTION: 4142 * Due to aligment issues 4143 * Processing of IP_RECVTTL option 4144 * should always be the last. Adding 4145 * any option processing after this will 4146 * cause alignment panic. 4147 */ 4148 if (udp_bits.udpb_recvttl) { 4149 struct T_opthdr *toh; 4150 uint8_t *dstptr; 4151 4152 toh = (struct T_opthdr *)dstopt; 4153 toh->level = IPPROTO_IP; 4154 toh->name = IP_RECVTTL; 4155 toh->len = sizeof (struct T_opthdr) + 4156 sizeof (uint8_t); 4157 toh->status = 0; 4158 dstopt += sizeof (struct T_opthdr); 4159 dstptr = (uint8_t *)dstopt; 4160 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4161 dstopt += sizeof (uint8_t); 4162 udi_size -= toh->len; 4163 } 4164 4165 /* Consumed all of allocated space */ 4166 ASSERT(udi_size == 0); 4167 } 4168 } else { 4169 sin6_t *sin6; 4170 4171 /* 4172 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4173 * 4174 * Normally we only send up the address. If receiving of any 4175 * optional receive side information is enabled, we also send 4176 * that up as options. 4177 */ 4178 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4179 4180 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4181 IPPF_RTHDR|IPPF_IFINDEX)) { 4182 if ((udp_bits.udpb_ipv6_recvhopopts) && 4183 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4184 size_t hlen; 4185 4186 UDP_STAT(us, udp_in_recvhopopts); 4187 hlen = copy_hop_opts(&ipp, NULL); 4188 if (hlen == 0) 4189 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4190 udi_size += hlen; 4191 } 4192 if (((udp_bits.udpb_ipv6_recvdstopts) || 4193 udp_bits.udpb_old_ipv6_recvdstopts) && 4194 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4195 udi_size += sizeof (struct T_opthdr) + 4196 ipp.ipp_dstoptslen; 4197 UDP_STAT(us, udp_in_recvdstopts); 4198 } 4199 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4200 udp_bits.udpb_ipv6_recvrthdr && 4201 (ipp.ipp_fields & IPPF_RTHDR)) || 4202 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4203 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4204 udi_size += sizeof (struct T_opthdr) + 4205 ipp.ipp_rtdstoptslen; 4206 UDP_STAT(us, udp_in_recvrtdstopts); 4207 } 4208 if ((udp_bits.udpb_ipv6_recvrthdr) && 4209 (ipp.ipp_fields & IPPF_RTHDR)) { 4210 udi_size += sizeof (struct T_opthdr) + 4211 ipp.ipp_rthdrlen; 4212 UDP_STAT(us, udp_in_recvrthdr); 4213 } 4214 if ((udp_bits.udpb_ip_recvpktinfo) && 4215 (ipp.ipp_fields & IPPF_IFINDEX)) { 4216 udi_size += sizeof (struct T_opthdr) + 4217 sizeof (struct in6_pktinfo); 4218 UDP_STAT(us, udp_in_recvpktinfo); 4219 } 4220 4221 } 4222 if ((udp_bits.udpb_recvucred) && 4223 (cr = msg_getcred(mp, &cpid)) != NULL) { 4224 udi_size += sizeof (struct T_opthdr) + ucredsize; 4225 UDP_STAT(us, udp_in_recvucred); 4226 } 4227 4228 /* 4229 * If SO_TIMESTAMP is set allocate the appropriate sized 4230 * buffer. Since gethrestime() expects a pointer aligned 4231 * argument, we allocate space necessary for extra 4232 * alignment (even though it might not be used). 4233 */ 4234 if (udp_bits.udpb_timestamp) { 4235 udi_size += sizeof (struct T_opthdr) + 4236 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4237 UDP_STAT(us, udp_in_timestamp); 4238 } 4239 4240 if (udp_bits.udpb_ipv6_recvhoplimit) { 4241 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4242 UDP_STAT(us, udp_in_recvhoplimit); 4243 } 4244 4245 if (udp_bits.udpb_ipv6_recvtclass) { 4246 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4247 UDP_STAT(us, udp_in_recvtclass); 4248 } 4249 4250 mp1 = allocb(udi_size, BPRI_MED); 4251 if (mp1 == NULL) { 4252 freemsg(mp); 4253 if (options_mp != NULL) 4254 freeb(options_mp); 4255 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4256 return; 4257 } 4258 mp1->b_cont = mp; 4259 mp = mp1; 4260 mp->b_datap->db_type = M_PROTO; 4261 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4262 mp->b_wptr = (uchar_t *)tudi + udi_size; 4263 tudi->PRIM_type = T_UNITDATA_IND; 4264 tudi->SRC_length = sizeof (sin6_t); 4265 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4266 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4267 sizeof (sin6_t); 4268 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4269 tudi->OPT_length = udi_size; 4270 sin6 = (sin6_t *)&tudi[1]; 4271 if (ipversion == IPV4_VERSION) { 4272 in6_addr_t v6dst; 4273 4274 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4275 &sin6->sin6_addr); 4276 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4277 &v6dst); 4278 sin6->sin6_flowinfo = 0; 4279 sin6->sin6_scope_id = 0; 4280 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4281 connp->conn_zoneid, us->us_netstack); 4282 } else { 4283 sin6->sin6_addr = ip6h->ip6_src; 4284 /* No sin6_flowinfo per API */ 4285 sin6->sin6_flowinfo = 0; 4286 /* For link-scope source pass up scope id */ 4287 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4288 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4289 sin6->sin6_scope_id = ipp.ipp_ifindex; 4290 else 4291 sin6->sin6_scope_id = 0; 4292 sin6->__sin6_src_id = ip_srcid_find_addr( 4293 &ip6h->ip6_dst, connp->conn_zoneid, 4294 us->us_netstack); 4295 } 4296 sin6->sin6_port = udpha->uha_src_port; 4297 sin6->sin6_family = udp->udp_family; 4298 4299 if (udi_size != 0) { 4300 uchar_t *dstopt; 4301 4302 dstopt = (uchar_t *)&sin6[1]; 4303 if ((udp_bits.udpb_ip_recvpktinfo) && 4304 (ipp.ipp_fields & IPPF_IFINDEX)) { 4305 struct T_opthdr *toh; 4306 struct in6_pktinfo *pkti; 4307 4308 toh = (struct T_opthdr *)dstopt; 4309 toh->level = IPPROTO_IPV6; 4310 toh->name = IPV6_PKTINFO; 4311 toh->len = sizeof (struct T_opthdr) + 4312 sizeof (*pkti); 4313 toh->status = 0; 4314 dstopt += sizeof (struct T_opthdr); 4315 pkti = (struct in6_pktinfo *)dstopt; 4316 if (ipversion == IPV6_VERSION) 4317 pkti->ipi6_addr = ip6h->ip6_dst; 4318 else 4319 IN6_IPADDR_TO_V4MAPPED( 4320 ((ipha_t *)rptr)->ipha_dst, 4321 &pkti->ipi6_addr); 4322 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4323 dstopt += sizeof (*pkti); 4324 udi_size -= toh->len; 4325 } 4326 if (udp_bits.udpb_ipv6_recvhoplimit) { 4327 struct T_opthdr *toh; 4328 4329 toh = (struct T_opthdr *)dstopt; 4330 toh->level = IPPROTO_IPV6; 4331 toh->name = IPV6_HOPLIMIT; 4332 toh->len = sizeof (struct T_opthdr) + 4333 sizeof (uint_t); 4334 toh->status = 0; 4335 dstopt += sizeof (struct T_opthdr); 4336 if (ipversion == IPV6_VERSION) 4337 *(uint_t *)dstopt = ip6h->ip6_hops; 4338 else 4339 *(uint_t *)dstopt = 4340 ((ipha_t *)rptr)->ipha_ttl; 4341 dstopt += sizeof (uint_t); 4342 udi_size -= toh->len; 4343 } 4344 if (udp_bits.udpb_ipv6_recvtclass) { 4345 struct T_opthdr *toh; 4346 4347 toh = (struct T_opthdr *)dstopt; 4348 toh->level = IPPROTO_IPV6; 4349 toh->name = IPV6_TCLASS; 4350 toh->len = sizeof (struct T_opthdr) + 4351 sizeof (uint_t); 4352 toh->status = 0; 4353 dstopt += sizeof (struct T_opthdr); 4354 if (ipversion == IPV6_VERSION) { 4355 *(uint_t *)dstopt = 4356 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4357 } else { 4358 ipha_t *ipha = (ipha_t *)rptr; 4359 *(uint_t *)dstopt = 4360 ipha->ipha_type_of_service; 4361 } 4362 dstopt += sizeof (uint_t); 4363 udi_size -= toh->len; 4364 } 4365 if ((udp_bits.udpb_ipv6_recvhopopts) && 4366 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4367 size_t hlen; 4368 4369 hlen = copy_hop_opts(&ipp, dstopt); 4370 dstopt += hlen; 4371 udi_size -= hlen; 4372 } 4373 if ((udp_bits.udpb_ipv6_recvdstopts) && 4374 (udp_bits.udpb_ipv6_recvrthdr) && 4375 (ipp.ipp_fields & IPPF_RTHDR) && 4376 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4377 struct T_opthdr *toh; 4378 4379 toh = (struct T_opthdr *)dstopt; 4380 toh->level = IPPROTO_IPV6; 4381 toh->name = IPV6_DSTOPTS; 4382 toh->len = sizeof (struct T_opthdr) + 4383 ipp.ipp_rtdstoptslen; 4384 toh->status = 0; 4385 dstopt += sizeof (struct T_opthdr); 4386 bcopy(ipp.ipp_rtdstopts, dstopt, 4387 ipp.ipp_rtdstoptslen); 4388 dstopt += ipp.ipp_rtdstoptslen; 4389 udi_size -= toh->len; 4390 } 4391 if ((udp_bits.udpb_ipv6_recvrthdr) && 4392 (ipp.ipp_fields & IPPF_RTHDR)) { 4393 struct T_opthdr *toh; 4394 4395 toh = (struct T_opthdr *)dstopt; 4396 toh->level = IPPROTO_IPV6; 4397 toh->name = IPV6_RTHDR; 4398 toh->len = sizeof (struct T_opthdr) + 4399 ipp.ipp_rthdrlen; 4400 toh->status = 0; 4401 dstopt += sizeof (struct T_opthdr); 4402 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4403 dstopt += ipp.ipp_rthdrlen; 4404 udi_size -= toh->len; 4405 } 4406 if ((udp_bits.udpb_ipv6_recvdstopts) && 4407 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4408 struct T_opthdr *toh; 4409 4410 toh = (struct T_opthdr *)dstopt; 4411 toh->level = IPPROTO_IPV6; 4412 toh->name = IPV6_DSTOPTS; 4413 toh->len = sizeof (struct T_opthdr) + 4414 ipp.ipp_dstoptslen; 4415 toh->status = 0; 4416 dstopt += sizeof (struct T_opthdr); 4417 bcopy(ipp.ipp_dstopts, dstopt, 4418 ipp.ipp_dstoptslen); 4419 dstopt += ipp.ipp_dstoptslen; 4420 udi_size -= toh->len; 4421 } 4422 if (cr != NULL) { 4423 struct T_opthdr *toh; 4424 4425 toh = (struct T_opthdr *)dstopt; 4426 toh->level = SOL_SOCKET; 4427 toh->name = SCM_UCRED; 4428 toh->len = sizeof (struct T_opthdr) + ucredsize; 4429 toh->status = 0; 4430 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4431 dstopt += toh->len; 4432 udi_size -= toh->len; 4433 } 4434 if (udp_bits.udpb_timestamp) { 4435 struct T_opthdr *toh; 4436 4437 toh = (struct T_opthdr *)dstopt; 4438 toh->level = SOL_SOCKET; 4439 toh->name = SCM_TIMESTAMP; 4440 toh->len = sizeof (struct T_opthdr) + 4441 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4442 toh->status = 0; 4443 dstopt += sizeof (struct T_opthdr); 4444 /* Align for gethrestime() */ 4445 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4446 sizeof (intptr_t)); 4447 gethrestime((timestruc_t *)dstopt); 4448 dstopt = (uchar_t *)toh + toh->len; 4449 udi_size -= toh->len; 4450 } 4451 4452 /* Consumed all of allocated space */ 4453 ASSERT(udi_size == 0); 4454 } 4455 #undef sin6 4456 /* No IP_RECVDSTADDR for IPv6. */ 4457 } 4458 4459 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4460 if (options_mp != NULL) 4461 freeb(options_mp); 4462 4463 if (IPCL_IS_NONSTR(connp)) { 4464 int error; 4465 4466 if ((*connp->conn_upcalls->su_recv) 4467 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4468 NULL) < 0) { 4469 mutex_enter(&udp->udp_recv_lock); 4470 if (error == ENOSPC) { 4471 /* 4472 * let's confirm while holding the lock 4473 */ 4474 if ((*connp->conn_upcalls->su_recv) 4475 (connp->conn_upper_handle, NULL, 0, 0, 4476 &error, NULL) < 0) { 4477 if (error == ENOSPC) { 4478 connp->conn_flow_cntrld = 4479 B_TRUE; 4480 } else { 4481 ASSERT(error == EOPNOTSUPP); 4482 } 4483 } 4484 mutex_exit(&udp->udp_recv_lock); 4485 } else { 4486 ASSERT(error == EOPNOTSUPP); 4487 udp_queue_fallback(udp, mp); 4488 } 4489 } 4490 } else { 4491 putnext(connp->conn_rq, mp); 4492 } 4493 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4494 return; 4495 4496 tossit: 4497 freemsg(mp); 4498 if (options_mp != NULL) 4499 freeb(options_mp); 4500 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4501 } 4502 4503 /* 4504 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4505 * information that can be changing beneath us. 4506 */ 4507 mblk_t * 4508 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4509 { 4510 mblk_t *mpdata; 4511 mblk_t *mp_conn_ctl; 4512 mblk_t *mp_attr_ctl; 4513 mblk_t *mp6_conn_ctl; 4514 mblk_t *mp6_attr_ctl; 4515 mblk_t *mp_conn_tail; 4516 mblk_t *mp_attr_tail; 4517 mblk_t *mp6_conn_tail; 4518 mblk_t *mp6_attr_tail; 4519 struct opthdr *optp; 4520 mib2_udpEntry_t ude; 4521 mib2_udp6Entry_t ude6; 4522 mib2_transportMLPEntry_t mlp; 4523 int state; 4524 zoneid_t zoneid; 4525 int i; 4526 connf_t *connfp; 4527 conn_t *connp = Q_TO_CONN(q); 4528 int v4_conn_idx; 4529 int v6_conn_idx; 4530 boolean_t needattr; 4531 udp_t *udp; 4532 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4533 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4534 mblk_t *mp2ctl; 4535 4536 /* 4537 * make a copy of the original message 4538 */ 4539 mp2ctl = copymsg(mpctl); 4540 4541 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4542 if (mpctl == NULL || 4543 (mpdata = mpctl->b_cont) == NULL || 4544 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4545 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4546 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4547 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4548 freemsg(mp_conn_ctl); 4549 freemsg(mp_attr_ctl); 4550 freemsg(mp6_conn_ctl); 4551 freemsg(mpctl); 4552 freemsg(mp2ctl); 4553 return (0); 4554 } 4555 4556 zoneid = connp->conn_zoneid; 4557 4558 /* fixed length structure for IPv4 and IPv6 counters */ 4559 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4560 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4561 /* synchronize 64- and 32-bit counters */ 4562 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4563 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4564 4565 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4566 optp->level = MIB2_UDP; 4567 optp->name = 0; 4568 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4569 sizeof (us->us_udp_mib)); 4570 optp->len = msgdsize(mpdata); 4571 qreply(q, mpctl); 4572 4573 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4574 v4_conn_idx = v6_conn_idx = 0; 4575 4576 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4577 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4578 connp = NULL; 4579 4580 while ((connp = ipcl_get_next_conn(connfp, connp, 4581 IPCL_UDPCONN))) { 4582 udp = connp->conn_udp; 4583 if (zoneid != connp->conn_zoneid) 4584 continue; 4585 4586 /* 4587 * Note that the port numbers are sent in 4588 * host byte order 4589 */ 4590 4591 if (udp->udp_state == TS_UNBND) 4592 state = MIB2_UDP_unbound; 4593 else if (udp->udp_state == TS_IDLE) 4594 state = MIB2_UDP_idle; 4595 else if (udp->udp_state == TS_DATA_XFER) 4596 state = MIB2_UDP_connected; 4597 else 4598 state = MIB2_UDP_unknown; 4599 4600 needattr = B_FALSE; 4601 bzero(&mlp, sizeof (mlp)); 4602 if (connp->conn_mlp_type != mlptSingle) { 4603 if (connp->conn_mlp_type == mlptShared || 4604 connp->conn_mlp_type == mlptBoth) 4605 mlp.tme_flags |= MIB2_TMEF_SHARED; 4606 if (connp->conn_mlp_type == mlptPrivate || 4607 connp->conn_mlp_type == mlptBoth) 4608 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4609 needattr = B_TRUE; 4610 } 4611 4612 /* 4613 * Create an IPv4 table entry for IPv4 entries and also 4614 * any IPv6 entries which are bound to in6addr_any 4615 * (i.e. anything a IPv4 peer could connect/send to). 4616 */ 4617 if (udp->udp_ipversion == IPV4_VERSION || 4618 (udp->udp_state <= TS_IDLE && 4619 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4620 ude.udpEntryInfo.ue_state = state; 4621 /* 4622 * If in6addr_any this will set it to 4623 * INADDR_ANY 4624 */ 4625 ude.udpLocalAddress = 4626 V4_PART_OF_V6(udp->udp_v6src); 4627 ude.udpLocalPort = ntohs(udp->udp_port); 4628 if (udp->udp_state == TS_DATA_XFER) { 4629 /* 4630 * Can potentially get here for 4631 * v6 socket if another process 4632 * (say, ping) has just done a 4633 * sendto(), changing the state 4634 * from the TS_IDLE above to 4635 * TS_DATA_XFER by the time we hit 4636 * this part of the code. 4637 */ 4638 ude.udpEntryInfo.ue_RemoteAddress = 4639 V4_PART_OF_V6(udp->udp_v6dst); 4640 ude.udpEntryInfo.ue_RemotePort = 4641 ntohs(udp->udp_dstport); 4642 } else { 4643 ude.udpEntryInfo.ue_RemoteAddress = 0; 4644 ude.udpEntryInfo.ue_RemotePort = 0; 4645 } 4646 4647 /* 4648 * We make the assumption that all udp_t 4649 * structs will be created within an address 4650 * region no larger than 32-bits. 4651 */ 4652 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4653 ude.udpCreationProcess = 4654 (udp->udp_open_pid < 0) ? 4655 MIB2_UNKNOWN_PROCESS : 4656 udp->udp_open_pid; 4657 ude.udpCreationTime = udp->udp_open_time; 4658 4659 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4660 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4661 mlp.tme_connidx = v4_conn_idx++; 4662 if (needattr) 4663 (void) snmp_append_data2( 4664 mp_attr_ctl->b_cont, &mp_attr_tail, 4665 (char *)&mlp, sizeof (mlp)); 4666 } 4667 if (udp->udp_ipversion == IPV6_VERSION) { 4668 ude6.udp6EntryInfo.ue_state = state; 4669 ude6.udp6LocalAddress = udp->udp_v6src; 4670 ude6.udp6LocalPort = ntohs(udp->udp_port); 4671 ude6.udp6IfIndex = udp->udp_bound_if; 4672 if (udp->udp_state == TS_DATA_XFER) { 4673 ude6.udp6EntryInfo.ue_RemoteAddress = 4674 udp->udp_v6dst; 4675 ude6.udp6EntryInfo.ue_RemotePort = 4676 ntohs(udp->udp_dstport); 4677 } else { 4678 ude6.udp6EntryInfo.ue_RemoteAddress = 4679 sin6_null.sin6_addr; 4680 ude6.udp6EntryInfo.ue_RemotePort = 0; 4681 } 4682 /* 4683 * We make the assumption that all udp_t 4684 * structs will be created within an address 4685 * region no larger than 32-bits. 4686 */ 4687 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4688 ude6.udp6CreationProcess = 4689 (udp->udp_open_pid < 0) ? 4690 MIB2_UNKNOWN_PROCESS : 4691 udp->udp_open_pid; 4692 ude6.udp6CreationTime = udp->udp_open_time; 4693 4694 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4695 &mp6_conn_tail, (char *)&ude6, 4696 sizeof (ude6)); 4697 mlp.tme_connidx = v6_conn_idx++; 4698 if (needattr) 4699 (void) snmp_append_data2( 4700 mp6_attr_ctl->b_cont, 4701 &mp6_attr_tail, (char *)&mlp, 4702 sizeof (mlp)); 4703 } 4704 } 4705 } 4706 4707 /* IPv4 UDP endpoints */ 4708 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4709 sizeof (struct T_optmgmt_ack)]; 4710 optp->level = MIB2_UDP; 4711 optp->name = MIB2_UDP_ENTRY; 4712 optp->len = msgdsize(mp_conn_ctl->b_cont); 4713 qreply(q, mp_conn_ctl); 4714 4715 /* table of MLP attributes... */ 4716 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4717 sizeof (struct T_optmgmt_ack)]; 4718 optp->level = MIB2_UDP; 4719 optp->name = EXPER_XPORT_MLP; 4720 optp->len = msgdsize(mp_attr_ctl->b_cont); 4721 if (optp->len == 0) 4722 freemsg(mp_attr_ctl); 4723 else 4724 qreply(q, mp_attr_ctl); 4725 4726 /* IPv6 UDP endpoints */ 4727 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4728 sizeof (struct T_optmgmt_ack)]; 4729 optp->level = MIB2_UDP6; 4730 optp->name = MIB2_UDP6_ENTRY; 4731 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4732 qreply(q, mp6_conn_ctl); 4733 4734 /* table of MLP attributes... */ 4735 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4736 sizeof (struct T_optmgmt_ack)]; 4737 optp->level = MIB2_UDP6; 4738 optp->name = EXPER_XPORT_MLP; 4739 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4740 if (optp->len == 0) 4741 freemsg(mp6_attr_ctl); 4742 else 4743 qreply(q, mp6_attr_ctl); 4744 4745 return (mp2ctl); 4746 } 4747 4748 /* 4749 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4750 * NOTE: Per MIB-II, UDP has no writable data. 4751 * TODO: If this ever actually tries to set anything, it needs to be 4752 * to do the appropriate locking. 4753 */ 4754 /* ARGSUSED */ 4755 int 4756 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4757 uchar_t *ptr, int len) 4758 { 4759 switch (level) { 4760 case MIB2_UDP: 4761 return (0); 4762 default: 4763 return (1); 4764 } 4765 } 4766 4767 static void 4768 udp_report_item(mblk_t *mp, udp_t *udp) 4769 { 4770 char *state; 4771 char addrbuf1[INET6_ADDRSTRLEN]; 4772 char addrbuf2[INET6_ADDRSTRLEN]; 4773 uint_t print_len, buf_len; 4774 4775 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4776 ASSERT(buf_len >= 0); 4777 if (buf_len == 0) 4778 return; 4779 4780 if (udp->udp_state == TS_UNBND) 4781 state = "UNBOUND"; 4782 else if (udp->udp_state == TS_IDLE) 4783 state = "IDLE"; 4784 else if (udp->udp_state == TS_DATA_XFER) 4785 state = "CONNECTED"; 4786 else 4787 state = "UnkState"; 4788 print_len = snprintf((char *)mp->b_wptr, buf_len, 4789 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4790 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4791 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4792 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4793 ntohs(udp->udp_dstport), state); 4794 if (print_len < buf_len) { 4795 mp->b_wptr += print_len; 4796 } else { 4797 mp->b_wptr += buf_len; 4798 } 4799 } 4800 4801 /* Report for ndd "udp_status" */ 4802 /* ARGSUSED */ 4803 static int 4804 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4805 { 4806 zoneid_t zoneid; 4807 connf_t *connfp; 4808 conn_t *connp = Q_TO_CONN(q); 4809 udp_t *udp = connp->conn_udp; 4810 int i; 4811 udp_stack_t *us = udp->udp_us; 4812 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4813 4814 /* 4815 * Because of the ndd constraint, at most we can have 64K buffer 4816 * to put in all UDP info. So to be more efficient, just 4817 * allocate a 64K buffer here, assuming we need that large buffer. 4818 * This may be a problem as any user can read udp_status. Therefore 4819 * we limit the rate of doing this using us_ndd_get_info_interval. 4820 * This should be OK as normal users should not do this too often. 4821 */ 4822 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4823 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4824 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4825 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4826 return (0); 4827 } 4828 } 4829 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4830 /* The following may work even if we cannot get a large buf. */ 4831 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4832 return (0); 4833 } 4834 (void) mi_mpprintf(mp, 4835 "UDP " MI_COL_HDRPAD_STR 4836 /* 12345678[89ABCDEF] */ 4837 " zone lport src addr dest addr port state"); 4838 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4839 4840 zoneid = connp->conn_zoneid; 4841 4842 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4843 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4844 connp = NULL; 4845 4846 while ((connp = ipcl_get_next_conn(connfp, connp, 4847 IPCL_UDPCONN))) { 4848 udp = connp->conn_udp; 4849 if (zoneid != GLOBAL_ZONEID && 4850 zoneid != connp->conn_zoneid) 4851 continue; 4852 4853 udp_report_item(mp->b_cont, udp); 4854 } 4855 } 4856 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4857 return (0); 4858 } 4859 4860 /* 4861 * This routine creates a T_UDERROR_IND message and passes it upstream. 4862 * The address and options are copied from the T_UNITDATA_REQ message 4863 * passed in mp. This message is freed. 4864 */ 4865 static void 4866 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4867 t_scalar_t err) 4868 { 4869 struct T_unitdata_req *tudr; 4870 mblk_t *mp1; 4871 uchar_t *optaddr; 4872 t_scalar_t optlen; 4873 4874 if (DB_TYPE(mp) == M_DATA) { 4875 ASSERT(destaddr != NULL && destlen != 0); 4876 optaddr = NULL; 4877 optlen = 0; 4878 } else { 4879 if ((mp->b_wptr < mp->b_rptr) || 4880 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4881 goto done; 4882 } 4883 tudr = (struct T_unitdata_req *)mp->b_rptr; 4884 destaddr = mp->b_rptr + tudr->DEST_offset; 4885 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4886 destaddr + tudr->DEST_length < mp->b_rptr || 4887 destaddr + tudr->DEST_length > mp->b_wptr) { 4888 goto done; 4889 } 4890 optaddr = mp->b_rptr + tudr->OPT_offset; 4891 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4892 optaddr + tudr->OPT_length < mp->b_rptr || 4893 optaddr + tudr->OPT_length > mp->b_wptr) { 4894 goto done; 4895 } 4896 destlen = tudr->DEST_length; 4897 optlen = tudr->OPT_length; 4898 } 4899 4900 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4901 (char *)optaddr, optlen, err); 4902 if (mp1 != NULL) 4903 qreply(q, mp1); 4904 4905 done: 4906 freemsg(mp); 4907 } 4908 4909 /* 4910 * This routine removes a port number association from a stream. It 4911 * is called by udp_wput to handle T_UNBIND_REQ messages. 4912 */ 4913 static void 4914 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4915 { 4916 conn_t *connp = Q_TO_CONN(q); 4917 int error; 4918 4919 error = udp_do_unbind(connp); 4920 if (error) { 4921 if (error < 0) 4922 udp_err_ack(q, mp, -error, 0); 4923 else 4924 udp_err_ack(q, mp, TSYSERR, error); 4925 return; 4926 } 4927 4928 mp = mi_tpi_ok_ack_alloc(mp); 4929 ASSERT(mp != NULL); 4930 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4931 qreply(q, mp); 4932 } 4933 4934 /* 4935 * Don't let port fall into the privileged range. 4936 * Since the extra privileged ports can be arbitrary we also 4937 * ensure that we exclude those from consideration. 4938 * us->us_epriv_ports is not sorted thus we loop over it until 4939 * there are no changes. 4940 */ 4941 static in_port_t 4942 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4943 { 4944 int i; 4945 in_port_t nextport; 4946 boolean_t restart = B_FALSE; 4947 udp_stack_t *us = udp->udp_us; 4948 4949 if (random && udp_random_anon_port != 0) { 4950 (void) random_get_pseudo_bytes((uint8_t *)&port, 4951 sizeof (in_port_t)); 4952 /* 4953 * Unless changed by a sys admin, the smallest anon port 4954 * is 32768 and the largest anon port is 65535. It is 4955 * very likely (50%) for the random port to be smaller 4956 * than the smallest anon port. When that happens, 4957 * add port % (anon port range) to the smallest anon 4958 * port to get the random port. It should fall into the 4959 * valid anon port range. 4960 */ 4961 if (port < us->us_smallest_anon_port) { 4962 port = us->us_smallest_anon_port + 4963 port % (us->us_largest_anon_port - 4964 us->us_smallest_anon_port); 4965 } 4966 } 4967 4968 retry: 4969 if (port < us->us_smallest_anon_port) 4970 port = us->us_smallest_anon_port; 4971 4972 if (port > us->us_largest_anon_port) { 4973 port = us->us_smallest_anon_port; 4974 if (restart) 4975 return (0); 4976 restart = B_TRUE; 4977 } 4978 4979 if (port < us->us_smallest_nonpriv_port) 4980 port = us->us_smallest_nonpriv_port; 4981 4982 for (i = 0; i < us->us_num_epriv_ports; i++) { 4983 if (port == us->us_epriv_ports[i]) { 4984 port++; 4985 /* 4986 * Make sure that the port is in the 4987 * valid range. 4988 */ 4989 goto retry; 4990 } 4991 } 4992 4993 if (is_system_labeled() && 4994 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4995 port, IPPROTO_UDP, B_TRUE)) != 0) { 4996 port = nextport; 4997 goto retry; 4998 } 4999 5000 return (port); 5001 } 5002 5003 static int 5004 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 5005 boolean_t *update_lastdst) 5006 { 5007 int err; 5008 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5009 udp_t *udp = Q_TO_UDP(wq); 5010 udp_stack_t *us = udp->udp_us; 5011 cred_t *cr; 5012 5013 /* 5014 * All Solaris components should pass a db_credp 5015 * for this message, hence we ASSERT. 5016 * On production kernels we return an error to be robust against 5017 * random streams modules sitting on top of us. 5018 */ 5019 cr = msg_getcred(mp, NULL); 5020 ASSERT(cr != NULL); 5021 if (cr == NULL) 5022 return (EINVAL); 5023 5024 /* Note that we use the cred/label from the message to handle MLP */ 5025 err = tsol_compute_label(cr, dst, 5026 opt_storage, udp->udp_connp->conn_mac_exempt, 5027 us->us_netstack->netstack_ip); 5028 if (err == 0) { 5029 err = tsol_update_options(&udp->udp_ip_snd_options, 5030 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5031 opt_storage); 5032 } 5033 if (err != 0) { 5034 DTRACE_PROBE4( 5035 tx__ip__log__info__updatelabel__udp, 5036 char *, "queue(1) failed to update options(2) on mp(3)", 5037 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5038 } else { 5039 *update_lastdst = B_TRUE; 5040 } 5041 return (err); 5042 } 5043 5044 static mblk_t * 5045 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5046 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 5047 cred_t *cr, pid_t pid) 5048 { 5049 udp_t *udp = connp->conn_udp; 5050 mblk_t *mp1 = mp; 5051 mblk_t *mp2; 5052 ipha_t *ipha; 5053 int ip_hdr_length; 5054 uint32_t ip_len; 5055 udpha_t *udpha; 5056 boolean_t lock_held = B_FALSE; 5057 in_port_t uha_src_port; 5058 udpattrs_t attrs; 5059 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5060 uint32_t ip_snd_opt_len = 0; 5061 ip4_pkt_t pktinfo; 5062 ip4_pkt_t *pktinfop = &pktinfo; 5063 ip_opt_info_t optinfo; 5064 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5065 udp_stack_t *us = udp->udp_us; 5066 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5067 queue_t *q = connp->conn_wq; 5068 ire_t *ire; 5069 in6_addr_t v6dst; 5070 boolean_t update_lastdst = B_FALSE; 5071 5072 *error = 0; 5073 pktinfop->ip4_ill_index = 0; 5074 pktinfop->ip4_addr = INADDR_ANY; 5075 optinfo.ip_opt_flags = 0; 5076 optinfo.ip_opt_ill_index = 0; 5077 5078 if (v4dst == INADDR_ANY) 5079 v4dst = htonl(INADDR_LOOPBACK); 5080 5081 /* 5082 * If options passed in, feed it for verification and handling 5083 */ 5084 attrs.udpattr_credset = B_FALSE; 5085 if (IPCL_IS_NONSTR(connp)) { 5086 if (msg->msg_controllen != 0) { 5087 attrs.udpattr_ipp4 = pktinfop; 5088 attrs.udpattr_mb = mp; 5089 5090 rw_enter(&udp->udp_rwlock, RW_WRITER); 5091 *error = process_auxiliary_options(connp, 5092 msg->msg_control, msg->msg_controllen, 5093 &attrs, &udp_opt_obj, udp_opt_set, cr); 5094 rw_exit(&udp->udp_rwlock); 5095 if (*error) 5096 goto done; 5097 } 5098 } else { 5099 if (DB_TYPE(mp) != M_DATA) { 5100 mp1 = mp->b_cont; 5101 if (((struct T_unitdata_req *) 5102 mp->b_rptr)->OPT_length != 0) { 5103 attrs.udpattr_ipp4 = pktinfop; 5104 attrs.udpattr_mb = mp; 5105 if (udp_unitdata_opt_process(q, mp, error, 5106 &attrs) < 0) 5107 goto done; 5108 /* 5109 * Note: success in processing options. 5110 * mp option buffer represented by 5111 * OPT_length/offset now potentially modified 5112 * and contain option setting results 5113 */ 5114 ASSERT(*error == 0); 5115 } 5116 } 5117 } 5118 5119 /* mp1 points to the M_DATA mblk carrying the packet */ 5120 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5121 5122 /* 5123 * Determine whether we need to mark the mblk with the user's 5124 * credentials. 5125 * If labeled then sockfs would have already done this. 5126 */ 5127 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5128 5129 ire = connp->conn_ire_cache; 5130 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 5131 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5132 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 5133 mblk_setcred(mp, cr, pid); 5134 } 5135 5136 rw_enter(&udp->udp_rwlock, RW_READER); 5137 lock_held = B_TRUE; 5138 5139 /* 5140 * Cluster and TSOL note: 5141 * udp.udp_v6lastdst is shared by Cluster and TSOL 5142 * udp.udp_lastdstport is used by Cluster 5143 * 5144 * Both Cluster and TSOL need to update the dest addr and/or port. 5145 * Updating is done after both Cluster and TSOL checks, protected 5146 * by conn_lock. 5147 */ 5148 mutex_enter(&connp->conn_lock); 5149 5150 if (cl_inet_connect2 != NULL && 5151 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5152 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5153 udp->udp_lastdstport != port)) { 5154 mutex_exit(&connp->conn_lock); 5155 *error = 0; 5156 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5157 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5158 if (*error != 0) { 5159 *error = EHOSTUNREACH; 5160 goto done; 5161 } 5162 update_lastdst = B_TRUE; 5163 mutex_enter(&connp->conn_lock); 5164 } 5165 5166 /* 5167 * Check if our saved options are valid; update if not. 5168 * TSOL Note: Since we are not in WRITER mode, UDP packets 5169 * to different destination may require different labels, 5170 * or worse, UDP packets to same IP address may require 5171 * different labels due to use of shared all-zones address. 5172 * We use conn_lock to ensure that lastdst, ip_snd_options, 5173 * and ip_snd_options_len are consistent for the current 5174 * destination and are updated atomically. 5175 */ 5176 if (is_system_labeled()) { 5177 /* Using UDP MLP requires SCM_UCRED from user */ 5178 if (connp->conn_mlp_type != mlptSingle && 5179 !attrs.udpattr_credset) { 5180 mutex_exit(&connp->conn_lock); 5181 DTRACE_PROBE4( 5182 tx__ip__log__info__output__udp, 5183 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5184 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5185 *error = ECONNREFUSED; 5186 goto done; 5187 } 5188 /* 5189 * update label option for this UDP socket if 5190 * - the destination has changed, or 5191 * - the UDP socket is MLP 5192 */ 5193 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5194 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5195 connp->conn_mlp_type != mlptSingle) && 5196 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5197 != 0) { 5198 mutex_exit(&connp->conn_lock); 5199 goto done; 5200 } 5201 } 5202 if (update_lastdst) { 5203 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5204 udp->udp_lastdstport = port; 5205 } 5206 if (udp->udp_ip_snd_options_len > 0) { 5207 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5208 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5209 } 5210 mutex_exit(&connp->conn_lock); 5211 5212 /* Add an IP header */ 5213 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5214 (insert_spi ? sizeof (uint32_t) : 0); 5215 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5216 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5217 !OK_32PTR(ipha)) { 5218 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5219 if (mp2 == NULL) { 5220 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5221 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5222 *error = ENOMEM; 5223 goto done; 5224 } 5225 mp2->b_wptr = DB_LIM(mp2); 5226 mp2->b_cont = mp1; 5227 mp1 = mp2; 5228 if (DB_TYPE(mp) != M_DATA) 5229 mp->b_cont = mp1; 5230 else 5231 mp = mp1; 5232 5233 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5234 } 5235 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5236 #ifdef _BIG_ENDIAN 5237 /* Set version, header length, and tos */ 5238 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5239 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5240 udp->udp_type_of_service); 5241 /* Set ttl and protocol */ 5242 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5243 #else 5244 /* Set version, header length, and tos */ 5245 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5246 ((udp->udp_type_of_service << 8) | 5247 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5248 /* Set ttl and protocol */ 5249 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5250 #endif 5251 if (pktinfop->ip4_addr != INADDR_ANY) { 5252 ipha->ipha_src = pktinfop->ip4_addr; 5253 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5254 } else { 5255 /* 5256 * Copy our address into the packet. If this is zero, 5257 * first look at __sin6_src_id for a hint. If we leave the 5258 * source as INADDR_ANY then ip will fill in the real source 5259 * address. 5260 */ 5261 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5262 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5263 in6_addr_t v6src; 5264 5265 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5266 us->us_netstack); 5267 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5268 } 5269 } 5270 uha_src_port = udp->udp_port; 5271 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5272 rw_exit(&udp->udp_rwlock); 5273 lock_held = B_FALSE; 5274 } 5275 5276 if (pktinfop->ip4_ill_index != 0) { 5277 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5278 } 5279 5280 ipha->ipha_fragment_offset_and_flags = 0; 5281 ipha->ipha_ident = 0; 5282 5283 mp1->b_rptr = (uchar_t *)ipha; 5284 5285 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5286 (uintptr_t)UINT_MAX); 5287 5288 /* Determine length of packet */ 5289 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5290 if ((mp2 = mp1->b_cont) != NULL) { 5291 do { 5292 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5293 ip_len += (uint32_t)MBLKL(mp2); 5294 } while ((mp2 = mp2->b_cont) != NULL); 5295 } 5296 /* 5297 * If the size of the packet is greater than the maximum allowed by 5298 * ip, return an error. Passing this down could cause panics because 5299 * the size will have wrapped and be inconsistent with the msg size. 5300 */ 5301 if (ip_len > IP_MAXPACKET) { 5302 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5303 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5304 *error = EMSGSIZE; 5305 goto done; 5306 } 5307 ipha->ipha_length = htons((uint16_t)ip_len); 5308 ip_len -= ip_hdr_length; 5309 ip_len = htons((uint16_t)ip_len); 5310 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5311 5312 /* Insert all-0s SPI now. */ 5313 if (insert_spi) 5314 *((uint32_t *)(udpha + 1)) = 0; 5315 5316 /* 5317 * Copy in the destination address 5318 */ 5319 ipha->ipha_dst = v4dst; 5320 5321 /* 5322 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5323 */ 5324 if (CLASSD(v4dst)) 5325 ipha->ipha_ttl = udp->udp_multicast_ttl; 5326 5327 udpha->uha_dst_port = port; 5328 udpha->uha_src_port = uha_src_port; 5329 5330 if (ip_snd_opt_len > 0) { 5331 uint32_t cksum; 5332 5333 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5334 lock_held = B_FALSE; 5335 rw_exit(&udp->udp_rwlock); 5336 /* 5337 * Massage source route putting first source route in ipha_dst. 5338 * Ignore the destination in T_unitdata_req. 5339 * Create a checksum adjustment for a source route, if any. 5340 */ 5341 cksum = ip_massage_options(ipha, us->us_netstack); 5342 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5343 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5344 (ipha->ipha_dst & 0xFFFF); 5345 if ((int)cksum < 0) 5346 cksum--; 5347 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5348 /* 5349 * IP does the checksum if uha_checksum is non-zero, 5350 * We make it easy for IP to include our pseudo header 5351 * by putting our length in uha_checksum. 5352 */ 5353 cksum += ip_len; 5354 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5355 /* There might be a carry. */ 5356 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5357 #ifdef _LITTLE_ENDIAN 5358 if (us->us_do_checksum) 5359 ip_len = (cksum << 16) | ip_len; 5360 #else 5361 if (us->us_do_checksum) 5362 ip_len = (ip_len << 16) | cksum; 5363 else 5364 ip_len <<= 16; 5365 #endif 5366 } else { 5367 /* 5368 * IP does the checksum if uha_checksum is non-zero, 5369 * We make it easy for IP to include our pseudo header 5370 * by putting our length in uha_checksum. 5371 */ 5372 if (us->us_do_checksum) 5373 ip_len |= (ip_len << 16); 5374 #ifndef _LITTLE_ENDIAN 5375 else 5376 ip_len <<= 16; 5377 #endif 5378 } 5379 ASSERT(!lock_held); 5380 /* Set UDP length and checksum */ 5381 *((uint32_t *)&udpha->uha_length) = ip_len; 5382 5383 if (DB_TYPE(mp) != M_DATA) { 5384 cred_t *cr; 5385 pid_t cpid; 5386 5387 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5388 cr = msg_extractcred(mp, &cpid); 5389 if (cr != NULL) { 5390 if (mp1->b_datap->db_credp != NULL) 5391 crfree(mp1->b_datap->db_credp); 5392 mp1->b_datap->db_credp = cr; 5393 mp1->b_datap->db_cpid = cpid; 5394 } 5395 ASSERT(mp != mp1); 5396 freeb(mp); 5397 } 5398 5399 /* mp has been consumed and we'll return success */ 5400 ASSERT(*error == 0); 5401 mp = NULL; 5402 5403 /* We're done. Pass the packet to ip. */ 5404 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5405 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5406 "udp_wput_end: q %p (%S)", q, "end"); 5407 5408 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5409 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5410 connp->conn_dontroute || 5411 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5412 optinfo.ip_opt_ill_index != 0 || 5413 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5414 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5415 ipst->ips_ip_g_mrouter != NULL) { 5416 UDP_STAT(us, udp_ip_send); 5417 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5418 &optinfo); 5419 } else { 5420 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5421 } 5422 5423 done: 5424 if (lock_held) 5425 rw_exit(&udp->udp_rwlock); 5426 if (*error != 0) { 5427 ASSERT(mp != NULL); 5428 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5429 } 5430 return (mp); 5431 } 5432 5433 static void 5434 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5435 { 5436 conn_t *connp = udp->udp_connp; 5437 ipaddr_t src, dst; 5438 ire_t *ire; 5439 ipif_t *ipif = NULL; 5440 mblk_t *ire_fp_mp; 5441 boolean_t retry_caching; 5442 udp_stack_t *us = udp->udp_us; 5443 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5444 5445 dst = ipha->ipha_dst; 5446 src = ipha->ipha_src; 5447 ASSERT(ipha->ipha_ident == 0); 5448 5449 if (CLASSD(dst)) { 5450 int err; 5451 5452 ipif = conn_get_held_ipif(connp, 5453 &connp->conn_multicast_ipif, &err); 5454 5455 if (ipif == NULL || ipif->ipif_isv6 || 5456 (ipif->ipif_ill->ill_phyint->phyint_flags & 5457 PHYI_LOOPBACK)) { 5458 if (ipif != NULL) 5459 ipif_refrele(ipif); 5460 UDP_STAT(us, udp_ip_send); 5461 ip_output(connp, mp, q, IP_WPUT); 5462 return; 5463 } 5464 } 5465 5466 retry_caching = B_FALSE; 5467 mutex_enter(&connp->conn_lock); 5468 ire = connp->conn_ire_cache; 5469 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5470 5471 if (ire == NULL || ire->ire_addr != dst || 5472 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5473 retry_caching = B_TRUE; 5474 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5475 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5476 5477 ASSERT(ipif != NULL); 5478 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5479 retry_caching = B_TRUE; 5480 } 5481 5482 if (!retry_caching) { 5483 ASSERT(ire != NULL); 5484 IRE_REFHOLD(ire); 5485 mutex_exit(&connp->conn_lock); 5486 } else { 5487 boolean_t cached = B_FALSE; 5488 5489 connp->conn_ire_cache = NULL; 5490 mutex_exit(&connp->conn_lock); 5491 5492 /* Release the old ire */ 5493 if (ire != NULL) { 5494 IRE_REFRELE_NOTR(ire); 5495 ire = NULL; 5496 } 5497 5498 if (CLASSD(dst)) { 5499 ASSERT(ipif != NULL); 5500 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5501 connp->conn_zoneid, msg_getlabel(mp), 5502 MATCH_IRE_ILL, ipst); 5503 } else { 5504 ASSERT(ipif == NULL); 5505 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5506 msg_getlabel(mp), ipst); 5507 } 5508 5509 if (ire == NULL) { 5510 if (ipif != NULL) 5511 ipif_refrele(ipif); 5512 UDP_STAT(us, udp_ire_null); 5513 ip_output(connp, mp, q, IP_WPUT); 5514 return; 5515 } 5516 IRE_REFHOLD_NOTR(ire); 5517 5518 mutex_enter(&connp->conn_lock); 5519 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5520 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5521 irb_t *irb = ire->ire_bucket; 5522 5523 /* 5524 * IRE's created for non-connection oriented transports 5525 * are normally initialized with IRE_MARK_TEMPORARY set 5526 * in the ire_marks. These IRE's are preferentially 5527 * reaped when the hash chain length in the cache 5528 * bucket exceeds the maximum value specified in 5529 * ip[6]_ire_max_bucket_cnt. This can severely affect 5530 * UDP performance if IRE cache entries that we need 5531 * to reuse are continually removed. To remedy this, 5532 * when we cache the IRE in the conn_t, we remove the 5533 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5534 * set. 5535 */ 5536 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5537 rw_enter(&irb->irb_lock, RW_WRITER); 5538 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5539 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5540 irb->irb_tmp_ire_cnt--; 5541 } 5542 rw_exit(&irb->irb_lock); 5543 } 5544 connp->conn_ire_cache = ire; 5545 cached = B_TRUE; 5546 } 5547 mutex_exit(&connp->conn_lock); 5548 5549 /* 5550 * We can continue to use the ire but since it was not 5551 * cached, we should drop the extra reference. 5552 */ 5553 if (!cached) 5554 IRE_REFRELE_NOTR(ire); 5555 } 5556 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5557 ASSERT(!CLASSD(dst) || ipif != NULL); 5558 5559 /* 5560 * Check if we can take the fast-path. 5561 * Note that "incomplete" ire's (where the link-layer for next hop 5562 * is not resolved, or where the fast-path header in nce_fp_mp is not 5563 * available yet) are sent down the legacy (slow) path 5564 */ 5565 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5566 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5567 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5568 ((ire->ire_nce == NULL) || 5569 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5570 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5571 if (ipif != NULL) 5572 ipif_refrele(ipif); 5573 UDP_STAT(us, udp_ip_ire_send); 5574 IRE_REFRELE(ire); 5575 ip_output(connp, mp, q, IP_WPUT); 5576 return; 5577 } 5578 5579 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5580 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5581 ipha->ipha_src = ipif->ipif_src_addr; 5582 else 5583 ipha->ipha_src = ire->ire_src_addr; 5584 } 5585 5586 if (ipif != NULL) 5587 ipif_refrele(ipif); 5588 5589 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5590 } 5591 5592 static void 5593 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5594 { 5595 ipaddr_t src, dst; 5596 ill_t *ill; 5597 mblk_t *ire_fp_mp; 5598 uint_t ire_fp_mp_len; 5599 uint16_t *up; 5600 uint32_t cksum, hcksum_txflags; 5601 queue_t *dev_q; 5602 udp_t *udp = connp->conn_udp; 5603 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5604 udp_stack_t *us = udp->udp_us; 5605 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5606 boolean_t ll_multicast = B_FALSE; 5607 boolean_t direct_send; 5608 5609 dev_q = ire->ire_stq->q_next; 5610 ASSERT(dev_q != NULL); 5611 5612 ill = ire_to_ill(ire); 5613 ASSERT(ill != NULL); 5614 5615 /* 5616 * For the direct send case, if resetting of conn_direct_blocked 5617 * was missed, it is still ok because the putq() would enable 5618 * the queue and write service will drain it out. 5619 */ 5620 direct_send = ILL_DIRECT_CAPABLE(ill); 5621 5622 /* is queue flow controlled? */ 5623 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5624 DEV_Q_FLOW_BLOCKED(dev_q))) { 5625 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5626 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5627 if (ipst->ips_ip_output_queue) { 5628 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5629 (void) putq(connp->conn_wq, mp); 5630 } else { 5631 freemsg(mp); 5632 } 5633 ire_refrele(ire); 5634 return; 5635 } 5636 5637 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5638 ire_fp_mp_len = MBLKL(ire_fp_mp); 5639 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5640 5641 dst = ipha->ipha_dst; 5642 src = ipha->ipha_src; 5643 5644 5645 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5646 5647 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5648 #ifndef _BIG_ENDIAN 5649 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5650 #endif 5651 5652 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5653 ASSERT(ill->ill_hcksum_capab != NULL); 5654 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5655 } else { 5656 hcksum_txflags = 0; 5657 } 5658 5659 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5660 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5661 5662 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5663 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5664 if (*up != 0) { 5665 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5666 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5667 ntohs(ipha->ipha_length), cksum); 5668 5669 /* Software checksum? */ 5670 if (DB_CKSUMFLAGS(mp) == 0) { 5671 UDP_STAT(us, udp_out_sw_cksum); 5672 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5673 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5674 } 5675 } 5676 5677 if (!CLASSD(dst)) { 5678 ipha->ipha_fragment_offset_and_flags |= 5679 (uint32_t)htons(ire->ire_frag_flag); 5680 } 5681 5682 /* Calculate IP header checksum if hardware isn't capable */ 5683 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5684 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5685 ((uint16_t *)ipha)[4]); 5686 } 5687 5688 if (CLASSD(dst)) { 5689 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5690 ip_multicast_loopback(q, ill, mp, 5691 connp->conn_multicast_loop ? 0 : 5692 IP_FF_NO_MCAST_LOOP, zoneid); 5693 } 5694 5695 /* If multicast TTL is 0 then we are done */ 5696 if (ipha->ipha_ttl == 0) { 5697 freemsg(mp); 5698 ire_refrele(ire); 5699 return; 5700 } 5701 ll_multicast = B_TRUE; 5702 } 5703 5704 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5705 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5706 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5707 5708 UPDATE_OB_PKT_COUNT(ire); 5709 ire->ire_last_used_time = lbolt; 5710 5711 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5712 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5713 ntohs(ipha->ipha_length)); 5714 5715 DTRACE_PROBE4(ip4__physical__out__start, 5716 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5717 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5718 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5719 ll_multicast, ipst); 5720 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5721 if (ipst->ips_ipobs_enabled && mp != NULL) { 5722 zoneid_t szone; 5723 5724 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5725 ipst, ALL_ZONES); 5726 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5727 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5728 } 5729 5730 if (mp == NULL) 5731 goto bail; 5732 5733 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5734 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5735 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5736 5737 if (direct_send) { 5738 uintptr_t cookie; 5739 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5740 5741 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5742 (uintptr_t)connp, 0); 5743 if (cookie != NULL) { 5744 idl_tx_list_t *idl_txl; 5745 5746 /* 5747 * Flow controlled. 5748 */ 5749 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5750 cookie, conn_t *, connp); 5751 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5752 mutex_enter(&idl_txl->txl_lock); 5753 /* 5754 * Check again after holding txl_lock to see if Tx 5755 * ring is still blocked and only then insert the 5756 * connp into the drain list. 5757 */ 5758 if (connp->conn_direct_blocked || 5759 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5760 cookie) == 0)) { 5761 mutex_exit(&idl_txl->txl_lock); 5762 goto bail; 5763 } 5764 if (idl_txl->txl_cookie != NULL && 5765 idl_txl->txl_cookie != cookie) { 5766 DTRACE_PROBE2(udp__xmit__collision, 5767 uintptr_t, cookie, 5768 uintptr_t, idl_txl->txl_cookie); 5769 UDP_STAT(us, udp_cookie_coll); 5770 } else { 5771 connp->conn_direct_blocked = B_TRUE; 5772 idl_txl->txl_cookie = cookie; 5773 conn_drain_insert(connp, idl_txl); 5774 DTRACE_PROBE1(udp__xmit__insert, 5775 conn_t *, connp); 5776 } 5777 mutex_exit(&idl_txl->txl_lock); 5778 } 5779 } else { 5780 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5781 putnext(ire->ire_stq, mp); 5782 } 5783 bail: 5784 IRE_REFRELE(ire); 5785 } 5786 5787 static boolean_t 5788 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5789 boolean_t *update_lastdst) 5790 { 5791 udp_t *udp = Q_TO_UDP(wq); 5792 int err; 5793 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5794 udp_stack_t *us = udp->udp_us; 5795 cred_t *cr; 5796 5797 /* 5798 * All Solaris components should pass a db_credp 5799 * for this message, hence we ASSERT. 5800 * On production kernels we return an error to be robust against 5801 * random streams modules sitting on top of us. 5802 */ 5803 cr = msg_getcred(mp, NULL); 5804 ASSERT(cr != NULL); 5805 if (cr == NULL) 5806 return (EINVAL); 5807 5808 /* Note that we use the cred/label from the message to handle MLP */ 5809 err = tsol_compute_label_v6(cr, 5810 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5811 us->us_netstack->netstack_ip); 5812 if (err == 0) { 5813 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5814 &udp->udp_label_len_v6, opt_storage); 5815 } 5816 if (err != 0) { 5817 DTRACE_PROBE4( 5818 tx__ip__log__drop__updatelabel__udp6, 5819 char *, "queue(1) failed to update options(2) on mp(3)", 5820 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5821 } else { 5822 *update_lastdst = B_TRUE; 5823 } 5824 return (err); 5825 } 5826 5827 static int 5828 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5829 pid_t pid) 5830 { 5831 udp_t *udp = connp->conn_udp; 5832 udp_stack_t *us = udp->udp_us; 5833 ipaddr_t v4dst; 5834 in_port_t dstport; 5835 boolean_t mapped_addr; 5836 struct sockaddr_storage ss; 5837 sin_t *sin; 5838 sin6_t *sin6; 5839 struct sockaddr *addr; 5840 socklen_t addrlen; 5841 int error; 5842 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5843 5844 /* M_DATA for connected socket */ 5845 5846 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5847 UDP_DBGSTAT(us, udp_data_conn); 5848 5849 mutex_enter(&connp->conn_lock); 5850 if (udp->udp_state != TS_DATA_XFER) { 5851 mutex_exit(&connp->conn_lock); 5852 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5853 UDP_STAT(us, udp_out_err_notconn); 5854 freemsg(mp); 5855 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5856 "udp_wput_end: connp %p (%S)", connp, 5857 "not-connected; address required"); 5858 return (EDESTADDRREQ); 5859 } 5860 5861 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5862 if (mapped_addr) 5863 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5864 5865 /* Initialize addr and addrlen as if they're passed in */ 5866 if (udp->udp_family == AF_INET) { 5867 sin = (sin_t *)&ss; 5868 sin->sin_family = AF_INET; 5869 dstport = sin->sin_port = udp->udp_dstport; 5870 ASSERT(mapped_addr); 5871 sin->sin_addr.s_addr = v4dst; 5872 addr = (struct sockaddr *)sin; 5873 addrlen = sizeof (*sin); 5874 } else { 5875 sin6 = (sin6_t *)&ss; 5876 sin6->sin6_family = AF_INET6; 5877 dstport = sin6->sin6_port = udp->udp_dstport; 5878 sin6->sin6_flowinfo = udp->udp_flowinfo; 5879 sin6->sin6_addr = udp->udp_v6dst; 5880 sin6->sin6_scope_id = 0; 5881 sin6->__sin6_src_id = 0; 5882 addr = (struct sockaddr *)sin6; 5883 addrlen = sizeof (*sin6); 5884 } 5885 mutex_exit(&connp->conn_lock); 5886 5887 if (mapped_addr) { 5888 /* 5889 * Handle both AF_INET and AF_INET6; the latter 5890 * for IPV4 mapped destination addresses. Note 5891 * here that both addr and addrlen point to the 5892 * corresponding struct depending on the address 5893 * family of the socket. 5894 */ 5895 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5896 insert_spi, msg, cr, pid); 5897 } else { 5898 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5899 } 5900 if (error == 0) { 5901 ASSERT(mp == NULL); 5902 return (0); 5903 } 5904 5905 UDP_STAT(us, udp_out_err_output); 5906 ASSERT(mp != NULL); 5907 if (IPCL_IS_NONSTR(connp)) { 5908 freemsg(mp); 5909 return (error); 5910 } else { 5911 /* mp is freed by the following routine */ 5912 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5913 (t_scalar_t)addrlen, (t_scalar_t)error); 5914 return (0); 5915 } 5916 } 5917 5918 /* ARGSUSED */ 5919 static int 5920 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5921 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5922 { 5923 5924 udp_t *udp = connp->conn_udp; 5925 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5926 int error = 0; 5927 sin6_t *sin6; 5928 sin_t *sin; 5929 uint_t srcid; 5930 uint16_t port; 5931 ipaddr_t v4dst; 5932 5933 5934 ASSERT(addr != NULL); 5935 5936 switch (udp->udp_family) { 5937 case AF_INET6: 5938 sin6 = (sin6_t *)addr; 5939 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5940 /* 5941 * Destination is a non-IPv4-compatible IPv6 address. 5942 * Send out an IPv6 format packet. 5943 */ 5944 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5945 pid); 5946 if (error != 0) 5947 goto ud_error; 5948 5949 return (0); 5950 } 5951 /* 5952 * If the local address is not zero or a mapped address 5953 * return an error. It would be possible to send an IPv4 5954 * packet but the response would never make it back to the 5955 * application since it is bound to a non-mapped address. 5956 */ 5957 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5958 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5959 error = EADDRNOTAVAIL; 5960 goto ud_error; 5961 } 5962 /* Send IPv4 packet without modifying udp_ipversion */ 5963 /* Extract port and ipaddr */ 5964 port = sin6->sin6_port; 5965 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5966 srcid = sin6->__sin6_src_id; 5967 break; 5968 5969 case AF_INET: 5970 sin = (sin_t *)addr; 5971 /* Extract port and ipaddr */ 5972 port = sin->sin_port; 5973 v4dst = sin->sin_addr.s_addr; 5974 srcid = 0; 5975 break; 5976 } 5977 5978 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5979 msg, cr, pid); 5980 5981 if (error == 0) { 5982 ASSERT(mp == NULL); 5983 return (0); 5984 } 5985 5986 ud_error: 5987 ASSERT(mp != NULL); 5988 5989 return (error); 5990 } 5991 5992 /* 5993 * This routine handles all messages passed downstream. It either 5994 * consumes the message or passes it downstream; it never queues a 5995 * a message. 5996 * 5997 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5998 * is valid when we are directly beneath the stream head, and thus sockfs 5999 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6000 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6001 * connected endpoints. 6002 */ 6003 void 6004 udp_wput(queue_t *q, mblk_t *mp) 6005 { 6006 conn_t *connp = Q_TO_CONN(q); 6007 udp_t *udp = connp->conn_udp; 6008 int error = 0; 6009 struct sockaddr *addr; 6010 socklen_t addrlen; 6011 udp_stack_t *us = udp->udp_us; 6012 6013 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6014 "udp_wput_start: queue %p mp %p", q, mp); 6015 6016 /* 6017 * We directly handle several cases here: T_UNITDATA_REQ message 6018 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6019 * socket. 6020 */ 6021 switch (DB_TYPE(mp)) { 6022 case M_DATA: 6023 /* 6024 * Quick check for error cases. Checks will be done again 6025 * under the lock later on 6026 */ 6027 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6028 /* Not connected; address is required */ 6029 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6030 UDP_STAT(us, udp_out_err_notconn); 6031 freemsg(mp); 6032 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6033 "udp_wput_end: connp %p (%S)", connp, 6034 "not-connected; address required"); 6035 return; 6036 } 6037 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 6038 return; 6039 6040 case M_PROTO: 6041 case M_PCPROTO: { 6042 struct T_unitdata_req *tudr; 6043 6044 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6045 tudr = (struct T_unitdata_req *)mp->b_rptr; 6046 6047 /* Handle valid T_UNITDATA_REQ here */ 6048 if (MBLKL(mp) >= sizeof (*tudr) && 6049 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6050 if (mp->b_cont == NULL) { 6051 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6052 "udp_wput_end: q %p (%S)", q, "badaddr"); 6053 error = EPROTO; 6054 goto ud_error; 6055 } 6056 6057 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6058 tudr->DEST_length)) { 6059 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6060 "udp_wput_end: q %p (%S)", q, "badaddr"); 6061 error = EADDRNOTAVAIL; 6062 goto ud_error; 6063 } 6064 /* 6065 * If a port has not been bound to the stream, fail. 6066 * This is not a problem when sockfs is directly 6067 * above us, because it will ensure that the socket 6068 * is first bound before allowing data to be sent. 6069 */ 6070 if (udp->udp_state == TS_UNBND) { 6071 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6072 "udp_wput_end: q %p (%S)", q, "outstate"); 6073 error = EPROTO; 6074 goto ud_error; 6075 } 6076 addr = (struct sockaddr *) 6077 &mp->b_rptr[tudr->DEST_offset]; 6078 addrlen = tudr->DEST_length; 6079 if (tudr->OPT_length != 0) 6080 UDP_STAT(us, udp_out_opt); 6081 break; 6082 } 6083 /* FALLTHRU */ 6084 } 6085 default: 6086 udp_wput_other(q, mp); 6087 return; 6088 } 6089 ASSERT(addr != NULL); 6090 6091 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 6092 -1); 6093 if (error != 0) { 6094 ud_error: 6095 UDP_STAT(us, udp_out_err_output); 6096 ASSERT(mp != NULL); 6097 /* mp is freed by the following routine */ 6098 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6099 (t_scalar_t)error); 6100 } 6101 } 6102 6103 /* ARGSUSED */ 6104 static void 6105 udp_wput_fallback(queue_t *wq, mblk_t *mp) 6106 { 6107 #ifdef DEBUG 6108 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 6109 #endif 6110 freemsg(mp); 6111 } 6112 6113 6114 /* 6115 * udp_output_v6(): 6116 * Assumes that udp_wput did some sanity checking on the destination 6117 * address. 6118 */ 6119 static mblk_t * 6120 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6121 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6122 { 6123 ip6_t *ip6h; 6124 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6125 mblk_t *mp1 = mp; 6126 mblk_t *mp2; 6127 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6128 size_t ip_len; 6129 udpha_t *udph; 6130 udp_t *udp = connp->conn_udp; 6131 udp_stack_t *us = udp->udp_us; 6132 queue_t *q = connp->conn_wq; 6133 ip6_pkt_t ipp_s; /* For ancillary data options */ 6134 ip6_pkt_t *ipp = &ipp_s; 6135 ip6_pkt_t *tipp; /* temporary ipp */ 6136 uint32_t csum = 0; 6137 uint_t ignore = 0; 6138 uint_t option_exists = 0, is_sticky = 0; 6139 uint8_t *cp; 6140 uint8_t *nxthdr_ptr; 6141 in6_addr_t ip6_dst; 6142 in_port_t port; 6143 udpattrs_t attrs; 6144 boolean_t opt_present; 6145 ip6_hbh_t *hopoptsptr = NULL; 6146 uint_t hopoptslen = 0; 6147 boolean_t is_ancillary = B_FALSE; 6148 size_t sth_wroff = 0; 6149 ire_t *ire; 6150 boolean_t update_lastdst = B_FALSE; 6151 6152 *error = 0; 6153 6154 /* 6155 * If the local address is a mapped address return 6156 * an error. 6157 * It would be possible to send an IPv6 packet but the 6158 * response would never make it back to the application 6159 * since it is bound to a mapped address. 6160 */ 6161 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6162 *error = EADDRNOTAVAIL; 6163 goto done; 6164 } 6165 6166 ipp->ipp_fields = 0; 6167 ipp->ipp_sticky_ignored = 0; 6168 6169 /* 6170 * If TPI options passed in, feed it for verification and handling 6171 */ 6172 attrs.udpattr_credset = B_FALSE; 6173 opt_present = B_FALSE; 6174 if (IPCL_IS_NONSTR(connp)) { 6175 if (msg->msg_controllen != 0) { 6176 attrs.udpattr_ipp6 = ipp; 6177 attrs.udpattr_mb = mp; 6178 6179 rw_enter(&udp->udp_rwlock, RW_WRITER); 6180 *error = process_auxiliary_options(connp, 6181 msg->msg_control, msg->msg_controllen, 6182 &attrs, &udp_opt_obj, udp_opt_set, cr); 6183 rw_exit(&udp->udp_rwlock); 6184 if (*error) 6185 goto done; 6186 ASSERT(*error == 0); 6187 opt_present = B_TRUE; 6188 } 6189 } else { 6190 if (DB_TYPE(mp) != M_DATA) { 6191 mp1 = mp->b_cont; 6192 if (((struct T_unitdata_req *) 6193 mp->b_rptr)->OPT_length != 0) { 6194 attrs.udpattr_ipp6 = ipp; 6195 attrs.udpattr_mb = mp; 6196 if (udp_unitdata_opt_process(q, mp, error, 6197 &attrs) < 0) { 6198 goto done; 6199 } 6200 ASSERT(*error == 0); 6201 opt_present = B_TRUE; 6202 } 6203 } 6204 } 6205 6206 /* 6207 * Determine whether we need to mark the mblk with the user's 6208 * credentials. 6209 * If labeled then sockfs would have already done this. 6210 */ 6211 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6212 ire = connp->conn_ire_cache; 6213 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6214 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6215 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6216 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6217 mblk_setcred(mp, cr, pid); 6218 } 6219 6220 rw_enter(&udp->udp_rwlock, RW_READER); 6221 ignore = ipp->ipp_sticky_ignored; 6222 6223 /* mp1 points to the M_DATA mblk carrying the packet */ 6224 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6225 6226 if (sin6->sin6_scope_id != 0 && 6227 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6228 /* 6229 * IPPF_SCOPE_ID is special. It's neither a sticky 6230 * option nor ancillary data. It needs to be 6231 * explicitly set in options_exists. 6232 */ 6233 option_exists |= IPPF_SCOPE_ID; 6234 } 6235 6236 /* 6237 * Compute the destination address 6238 */ 6239 ip6_dst = sin6->sin6_addr; 6240 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6241 ip6_dst = ipv6_loopback; 6242 6243 port = sin6->sin6_port; 6244 6245 /* 6246 * Cluster and TSOL notes, Cluster check: 6247 * see comments in udp_output_v4(). 6248 */ 6249 mutex_enter(&connp->conn_lock); 6250 6251 if (cl_inet_connect2 != NULL && 6252 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6253 port != udp->udp_lastdstport)) { 6254 mutex_exit(&connp->conn_lock); 6255 *error = 0; 6256 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6257 if (*error != 0) { 6258 *error = EHOSTUNREACH; 6259 rw_exit(&udp->udp_rwlock); 6260 goto done; 6261 } 6262 update_lastdst = B_TRUE; 6263 mutex_enter(&connp->conn_lock); 6264 } 6265 6266 /* 6267 * If we're not going to the same destination as last time, then 6268 * recompute the label required. This is done in a separate routine to 6269 * avoid blowing up our stack here. 6270 * 6271 * TSOL Note: Since we are not in WRITER mode, UDP packets 6272 * to different destination may require different labels, 6273 * or worse, UDP packets to same IP address may require 6274 * different labels due to use of shared all-zones address. 6275 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6276 * and sticky ipp_hopoptslen are consistent for the current 6277 * destination and are updated atomically. 6278 */ 6279 if (is_system_labeled()) { 6280 /* Using UDP MLP requires SCM_UCRED from user */ 6281 if (connp->conn_mlp_type != mlptSingle && 6282 !attrs.udpattr_credset) { 6283 DTRACE_PROBE4( 6284 tx__ip__log__info__output__udp6, 6285 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6286 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6287 *error = ECONNREFUSED; 6288 rw_exit(&udp->udp_rwlock); 6289 mutex_exit(&connp->conn_lock); 6290 goto done; 6291 } 6292 /* 6293 * update label option for this UDP socket if 6294 * - the destination has changed, or 6295 * - the UDP socket is MLP 6296 */ 6297 if ((opt_present || 6298 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6299 connp->conn_mlp_type != mlptSingle) && 6300 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6301 &update_lastdst)) != 0) { 6302 rw_exit(&udp->udp_rwlock); 6303 mutex_exit(&connp->conn_lock); 6304 goto done; 6305 } 6306 } 6307 6308 if (update_lastdst) { 6309 udp->udp_v6lastdst = ip6_dst; 6310 udp->udp_lastdstport = port; 6311 } 6312 6313 /* 6314 * If there's a security label here, then we ignore any options the 6315 * user may try to set. We keep the peer's label as a hidden sticky 6316 * option. We make a private copy of this label before releasing the 6317 * lock so that label is kept consistent with the destination addr. 6318 */ 6319 if (udp->udp_label_len_v6 > 0) { 6320 ignore &= ~IPPF_HOPOPTS; 6321 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6322 } 6323 6324 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6325 /* No sticky options nor ancillary data. */ 6326 mutex_exit(&connp->conn_lock); 6327 goto no_options; 6328 } 6329 6330 /* 6331 * Go through the options figuring out where each is going to 6332 * come from and build two masks. The first mask indicates if 6333 * the option exists at all. The second mask indicates if the 6334 * option is sticky or ancillary. 6335 */ 6336 if (!(ignore & IPPF_HOPOPTS)) { 6337 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6338 option_exists |= IPPF_HOPOPTS; 6339 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6340 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6341 option_exists |= IPPF_HOPOPTS; 6342 is_sticky |= IPPF_HOPOPTS; 6343 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6344 hopoptsptr = kmem_alloc( 6345 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6346 if (hopoptsptr == NULL) { 6347 *error = ENOMEM; 6348 mutex_exit(&connp->conn_lock); 6349 goto done; 6350 } 6351 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6352 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6353 hopoptslen); 6354 udp_ip_hdr_len += hopoptslen; 6355 } 6356 } 6357 mutex_exit(&connp->conn_lock); 6358 6359 if (!(ignore & IPPF_RTHDR)) { 6360 if (ipp->ipp_fields & IPPF_RTHDR) { 6361 option_exists |= IPPF_RTHDR; 6362 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6363 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6364 option_exists |= IPPF_RTHDR; 6365 is_sticky |= IPPF_RTHDR; 6366 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6367 } 6368 } 6369 6370 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6371 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6372 option_exists |= IPPF_RTDSTOPTS; 6373 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6374 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6375 option_exists |= IPPF_RTDSTOPTS; 6376 is_sticky |= IPPF_RTDSTOPTS; 6377 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6378 } 6379 } 6380 6381 if (!(ignore & IPPF_DSTOPTS)) { 6382 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6383 option_exists |= IPPF_DSTOPTS; 6384 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6385 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6386 option_exists |= IPPF_DSTOPTS; 6387 is_sticky |= IPPF_DSTOPTS; 6388 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6389 } 6390 } 6391 6392 if (!(ignore & IPPF_IFINDEX)) { 6393 if (ipp->ipp_fields & IPPF_IFINDEX) { 6394 option_exists |= IPPF_IFINDEX; 6395 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6396 option_exists |= IPPF_IFINDEX; 6397 is_sticky |= IPPF_IFINDEX; 6398 } 6399 } 6400 6401 if (!(ignore & IPPF_ADDR)) { 6402 if (ipp->ipp_fields & IPPF_ADDR) { 6403 option_exists |= IPPF_ADDR; 6404 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6405 option_exists |= IPPF_ADDR; 6406 is_sticky |= IPPF_ADDR; 6407 } 6408 } 6409 6410 if (!(ignore & IPPF_DONTFRAG)) { 6411 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6412 option_exists |= IPPF_DONTFRAG; 6413 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6414 option_exists |= IPPF_DONTFRAG; 6415 is_sticky |= IPPF_DONTFRAG; 6416 } 6417 } 6418 6419 if (!(ignore & IPPF_USE_MIN_MTU)) { 6420 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6421 option_exists |= IPPF_USE_MIN_MTU; 6422 } else if (udp->udp_sticky_ipp.ipp_fields & 6423 IPPF_USE_MIN_MTU) { 6424 option_exists |= IPPF_USE_MIN_MTU; 6425 is_sticky |= IPPF_USE_MIN_MTU; 6426 } 6427 } 6428 6429 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6430 option_exists |= IPPF_HOPLIMIT; 6431 /* IPV6_HOPLIMIT can never be sticky */ 6432 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6433 6434 if (!(ignore & IPPF_UNICAST_HOPS) && 6435 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6436 option_exists |= IPPF_UNICAST_HOPS; 6437 is_sticky |= IPPF_UNICAST_HOPS; 6438 } 6439 6440 if (!(ignore & IPPF_MULTICAST_HOPS) && 6441 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6442 option_exists |= IPPF_MULTICAST_HOPS; 6443 is_sticky |= IPPF_MULTICAST_HOPS; 6444 } 6445 6446 if (!(ignore & IPPF_TCLASS)) { 6447 if (ipp->ipp_fields & IPPF_TCLASS) { 6448 option_exists |= IPPF_TCLASS; 6449 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6450 option_exists |= IPPF_TCLASS; 6451 is_sticky |= IPPF_TCLASS; 6452 } 6453 } 6454 6455 if (!(ignore & IPPF_NEXTHOP) && 6456 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6457 option_exists |= IPPF_NEXTHOP; 6458 is_sticky |= IPPF_NEXTHOP; 6459 } 6460 6461 no_options: 6462 6463 /* 6464 * If any options carried in the ip6i_t were specified, we 6465 * need to account for the ip6i_t in the data we'll be sending 6466 * down. 6467 */ 6468 if (option_exists & IPPF_HAS_IP6I) 6469 udp_ip_hdr_len += sizeof (ip6i_t); 6470 6471 /* check/fix buffer config, setup pointers into it */ 6472 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6473 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6474 !OK_32PTR(ip6h)) { 6475 6476 /* Try to get everything in a single mblk next time */ 6477 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6478 udp->udp_max_hdr_len = udp_ip_hdr_len; 6479 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6480 } 6481 6482 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6483 if (mp2 == NULL) { 6484 *error = ENOMEM; 6485 rw_exit(&udp->udp_rwlock); 6486 goto done; 6487 } 6488 mp2->b_wptr = DB_LIM(mp2); 6489 mp2->b_cont = mp1; 6490 mp1 = mp2; 6491 if (DB_TYPE(mp) != M_DATA) 6492 mp->b_cont = mp1; 6493 else 6494 mp = mp1; 6495 6496 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6497 } 6498 mp1->b_rptr = (unsigned char *)ip6h; 6499 ip6i = (ip6i_t *)ip6h; 6500 6501 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6502 if (option_exists & IPPF_HAS_IP6I) { 6503 ip6h = (ip6_t *)&ip6i[1]; 6504 ip6i->ip6i_flags = 0; 6505 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6506 6507 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6508 if (option_exists & IPPF_SCOPE_ID) { 6509 ip6i->ip6i_flags |= IP6I_IFINDEX; 6510 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6511 } else if (option_exists & IPPF_IFINDEX) { 6512 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6513 ASSERT(tipp->ipp_ifindex != 0); 6514 ip6i->ip6i_flags |= IP6I_IFINDEX; 6515 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6516 } 6517 6518 if (option_exists & IPPF_ADDR) { 6519 /* 6520 * Enable per-packet source address verification if 6521 * IPV6_PKTINFO specified the source address. 6522 * ip6_src is set in the transport's _wput function. 6523 */ 6524 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6525 } 6526 6527 if (option_exists & IPPF_DONTFRAG) { 6528 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6529 } 6530 6531 if (option_exists & IPPF_USE_MIN_MTU) { 6532 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6533 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6534 } 6535 6536 if (option_exists & IPPF_NEXTHOP) { 6537 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6538 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6539 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6540 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6541 } 6542 6543 /* 6544 * tell IP this is an ip6i_t private header 6545 */ 6546 ip6i->ip6i_nxt = IPPROTO_RAW; 6547 } 6548 6549 /* Initialize IPv6 header */ 6550 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6551 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6552 6553 /* Set the hoplimit of the outgoing packet. */ 6554 if (option_exists & IPPF_HOPLIMIT) { 6555 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6556 ip6h->ip6_hops = ipp->ipp_hoplimit; 6557 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6558 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6559 ip6h->ip6_hops = udp->udp_multicast_ttl; 6560 if (option_exists & IPPF_MULTICAST_HOPS) 6561 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6562 } else { 6563 ip6h->ip6_hops = udp->udp_ttl; 6564 if (option_exists & IPPF_UNICAST_HOPS) 6565 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6566 } 6567 6568 if (option_exists & IPPF_ADDR) { 6569 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6570 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6571 ip6h->ip6_src = tipp->ipp_addr; 6572 } else { 6573 /* 6574 * The source address was not set using IPV6_PKTINFO. 6575 * First look at the bound source. 6576 * If unspecified fallback to __sin6_src_id. 6577 */ 6578 ip6h->ip6_src = udp->udp_v6src; 6579 if (sin6->__sin6_src_id != 0 && 6580 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6581 ip_srcid_find_id(sin6->__sin6_src_id, 6582 &ip6h->ip6_src, connp->conn_zoneid, 6583 us->us_netstack); 6584 } 6585 } 6586 6587 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6588 cp = (uint8_t *)&ip6h[1]; 6589 6590 /* 6591 * Here's where we have to start stringing together 6592 * any extension headers in the right order: 6593 * Hop-by-hop, destination, routing, and final destination opts. 6594 */ 6595 if (option_exists & IPPF_HOPOPTS) { 6596 /* Hop-by-hop options */ 6597 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6598 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6599 if (hopoptslen == 0) { 6600 hopoptsptr = tipp->ipp_hopopts; 6601 hopoptslen = tipp->ipp_hopoptslen; 6602 is_ancillary = B_TRUE; 6603 } 6604 6605 *nxthdr_ptr = IPPROTO_HOPOPTS; 6606 nxthdr_ptr = &hbh->ip6h_nxt; 6607 6608 bcopy(hopoptsptr, cp, hopoptslen); 6609 cp += hopoptslen; 6610 6611 if (hopoptsptr != NULL && !is_ancillary) { 6612 kmem_free(hopoptsptr, hopoptslen); 6613 hopoptsptr = NULL; 6614 hopoptslen = 0; 6615 } 6616 } 6617 /* 6618 * En-route destination options 6619 * Only do them if there's a routing header as well 6620 */ 6621 if (option_exists & IPPF_RTDSTOPTS) { 6622 ip6_dest_t *dst = (ip6_dest_t *)cp; 6623 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6624 6625 *nxthdr_ptr = IPPROTO_DSTOPTS; 6626 nxthdr_ptr = &dst->ip6d_nxt; 6627 6628 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6629 cp += tipp->ipp_rtdstoptslen; 6630 } 6631 /* 6632 * Routing header next 6633 */ 6634 if (option_exists & IPPF_RTHDR) { 6635 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6636 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6637 6638 *nxthdr_ptr = IPPROTO_ROUTING; 6639 nxthdr_ptr = &rt->ip6r_nxt; 6640 6641 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6642 cp += tipp->ipp_rthdrlen; 6643 } 6644 /* 6645 * Do ultimate destination options 6646 */ 6647 if (option_exists & IPPF_DSTOPTS) { 6648 ip6_dest_t *dest = (ip6_dest_t *)cp; 6649 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6650 6651 *nxthdr_ptr = IPPROTO_DSTOPTS; 6652 nxthdr_ptr = &dest->ip6d_nxt; 6653 6654 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6655 cp += tipp->ipp_dstoptslen; 6656 } 6657 /* 6658 * Now set the last header pointer to the proto passed in 6659 */ 6660 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6661 *nxthdr_ptr = IPPROTO_UDP; 6662 6663 /* Update UDP header */ 6664 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6665 udph->uha_dst_port = sin6->sin6_port; 6666 udph->uha_src_port = udp->udp_port; 6667 6668 /* 6669 * Copy in the destination address 6670 */ 6671 ip6h->ip6_dst = ip6_dst; 6672 6673 ip6h->ip6_vcf = 6674 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6675 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6676 6677 if (option_exists & IPPF_TCLASS) { 6678 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6679 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6680 tipp->ipp_tclass); 6681 } 6682 rw_exit(&udp->udp_rwlock); 6683 6684 if (option_exists & IPPF_RTHDR) { 6685 ip6_rthdr_t *rth; 6686 6687 /* 6688 * Perform any processing needed for source routing. 6689 * We know that all extension headers will be in the same mblk 6690 * as the IPv6 header. 6691 */ 6692 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6693 if (rth != NULL && rth->ip6r_segleft != 0) { 6694 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6695 /* 6696 * Drop packet - only support Type 0 routing. 6697 * Notify the application as well. 6698 */ 6699 *error = EPROTO; 6700 goto done; 6701 } 6702 6703 /* 6704 * rth->ip6r_len is twice the number of 6705 * addresses in the header. Thus it must be even. 6706 */ 6707 if (rth->ip6r_len & 0x1) { 6708 *error = EPROTO; 6709 goto done; 6710 } 6711 /* 6712 * Shuffle the routing header and ip6_dst 6713 * addresses, and get the checksum difference 6714 * between the first hop (in ip6_dst) and 6715 * the destination (in the last routing hdr entry). 6716 */ 6717 csum = ip_massage_options_v6(ip6h, rth, 6718 us->us_netstack); 6719 /* 6720 * Verify that the first hop isn't a mapped address. 6721 * Routers along the path need to do this verification 6722 * for subsequent hops. 6723 */ 6724 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6725 *error = EADDRNOTAVAIL; 6726 goto done; 6727 } 6728 6729 cp += (rth->ip6r_len + 1)*8; 6730 } 6731 } 6732 6733 /* count up length of UDP packet */ 6734 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6735 if ((mp2 = mp1->b_cont) != NULL) { 6736 do { 6737 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6738 ip_len += (uint32_t)MBLKL(mp2); 6739 } while ((mp2 = mp2->b_cont) != NULL); 6740 } 6741 6742 /* 6743 * If the size of the packet is greater than the maximum allowed by 6744 * ip, return an error. Passing this down could cause panics because 6745 * the size will have wrapped and be inconsistent with the msg size. 6746 */ 6747 if (ip_len > IP_MAXPACKET) { 6748 *error = EMSGSIZE; 6749 goto done; 6750 } 6751 6752 /* Store the UDP length. Subtract length of extension hdrs */ 6753 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6754 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6755 6756 /* 6757 * We make it easy for IP to include our pseudo header 6758 * by putting our length in uh_checksum, modified (if 6759 * we have a routing header) by the checksum difference 6760 * between the ultimate destination and first hop addresses. 6761 * Note: UDP over IPv6 must always checksum the packet. 6762 */ 6763 csum += udph->uha_length; 6764 csum = (csum & 0xFFFF) + (csum >> 16); 6765 udph->uha_checksum = (uint16_t)csum; 6766 6767 #ifdef _LITTLE_ENDIAN 6768 ip_len = htons(ip_len); 6769 #endif 6770 ip6h->ip6_plen = ip_len; 6771 6772 if (DB_TYPE(mp) != M_DATA) { 6773 cred_t *cr; 6774 pid_t cpid; 6775 6776 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6777 cr = msg_extractcred(mp, &cpid); 6778 if (cr != NULL) { 6779 if (mp1->b_datap->db_credp != NULL) 6780 crfree(mp1->b_datap->db_credp); 6781 mp1->b_datap->db_credp = cr; 6782 mp1->b_datap->db_cpid = cpid; 6783 } 6784 6785 ASSERT(mp != mp1); 6786 freeb(mp); 6787 } 6788 6789 /* mp has been consumed and we'll return success */ 6790 ASSERT(*error == 0); 6791 mp = NULL; 6792 6793 /* We're done. Pass the packet to IP */ 6794 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6795 ip_output_v6(connp, mp1, q, IP_WPUT); 6796 6797 done: 6798 if (sth_wroff != 0) { 6799 (void) proto_set_tx_wroff(RD(q), connp, 6800 udp->udp_max_hdr_len + us->us_wroff_extra); 6801 } 6802 if (hopoptsptr != NULL && !is_ancillary) { 6803 kmem_free(hopoptsptr, hopoptslen); 6804 hopoptsptr = NULL; 6805 } 6806 if (*error != 0) { 6807 ASSERT(mp != NULL); 6808 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6809 } 6810 return (mp); 6811 } 6812 6813 6814 static int 6815 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6816 { 6817 sin_t *sin = (sin_t *)sa; 6818 sin6_t *sin6 = (sin6_t *)sa; 6819 6820 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6821 6822 if (udp->udp_state != TS_DATA_XFER) 6823 return (ENOTCONN); 6824 6825 switch (udp->udp_family) { 6826 case AF_INET: 6827 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6828 6829 if (*salenp < sizeof (sin_t)) 6830 return (EINVAL); 6831 6832 *salenp = sizeof (sin_t); 6833 *sin = sin_null; 6834 sin->sin_family = AF_INET; 6835 sin->sin_port = udp->udp_dstport; 6836 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6837 break; 6838 6839 case AF_INET6: 6840 if (*salenp < sizeof (sin6_t)) 6841 return (EINVAL); 6842 6843 *salenp = sizeof (sin6_t); 6844 *sin6 = sin6_null; 6845 sin6->sin6_family = AF_INET6; 6846 sin6->sin6_port = udp->udp_dstport; 6847 sin6->sin6_addr = udp->udp_v6dst; 6848 sin6->sin6_flowinfo = udp->udp_flowinfo; 6849 break; 6850 } 6851 6852 return (0); 6853 } 6854 6855 static int 6856 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6857 { 6858 sin_t *sin = (sin_t *)sa; 6859 sin6_t *sin6 = (sin6_t *)sa; 6860 6861 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6862 6863 switch (udp->udp_family) { 6864 case AF_INET: 6865 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6866 6867 if (*salenp < sizeof (sin_t)) 6868 return (EINVAL); 6869 6870 *salenp = sizeof (sin_t); 6871 *sin = sin_null; 6872 sin->sin_family = AF_INET; 6873 sin->sin_port = udp->udp_port; 6874 6875 /* 6876 * If udp_v6src is unspecified, we might be bound to broadcast 6877 * / multicast. Use udp_bound_v6src as local address instead 6878 * (that could also still be unspecified). 6879 */ 6880 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6881 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6882 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6883 } else { 6884 sin->sin_addr.s_addr = 6885 V4_PART_OF_V6(udp->udp_bound_v6src); 6886 } 6887 break; 6888 6889 case AF_INET6: 6890 if (*salenp < sizeof (sin6_t)) 6891 return (EINVAL); 6892 6893 *salenp = sizeof (sin6_t); 6894 *sin6 = sin6_null; 6895 sin6->sin6_family = AF_INET6; 6896 sin6->sin6_port = udp->udp_port; 6897 sin6->sin6_flowinfo = udp->udp_flowinfo; 6898 6899 /* 6900 * If udp_v6src is unspecified, we might be bound to broadcast 6901 * / multicast. Use udp_bound_v6src as local address instead 6902 * (that could also still be unspecified). 6903 */ 6904 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6905 sin6->sin6_addr = udp->udp_v6src; 6906 else 6907 sin6->sin6_addr = udp->udp_bound_v6src; 6908 break; 6909 } 6910 6911 return (0); 6912 } 6913 6914 /* 6915 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6916 */ 6917 static void 6918 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6919 { 6920 void *data; 6921 mblk_t *datamp = mp->b_cont; 6922 udp_t *udp = Q_TO_UDP(q); 6923 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6924 6925 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6926 cmdp->cb_error = EPROTO; 6927 qreply(q, mp); 6928 return; 6929 } 6930 data = datamp->b_rptr; 6931 6932 rw_enter(&udp->udp_rwlock, RW_READER); 6933 switch (cmdp->cb_cmd) { 6934 case TI_GETPEERNAME: 6935 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6936 break; 6937 case TI_GETMYNAME: 6938 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6939 break; 6940 default: 6941 cmdp->cb_error = EINVAL; 6942 break; 6943 } 6944 rw_exit(&udp->udp_rwlock); 6945 6946 qreply(q, mp); 6947 } 6948 6949 static void 6950 udp_disable_direct_sockfs(udp_t *udp) 6951 { 6952 udp->udp_issocket = B_FALSE; 6953 if (udp->udp_direct_sockfs) { 6954 /* 6955 * Disable read-side synchronous stream interface and 6956 * drain any queued data. 6957 */ 6958 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6959 ASSERT(!udp->udp_direct_sockfs); 6960 UDP_STAT(udp->udp_us, udp_sock_fallback); 6961 } 6962 } 6963 6964 static void 6965 udp_wput_other(queue_t *q, mblk_t *mp) 6966 { 6967 uchar_t *rptr = mp->b_rptr; 6968 struct datab *db; 6969 struct iocblk *iocp; 6970 cred_t *cr; 6971 conn_t *connp = Q_TO_CONN(q); 6972 udp_t *udp = connp->conn_udp; 6973 udp_stack_t *us; 6974 6975 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6976 "udp_wput_other_start: q %p", q); 6977 6978 us = udp->udp_us; 6979 db = mp->b_datap; 6980 6981 switch (db->db_type) { 6982 case M_CMD: 6983 udp_wput_cmdblk(q, mp); 6984 return; 6985 6986 case M_PROTO: 6987 case M_PCPROTO: 6988 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6989 freemsg(mp); 6990 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6991 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6992 return; 6993 } 6994 switch (((t_primp_t)rptr)->type) { 6995 case T_ADDR_REQ: 6996 udp_addr_req(q, mp); 6997 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6998 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6999 return; 7000 case O_T_BIND_REQ: 7001 case T_BIND_REQ: 7002 udp_tpi_bind(q, mp); 7003 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7004 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7005 return; 7006 case T_CONN_REQ: 7007 udp_tpi_connect(q, mp); 7008 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7009 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7010 return; 7011 case T_CAPABILITY_REQ: 7012 udp_capability_req(q, mp); 7013 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7014 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7015 return; 7016 case T_INFO_REQ: 7017 udp_info_req(q, mp); 7018 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7019 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7020 return; 7021 case T_UNITDATA_REQ: 7022 /* 7023 * If a T_UNITDATA_REQ gets here, the address must 7024 * be bad. Valid T_UNITDATA_REQs are handled 7025 * in udp_wput. 7026 */ 7027 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7028 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7029 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7030 return; 7031 case T_UNBIND_REQ: 7032 udp_tpi_unbind(q, mp); 7033 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7034 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7035 return; 7036 case T_SVR4_OPTMGMT_REQ: 7037 /* 7038 * All Solaris components should pass a db_credp 7039 * for this TPI message, hence we ASSERT. 7040 * But in case there is some other M_PROTO that looks 7041 * like a TPI message sent by some other kernel 7042 * component, we check and return an error. 7043 */ 7044 cr = msg_getcred(mp, NULL); 7045 ASSERT(cr != NULL); 7046 if (cr == NULL) { 7047 udp_err_ack(q, mp, TSYSERR, EINVAL); 7048 return; 7049 } 7050 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7051 cr)) { 7052 (void) svr4_optcom_req(q, 7053 mp, cr, &udp_opt_obj, B_TRUE); 7054 } 7055 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7056 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7057 return; 7058 7059 case T_OPTMGMT_REQ: 7060 /* 7061 * All Solaris components should pass a db_credp 7062 * for this TPI message, hence we ASSERT. 7063 * But in case there is some other M_PROTO that looks 7064 * like a TPI message sent by some other kernel 7065 * component, we check and return an error. 7066 */ 7067 cr = msg_getcred(mp, NULL); 7068 ASSERT(cr != NULL); 7069 if (cr == NULL) { 7070 udp_err_ack(q, mp, TSYSERR, EINVAL); 7071 return; 7072 } 7073 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7074 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7075 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7076 return; 7077 7078 case T_DISCON_REQ: 7079 udp_tpi_disconnect(q, mp); 7080 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7081 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7082 return; 7083 7084 /* The following TPI message is not supported by udp. */ 7085 case O_T_CONN_RES: 7086 case T_CONN_RES: 7087 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7088 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7089 "udp_wput_other_end: q %p (%S)", q, 7090 "connres/disconreq"); 7091 return; 7092 7093 /* The following 3 TPI messages are illegal for udp. */ 7094 case T_DATA_REQ: 7095 case T_EXDATA_REQ: 7096 case T_ORDREL_REQ: 7097 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7098 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7099 "udp_wput_other_end: q %p (%S)", q, 7100 "data/exdata/ordrel"); 7101 return; 7102 default: 7103 break; 7104 } 7105 break; 7106 case M_FLUSH: 7107 if (*rptr & FLUSHW) 7108 flushq(q, FLUSHDATA); 7109 break; 7110 case M_IOCTL: 7111 iocp = (struct iocblk *)mp->b_rptr; 7112 switch (iocp->ioc_cmd) { 7113 case TI_GETPEERNAME: 7114 if (udp->udp_state != TS_DATA_XFER) { 7115 /* 7116 * If a default destination address has not 7117 * been associated with the stream, then we 7118 * don't know the peer's name. 7119 */ 7120 iocp->ioc_error = ENOTCONN; 7121 iocp->ioc_count = 0; 7122 mp->b_datap->db_type = M_IOCACK; 7123 qreply(q, mp); 7124 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7125 "udp_wput_other_end: q %p (%S)", q, 7126 "getpeername"); 7127 return; 7128 } 7129 /* FALLTHRU */ 7130 case TI_GETMYNAME: { 7131 /* 7132 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7133 * need to copyin the user's strbuf structure. 7134 * Processing will continue in the M_IOCDATA case 7135 * below. 7136 */ 7137 mi_copyin(q, mp, NULL, 7138 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7139 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7140 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7141 return; 7142 } 7143 case ND_SET: 7144 /* nd_getset performs the necessary checking */ 7145 case ND_GET: 7146 if (nd_getset(q, us->us_nd, mp)) { 7147 qreply(q, mp); 7148 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7149 "udp_wput_other_end: q %p (%S)", q, "get"); 7150 return; 7151 } 7152 break; 7153 case _SIOCSOCKFALLBACK: 7154 /* 7155 * Either sockmod is about to be popped and the 7156 * socket would now be treated as a plain stream, 7157 * or a module is about to be pushed so we could 7158 * no longer use read-side synchronous stream. 7159 * Drain any queued data and disable direct sockfs 7160 * interface from now on. 7161 */ 7162 if (!udp->udp_issocket) { 7163 DB_TYPE(mp) = M_IOCNAK; 7164 iocp->ioc_error = EINVAL; 7165 } else { 7166 udp_disable_direct_sockfs(udp); 7167 7168 DB_TYPE(mp) = M_IOCACK; 7169 iocp->ioc_error = 0; 7170 } 7171 iocp->ioc_count = 0; 7172 iocp->ioc_rval = 0; 7173 qreply(q, mp); 7174 return; 7175 default: 7176 break; 7177 } 7178 break; 7179 case M_IOCDATA: 7180 udp_wput_iocdata(q, mp); 7181 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7182 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7183 return; 7184 default: 7185 /* Unrecognized messages are passed through without change. */ 7186 break; 7187 } 7188 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7189 "udp_wput_other_end: q %p (%S)", q, "end"); 7190 ip_output(connp, mp, q, IP_WPUT); 7191 } 7192 7193 /* 7194 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7195 * messages. 7196 */ 7197 static void 7198 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7199 { 7200 mblk_t *mp1; 7201 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7202 STRUCT_HANDLE(strbuf, sb); 7203 udp_t *udp = Q_TO_UDP(q); 7204 int error; 7205 uint_t addrlen; 7206 7207 /* Make sure it is one of ours. */ 7208 switch (iocp->ioc_cmd) { 7209 case TI_GETMYNAME: 7210 case TI_GETPEERNAME: 7211 break; 7212 default: 7213 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7214 return; 7215 } 7216 7217 switch (mi_copy_state(q, mp, &mp1)) { 7218 case -1: 7219 return; 7220 case MI_COPY_CASE(MI_COPY_IN, 1): 7221 break; 7222 case MI_COPY_CASE(MI_COPY_OUT, 1): 7223 /* 7224 * The address has been copied out, so now 7225 * copyout the strbuf. 7226 */ 7227 mi_copyout(q, mp); 7228 return; 7229 case MI_COPY_CASE(MI_COPY_OUT, 2): 7230 /* 7231 * The address and strbuf have been copied out. 7232 * We're done, so just acknowledge the original 7233 * M_IOCTL. 7234 */ 7235 mi_copy_done(q, mp, 0); 7236 return; 7237 default: 7238 /* 7239 * Something strange has happened, so acknowledge 7240 * the original M_IOCTL with an EPROTO error. 7241 */ 7242 mi_copy_done(q, mp, EPROTO); 7243 return; 7244 } 7245 7246 /* 7247 * Now we have the strbuf structure for TI_GETMYNAME 7248 * and TI_GETPEERNAME. Next we copyout the requested 7249 * address and then we'll copyout the strbuf. 7250 */ 7251 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7252 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7253 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7254 mi_copy_done(q, mp, EINVAL); 7255 return; 7256 } 7257 7258 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7259 7260 if (mp1 == NULL) 7261 return; 7262 7263 rw_enter(&udp->udp_rwlock, RW_READER); 7264 switch (iocp->ioc_cmd) { 7265 case TI_GETMYNAME: 7266 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7267 break; 7268 case TI_GETPEERNAME: 7269 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7270 break; 7271 } 7272 rw_exit(&udp->udp_rwlock); 7273 7274 if (error != 0) { 7275 mi_copy_done(q, mp, error); 7276 } else { 7277 mp1->b_wptr += addrlen; 7278 STRUCT_FSET(sb, len, addrlen); 7279 7280 /* Copy out the address */ 7281 mi_copyout(q, mp); 7282 } 7283 } 7284 7285 static int 7286 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7287 udpattrs_t *udpattrs) 7288 { 7289 struct T_unitdata_req *udreqp; 7290 int is_absreq_failure; 7291 cred_t *cr; 7292 7293 ASSERT(((t_primp_t)mp->b_rptr)->type); 7294 7295 /* 7296 * All Solaris components should pass a db_credp 7297 * for this TPI message, hence we should ASSERT. 7298 * However, RPC (svc_clts_ksend) does this odd thing where it 7299 * passes the options from a T_UNITDATA_IND unchanged in a 7300 * T_UNITDATA_REQ. While that is the right thing to do for 7301 * some options, SCM_UCRED being the key one, this also makes it 7302 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7303 */ 7304 cr = msg_getcred(mp, NULL); 7305 if (cr == NULL) { 7306 cr = Q_TO_CONN(q)->conn_cred; 7307 } 7308 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7309 7310 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7311 udreqp->OPT_offset, cr, &udp_opt_obj, 7312 udpattrs, &is_absreq_failure); 7313 7314 if (*errorp != 0) { 7315 /* 7316 * Note: No special action needed in this 7317 * module for "is_absreq_failure" 7318 */ 7319 return (-1); /* failure */ 7320 } 7321 ASSERT(is_absreq_failure == 0); 7322 return (0); /* success */ 7323 } 7324 7325 void 7326 udp_ddi_g_init(void) 7327 { 7328 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7329 udp_opt_obj.odb_opt_arr_cnt); 7330 7331 /* 7332 * We want to be informed each time a stack is created or 7333 * destroyed in the kernel, so we can maintain the 7334 * set of udp_stack_t's. 7335 */ 7336 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7337 } 7338 7339 void 7340 udp_ddi_g_destroy(void) 7341 { 7342 netstack_unregister(NS_UDP); 7343 } 7344 7345 #define INET_NAME "ip" 7346 7347 /* 7348 * Initialize the UDP stack instance. 7349 */ 7350 static void * 7351 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7352 { 7353 udp_stack_t *us; 7354 udpparam_t *pa; 7355 int i; 7356 int error = 0; 7357 major_t major; 7358 7359 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7360 us->us_netstack = ns; 7361 7362 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7363 us->us_epriv_ports[0] = 2049; 7364 us->us_epriv_ports[1] = 4045; 7365 7366 /* 7367 * The smallest anonymous port in the priviledged port range which UDP 7368 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7369 */ 7370 us->us_min_anonpriv_port = 512; 7371 7372 us->us_bind_fanout_size = udp_bind_fanout_size; 7373 7374 /* Roundup variable that might have been modified in /etc/system */ 7375 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7376 /* Not a power of two. Round up to nearest power of two */ 7377 for (i = 0; i < 31; i++) { 7378 if (us->us_bind_fanout_size < (1 << i)) 7379 break; 7380 } 7381 us->us_bind_fanout_size = 1 << i; 7382 } 7383 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7384 sizeof (udp_fanout_t), KM_SLEEP); 7385 for (i = 0; i < us->us_bind_fanout_size; i++) { 7386 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7387 NULL); 7388 } 7389 7390 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7391 7392 us->us_param_arr = pa; 7393 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7394 7395 (void) udp_param_register(&us->us_nd, 7396 us->us_param_arr, A_CNT(udp_param_arr)); 7397 7398 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7399 us->us_mibkp = udp_kstat_init(stackid); 7400 7401 major = mod_name_to_major(INET_NAME); 7402 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7403 ASSERT(error == 0); 7404 return (us); 7405 } 7406 7407 /* 7408 * Free the UDP stack instance. 7409 */ 7410 static void 7411 udp_stack_fini(netstackid_t stackid, void *arg) 7412 { 7413 udp_stack_t *us = (udp_stack_t *)arg; 7414 int i; 7415 7416 for (i = 0; i < us->us_bind_fanout_size; i++) { 7417 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7418 } 7419 7420 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7421 sizeof (udp_fanout_t)); 7422 7423 us->us_bind_fanout = NULL; 7424 7425 nd_free(&us->us_nd); 7426 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7427 us->us_param_arr = NULL; 7428 7429 udp_kstat_fini(stackid, us->us_mibkp); 7430 us->us_mibkp = NULL; 7431 7432 udp_kstat2_fini(stackid, us->us_kstat); 7433 us->us_kstat = NULL; 7434 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7435 7436 ldi_ident_release(us->us_ldi_ident); 7437 kmem_free(us, sizeof (*us)); 7438 } 7439 7440 static void * 7441 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7442 { 7443 kstat_t *ksp; 7444 7445 udp_stat_t template = { 7446 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7447 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7448 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7449 { "udp_drain", KSTAT_DATA_UINT64 }, 7450 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7451 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7452 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7453 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7454 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7455 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7456 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7457 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7458 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7459 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7460 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7461 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7462 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7463 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7464 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7465 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7466 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7467 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7468 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7469 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7470 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7471 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7472 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7473 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7474 #ifdef DEBUG 7475 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7476 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7477 #endif 7478 }; 7479 7480 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7481 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7482 KSTAT_FLAG_VIRTUAL, stackid); 7483 7484 if (ksp == NULL) 7485 return (NULL); 7486 7487 bcopy(&template, us_statisticsp, sizeof (template)); 7488 ksp->ks_data = (void *)us_statisticsp; 7489 ksp->ks_private = (void *)(uintptr_t)stackid; 7490 7491 kstat_install(ksp); 7492 return (ksp); 7493 } 7494 7495 static void 7496 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7497 { 7498 if (ksp != NULL) { 7499 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7500 kstat_delete_netstack(ksp, stackid); 7501 } 7502 } 7503 7504 static void * 7505 udp_kstat_init(netstackid_t stackid) 7506 { 7507 kstat_t *ksp; 7508 7509 udp_named_kstat_t template = { 7510 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7511 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7512 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7513 { "entrySize", KSTAT_DATA_INT32, 0 }, 7514 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7515 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7516 }; 7517 7518 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7519 KSTAT_TYPE_NAMED, 7520 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7521 7522 if (ksp == NULL || ksp->ks_data == NULL) 7523 return (NULL); 7524 7525 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7526 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7527 7528 bcopy(&template, ksp->ks_data, sizeof (template)); 7529 ksp->ks_update = udp_kstat_update; 7530 ksp->ks_private = (void *)(uintptr_t)stackid; 7531 7532 kstat_install(ksp); 7533 return (ksp); 7534 } 7535 7536 static void 7537 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7538 { 7539 if (ksp != NULL) { 7540 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7541 kstat_delete_netstack(ksp, stackid); 7542 } 7543 } 7544 7545 static int 7546 udp_kstat_update(kstat_t *kp, int rw) 7547 { 7548 udp_named_kstat_t *udpkp; 7549 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7550 netstack_t *ns; 7551 udp_stack_t *us; 7552 7553 if ((kp == NULL) || (kp->ks_data == NULL)) 7554 return (EIO); 7555 7556 if (rw == KSTAT_WRITE) 7557 return (EACCES); 7558 7559 ns = netstack_find_by_stackid(stackid); 7560 if (ns == NULL) 7561 return (-1); 7562 us = ns->netstack_udp; 7563 if (us == NULL) { 7564 netstack_rele(ns); 7565 return (-1); 7566 } 7567 udpkp = (udp_named_kstat_t *)kp->ks_data; 7568 7569 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7570 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7571 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7572 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7573 netstack_rele(ns); 7574 return (0); 7575 } 7576 7577 /* 7578 * Read-side synchronous stream info entry point, called as a 7579 * result of handling certain STREAMS ioctl operations. 7580 */ 7581 static int 7582 udp_rinfop(queue_t *q, infod_t *dp) 7583 { 7584 mblk_t *mp; 7585 uint_t cmd = dp->d_cmd; 7586 int res = 0; 7587 int error = 0; 7588 udp_t *udp = Q_TO_UDP(q); 7589 struct stdata *stp = STREAM(q); 7590 7591 mutex_enter(&udp->udp_drain_lock); 7592 /* If shutdown on read has happened, return nothing */ 7593 mutex_enter(&stp->sd_lock); 7594 if (stp->sd_flag & STREOF) { 7595 mutex_exit(&stp->sd_lock); 7596 goto done; 7597 } 7598 mutex_exit(&stp->sd_lock); 7599 7600 if ((mp = udp->udp_rcv_list_head) == NULL) 7601 goto done; 7602 7603 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7604 7605 if (cmd & INFOD_COUNT) { 7606 /* 7607 * Return the number of messages. 7608 */ 7609 dp->d_count += udp->udp_rcv_msgcnt; 7610 res |= INFOD_COUNT; 7611 } 7612 if (cmd & INFOD_BYTES) { 7613 /* 7614 * Return size of all data messages. 7615 */ 7616 dp->d_bytes += udp->udp_rcv_cnt; 7617 res |= INFOD_BYTES; 7618 } 7619 if (cmd & INFOD_FIRSTBYTES) { 7620 /* 7621 * Return size of first data message. 7622 */ 7623 dp->d_bytes = msgdsize(mp); 7624 res |= INFOD_FIRSTBYTES; 7625 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7626 } 7627 if (cmd & INFOD_COPYOUT) { 7628 mblk_t *mp1 = mp->b_cont; 7629 int n; 7630 /* 7631 * Return data contents of first message. 7632 */ 7633 ASSERT(DB_TYPE(mp1) == M_DATA); 7634 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7635 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7636 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7637 UIO_READ, dp->d_uiop)) != 0) { 7638 goto done; 7639 } 7640 mp1 = mp1->b_cont; 7641 } 7642 res |= INFOD_COPYOUT; 7643 dp->d_cmd &= ~INFOD_COPYOUT; 7644 } 7645 done: 7646 mutex_exit(&udp->udp_drain_lock); 7647 7648 dp->d_res |= res; 7649 7650 return (error); 7651 } 7652 7653 /* 7654 * Read-side synchronous stream entry point. This is called as a result 7655 * of recv/read operation done at sockfs, and is guaranteed to execute 7656 * outside of the interrupt thread context. It returns a single datagram 7657 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7658 */ 7659 static int 7660 udp_rrw(queue_t *q, struiod_t *dp) 7661 { 7662 mblk_t *mp; 7663 udp_t *udp = Q_TO_UDP(q); 7664 udp_stack_t *us = udp->udp_us; 7665 7666 /* 7667 * Dequeue datagram from the head of the list and return 7668 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7669 * set/cleared depending on whether or not there's data 7670 * remaining in the list. 7671 */ 7672 mutex_enter(&udp->udp_drain_lock); 7673 if (!udp->udp_direct_sockfs) { 7674 mutex_exit(&udp->udp_drain_lock); 7675 UDP_STAT(us, udp_rrw_busy); 7676 return (EBUSY); 7677 } 7678 if ((mp = udp->udp_rcv_list_head) != NULL) { 7679 uint_t size = msgdsize(mp); 7680 7681 /* Last datagram in the list? */ 7682 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7683 udp->udp_rcv_list_tail = NULL; 7684 mp->b_next = NULL; 7685 7686 udp->udp_rcv_cnt -= size; 7687 udp->udp_rcv_msgcnt--; 7688 UDP_STAT(us, udp_rrw_msgcnt); 7689 7690 /* No longer flow-controlling? */ 7691 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7692 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7693 udp->udp_drain_qfull = B_FALSE; 7694 } 7695 if (udp->udp_rcv_list_head == NULL) { 7696 /* 7697 * Either we just dequeued the last datagram or 7698 * we get here from sockfs and have nothing to 7699 * return; in this case clear RSLEEP. 7700 */ 7701 ASSERT(udp->udp_rcv_cnt == 0); 7702 ASSERT(udp->udp_rcv_msgcnt == 0); 7703 ASSERT(udp->udp_rcv_list_tail == NULL); 7704 STR_WAKEUP_CLEAR(STREAM(q)); 7705 } else { 7706 /* 7707 * More data follows; we need udp_rrw() to be 7708 * called in future to pick up the rest. 7709 */ 7710 STR_WAKEUP_SET(STREAM(q)); 7711 } 7712 mutex_exit(&udp->udp_drain_lock); 7713 dp->d_mp = mp; 7714 return (0); 7715 } 7716 7717 /* 7718 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7719 * list; this is typically executed within the interrupt thread context 7720 * and so we do things as quickly as possible. 7721 */ 7722 static void 7723 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7724 { 7725 ASSERT(q == RD(q)); 7726 ASSERT(pkt_len == msgdsize(mp)); 7727 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7728 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7729 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7730 7731 mutex_enter(&udp->udp_drain_lock); 7732 /* 7733 * Wake up and signal the receiving app; it is okay to do this 7734 * before enqueueing the mp because we are holding the drain lock. 7735 * One of the advantages of synchronous stream is the ability for 7736 * us to find out when the application performs a read on the 7737 * socket by way of udp_rrw() entry point being called. We need 7738 * to generate SIGPOLL/SIGIO for each received data in the case 7739 * of asynchronous socket just as in the strrput() case. However, 7740 * we only wake the application up when necessary, i.e. during the 7741 * first enqueue. When udp_rrw() is called, we send up a single 7742 * datagram upstream and call STR_WAKEUP_SET() again when there 7743 * are still data remaining in our receive queue. 7744 */ 7745 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7746 if (udp->udp_rcv_list_head == NULL) 7747 udp->udp_rcv_list_head = mp; 7748 else 7749 udp->udp_rcv_list_tail->b_next = mp; 7750 udp->udp_rcv_list_tail = mp; 7751 udp->udp_rcv_cnt += pkt_len; 7752 udp->udp_rcv_msgcnt++; 7753 7754 /* Need to flow-control? */ 7755 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7756 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7757 udp->udp_drain_qfull = B_TRUE; 7758 7759 mutex_exit(&udp->udp_drain_lock); 7760 } 7761 7762 /* 7763 * Drain the contents of receive list to the module upstream; we do 7764 * this during close or when we fallback to the slow mode due to 7765 * sockmod being popped or a module being pushed on top of us. 7766 */ 7767 static void 7768 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7769 { 7770 mblk_t *mp; 7771 udp_stack_t *us = udp->udp_us; 7772 7773 mutex_enter(&udp->udp_drain_lock); 7774 /* 7775 * There is no race with a concurrent udp_input() sending 7776 * up packets using putnext() after we have cleared the 7777 * udp_direct_sockfs flag but before we have completed 7778 * sending up the packets in udp_rcv_list, since we are 7779 * either a writer or we have quiesced the conn. 7780 */ 7781 udp->udp_direct_sockfs = B_FALSE; 7782 mutex_exit(&udp->udp_drain_lock); 7783 7784 if (udp->udp_rcv_list_head != NULL) 7785 UDP_STAT(us, udp_drain); 7786 7787 /* 7788 * Send up everything via putnext(); note here that we 7789 * don't need the udp_drain_lock to protect us since 7790 * nothing can enter udp_rrw() and that we currently 7791 * have exclusive access to this udp. 7792 */ 7793 while ((mp = udp->udp_rcv_list_head) != NULL) { 7794 udp->udp_rcv_list_head = mp->b_next; 7795 mp->b_next = NULL; 7796 udp->udp_rcv_cnt -= msgdsize(mp); 7797 udp->udp_rcv_msgcnt--; 7798 if (closing) { 7799 freemsg(mp); 7800 } else { 7801 ASSERT(q == RD(q)); 7802 putnext(q, mp); 7803 } 7804 } 7805 ASSERT(udp->udp_rcv_cnt == 0); 7806 ASSERT(udp->udp_rcv_msgcnt == 0); 7807 ASSERT(udp->udp_rcv_list_head == NULL); 7808 udp->udp_rcv_list_tail = NULL; 7809 udp->udp_drain_qfull = B_FALSE; 7810 } 7811 7812 static size_t 7813 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7814 { 7815 udp_stack_t *us = udp->udp_us; 7816 7817 /* We add a bit of extra buffering */ 7818 size += size >> 1; 7819 if (size > us->us_max_buf) 7820 size = us->us_max_buf; 7821 7822 udp->udp_rcv_hiwat = size; 7823 return (size); 7824 } 7825 7826 /* 7827 * For the lower queue so that UDP can be a dummy mux. 7828 * Nobody should be sending 7829 * packets up this stream 7830 */ 7831 static void 7832 udp_lrput(queue_t *q, mblk_t *mp) 7833 { 7834 mblk_t *mp1; 7835 7836 switch (mp->b_datap->db_type) { 7837 case M_FLUSH: 7838 /* Turn around */ 7839 if (*mp->b_rptr & FLUSHW) { 7840 *mp->b_rptr &= ~FLUSHR; 7841 qreply(q, mp); 7842 return; 7843 } 7844 break; 7845 } 7846 /* Could receive messages that passed through ar_rput */ 7847 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7848 mp1->b_prev = mp1->b_next = NULL; 7849 freemsg(mp); 7850 } 7851 7852 /* 7853 * For the lower queue so that UDP can be a dummy mux. 7854 * Nobody should be sending packets down this stream. 7855 */ 7856 /* ARGSUSED */ 7857 void 7858 udp_lwput(queue_t *q, mblk_t *mp) 7859 { 7860 freemsg(mp); 7861 } 7862 7863 /* 7864 * Below routines for UDP socket module. 7865 */ 7866 7867 static conn_t * 7868 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7869 { 7870 udp_t *udp; 7871 conn_t *connp; 7872 zoneid_t zoneid; 7873 netstack_t *ns; 7874 udp_stack_t *us; 7875 7876 ns = netstack_find_by_cred(credp); 7877 ASSERT(ns != NULL); 7878 us = ns->netstack_udp; 7879 ASSERT(us != NULL); 7880 7881 /* 7882 * For exclusive stacks we set the zoneid to zero 7883 * to make UDP operate as if in the global zone. 7884 */ 7885 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7886 zoneid = GLOBAL_ZONEID; 7887 else 7888 zoneid = crgetzoneid(credp); 7889 7890 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7891 7892 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7893 if (connp == NULL) { 7894 netstack_rele(ns); 7895 return (NULL); 7896 } 7897 udp = connp->conn_udp; 7898 7899 /* 7900 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7901 * done by netstack_find_by_cred() 7902 */ 7903 netstack_rele(ns); 7904 7905 rw_enter(&udp->udp_rwlock, RW_WRITER); 7906 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7907 ASSERT(connp->conn_udp == udp); 7908 ASSERT(udp->udp_connp == connp); 7909 7910 /* Set the initial state of the stream and the privilege status. */ 7911 udp->udp_state = TS_UNBND; 7912 if (isv6) { 7913 udp->udp_family = AF_INET6; 7914 udp->udp_ipversion = IPV6_VERSION; 7915 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7916 udp->udp_ttl = us->us_ipv6_hoplimit; 7917 connp->conn_af_isv6 = B_TRUE; 7918 connp->conn_flags |= IPCL_ISV6; 7919 } else { 7920 udp->udp_family = AF_INET; 7921 udp->udp_ipversion = IPV4_VERSION; 7922 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7923 udp->udp_ttl = us->us_ipv4_ttl; 7924 connp->conn_af_isv6 = B_FALSE; 7925 connp->conn_flags &= ~IPCL_ISV6; 7926 } 7927 7928 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7929 udp->udp_pending_op = -1; 7930 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7931 connp->conn_zoneid = zoneid; 7932 7933 udp->udp_open_time = lbolt64; 7934 udp->udp_open_pid = curproc->p_pid; 7935 7936 /* 7937 * If the caller has the process-wide flag set, then default to MAC 7938 * exempt mode. This allows read-down to unlabeled hosts. 7939 */ 7940 if (getpflags(NET_MAC_AWARE, credp) != 0) 7941 connp->conn_mac_exempt = B_TRUE; 7942 7943 connp->conn_ulp_labeled = is_system_labeled(); 7944 7945 udp->udp_us = us; 7946 7947 connp->conn_recv = udp_input; 7948 crhold(credp); 7949 connp->conn_cred = credp; 7950 7951 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7952 7953 rw_exit(&udp->udp_rwlock); 7954 7955 return (connp); 7956 } 7957 7958 /* ARGSUSED */ 7959 sock_lower_handle_t 7960 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7961 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7962 { 7963 udp_t *udp = NULL; 7964 udp_stack_t *us; 7965 conn_t *connp; 7966 boolean_t isv6; 7967 7968 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7969 (proto != 0 && proto != IPPROTO_UDP)) { 7970 *errorp = EPROTONOSUPPORT; 7971 return (NULL); 7972 } 7973 7974 if (family == AF_INET6) 7975 isv6 = B_TRUE; 7976 else 7977 isv6 = B_FALSE; 7978 7979 connp = udp_do_open(credp, isv6, flags); 7980 if (connp == NULL) { 7981 *errorp = ENOMEM; 7982 return (NULL); 7983 } 7984 7985 udp = connp->conn_udp; 7986 ASSERT(udp != NULL); 7987 us = udp->udp_us; 7988 ASSERT(us != NULL); 7989 7990 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7991 7992 /* Set flow control */ 7993 rw_enter(&udp->udp_rwlock, RW_WRITER); 7994 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7995 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7996 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7997 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7998 udp->udp_xmit_lowat = us->us_xmit_lowat; 7999 8000 if (udp->udp_family == AF_INET6) { 8001 /* Build initial header template for transmit */ 8002 if ((*errorp = udp_build_hdrs(udp)) != 0) { 8003 rw_exit(&udp->udp_rwlock); 8004 ipcl_conn_destroy(connp); 8005 return (NULL); 8006 } 8007 } 8008 rw_exit(&udp->udp_rwlock); 8009 8010 connp->conn_flow_cntrld = B_FALSE; 8011 8012 ASSERT(us->us_ldi_ident != NULL); 8013 8014 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 8015 ip1dbg(("udp_create: create of IP helper stream failed\n")); 8016 udp_do_close(connp); 8017 return (NULL); 8018 } 8019 8020 /* Set the send flow control */ 8021 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 8022 connp->conn_wq->q_lowat = us->us_xmit_lowat; 8023 8024 mutex_enter(&connp->conn_lock); 8025 connp->conn_state_flags &= ~CONN_INCIPIENT; 8026 mutex_exit(&connp->conn_lock); 8027 8028 *errorp = 0; 8029 *smodep = SM_ATOMIC; 8030 *sock_downcalls = &sock_udp_downcalls; 8031 return ((sock_lower_handle_t)connp); 8032 } 8033 8034 /* ARGSUSED */ 8035 void 8036 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 8037 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 8038 { 8039 conn_t *connp = (conn_t *)proto_handle; 8040 udp_t *udp = connp->conn_udp; 8041 udp_stack_t *us = udp->udp_us; 8042 struct sock_proto_props sopp; 8043 8044 /* All Solaris components should pass a cred for this operation. */ 8045 ASSERT(cr != NULL); 8046 8047 connp->conn_upcalls = sock_upcalls; 8048 connp->conn_upper_handle = sock_handle; 8049 8050 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 8051 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 8052 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 8053 sopp.sopp_maxblk = INFPSZ; 8054 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 8055 sopp.sopp_maxaddrlen = sizeof (sin6_t); 8056 sopp.sopp_maxpsz = 8057 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 8058 UDP_MAXPACKET_IPV6; 8059 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 8060 udp_mod_info.mi_minpsz; 8061 8062 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 8063 &sopp); 8064 } 8065 8066 static void 8067 udp_do_close(conn_t *connp) 8068 { 8069 udp_t *udp; 8070 8071 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 8072 udp = connp->conn_udp; 8073 8074 udp_quiesce_conn(connp); 8075 ip_quiesce_conn(connp); 8076 8077 if (!IPCL_IS_NONSTR(connp)) { 8078 /* 8079 * Disable read-side synchronous stream 8080 * interface and drain any queued data. 8081 */ 8082 ASSERT(connp->conn_wq != NULL); 8083 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 8084 ASSERT(!udp->udp_direct_sockfs); 8085 8086 ASSERT(connp->conn_rq != NULL); 8087 qprocsoff(connp->conn_rq); 8088 } 8089 8090 ASSERT(udp->udp_rcv_cnt == 0); 8091 ASSERT(udp->udp_rcv_msgcnt == 0); 8092 ASSERT(udp->udp_rcv_list_head == NULL); 8093 ASSERT(udp->udp_rcv_list_tail == NULL); 8094 8095 udp_close_free(connp); 8096 8097 /* 8098 * Now we are truly single threaded on this stream, and can 8099 * delete the things hanging off the connp, and finally the connp. 8100 * We removed this connp from the fanout list, it cannot be 8101 * accessed thru the fanouts, and we already waited for the 8102 * conn_ref to drop to 0. We are already in close, so 8103 * there cannot be any other thread from the top. qprocsoff 8104 * has completed, and service has completed or won't run in 8105 * future. 8106 */ 8107 ASSERT(connp->conn_ref == 1); 8108 if (!IPCL_IS_NONSTR(connp)) { 8109 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 8110 } else { 8111 ip_free_helper_stream(connp); 8112 } 8113 8114 connp->conn_ref--; 8115 ipcl_conn_destroy(connp); 8116 } 8117 8118 /* ARGSUSED */ 8119 int 8120 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 8121 { 8122 conn_t *connp = (conn_t *)proto_handle; 8123 8124 /* All Solaris components should pass a cred for this operation. */ 8125 ASSERT(cr != NULL); 8126 8127 udp_do_close(connp); 8128 return (0); 8129 } 8130 8131 static int 8132 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 8133 boolean_t bind_to_req_port_only) 8134 { 8135 sin_t *sin; 8136 sin6_t *sin6; 8137 sin6_t sin6addr; 8138 in_port_t port; /* Host byte order */ 8139 in_port_t requested_port; /* Host byte order */ 8140 int count; 8141 in6_addr_t v6src; 8142 int loopmax; 8143 udp_fanout_t *udpf; 8144 in_port_t lport; /* Network byte order */ 8145 zoneid_t zoneid; 8146 udp_t *udp; 8147 boolean_t is_inaddr_any; 8148 mlp_type_t addrtype, mlptype; 8149 udp_stack_t *us; 8150 int error = 0; 8151 mblk_t *mp = NULL; 8152 8153 udp = connp->conn_udp; 8154 us = udp->udp_us; 8155 8156 if (udp->udp_state != TS_UNBND) { 8157 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8158 "udp_bind: bad state, %u", udp->udp_state); 8159 return (-TOUTSTATE); 8160 } 8161 8162 switch (len) { 8163 case 0: 8164 if (udp->udp_family == AF_INET) { 8165 sin = (sin_t *)&sin6addr; 8166 *sin = sin_null; 8167 sin->sin_family = AF_INET; 8168 sin->sin_addr.s_addr = INADDR_ANY; 8169 udp->udp_ipversion = IPV4_VERSION; 8170 } else { 8171 ASSERT(udp->udp_family == AF_INET6); 8172 sin6 = (sin6_t *)&sin6addr; 8173 *sin6 = sin6_null; 8174 sin6->sin6_family = AF_INET6; 8175 V6_SET_ZERO(sin6->sin6_addr); 8176 udp->udp_ipversion = IPV6_VERSION; 8177 } 8178 port = 0; 8179 break; 8180 8181 case sizeof (sin_t): /* Complete IPv4 address */ 8182 sin = (sin_t *)sa; 8183 8184 if (sin == NULL || !OK_32PTR((char *)sin)) 8185 return (EINVAL); 8186 8187 if (udp->udp_family != AF_INET || 8188 sin->sin_family != AF_INET) { 8189 return (EAFNOSUPPORT); 8190 } 8191 port = ntohs(sin->sin_port); 8192 break; 8193 8194 case sizeof (sin6_t): /* complete IPv6 address */ 8195 sin6 = (sin6_t *)sa; 8196 8197 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8198 return (EINVAL); 8199 8200 if (udp->udp_family != AF_INET6 || 8201 sin6->sin6_family != AF_INET6) { 8202 return (EAFNOSUPPORT); 8203 } 8204 port = ntohs(sin6->sin6_port); 8205 break; 8206 8207 default: /* Invalid request */ 8208 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8209 "udp_bind: bad ADDR_length length %u", len); 8210 return (-TBADADDR); 8211 } 8212 8213 requested_port = port; 8214 8215 if (requested_port == 0 || !bind_to_req_port_only) 8216 bind_to_req_port_only = B_FALSE; 8217 else /* T_BIND_REQ and requested_port != 0 */ 8218 bind_to_req_port_only = B_TRUE; 8219 8220 if (requested_port == 0) { 8221 /* 8222 * If the application passed in zero for the port number, it 8223 * doesn't care which port number we bind to. Get one in the 8224 * valid range. 8225 */ 8226 if (udp->udp_anon_priv_bind) { 8227 port = udp_get_next_priv_port(udp); 8228 } else { 8229 port = udp_update_next_port(udp, 8230 us->us_next_port_to_try, B_TRUE); 8231 } 8232 } else { 8233 /* 8234 * If the port is in the well-known privileged range, 8235 * make sure the caller was privileged. 8236 */ 8237 int i; 8238 boolean_t priv = B_FALSE; 8239 8240 if (port < us->us_smallest_nonpriv_port) { 8241 priv = B_TRUE; 8242 } else { 8243 for (i = 0; i < us->us_num_epriv_ports; i++) { 8244 if (port == us->us_epriv_ports[i]) { 8245 priv = B_TRUE; 8246 break; 8247 } 8248 } 8249 } 8250 8251 if (priv) { 8252 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8253 return (-TACCES); 8254 } 8255 } 8256 8257 if (port == 0) 8258 return (-TNOADDR); 8259 8260 /* 8261 * The state must be TS_UNBND. TPI mandates that users must send 8262 * TPI primitives only 1 at a time and wait for the response before 8263 * sending the next primitive. 8264 */ 8265 rw_enter(&udp->udp_rwlock, RW_WRITER); 8266 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8267 rw_exit(&udp->udp_rwlock); 8268 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8269 "udp_bind: bad state, %u", udp->udp_state); 8270 return (-TOUTSTATE); 8271 } 8272 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8273 udp->udp_pending_op = T_BIND_REQ; 8274 /* 8275 * Copy the source address into our udp structure. This address 8276 * may still be zero; if so, IP will fill in the correct address 8277 * each time an outbound packet is passed to it. Since the udp is 8278 * not yet in the bind hash list, we don't grab the uf_lock to 8279 * change udp_ipversion 8280 */ 8281 if (udp->udp_family == AF_INET) { 8282 ASSERT(sin != NULL); 8283 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8284 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8285 udp->udp_ip_snd_options_len; 8286 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8287 } else { 8288 ASSERT(sin6 != NULL); 8289 v6src = sin6->sin6_addr; 8290 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8291 /* 8292 * no need to hold the uf_lock to set the udp_ipversion 8293 * since we are not yet in the fanout list 8294 */ 8295 udp->udp_ipversion = IPV4_VERSION; 8296 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8297 UDPH_SIZE + udp->udp_ip_snd_options_len; 8298 } else { 8299 udp->udp_ipversion = IPV6_VERSION; 8300 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8301 } 8302 } 8303 8304 /* 8305 * If udp_reuseaddr is not set, then we have to make sure that 8306 * the IP address and port number the application requested 8307 * (or we selected for the application) is not being used by 8308 * another stream. If another stream is already using the 8309 * requested IP address and port, the behavior depends on 8310 * "bind_to_req_port_only". If set the bind fails; otherwise we 8311 * search for any an unused port to bind to the the stream. 8312 * 8313 * As per the BSD semantics, as modified by the Deering multicast 8314 * changes, if udp_reuseaddr is set, then we allow multiple binds 8315 * to the same port independent of the local IP address. 8316 * 8317 * This is slightly different than in SunOS 4.X which did not 8318 * support IP multicast. Note that the change implemented by the 8319 * Deering multicast code effects all binds - not only binding 8320 * to IP multicast addresses. 8321 * 8322 * Note that when binding to port zero we ignore SO_REUSEADDR in 8323 * order to guarantee a unique port. 8324 */ 8325 8326 count = 0; 8327 if (udp->udp_anon_priv_bind) { 8328 /* 8329 * loopmax = (IPPORT_RESERVED-1) - 8330 * us->us_min_anonpriv_port + 1 8331 */ 8332 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8333 } else { 8334 loopmax = us->us_largest_anon_port - 8335 us->us_smallest_anon_port + 1; 8336 } 8337 8338 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8339 zoneid = connp->conn_zoneid; 8340 8341 for (;;) { 8342 udp_t *udp1; 8343 boolean_t found_exclbind = B_FALSE; 8344 8345 /* 8346 * Walk through the list of udp streams bound to 8347 * requested port with the same IP address. 8348 */ 8349 lport = htons(port); 8350 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8351 us->us_bind_fanout_size)]; 8352 mutex_enter(&udpf->uf_lock); 8353 for (udp1 = udpf->uf_udp; udp1 != NULL; 8354 udp1 = udp1->udp_bind_hash) { 8355 if (lport != udp1->udp_port) 8356 continue; 8357 8358 /* 8359 * On a labeled system, we must treat bindings to ports 8360 * on shared IP addresses by sockets with MAC exemption 8361 * privilege as being in all zones, as there's 8362 * otherwise no way to identify the right receiver. 8363 */ 8364 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8365 IPCL_ZONE_MATCH(connp, 8366 udp1->udp_connp->conn_zoneid)) && 8367 !connp->conn_mac_exempt && \ 8368 !udp1->udp_connp->conn_mac_exempt) 8369 continue; 8370 8371 /* 8372 * If UDP_EXCLBIND is set for either the bound or 8373 * binding endpoint, the semantics of bind 8374 * is changed according to the following chart. 8375 * 8376 * spec = specified address (v4 or v6) 8377 * unspec = unspecified address (v4 or v6) 8378 * A = specified addresses are different for endpoints 8379 * 8380 * bound bind to allowed? 8381 * ------------------------------------- 8382 * unspec unspec no 8383 * unspec spec no 8384 * spec unspec no 8385 * spec spec yes if A 8386 * 8387 * For labeled systems, SO_MAC_EXEMPT behaves the same 8388 * as UDP_EXCLBIND, except that zoneid is ignored. 8389 */ 8390 if (udp1->udp_exclbind || udp->udp_exclbind || 8391 udp1->udp_connp->conn_mac_exempt || 8392 connp->conn_mac_exempt) { 8393 if (V6_OR_V4_INADDR_ANY( 8394 udp1->udp_bound_v6src) || 8395 is_inaddr_any || 8396 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8397 &v6src)) { 8398 found_exclbind = B_TRUE; 8399 break; 8400 } 8401 continue; 8402 } 8403 8404 /* 8405 * Check ipversion to allow IPv4 and IPv6 sockets to 8406 * have disjoint port number spaces. 8407 */ 8408 if (udp->udp_ipversion != udp1->udp_ipversion) { 8409 8410 /* 8411 * On the first time through the loop, if the 8412 * the user intentionally specified a 8413 * particular port number, then ignore any 8414 * bindings of the other protocol that may 8415 * conflict. This allows the user to bind IPv6 8416 * alone and get both v4 and v6, or bind both 8417 * both and get each seperately. On subsequent 8418 * times through the loop, we're checking a 8419 * port that we chose (not the user) and thus 8420 * we do not allow casual duplicate bindings. 8421 */ 8422 if (count == 0 && requested_port != 0) 8423 continue; 8424 } 8425 8426 /* 8427 * No difference depending on SO_REUSEADDR. 8428 * 8429 * If existing port is bound to a 8430 * non-wildcard IP address and 8431 * the requesting stream is bound to 8432 * a distinct different IP addresses 8433 * (non-wildcard, also), keep going. 8434 */ 8435 if (!is_inaddr_any && 8436 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8437 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8438 &v6src)) { 8439 continue; 8440 } 8441 break; 8442 } 8443 8444 if (!found_exclbind && 8445 (udp->udp_reuseaddr && requested_port != 0)) { 8446 break; 8447 } 8448 8449 if (udp1 == NULL) { 8450 /* 8451 * No other stream has this IP address 8452 * and port number. We can use it. 8453 */ 8454 break; 8455 } 8456 mutex_exit(&udpf->uf_lock); 8457 if (bind_to_req_port_only) { 8458 /* 8459 * We get here only when requested port 8460 * is bound (and only first of the for() 8461 * loop iteration). 8462 * 8463 * The semantics of this bind request 8464 * require it to fail so we return from 8465 * the routine (and exit the loop). 8466 * 8467 */ 8468 udp->udp_pending_op = -1; 8469 rw_exit(&udp->udp_rwlock); 8470 return (-TADDRBUSY); 8471 } 8472 8473 if (udp->udp_anon_priv_bind) { 8474 port = udp_get_next_priv_port(udp); 8475 } else { 8476 if ((count == 0) && (requested_port != 0)) { 8477 /* 8478 * If the application wants us to find 8479 * a port, get one to start with. Set 8480 * requested_port to 0, so that we will 8481 * update us->us_next_port_to_try below. 8482 */ 8483 port = udp_update_next_port(udp, 8484 us->us_next_port_to_try, B_TRUE); 8485 requested_port = 0; 8486 } else { 8487 port = udp_update_next_port(udp, port + 1, 8488 B_FALSE); 8489 } 8490 } 8491 8492 if (port == 0 || ++count >= loopmax) { 8493 /* 8494 * We've tried every possible port number and 8495 * there are none available, so send an error 8496 * to the user. 8497 */ 8498 udp->udp_pending_op = -1; 8499 rw_exit(&udp->udp_rwlock); 8500 return (-TNOADDR); 8501 } 8502 } 8503 8504 /* 8505 * Copy the source address into our udp structure. This address 8506 * may still be zero; if so, ip will fill in the correct address 8507 * each time an outbound packet is passed to it. 8508 * If we are binding to a broadcast or multicast address then 8509 * udp_post_ip_bind_connect will clear the source address 8510 * when udp_do_bind success. 8511 */ 8512 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8513 udp->udp_port = lport; 8514 /* 8515 * Now reset the the next anonymous port if the application requested 8516 * an anonymous port, or we handed out the next anonymous port. 8517 */ 8518 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8519 us->us_next_port_to_try = port + 1; 8520 } 8521 8522 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8523 if (udp->udp_family == AF_INET) { 8524 sin->sin_port = udp->udp_port; 8525 } else { 8526 sin6->sin6_port = udp->udp_port; 8527 /* Rebuild the header template */ 8528 error = udp_build_hdrs(udp); 8529 if (error != 0) { 8530 udp->udp_pending_op = -1; 8531 rw_exit(&udp->udp_rwlock); 8532 mutex_exit(&udpf->uf_lock); 8533 return (error); 8534 } 8535 } 8536 udp->udp_state = TS_IDLE; 8537 udp_bind_hash_insert(udpf, udp); 8538 mutex_exit(&udpf->uf_lock); 8539 rw_exit(&udp->udp_rwlock); 8540 8541 if (cl_inet_bind) { 8542 /* 8543 * Running in cluster mode - register bind information 8544 */ 8545 if (udp->udp_ipversion == IPV4_VERSION) { 8546 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8547 IPPROTO_UDP, AF_INET, 8548 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8549 (in_port_t)udp->udp_port, NULL); 8550 } else { 8551 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8552 IPPROTO_UDP, AF_INET6, 8553 (uint8_t *)&(udp->udp_v6src), 8554 (in_port_t)udp->udp_port, NULL); 8555 } 8556 } 8557 8558 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8559 if (is_system_labeled() && (!connp->conn_anon_port || 8560 connp->conn_anon_mlp)) { 8561 uint16_t mlpport; 8562 zone_t *zone; 8563 8564 zone = crgetzone(cr); 8565 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8566 mlptSingle; 8567 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8568 &v6src, us->us_netstack->netstack_ip); 8569 if (addrtype == mlptSingle) { 8570 rw_enter(&udp->udp_rwlock, RW_WRITER); 8571 udp->udp_pending_op = -1; 8572 rw_exit(&udp->udp_rwlock); 8573 connp->conn_anon_port = B_FALSE; 8574 connp->conn_mlp_type = mlptSingle; 8575 return (-TNOADDR); 8576 } 8577 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8578 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8579 addrtype); 8580 if (mlptype != mlptSingle && 8581 (connp->conn_mlp_type == mlptSingle || 8582 secpolicy_net_bindmlp(cr) != 0)) { 8583 if (udp->udp_debug) { 8584 (void) strlog(UDP_MOD_ID, 0, 1, 8585 SL_ERROR|SL_TRACE, 8586 "udp_bind: no priv for multilevel port %d", 8587 mlpport); 8588 } 8589 rw_enter(&udp->udp_rwlock, RW_WRITER); 8590 udp->udp_pending_op = -1; 8591 rw_exit(&udp->udp_rwlock); 8592 connp->conn_anon_port = B_FALSE; 8593 connp->conn_mlp_type = mlptSingle; 8594 return (-TACCES); 8595 } 8596 8597 /* 8598 * If we're specifically binding a shared IP address and the 8599 * port is MLP on shared addresses, then check to see if this 8600 * zone actually owns the MLP. Reject if not. 8601 */ 8602 if (mlptype == mlptShared && addrtype == mlptShared) { 8603 /* 8604 * No need to handle exclusive-stack zones since 8605 * ALL_ZONES only applies to the shared stack. 8606 */ 8607 zoneid_t mlpzone; 8608 8609 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8610 htons(mlpport)); 8611 if (connp->conn_zoneid != mlpzone) { 8612 if (udp->udp_debug) { 8613 (void) strlog(UDP_MOD_ID, 0, 1, 8614 SL_ERROR|SL_TRACE, 8615 "udp_bind: attempt to bind port " 8616 "%d on shared addr in zone %d " 8617 "(should be %d)", 8618 mlpport, connp->conn_zoneid, 8619 mlpzone); 8620 } 8621 rw_enter(&udp->udp_rwlock, RW_WRITER); 8622 udp->udp_pending_op = -1; 8623 rw_exit(&udp->udp_rwlock); 8624 connp->conn_anon_port = B_FALSE; 8625 connp->conn_mlp_type = mlptSingle; 8626 return (-TACCES); 8627 } 8628 } 8629 if (connp->conn_anon_port) { 8630 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8631 port, B_TRUE); 8632 if (error != 0) { 8633 if (udp->udp_debug) { 8634 (void) strlog(UDP_MOD_ID, 0, 1, 8635 SL_ERROR|SL_TRACE, 8636 "udp_bind: cannot establish anon " 8637 "MLP for port %d", port); 8638 } 8639 rw_enter(&udp->udp_rwlock, RW_WRITER); 8640 udp->udp_pending_op = -1; 8641 rw_exit(&udp->udp_rwlock); 8642 connp->conn_anon_port = B_FALSE; 8643 connp->conn_mlp_type = mlptSingle; 8644 return (-TACCES); 8645 } 8646 } 8647 connp->conn_mlp_type = mlptype; 8648 } 8649 8650 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8651 /* 8652 * Append a request for an IRE if udp_v6src not 8653 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8654 */ 8655 mp = allocb(sizeof (ire_t), BPRI_HI); 8656 if (!mp) { 8657 rw_enter(&udp->udp_rwlock, RW_WRITER); 8658 udp->udp_pending_op = -1; 8659 rw_exit(&udp->udp_rwlock); 8660 return (ENOMEM); 8661 } 8662 mp->b_wptr += sizeof (ire_t); 8663 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8664 } 8665 if (udp->udp_family == AF_INET6) { 8666 ASSERT(udp->udp_connp->conn_af_isv6); 8667 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8668 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8669 } else { 8670 ASSERT(!udp->udp_connp->conn_af_isv6); 8671 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8672 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8673 B_TRUE); 8674 } 8675 8676 (void) udp_post_ip_bind_connect(udp, mp, error); 8677 return (error); 8678 } 8679 8680 int 8681 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8682 socklen_t len, cred_t *cr) 8683 { 8684 int error; 8685 conn_t *connp; 8686 8687 /* All Solaris components should pass a cred for this operation. */ 8688 ASSERT(cr != NULL); 8689 8690 connp = (conn_t *)proto_handle; 8691 8692 if (sa == NULL) 8693 error = udp_do_unbind(connp); 8694 else 8695 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8696 8697 if (error < 0) { 8698 if (error == -TOUTSTATE) 8699 error = EINVAL; 8700 else 8701 error = proto_tlitosyserr(-error); 8702 } 8703 8704 return (error); 8705 } 8706 8707 static int 8708 udp_implicit_bind(conn_t *connp, cred_t *cr) 8709 { 8710 int error; 8711 8712 /* All Solaris components should pass a cred for this operation. */ 8713 ASSERT(cr != NULL); 8714 8715 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8716 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8717 } 8718 8719 /* 8720 * This routine removes a port number association from a stream. It 8721 * is called by udp_unbind and udp_tpi_unbind. 8722 */ 8723 static int 8724 udp_do_unbind(conn_t *connp) 8725 { 8726 udp_t *udp = connp->conn_udp; 8727 udp_fanout_t *udpf; 8728 udp_stack_t *us = udp->udp_us; 8729 8730 if (cl_inet_unbind != NULL) { 8731 /* 8732 * Running in cluster mode - register unbind information 8733 */ 8734 if (udp->udp_ipversion == IPV4_VERSION) { 8735 (*cl_inet_unbind)( 8736 connp->conn_netstack->netstack_stackid, 8737 IPPROTO_UDP, AF_INET, 8738 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8739 (in_port_t)udp->udp_port, NULL); 8740 } else { 8741 (*cl_inet_unbind)( 8742 connp->conn_netstack->netstack_stackid, 8743 IPPROTO_UDP, AF_INET6, 8744 (uint8_t *)&(udp->udp_v6src), 8745 (in_port_t)udp->udp_port, NULL); 8746 } 8747 } 8748 8749 rw_enter(&udp->udp_rwlock, RW_WRITER); 8750 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8751 rw_exit(&udp->udp_rwlock); 8752 return (-TOUTSTATE); 8753 } 8754 udp->udp_pending_op = T_UNBIND_REQ; 8755 rw_exit(&udp->udp_rwlock); 8756 8757 /* 8758 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8759 * and therefore ip_unbind must never return NULL. 8760 */ 8761 ip_unbind(connp); 8762 8763 /* 8764 * Once we're unbound from IP, the pending operation may be cleared 8765 * here. 8766 */ 8767 rw_enter(&udp->udp_rwlock, RW_WRITER); 8768 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8769 us->us_bind_fanout_size)]; 8770 8771 mutex_enter(&udpf->uf_lock); 8772 udp_bind_hash_remove(udp, B_TRUE); 8773 V6_SET_ZERO(udp->udp_v6src); 8774 V6_SET_ZERO(udp->udp_bound_v6src); 8775 udp->udp_port = 0; 8776 mutex_exit(&udpf->uf_lock); 8777 8778 udp->udp_pending_op = -1; 8779 udp->udp_state = TS_UNBND; 8780 if (udp->udp_family == AF_INET6) 8781 (void) udp_build_hdrs(udp); 8782 rw_exit(&udp->udp_rwlock); 8783 8784 return (0); 8785 } 8786 8787 static int 8788 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8789 { 8790 ire_t *ire; 8791 udp_fanout_t *udpf; 8792 udp_stack_t *us = udp->udp_us; 8793 8794 ASSERT(udp->udp_pending_op != -1); 8795 rw_enter(&udp->udp_rwlock, RW_WRITER); 8796 if (error == 0) { 8797 /* For udp_do_connect() success */ 8798 /* udp_do_bind() success will do nothing in here */ 8799 /* 8800 * If a broadcast/multicast address was bound, set 8801 * the source address to 0. 8802 * This ensures no datagrams with broadcast address 8803 * as source address are emitted (which would violate 8804 * RFC1122 - Hosts requirements) 8805 * 8806 * Note that when connecting the returned IRE is 8807 * for the destination address and we only perform 8808 * the broadcast check for the source address (it 8809 * is OK to connect to a broadcast/multicast address.) 8810 */ 8811 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8812 ire = (ire_t *)ire_mp->b_rptr; 8813 8814 /* 8815 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8816 * multicast local address. 8817 */ 8818 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8819 us->us_bind_fanout_size)]; 8820 if (ire->ire_type == IRE_BROADCAST && 8821 udp->udp_state != TS_DATA_XFER) { 8822 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8823 udp->udp_pending_op == O_T_BIND_REQ); 8824 /* 8825 * This was just a local bind to a broadcast 8826 * addr. 8827 */ 8828 mutex_enter(&udpf->uf_lock); 8829 V6_SET_ZERO(udp->udp_v6src); 8830 mutex_exit(&udpf->uf_lock); 8831 if (udp->udp_family == AF_INET6) 8832 (void) udp_build_hdrs(udp); 8833 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8834 if (udp->udp_family == AF_INET6) 8835 (void) udp_build_hdrs(udp); 8836 } 8837 } 8838 } else { 8839 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8840 us->us_bind_fanout_size)]; 8841 mutex_enter(&udpf->uf_lock); 8842 8843 if (udp->udp_state == TS_DATA_XFER) { 8844 /* Connect failed */ 8845 /* Revert back to the bound source */ 8846 udp->udp_v6src = udp->udp_bound_v6src; 8847 udp->udp_state = TS_IDLE; 8848 } else { 8849 /* For udp_do_bind() failed */ 8850 V6_SET_ZERO(udp->udp_v6src); 8851 V6_SET_ZERO(udp->udp_bound_v6src); 8852 udp->udp_state = TS_UNBND; 8853 udp_bind_hash_remove(udp, B_TRUE); 8854 udp->udp_port = 0; 8855 } 8856 mutex_exit(&udpf->uf_lock); 8857 if (udp->udp_family == AF_INET6) 8858 (void) udp_build_hdrs(udp); 8859 } 8860 udp->udp_pending_op = -1; 8861 rw_exit(&udp->udp_rwlock); 8862 if (ire_mp != NULL) 8863 freeb(ire_mp); 8864 return (error); 8865 } 8866 8867 /* 8868 * It associates a default destination address with the stream. 8869 */ 8870 static int 8871 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8872 cred_t *cr) 8873 { 8874 sin6_t *sin6; 8875 sin_t *sin; 8876 in6_addr_t v6dst; 8877 ipaddr_t v4dst; 8878 uint16_t dstport; 8879 uint32_t flowinfo; 8880 mblk_t *ire_mp; 8881 udp_fanout_t *udpf; 8882 udp_t *udp, *udp1; 8883 ushort_t ipversion; 8884 udp_stack_t *us; 8885 int error; 8886 8887 udp = connp->conn_udp; 8888 us = udp->udp_us; 8889 8890 /* 8891 * Address has been verified by the caller 8892 */ 8893 switch (len) { 8894 default: 8895 /* 8896 * Should never happen 8897 */ 8898 return (EINVAL); 8899 8900 case sizeof (sin_t): 8901 sin = (sin_t *)sa; 8902 v4dst = sin->sin_addr.s_addr; 8903 dstport = sin->sin_port; 8904 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8905 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8906 ipversion = IPV4_VERSION; 8907 break; 8908 8909 case sizeof (sin6_t): 8910 sin6 = (sin6_t *)sa; 8911 v6dst = sin6->sin6_addr; 8912 dstport = sin6->sin6_port; 8913 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8914 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8915 ipversion = IPV4_VERSION; 8916 flowinfo = 0; 8917 } else { 8918 ipversion = IPV6_VERSION; 8919 flowinfo = sin6->sin6_flowinfo; 8920 } 8921 break; 8922 } 8923 8924 if (dstport == 0) 8925 return (-TBADADDR); 8926 8927 rw_enter(&udp->udp_rwlock, RW_WRITER); 8928 8929 /* 8930 * This UDP must have bound to a port already before doing a connect. 8931 * TPI mandates that users must send TPI primitives only 1 at a time 8932 * and wait for the response before sending the next primitive. 8933 */ 8934 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8935 rw_exit(&udp->udp_rwlock); 8936 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8937 "udp_connect: bad state, %u", udp->udp_state); 8938 return (-TOUTSTATE); 8939 } 8940 udp->udp_pending_op = T_CONN_REQ; 8941 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8942 8943 if (ipversion == IPV4_VERSION) { 8944 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8945 udp->udp_ip_snd_options_len; 8946 } else { 8947 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8948 } 8949 8950 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8951 us->us_bind_fanout_size)]; 8952 8953 mutex_enter(&udpf->uf_lock); 8954 if (udp->udp_state == TS_DATA_XFER) { 8955 /* Already connected - clear out state */ 8956 udp->udp_v6src = udp->udp_bound_v6src; 8957 udp->udp_state = TS_IDLE; 8958 } 8959 8960 /* 8961 * Create a default IP header with no IP options. 8962 */ 8963 udp->udp_dstport = dstport; 8964 udp->udp_ipversion = ipversion; 8965 if (ipversion == IPV4_VERSION) { 8966 /* 8967 * Interpret a zero destination to mean loopback. 8968 * Update the T_CONN_REQ (sin/sin6) since it is used to 8969 * generate the T_CONN_CON. 8970 */ 8971 if (v4dst == INADDR_ANY) { 8972 v4dst = htonl(INADDR_LOOPBACK); 8973 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8974 if (udp->udp_family == AF_INET) { 8975 sin->sin_addr.s_addr = v4dst; 8976 } else { 8977 sin6->sin6_addr = v6dst; 8978 } 8979 } 8980 udp->udp_v6dst = v6dst; 8981 udp->udp_flowinfo = 0; 8982 8983 /* 8984 * If the destination address is multicast and 8985 * an outgoing multicast interface has been set, 8986 * use the address of that interface as our 8987 * source address if no source address has been set. 8988 */ 8989 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8990 CLASSD(v4dst) && 8991 udp->udp_multicast_if_addr != INADDR_ANY) { 8992 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8993 &udp->udp_v6src); 8994 } 8995 } else { 8996 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8997 /* 8998 * Interpret a zero destination to mean loopback. 8999 * Update the T_CONN_REQ (sin/sin6) since it is used to 9000 * generate the T_CONN_CON. 9001 */ 9002 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 9003 v6dst = ipv6_loopback; 9004 sin6->sin6_addr = v6dst; 9005 } 9006 udp->udp_v6dst = v6dst; 9007 udp->udp_flowinfo = flowinfo; 9008 /* 9009 * If the destination address is multicast and 9010 * an outgoing multicast interface has been set, 9011 * then the ip bind logic will pick the correct source 9012 * address (i.e. matching the outgoing multicast interface). 9013 */ 9014 } 9015 9016 /* 9017 * Verify that the src/port/dst/port is unique for all 9018 * connections in TS_DATA_XFER 9019 */ 9020 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 9021 if (udp1->udp_state != TS_DATA_XFER) 9022 continue; 9023 if (udp->udp_port != udp1->udp_port || 9024 udp->udp_ipversion != udp1->udp_ipversion || 9025 dstport != udp1->udp_dstport || 9026 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 9027 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 9028 !(IPCL_ZONE_MATCH(udp->udp_connp, 9029 udp1->udp_connp->conn_zoneid) || 9030 IPCL_ZONE_MATCH(udp1->udp_connp, 9031 udp->udp_connp->conn_zoneid))) 9032 continue; 9033 mutex_exit(&udpf->uf_lock); 9034 udp->udp_pending_op = -1; 9035 rw_exit(&udp->udp_rwlock); 9036 return (-TBADADDR); 9037 } 9038 9039 if (cl_inet_connect2 != NULL) { 9040 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 9041 if (error != 0) { 9042 mutex_exit(&udpf->uf_lock); 9043 udp->udp_pending_op = -1; 9044 rw_exit(&udp->udp_rwlock); 9045 return (-TBADADDR); 9046 } 9047 } 9048 9049 udp->udp_state = TS_DATA_XFER; 9050 mutex_exit(&udpf->uf_lock); 9051 9052 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 9053 if (ire_mp == NULL) { 9054 mutex_enter(&udpf->uf_lock); 9055 udp->udp_state = TS_IDLE; 9056 udp->udp_pending_op = -1; 9057 mutex_exit(&udpf->uf_lock); 9058 rw_exit(&udp->udp_rwlock); 9059 return (ENOMEM); 9060 } 9061 9062 rw_exit(&udp->udp_rwlock); 9063 9064 ire_mp->b_wptr += sizeof (ire_t); 9065 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 9066 9067 if (udp->udp_family == AF_INET) { 9068 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 9069 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 9070 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 9071 B_TRUE, B_TRUE, cr); 9072 } else { 9073 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 9074 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 9075 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 9076 } 9077 9078 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 9079 } 9080 9081 /* ARGSUSED */ 9082 static int 9083 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 9084 socklen_t len, sock_connid_t *id, cred_t *cr) 9085 { 9086 conn_t *connp = (conn_t *)proto_handle; 9087 udp_t *udp = connp->conn_udp; 9088 int error; 9089 boolean_t did_bind = B_FALSE; 9090 9091 /* All Solaris components should pass a cred for this operation. */ 9092 ASSERT(cr != NULL); 9093 9094 if (sa == NULL) { 9095 /* 9096 * Disconnect 9097 * Make sure we are connected 9098 */ 9099 if (udp->udp_state != TS_DATA_XFER) 9100 return (EINVAL); 9101 9102 error = udp_disconnect(connp); 9103 return (error); 9104 } 9105 9106 error = proto_verify_ip_addr(udp->udp_family, sa, len); 9107 if (error != 0) 9108 goto done; 9109 9110 /* do an implicit bind if necessary */ 9111 if (udp->udp_state == TS_UNBND) { 9112 error = udp_implicit_bind(connp, cr); 9113 /* 9114 * We could be racing with an actual bind, in which case 9115 * we would see EPROTO. We cross our fingers and try 9116 * to connect. 9117 */ 9118 if (!(error == 0 || error == EPROTO)) 9119 goto done; 9120 did_bind = B_TRUE; 9121 } 9122 /* 9123 * set SO_DGRAM_ERRIND 9124 */ 9125 udp->udp_dgram_errind = B_TRUE; 9126 9127 error = udp_do_connect(connp, sa, len, cr); 9128 9129 if (error != 0 && did_bind) { 9130 int unbind_err; 9131 9132 unbind_err = udp_do_unbind(connp); 9133 ASSERT(unbind_err == 0); 9134 } 9135 9136 if (error == 0) { 9137 *id = 0; 9138 (*connp->conn_upcalls->su_connected) 9139 (connp->conn_upper_handle, 0, NULL, -1); 9140 } else if (error < 0) { 9141 error = proto_tlitosyserr(-error); 9142 } 9143 9144 done: 9145 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 9146 /* 9147 * No need to hold locks to set state 9148 * after connect failure socket state is undefined 9149 * We set the state only to imitate old sockfs behavior 9150 */ 9151 udp->udp_state = TS_IDLE; 9152 } 9153 return (error); 9154 } 9155 9156 /* ARGSUSED */ 9157 int 9158 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 9159 cred_t *cr) 9160 { 9161 conn_t *connp = (conn_t *)proto_handle; 9162 udp_t *udp = connp->conn_udp; 9163 udp_stack_t *us = udp->udp_us; 9164 int error = 0; 9165 9166 ASSERT(DB_TYPE(mp) == M_DATA); 9167 9168 /* All Solaris components should pass a cred for this operation. */ 9169 ASSERT(cr != NULL); 9170 9171 /* If labeled then sockfs should have already set db_credp */ 9172 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 9173 9174 /* 9175 * If the socket is connected and no change in destination 9176 */ 9177 if (msg->msg_namelen == 0) { 9178 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9179 if (error == EDESTADDRREQ) 9180 return (error); 9181 else 9182 return (udp->udp_dgram_errind ? error : 0); 9183 } 9184 9185 /* 9186 * Do an implicit bind if necessary. 9187 */ 9188 if (udp->udp_state == TS_UNBND) { 9189 error = udp_implicit_bind(connp, cr); 9190 /* 9191 * We could be racing with an actual bind, in which case 9192 * we would see EPROTO. We cross our fingers and try 9193 * to send. 9194 */ 9195 if (!(error == 0 || error == EPROTO)) { 9196 freemsg(mp); 9197 return (error); 9198 } 9199 } 9200 9201 rw_enter(&udp->udp_rwlock, RW_WRITER); 9202 9203 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9204 rw_exit(&udp->udp_rwlock); 9205 freemsg(mp); 9206 return (EISCONN); 9207 } 9208 9209 9210 if (udp->udp_delayed_error != 0) { 9211 boolean_t match; 9212 9213 error = udp->udp_delayed_error; 9214 match = B_FALSE; 9215 udp->udp_delayed_error = 0; 9216 switch (udp->udp_family) { 9217 case AF_INET: { 9218 /* Compare just IP address and port */ 9219 sin_t *sin1 = (sin_t *)msg->msg_name; 9220 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9221 9222 if (msg->msg_namelen == sizeof (sin_t) && 9223 sin1->sin_port == sin2->sin_port && 9224 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9225 match = B_TRUE; 9226 9227 break; 9228 } 9229 case AF_INET6: { 9230 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9231 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9232 9233 if (msg->msg_namelen == sizeof (sin6_t) && 9234 sin1->sin6_port == sin2->sin6_port && 9235 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9236 &sin2->sin6_addr)) 9237 match = B_TRUE; 9238 break; 9239 } 9240 default: 9241 ASSERT(0); 9242 } 9243 9244 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9245 9246 if (match) { 9247 rw_exit(&udp->udp_rwlock); 9248 freemsg(mp); 9249 return (error); 9250 } 9251 } 9252 9253 error = proto_verify_ip_addr(udp->udp_family, 9254 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9255 rw_exit(&udp->udp_rwlock); 9256 9257 if (error != 0) { 9258 freemsg(mp); 9259 return (error); 9260 } 9261 9262 error = udp_send_not_connected(connp, mp, 9263 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9264 curproc->p_pid); 9265 if (error != 0) { 9266 UDP_STAT(us, udp_out_err_output); 9267 freemsg(mp); 9268 } 9269 return (udp->udp_dgram_errind ? error : 0); 9270 } 9271 9272 void 9273 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9274 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9275 { 9276 conn_t *connp = (conn_t *)proto_handle; 9277 udp_t *udp; 9278 struct T_capability_ack tca; 9279 struct sockaddr_in6 laddr, faddr; 9280 socklen_t laddrlen, faddrlen; 9281 short opts; 9282 struct stroptions *stropt; 9283 mblk_t *stropt_mp; 9284 int error; 9285 9286 udp = connp->conn_udp; 9287 9288 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9289 9290 /* 9291 * setup the fallback stream that was allocated 9292 */ 9293 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9294 connp->conn_minor_arena = WR(q)->q_ptr; 9295 9296 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9297 9298 WR(q)->q_qinfo = &udp_winit; 9299 9300 connp->conn_rq = RD(q); 9301 connp->conn_wq = WR(q); 9302 9303 /* Notify stream head about options before sending up data */ 9304 stropt_mp->b_datap->db_type = M_SETOPTS; 9305 stropt_mp->b_wptr += sizeof (*stropt); 9306 stropt = (struct stroptions *)stropt_mp->b_rptr; 9307 stropt->so_flags = SO_WROFF | SO_HIWAT; 9308 stropt->so_wroff = 9309 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9310 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9311 putnext(RD(q), stropt_mp); 9312 9313 /* 9314 * Free the helper stream 9315 */ 9316 ip_free_helper_stream(connp); 9317 9318 if (!direct_sockfs) 9319 udp_disable_direct_sockfs(udp); 9320 9321 /* 9322 * Collect the information needed to sync with the sonode 9323 */ 9324 udp_do_capability_ack(udp, &tca, TC1_INFO); 9325 9326 laddrlen = faddrlen = sizeof (sin6_t); 9327 (void) udp_getsockname((sock_lower_handle_t)connp, 9328 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9329 error = udp_getpeername((sock_lower_handle_t)connp, 9330 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9331 if (error != 0) 9332 faddrlen = 0; 9333 9334 opts = 0; 9335 if (udp->udp_dgram_errind) 9336 opts |= SO_DGRAM_ERRIND; 9337 if (udp->udp_dontroute) 9338 opts |= SO_DONTROUTE; 9339 9340 /* 9341 * Once we grab the drain lock, no data will be send up 9342 * to the socket. So we notify the socket that the endpoint 9343 * is quiescent and it's therefore safe move data from 9344 * the socket to the stream head. 9345 */ 9346 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9347 (struct sockaddr *)&laddr, laddrlen, 9348 (struct sockaddr *)&faddr, faddrlen, opts); 9349 9350 /* 9351 * push up any packets that were queued in udp_t 9352 */ 9353 9354 mutex_enter(&udp->udp_recv_lock); 9355 while (udp->udp_fallback_queue_head != NULL) { 9356 mblk_t *mp; 9357 mp = udp->udp_fallback_queue_head; 9358 udp->udp_fallback_queue_head = mp->b_next; 9359 mutex_exit(&udp->udp_recv_lock); 9360 mp->b_next = NULL; 9361 putnext(RD(q), mp); 9362 mutex_enter(&udp->udp_recv_lock); 9363 } 9364 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9365 /* 9366 * No longer a streams less socket 9367 */ 9368 connp->conn_flags &= ~IPCL_NONSTR; 9369 mutex_exit(&udp->udp_recv_lock); 9370 9371 ASSERT(connp->conn_ref >= 1); 9372 } 9373 9374 static int 9375 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9376 { 9377 sin_t *sin = (sin_t *)sa; 9378 sin6_t *sin6 = (sin6_t *)sa; 9379 9380 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9381 ASSERT(udp != NULL); 9382 9383 if (udp->udp_state != TS_DATA_XFER) 9384 return (ENOTCONN); 9385 9386 switch (udp->udp_family) { 9387 case AF_INET: 9388 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9389 9390 if (*salenp < sizeof (sin_t)) 9391 return (EINVAL); 9392 9393 *salenp = sizeof (sin_t); 9394 *sin = sin_null; 9395 sin->sin_family = AF_INET; 9396 sin->sin_port = udp->udp_dstport; 9397 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9398 break; 9399 case AF_INET6: 9400 if (*salenp < sizeof (sin6_t)) 9401 return (EINVAL); 9402 9403 *salenp = sizeof (sin6_t); 9404 *sin6 = sin6_null; 9405 sin6->sin6_family = AF_INET6; 9406 sin6->sin6_port = udp->udp_dstport; 9407 sin6->sin6_addr = udp->udp_v6dst; 9408 sin6->sin6_flowinfo = udp->udp_flowinfo; 9409 break; 9410 } 9411 9412 return (0); 9413 } 9414 9415 /* ARGSUSED */ 9416 int 9417 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9418 socklen_t *salenp, cred_t *cr) 9419 { 9420 conn_t *connp = (conn_t *)proto_handle; 9421 udp_t *udp = connp->conn_udp; 9422 int error; 9423 9424 /* All Solaris components should pass a cred for this operation. */ 9425 ASSERT(cr != NULL); 9426 9427 ASSERT(udp != NULL); 9428 9429 rw_enter(&udp->udp_rwlock, RW_READER); 9430 9431 error = udp_do_getpeername(udp, sa, salenp); 9432 9433 rw_exit(&udp->udp_rwlock); 9434 9435 return (error); 9436 } 9437 9438 static int 9439 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9440 { 9441 sin_t *sin = (sin_t *)sa; 9442 sin6_t *sin6 = (sin6_t *)sa; 9443 9444 ASSERT(udp != NULL); 9445 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9446 9447 switch (udp->udp_family) { 9448 case AF_INET: 9449 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9450 9451 if (*salenp < sizeof (sin_t)) 9452 return (EINVAL); 9453 9454 *salenp = sizeof (sin_t); 9455 *sin = sin_null; 9456 sin->sin_family = AF_INET; 9457 if (udp->udp_state == TS_UNBND) { 9458 break; 9459 } 9460 sin->sin_port = udp->udp_port; 9461 9462 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9463 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9464 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9465 } else { 9466 /* 9467 * INADDR_ANY 9468 * udp_v6src is not set, we might be bound to 9469 * broadcast/multicast. Use udp_bound_v6src as 9470 * local address instead (that could 9471 * also still be INADDR_ANY) 9472 */ 9473 sin->sin_addr.s_addr = 9474 V4_PART_OF_V6(udp->udp_bound_v6src); 9475 } 9476 break; 9477 9478 case AF_INET6: 9479 if (*salenp < sizeof (sin6_t)) 9480 return (EINVAL); 9481 9482 *salenp = sizeof (sin6_t); 9483 *sin6 = sin6_null; 9484 sin6->sin6_family = AF_INET6; 9485 if (udp->udp_state == TS_UNBND) { 9486 break; 9487 } 9488 sin6->sin6_port = udp->udp_port; 9489 9490 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9491 sin6->sin6_addr = udp->udp_v6src; 9492 } else { 9493 /* 9494 * UNSPECIFIED 9495 * udp_v6src is not set, we might be bound to 9496 * broadcast/multicast. Use udp_bound_v6src as 9497 * local address instead (that could 9498 * also still be UNSPECIFIED) 9499 */ 9500 sin6->sin6_addr = udp->udp_bound_v6src; 9501 } 9502 } 9503 return (0); 9504 } 9505 9506 /* ARGSUSED */ 9507 int 9508 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9509 socklen_t *salenp, cred_t *cr) 9510 { 9511 conn_t *connp = (conn_t *)proto_handle; 9512 udp_t *udp = connp->conn_udp; 9513 int error; 9514 9515 /* All Solaris components should pass a cred for this operation. */ 9516 ASSERT(cr != NULL); 9517 9518 ASSERT(udp != NULL); 9519 rw_enter(&udp->udp_rwlock, RW_READER); 9520 9521 error = udp_do_getsockname(udp, sa, salenp); 9522 9523 rw_exit(&udp->udp_rwlock); 9524 9525 return (error); 9526 } 9527 9528 int 9529 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9530 void *optvalp, socklen_t *optlen, cred_t *cr) 9531 { 9532 conn_t *connp = (conn_t *)proto_handle; 9533 udp_t *udp = connp->conn_udp; 9534 int error; 9535 t_uscalar_t max_optbuf_len; 9536 void *optvalp_buf; 9537 int len; 9538 9539 /* All Solaris components should pass a cred for this operation. */ 9540 ASSERT(cr != NULL); 9541 9542 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9543 udp_opt_obj.odb_opt_des_arr, 9544 udp_opt_obj.odb_opt_arr_cnt, 9545 udp_opt_obj.odb_topmost_tpiprovider, 9546 B_FALSE, B_TRUE, cr); 9547 if (error != 0) { 9548 if (error < 0) 9549 error = proto_tlitosyserr(-error); 9550 return (error); 9551 } 9552 9553 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9554 rw_enter(&udp->udp_rwlock, RW_READER); 9555 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9556 rw_exit(&udp->udp_rwlock); 9557 9558 if (len < 0) { 9559 /* 9560 * Pass on to IP 9561 */ 9562 kmem_free(optvalp_buf, max_optbuf_len); 9563 return (ip_get_options(connp, level, option_name, 9564 optvalp, optlen, cr)); 9565 } else { 9566 /* 9567 * update optlen and copy option value 9568 */ 9569 t_uscalar_t size = MIN(len, *optlen); 9570 bcopy(optvalp_buf, optvalp, size); 9571 bcopy(&size, optlen, sizeof (size)); 9572 9573 kmem_free(optvalp_buf, max_optbuf_len); 9574 return (0); 9575 } 9576 } 9577 9578 int 9579 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9580 const void *optvalp, socklen_t optlen, cred_t *cr) 9581 { 9582 conn_t *connp = (conn_t *)proto_handle; 9583 udp_t *udp = connp->conn_udp; 9584 int error; 9585 9586 /* All Solaris components should pass a cred for this operation. */ 9587 ASSERT(cr != NULL); 9588 9589 error = proto_opt_check(level, option_name, optlen, NULL, 9590 udp_opt_obj.odb_opt_des_arr, 9591 udp_opt_obj.odb_opt_arr_cnt, 9592 udp_opt_obj.odb_topmost_tpiprovider, 9593 B_TRUE, B_FALSE, cr); 9594 9595 if (error != 0) { 9596 if (error < 0) 9597 error = proto_tlitosyserr(-error); 9598 return (error); 9599 } 9600 9601 rw_enter(&udp->udp_rwlock, RW_WRITER); 9602 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9603 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9604 NULL, cr); 9605 rw_exit(&udp->udp_rwlock); 9606 9607 if (error < 0) { 9608 /* 9609 * Pass on to ip 9610 */ 9611 error = ip_set_options(connp, level, option_name, optvalp, 9612 optlen, cr); 9613 } 9614 9615 return (error); 9616 } 9617 9618 void 9619 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9620 { 9621 conn_t *connp = (conn_t *)proto_handle; 9622 udp_t *udp = connp->conn_udp; 9623 9624 mutex_enter(&udp->udp_recv_lock); 9625 connp->conn_flow_cntrld = B_FALSE; 9626 mutex_exit(&udp->udp_recv_lock); 9627 } 9628 9629 /* ARGSUSED */ 9630 int 9631 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9632 { 9633 conn_t *connp = (conn_t *)proto_handle; 9634 9635 /* All Solaris components should pass a cred for this operation. */ 9636 ASSERT(cr != NULL); 9637 9638 /* shut down the send side */ 9639 if (how != SHUT_RD) 9640 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9641 SOCK_OPCTL_SHUT_SEND, 0); 9642 /* shut down the recv side */ 9643 if (how != SHUT_WR) 9644 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9645 SOCK_OPCTL_SHUT_RECV, 0); 9646 return (0); 9647 } 9648 9649 int 9650 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9651 int mode, int32_t *rvalp, cred_t *cr) 9652 { 9653 conn_t *connp = (conn_t *)proto_handle; 9654 int error; 9655 9656 /* All Solaris components should pass a cred for this operation. */ 9657 ASSERT(cr != NULL); 9658 9659 switch (cmd) { 9660 case ND_SET: 9661 case ND_GET: 9662 case _SIOCSOCKFALLBACK: 9663 case TI_GETPEERNAME: 9664 case TI_GETMYNAME: 9665 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9666 cmd)); 9667 error = EINVAL; 9668 break; 9669 default: 9670 /* 9671 * Pass on to IP using helper stream 9672 */ 9673 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9674 cmd, arg, mode, cr, rvalp); 9675 break; 9676 } 9677 return (error); 9678 } 9679 9680 /* ARGSUSED */ 9681 int 9682 udp_accept(sock_lower_handle_t lproto_handle, 9683 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9684 cred_t *cr) 9685 { 9686 return (EOPNOTSUPP); 9687 } 9688 9689 /* ARGSUSED */ 9690 int 9691 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9692 { 9693 return (EOPNOTSUPP); 9694 } 9695 9696 sock_downcalls_t sock_udp_downcalls = { 9697 udp_activate, /* sd_activate */ 9698 udp_accept, /* sd_accept */ 9699 udp_bind, /* sd_bind */ 9700 udp_listen, /* sd_listen */ 9701 udp_connect, /* sd_connect */ 9702 udp_getpeername, /* sd_getpeername */ 9703 udp_getsockname, /* sd_getsockname */ 9704 udp_getsockopt, /* sd_getsockopt */ 9705 udp_setsockopt, /* sd_setsockopt */ 9706 udp_send, /* sd_send */ 9707 NULL, /* sd_send_uio */ 9708 NULL, /* sd_recv_uio */ 9709 NULL, /* sd_poll */ 9710 udp_shutdown, /* sd_shutdown */ 9711 udp_clr_flowctrl, /* sd_setflowctrl */ 9712 udp_ioctl, /* sd_ioctl */ 9713 udp_close /* sd_close */ 9714 }; 9715