1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 #define NDD_TOO_QUICK_MSG \ 137 "ndd get info rate too high for non-privileged users, try again " \ 138 "later.\n" 139 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 140 141 /* Option processing attrs */ 142 typedef struct udpattrs_s { 143 union { 144 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 145 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 146 } udpattr_ippu; 147 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 148 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 149 mblk_t *udpattr_mb; 150 boolean_t udpattr_credset; 151 } udpattrs_t; 152 153 static void udp_addr_req(queue_t *q, mblk_t *mp); 154 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 155 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 156 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 157 static int udp_build_hdrs(udp_t *udp); 158 static void udp_capability_req(queue_t *q, mblk_t *mp); 159 static int udp_tpi_close(queue_t *q, int flags); 160 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 161 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 162 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 165 t_scalar_t tlierr, int unixerr); 166 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 167 cred_t *cr); 168 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 169 char *value, caddr_t cp, cred_t *cr); 170 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static void udp_icmp_error(conn_t *, mblk_t *); 173 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 174 static void udp_info_req(queue_t *q, mblk_t *mp); 175 static void udp_input(void *, mblk_t *, void *); 176 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 177 t_scalar_t addr_length); 178 static void udp_lrput(queue_t *, mblk_t *); 179 static void udp_lwput(queue_t *, mblk_t *); 180 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 181 cred_t *credp, boolean_t isv6); 182 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp); 184 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 187 int *errorp, udpattrs_t *udpattrs); 188 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 189 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 190 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 191 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 192 cred_t *cr); 193 static void udp_report_item(mblk_t *mp, udp_t *udp); 194 static int udp_rinfop(queue_t *q, infod_t *dp); 195 static int udp_rrw(queue_t *q, struiod_t *dp); 196 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 197 cred_t *cr); 198 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 199 ipha_t *ipha); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 229 cred_t *, pid_t); 230 static void udp_ulp_recv(conn_t *, mblk_t *); 231 232 /* Common routine for TPI and socket module */ 233 static conn_t *udp_do_open(cred_t *, boolean_t, int); 234 static void udp_do_close(conn_t *); 235 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 236 boolean_t); 237 static int udp_do_unbind(conn_t *); 238 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 239 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 240 241 int udp_getsockname(sock_lower_handle_t, 242 struct sockaddr *, socklen_t *, cred_t *); 243 int udp_getpeername(sock_lower_handle_t, 244 struct sockaddr *, socklen_t *, cred_t *); 245 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 246 cred_t *cr); 247 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 248 249 #define UDP_RECV_HIWATER (56 * 1024) 250 #define UDP_RECV_LOWATER 128 251 #define UDP_XMIT_HIWATER (56 * 1024) 252 #define UDP_XMIT_LOWATER 1024 253 254 /* 255 * The following is defined in tcp.c 256 */ 257 extern int (*cl_inet_connect2)(netstackid_t stack_id, 258 uint8_t protocol, boolean_t is_outgoing, 259 sa_family_t addr_family, 260 uint8_t *laddrp, in_port_t lport, 261 uint8_t *faddrp, in_port_t fport, void *args); 262 263 /* 264 * Checks if the given destination addr/port is allowed out. 265 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 266 * Called for each connect() and for sendto()/sendmsg() to a different 267 * destination. 268 * For connect(), called in udp_connect(). 269 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 270 * 271 * This macro assumes that the cl_inet_connect2 hook is not NULL. 272 * Please check this before calling this macro. 273 * 274 * void 275 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 276 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 277 */ 278 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 279 (err) = 0; \ 280 /* \ 281 * Running in cluster mode - check and register active \ 282 * "connection" information \ 283 */ \ 284 if ((udp)->udp_ipversion == IPV4_VERSION) \ 285 (err) = (*cl_inet_connect2)( \ 286 (cp)->conn_netstack->netstack_stackid, \ 287 IPPROTO_UDP, is_outgoing, AF_INET, \ 288 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 289 (udp)->udp_port, \ 290 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 291 (in_port_t)(fport), NULL); \ 292 else \ 293 (err) = (*cl_inet_connect2)( \ 294 (cp)->conn_netstack->netstack_stackid, \ 295 IPPROTO_UDP, is_outgoing, AF_INET6, \ 296 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 297 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 298 } 299 300 static struct module_info udp_mod_info = { 301 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 302 }; 303 304 /* 305 * Entry points for UDP as a device. 306 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 307 */ 308 static struct qinit udp_rinitv4 = { 309 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 310 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 311 }; 312 313 static struct qinit udp_rinitv6 = { 314 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 315 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 316 }; 317 318 static struct qinit udp_winit = { 319 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 320 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 321 }; 322 323 /* UDP entry point during fallback */ 324 struct qinit udp_fallback_sock_winit = { 325 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 326 }; 327 328 /* 329 * UDP needs to handle I_LINK and I_PLINK since ifconfig 330 * likes to use it as a place to hang the various streams. 331 */ 332 static struct qinit udp_lrinit = { 333 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 334 &udp_mod_info 335 }; 336 337 static struct qinit udp_lwinit = { 338 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 339 &udp_mod_info 340 }; 341 342 /* For AF_INET aka /dev/udp */ 343 struct streamtab udpinfov4 = { 344 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 345 }; 346 347 /* For AF_INET6 aka /dev/udp6 */ 348 struct streamtab udpinfov6 = { 349 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 350 }; 351 352 static sin_t sin_null; /* Zero address for quick clears */ 353 static sin6_t sin6_null; /* Zero address for quick clears */ 354 355 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 356 357 /* Default structure copied into T_INFO_ACK messages */ 358 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 359 T_INFO_ACK, 360 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 361 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 362 T_INVALID, /* CDATA_size. udp does not support connect data. */ 363 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 364 sizeof (sin_t), /* ADDR_size. */ 365 0, /* OPT_size - not initialized here */ 366 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 367 T_CLTS, /* SERV_type. udp supports connection-less. */ 368 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 369 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 370 }; 371 372 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 373 374 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 375 T_INFO_ACK, 376 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 377 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 378 T_INVALID, /* CDATA_size. udp does not support connect data. */ 379 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 380 sizeof (sin6_t), /* ADDR_size. */ 381 0, /* OPT_size - not initialized here */ 382 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 383 T_CLTS, /* SERV_type. udp supports connection-less. */ 384 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 385 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 386 }; 387 388 /* largest UDP port number */ 389 #define UDP_MAX_PORT 65535 390 391 /* 392 * Table of ND variables supported by udp. These are loaded into us_nd 393 * in udp_open. 394 * All of these are alterable, within the min/max values given, at run time. 395 */ 396 /* BEGIN CSTYLED */ 397 udpparam_t udp_param_arr[] = { 398 /*min max value name */ 399 { 0L, 256, 32, "udp_wroff_extra" }, 400 { 1L, 255, 255, "udp_ipv4_ttl" }, 401 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 402 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 403 { 0, 1, 1, "udp_do_checksum" }, 404 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 405 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 406 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 407 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 408 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 409 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 410 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 411 }; 412 /* END CSTYLED */ 413 414 /* Setable in /etc/system */ 415 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 416 uint32_t udp_random_anon_port = 1; 417 418 /* 419 * Hook functions to enable cluster networking. 420 * On non-clustered systems these vectors must always be NULL 421 */ 422 423 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 424 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 425 void *args) = NULL; 426 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 427 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 428 void *args) = NULL; 429 430 typedef union T_primitives *t_primp_t; 431 432 /* 433 * Return the next anonymous port in the privileged port range for 434 * bind checking. 435 * 436 * Trusted Extension (TX) notes: TX allows administrator to mark or 437 * reserve ports as Multilevel ports (MLP). MLP has special function 438 * on TX systems. Once a port is made MLP, it's not available as 439 * ordinary port. This creates "holes" in the port name space. It 440 * may be necessary to skip the "holes" find a suitable anon port. 441 */ 442 static in_port_t 443 udp_get_next_priv_port(udp_t *udp) 444 { 445 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 446 in_port_t nextport; 447 boolean_t restart = B_FALSE; 448 udp_stack_t *us = udp->udp_us; 449 450 retry: 451 if (next_priv_port < us->us_min_anonpriv_port || 452 next_priv_port >= IPPORT_RESERVED) { 453 next_priv_port = IPPORT_RESERVED - 1; 454 if (restart) 455 return (0); 456 restart = B_TRUE; 457 } 458 459 if (is_system_labeled() && 460 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 461 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 462 next_priv_port = nextport; 463 goto retry; 464 } 465 466 return (next_priv_port--); 467 } 468 469 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 470 /* ARGSUSED */ 471 static int 472 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 473 { 474 udp_fanout_t *udpf; 475 int i; 476 zoneid_t zoneid; 477 conn_t *connp; 478 udp_t *udp; 479 udp_stack_t *us; 480 481 connp = Q_TO_CONN(q); 482 udp = connp->conn_udp; 483 us = udp->udp_us; 484 485 /* Refer to comments in udp_status_report(). */ 486 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 487 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 488 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 489 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 490 return (0); 491 } 492 } 493 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 494 /* The following may work even if we cannot get a large buf. */ 495 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 496 return (0); 497 } 498 499 (void) mi_mpprintf(mp, 500 "UDP " MI_COL_HDRPAD_STR 501 /* 12345678[89ABCDEF] */ 502 " zone lport src addr dest addr port state"); 503 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 504 505 zoneid = connp->conn_zoneid; 506 507 for (i = 0; i < us->us_bind_fanout_size; i++) { 508 udpf = &us->us_bind_fanout[i]; 509 mutex_enter(&udpf->uf_lock); 510 511 /* Print the hash index. */ 512 udp = udpf->uf_udp; 513 if (zoneid != GLOBAL_ZONEID) { 514 /* skip to first entry in this zone; might be none */ 515 while (udp != NULL && 516 udp->udp_connp->conn_zoneid != zoneid) 517 udp = udp->udp_bind_hash; 518 } 519 if (udp != NULL) { 520 uint_t print_len, buf_len; 521 522 buf_len = mp->b_cont->b_datap->db_lim - 523 mp->b_cont->b_wptr; 524 print_len = snprintf((char *)mp->b_cont->b_wptr, 525 buf_len, "%d\n", i); 526 if (print_len < buf_len) { 527 mp->b_cont->b_wptr += print_len; 528 } else { 529 mp->b_cont->b_wptr += buf_len; 530 } 531 for (; udp != NULL; udp = udp->udp_bind_hash) { 532 if (zoneid == GLOBAL_ZONEID || 533 zoneid == udp->udp_connp->conn_zoneid) 534 udp_report_item(mp->b_cont, udp); 535 } 536 } 537 mutex_exit(&udpf->uf_lock); 538 } 539 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 540 return (0); 541 } 542 543 /* 544 * Hash list removal routine for udp_t structures. 545 */ 546 static void 547 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 548 { 549 udp_t *udpnext; 550 kmutex_t *lockp; 551 udp_stack_t *us = udp->udp_us; 552 553 if (udp->udp_ptpbhn == NULL) 554 return; 555 556 /* 557 * Extract the lock pointer in case there are concurrent 558 * hash_remove's for this instance. 559 */ 560 ASSERT(udp->udp_port != 0); 561 if (!caller_holds_lock) { 562 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 563 us->us_bind_fanout_size)].uf_lock; 564 ASSERT(lockp != NULL); 565 mutex_enter(lockp); 566 } 567 if (udp->udp_ptpbhn != NULL) { 568 udpnext = udp->udp_bind_hash; 569 if (udpnext != NULL) { 570 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 571 udp->udp_bind_hash = NULL; 572 } 573 *udp->udp_ptpbhn = udpnext; 574 udp->udp_ptpbhn = NULL; 575 } 576 if (!caller_holds_lock) { 577 mutex_exit(lockp); 578 } 579 } 580 581 static void 582 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 583 { 584 udp_t **udpp; 585 udp_t *udpnext; 586 587 ASSERT(MUTEX_HELD(&uf->uf_lock)); 588 ASSERT(udp->udp_ptpbhn == NULL); 589 udpp = &uf->uf_udp; 590 udpnext = udpp[0]; 591 if (udpnext != NULL) { 592 /* 593 * If the new udp bound to the INADDR_ANY address 594 * and the first one in the list is not bound to 595 * INADDR_ANY we skip all entries until we find the 596 * first one bound to INADDR_ANY. 597 * This makes sure that applications binding to a 598 * specific address get preference over those binding to 599 * INADDR_ANY. 600 */ 601 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 602 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 603 while ((udpnext = udpp[0]) != NULL && 604 !V6_OR_V4_INADDR_ANY( 605 udpnext->udp_bound_v6src)) { 606 udpp = &(udpnext->udp_bind_hash); 607 } 608 if (udpnext != NULL) 609 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 610 } else { 611 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 612 } 613 } 614 udp->udp_bind_hash = udpnext; 615 udp->udp_ptpbhn = udpp; 616 udpp[0] = udp; 617 } 618 619 /* 620 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 621 * passed to udp_wput. 622 * It associates a port number and local address with the stream. 623 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 624 * protocol type (IPPROTO_UDP) placed in the message following the address. 625 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 626 * (Called as writer.) 627 * 628 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 629 * without setting SO_REUSEADDR. This is needed so that they 630 * can be viewed as two independent transport protocols. 631 * However, anonymouns ports are allocated from the same range to avoid 632 * duplicating the us->us_next_port_to_try. 633 */ 634 static void 635 udp_tpi_bind(queue_t *q, mblk_t *mp) 636 { 637 sin_t *sin; 638 sin6_t *sin6; 639 mblk_t *mp1; 640 struct T_bind_req *tbr; 641 conn_t *connp; 642 udp_t *udp; 643 int error; 644 struct sockaddr *sa; 645 cred_t *cr; 646 647 /* 648 * All Solaris components should pass a db_credp 649 * for this TPI message, hence we ASSERT. 650 * But in case there is some other M_PROTO that looks 651 * like a TPI message sent by some other kernel 652 * component, we check and return an error. 653 */ 654 cr = msg_getcred(mp, NULL); 655 ASSERT(cr != NULL); 656 if (cr == NULL) { 657 udp_err_ack(q, mp, TSYSERR, EINVAL); 658 return; 659 } 660 661 connp = Q_TO_CONN(q); 662 udp = connp->conn_udp; 663 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 664 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 665 "udp_bind: bad req, len %u", 666 (uint_t)(mp->b_wptr - mp->b_rptr)); 667 udp_err_ack(q, mp, TPROTO, 0); 668 return; 669 } 670 if (udp->udp_state != TS_UNBND) { 671 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 672 "udp_bind: bad state, %u", udp->udp_state); 673 udp_err_ack(q, mp, TOUTSTATE, 0); 674 return; 675 } 676 /* 677 * Reallocate the message to make sure we have enough room for an 678 * address and the protocol type. 679 */ 680 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 681 if (!mp1) { 682 udp_err_ack(q, mp, TSYSERR, ENOMEM); 683 return; 684 } 685 686 mp = mp1; 687 688 /* Reset the message type in preparation for shipping it back. */ 689 DB_TYPE(mp) = M_PCPROTO; 690 691 tbr = (struct T_bind_req *)mp->b_rptr; 692 switch (tbr->ADDR_length) { 693 case 0: /* Request for a generic port */ 694 tbr->ADDR_offset = sizeof (struct T_bind_req); 695 if (udp->udp_family == AF_INET) { 696 tbr->ADDR_length = sizeof (sin_t); 697 sin = (sin_t *)&tbr[1]; 698 *sin = sin_null; 699 sin->sin_family = AF_INET; 700 mp->b_wptr = (uchar_t *)&sin[1]; 701 sa = (struct sockaddr *)sin; 702 } else { 703 ASSERT(udp->udp_family == AF_INET6); 704 tbr->ADDR_length = sizeof (sin6_t); 705 sin6 = (sin6_t *)&tbr[1]; 706 *sin6 = sin6_null; 707 sin6->sin6_family = AF_INET6; 708 mp->b_wptr = (uchar_t *)&sin6[1]; 709 sa = (struct sockaddr *)sin6; 710 } 711 break; 712 713 case sizeof (sin_t): /* Complete IPv4 address */ 714 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 715 sizeof (sin_t)); 716 if (sa == NULL || !OK_32PTR((char *)sa)) { 717 udp_err_ack(q, mp, TSYSERR, EINVAL); 718 return; 719 } 720 if (udp->udp_family != AF_INET || 721 sa->sa_family != AF_INET) { 722 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 723 return; 724 } 725 break; 726 727 case sizeof (sin6_t): /* complete IPv6 address */ 728 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 729 sizeof (sin6_t)); 730 if (sa == NULL || !OK_32PTR((char *)sa)) { 731 udp_err_ack(q, mp, TSYSERR, EINVAL); 732 return; 733 } 734 if (udp->udp_family != AF_INET6 || 735 sa->sa_family != AF_INET6) { 736 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 737 return; 738 } 739 break; 740 741 default: /* Invalid request */ 742 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 743 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 744 udp_err_ack(q, mp, TBADADDR, 0); 745 return; 746 } 747 748 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 749 tbr->PRIM_type != O_T_BIND_REQ); 750 751 if (error != 0) { 752 if (error > 0) { 753 udp_err_ack(q, mp, TSYSERR, error); 754 } else { 755 udp_err_ack(q, mp, -error, 0); 756 } 757 } else { 758 tbr->PRIM_type = T_BIND_ACK; 759 qreply(q, mp); 760 } 761 } 762 763 /* 764 * This routine handles each T_CONN_REQ message passed to udp. It 765 * associates a default destination address with the stream. 766 * 767 * This routine sends down a T_BIND_REQ to IP with the following mblks: 768 * T_BIND_REQ - specifying local and remote address/port 769 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 770 * T_OK_ACK - for the T_CONN_REQ 771 * T_CONN_CON - to keep the TPI user happy 772 * 773 * The connect completes in udp_do_connect. 774 * When a T_BIND_ACK is received information is extracted from the IRE 775 * and the two appended messages are sent to the TPI user. 776 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 777 * convert it to an error ack for the appropriate primitive. 778 */ 779 static void 780 udp_tpi_connect(queue_t *q, mblk_t *mp) 781 { 782 mblk_t *mp1; 783 udp_t *udp; 784 conn_t *connp = Q_TO_CONN(q); 785 int error; 786 socklen_t len; 787 struct sockaddr *sa; 788 struct T_conn_req *tcr; 789 cred_t *cr; 790 791 /* 792 * All Solaris components should pass a db_credp 793 * for this TPI message, hence we ASSERT. 794 * But in case there is some other M_PROTO that looks 795 * like a TPI message sent by some other kernel 796 * component, we check and return an error. 797 */ 798 cr = msg_getcred(mp, NULL); 799 ASSERT(cr != NULL); 800 if (cr == NULL) { 801 udp_err_ack(q, mp, TSYSERR, EINVAL); 802 return; 803 } 804 805 udp = connp->conn_udp; 806 tcr = (struct T_conn_req *)mp->b_rptr; 807 808 /* A bit of sanity checking */ 809 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 810 udp_err_ack(q, mp, TPROTO, 0); 811 return; 812 } 813 814 if (tcr->OPT_length != 0) { 815 udp_err_ack(q, mp, TBADOPT, 0); 816 return; 817 } 818 819 /* 820 * Determine packet type based on type of address passed in 821 * the request should contain an IPv4 or IPv6 address. 822 * Make sure that address family matches the type of 823 * family of the the address passed down 824 */ 825 len = tcr->DEST_length; 826 switch (tcr->DEST_length) { 827 default: 828 udp_err_ack(q, mp, TBADADDR, 0); 829 return; 830 831 case sizeof (sin_t): 832 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 833 sizeof (sin_t)); 834 break; 835 836 case sizeof (sin6_t): 837 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 838 sizeof (sin6_t)); 839 break; 840 } 841 842 error = proto_verify_ip_addr(udp->udp_family, sa, len); 843 if (error != 0) { 844 udp_err_ack(q, mp, TSYSERR, error); 845 return; 846 } 847 848 /* 849 * We have to send a connection confirmation to 850 * keep TLI happy. 851 */ 852 if (udp->udp_family == AF_INET) { 853 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 854 sizeof (sin_t), NULL, 0); 855 } else { 856 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 857 sizeof (sin6_t), NULL, 0); 858 } 859 if (mp1 == NULL) { 860 udp_err_ack(q, mp, TSYSERR, ENOMEM); 861 return; 862 } 863 864 /* 865 * Allocate the largest primitive we need to send back 866 * T_error_ack is > than T_ok_ack 867 */ 868 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 869 if (mp == NULL) { 870 /* Unable to reuse the T_CONN_REQ for the ack. */ 871 freemsg(mp1); 872 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 873 return; 874 } 875 876 error = udp_do_connect(connp, sa, len, cr); 877 if (error != 0) { 878 freeb(mp1); 879 if (error < 0) 880 udp_err_ack(q, mp, -error, 0); 881 else 882 udp_err_ack(q, mp, TSYSERR, error); 883 } else { 884 mp = mi_tpi_ok_ack_alloc(mp); 885 ASSERT(mp != NULL); 886 putnext(connp->conn_rq, mp); 887 putnext(connp->conn_rq, mp1); 888 } 889 } 890 891 static int 892 udp_tpi_close(queue_t *q, int flags) 893 { 894 conn_t *connp; 895 896 if (flags & SO_FALLBACK) { 897 /* 898 * stream is being closed while in fallback 899 * simply free the resources that were allocated 900 */ 901 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 902 qprocsoff(q); 903 goto done; 904 } 905 906 connp = Q_TO_CONN(q); 907 udp_do_close(connp); 908 done: 909 q->q_ptr = WR(q)->q_ptr = NULL; 910 return (0); 911 } 912 913 /* 914 * Called in the close path to quiesce the conn 915 */ 916 void 917 udp_quiesce_conn(conn_t *connp) 918 { 919 udp_t *udp = connp->conn_udp; 920 921 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 922 /* 923 * Running in cluster mode - register unbind information 924 */ 925 if (udp->udp_ipversion == IPV4_VERSION) { 926 (*cl_inet_unbind)( 927 connp->conn_netstack->netstack_stackid, 928 IPPROTO_UDP, AF_INET, 929 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 930 (in_port_t)udp->udp_port, NULL); 931 } else { 932 (*cl_inet_unbind)( 933 connp->conn_netstack->netstack_stackid, 934 IPPROTO_UDP, AF_INET6, 935 (uint8_t *)(&(udp->udp_v6src)), 936 (in_port_t)udp->udp_port, NULL); 937 } 938 } 939 940 udp_bind_hash_remove(udp, B_FALSE); 941 942 } 943 944 void 945 udp_close_free(conn_t *connp) 946 { 947 udp_t *udp = connp->conn_udp; 948 949 /* If there are any options associated with the stream, free them. */ 950 if (udp->udp_ip_snd_options != NULL) { 951 mi_free((char *)udp->udp_ip_snd_options); 952 udp->udp_ip_snd_options = NULL; 953 udp->udp_ip_snd_options_len = 0; 954 } 955 956 if (udp->udp_ip_rcv_options != NULL) { 957 mi_free((char *)udp->udp_ip_rcv_options); 958 udp->udp_ip_rcv_options = NULL; 959 udp->udp_ip_rcv_options_len = 0; 960 } 961 962 /* Free memory associated with sticky options */ 963 if (udp->udp_sticky_hdrs_len != 0) { 964 kmem_free(udp->udp_sticky_hdrs, 965 udp->udp_sticky_hdrs_len); 966 udp->udp_sticky_hdrs = NULL; 967 udp->udp_sticky_hdrs_len = 0; 968 } 969 970 ip6_pkt_free(&udp->udp_sticky_ipp); 971 972 /* 973 * Clear any fields which the kmem_cache constructor clears. 974 * Only udp_connp needs to be preserved. 975 * TBD: We should make this more efficient to avoid clearing 976 * everything. 977 */ 978 ASSERT(udp->udp_connp == connp); 979 bzero(udp, sizeof (udp_t)); 980 udp->udp_connp = connp; 981 } 982 983 static int 984 udp_do_disconnect(conn_t *connp) 985 { 986 udp_t *udp; 987 mblk_t *ire_mp; 988 udp_fanout_t *udpf; 989 udp_stack_t *us; 990 int error; 991 992 udp = connp->conn_udp; 993 us = udp->udp_us; 994 rw_enter(&udp->udp_rwlock, RW_WRITER); 995 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 996 rw_exit(&udp->udp_rwlock); 997 return (-TOUTSTATE); 998 } 999 udp->udp_pending_op = T_DISCON_REQ; 1000 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1001 us->us_bind_fanout_size)]; 1002 mutex_enter(&udpf->uf_lock); 1003 udp->udp_v6src = udp->udp_bound_v6src; 1004 udp->udp_state = TS_IDLE; 1005 mutex_exit(&udpf->uf_lock); 1006 1007 if (udp->udp_family == AF_INET6) { 1008 /* Rebuild the header template */ 1009 error = udp_build_hdrs(udp); 1010 if (error != 0) { 1011 udp->udp_pending_op = -1; 1012 rw_exit(&udp->udp_rwlock); 1013 return (error); 1014 } 1015 } 1016 1017 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 1018 if (ire_mp == NULL) { 1019 mutex_enter(&udpf->uf_lock); 1020 udp->udp_pending_op = -1; 1021 mutex_exit(&udpf->uf_lock); 1022 rw_exit(&udp->udp_rwlock); 1023 return (ENOMEM); 1024 } 1025 1026 rw_exit(&udp->udp_rwlock); 1027 1028 if (udp->udp_family == AF_INET6) { 1029 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 1030 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 1031 } else { 1032 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 1033 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 1034 } 1035 1036 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 1037 } 1038 1039 1040 static void 1041 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 1042 { 1043 conn_t *connp = Q_TO_CONN(q); 1044 int error; 1045 1046 /* 1047 * Allocate the largest primitive we need to send back 1048 * T_error_ack is > than T_ok_ack 1049 */ 1050 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 1051 if (mp == NULL) { 1052 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1053 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 1054 return; 1055 } 1056 1057 error = udp_do_disconnect(connp); 1058 1059 if (error != 0) { 1060 if (error < 0) { 1061 udp_err_ack(q, mp, -error, 0); 1062 } else { 1063 udp_err_ack(q, mp, TSYSERR, error); 1064 } 1065 } else { 1066 mp = mi_tpi_ok_ack_alloc(mp); 1067 ASSERT(mp != NULL); 1068 qreply(q, mp); 1069 } 1070 } 1071 1072 int 1073 udp_disconnect(conn_t *connp) 1074 { 1075 int error; 1076 udp_t *udp = connp->conn_udp; 1077 1078 udp->udp_dgram_errind = B_FALSE; 1079 1080 error = udp_do_disconnect(connp); 1081 1082 if (error < 0) 1083 error = proto_tlitosyserr(-error); 1084 1085 return (error); 1086 } 1087 1088 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1089 static void 1090 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1091 { 1092 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1093 qreply(q, mp); 1094 } 1095 1096 /* Shorthand to generate and send TPI error acks to our client */ 1097 static void 1098 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1099 int sys_error) 1100 { 1101 struct T_error_ack *teackp; 1102 1103 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1104 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1105 teackp = (struct T_error_ack *)mp->b_rptr; 1106 teackp->ERROR_prim = primitive; 1107 teackp->TLI_error = t_error; 1108 teackp->UNIX_error = sys_error; 1109 qreply(q, mp); 1110 } 1111 } 1112 1113 /*ARGSUSED*/ 1114 static int 1115 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1116 { 1117 int i; 1118 udp_t *udp = Q_TO_UDP(q); 1119 udp_stack_t *us = udp->udp_us; 1120 1121 for (i = 0; i < us->us_num_epriv_ports; i++) { 1122 if (us->us_epriv_ports[i] != 0) 1123 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1124 } 1125 return (0); 1126 } 1127 1128 /* ARGSUSED */ 1129 static int 1130 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1131 cred_t *cr) 1132 { 1133 long new_value; 1134 int i; 1135 udp_t *udp = Q_TO_UDP(q); 1136 udp_stack_t *us = udp->udp_us; 1137 1138 /* 1139 * Fail the request if the new value does not lie within the 1140 * port number limits. 1141 */ 1142 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1143 new_value <= 0 || new_value >= 65536) { 1144 return (EINVAL); 1145 } 1146 1147 /* Check if the value is already in the list */ 1148 for (i = 0; i < us->us_num_epriv_ports; i++) { 1149 if (new_value == us->us_epriv_ports[i]) { 1150 return (EEXIST); 1151 } 1152 } 1153 /* Find an empty slot */ 1154 for (i = 0; i < us->us_num_epriv_ports; i++) { 1155 if (us->us_epriv_ports[i] == 0) 1156 break; 1157 } 1158 if (i == us->us_num_epriv_ports) { 1159 return (EOVERFLOW); 1160 } 1161 1162 /* Set the new value */ 1163 us->us_epriv_ports[i] = (in_port_t)new_value; 1164 return (0); 1165 } 1166 1167 /* ARGSUSED */ 1168 static int 1169 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1170 cred_t *cr) 1171 { 1172 long new_value; 1173 int i; 1174 udp_t *udp = Q_TO_UDP(q); 1175 udp_stack_t *us = udp->udp_us; 1176 1177 /* 1178 * Fail the request if the new value does not lie within the 1179 * port number limits. 1180 */ 1181 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1182 new_value <= 0 || new_value >= 65536) { 1183 return (EINVAL); 1184 } 1185 1186 /* Check that the value is already in the list */ 1187 for (i = 0; i < us->us_num_epriv_ports; i++) { 1188 if (us->us_epriv_ports[i] == new_value) 1189 break; 1190 } 1191 if (i == us->us_num_epriv_ports) { 1192 return (ESRCH); 1193 } 1194 1195 /* Clear the value */ 1196 us->us_epriv_ports[i] = 0; 1197 return (0); 1198 } 1199 1200 /* At minimum we need 4 bytes of UDP header */ 1201 #define ICMP_MIN_UDP_HDR 4 1202 1203 /* 1204 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1205 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1206 * Assumes that IP has pulled up everything up to and including the ICMP header. 1207 */ 1208 static void 1209 udp_icmp_error(conn_t *connp, mblk_t *mp) 1210 { 1211 icmph_t *icmph; 1212 ipha_t *ipha; 1213 int iph_hdr_length; 1214 udpha_t *udpha; 1215 sin_t sin; 1216 sin6_t sin6; 1217 mblk_t *mp1; 1218 int error = 0; 1219 udp_t *udp = connp->conn_udp; 1220 1221 mp1 = NULL; 1222 ipha = (ipha_t *)mp->b_rptr; 1223 1224 ASSERT(OK_32PTR(mp->b_rptr)); 1225 1226 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1227 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1228 udp_icmp_error_ipv6(connp, mp); 1229 return; 1230 } 1231 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1232 1233 /* Skip past the outer IP and ICMP headers */ 1234 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1235 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1236 ipha = (ipha_t *)&icmph[1]; 1237 1238 /* Skip past the inner IP and find the ULP header */ 1239 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1240 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1241 1242 switch (icmph->icmph_type) { 1243 case ICMP_DEST_UNREACHABLE: 1244 switch (icmph->icmph_code) { 1245 case ICMP_FRAGMENTATION_NEEDED: 1246 /* 1247 * IP has already adjusted the path MTU. 1248 */ 1249 break; 1250 case ICMP_PORT_UNREACHABLE: 1251 case ICMP_PROTOCOL_UNREACHABLE: 1252 error = ECONNREFUSED; 1253 break; 1254 default: 1255 /* Transient errors */ 1256 break; 1257 } 1258 break; 1259 default: 1260 /* Transient errors */ 1261 break; 1262 } 1263 if (error == 0) { 1264 freemsg(mp); 1265 return; 1266 } 1267 1268 /* 1269 * Deliver T_UDERROR_IND when the application has asked for it. 1270 * The socket layer enables this automatically when connected. 1271 */ 1272 if (!udp->udp_dgram_errind) { 1273 freemsg(mp); 1274 return; 1275 } 1276 1277 1278 switch (udp->udp_family) { 1279 case AF_INET: 1280 sin = sin_null; 1281 sin.sin_family = AF_INET; 1282 sin.sin_addr.s_addr = ipha->ipha_dst; 1283 sin.sin_port = udpha->uha_dst_port; 1284 if (IPCL_IS_NONSTR(connp)) { 1285 rw_enter(&udp->udp_rwlock, RW_WRITER); 1286 if (udp->udp_state == TS_DATA_XFER) { 1287 if (sin.sin_port == udp->udp_dstport && 1288 sin.sin_addr.s_addr == 1289 V4_PART_OF_V6(udp->udp_v6dst)) { 1290 rw_exit(&udp->udp_rwlock); 1291 (*connp->conn_upcalls->su_set_error) 1292 (connp->conn_upper_handle, error); 1293 goto done; 1294 } 1295 } else { 1296 udp->udp_delayed_error = error; 1297 *((sin_t *)&udp->udp_delayed_addr) = sin; 1298 } 1299 rw_exit(&udp->udp_rwlock); 1300 } else { 1301 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1302 NULL, 0, error); 1303 } 1304 break; 1305 case AF_INET6: 1306 sin6 = sin6_null; 1307 sin6.sin6_family = AF_INET6; 1308 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1309 sin6.sin6_port = udpha->uha_dst_port; 1310 if (IPCL_IS_NONSTR(connp)) { 1311 rw_enter(&udp->udp_rwlock, RW_WRITER); 1312 if (udp->udp_state == TS_DATA_XFER) { 1313 if (sin6.sin6_port == udp->udp_dstport && 1314 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1315 &udp->udp_v6dst)) { 1316 rw_exit(&udp->udp_rwlock); 1317 (*connp->conn_upcalls->su_set_error) 1318 (connp->conn_upper_handle, error); 1319 goto done; 1320 } 1321 } else { 1322 udp->udp_delayed_error = error; 1323 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1324 } 1325 rw_exit(&udp->udp_rwlock); 1326 } else { 1327 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1328 NULL, 0, error); 1329 } 1330 break; 1331 } 1332 if (mp1 != NULL) 1333 putnext(connp->conn_rq, mp1); 1334 done: 1335 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1336 freemsg(mp); 1337 } 1338 1339 /* 1340 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1341 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1342 * Assumes that IP has pulled up all the extension headers as well as the 1343 * ICMPv6 header. 1344 */ 1345 static void 1346 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1347 { 1348 icmp6_t *icmp6; 1349 ip6_t *ip6h, *outer_ip6h; 1350 uint16_t iph_hdr_length; 1351 uint8_t *nexthdrp; 1352 udpha_t *udpha; 1353 sin6_t sin6; 1354 mblk_t *mp1; 1355 int error = 0; 1356 udp_t *udp = connp->conn_udp; 1357 udp_stack_t *us = udp->udp_us; 1358 1359 outer_ip6h = (ip6_t *)mp->b_rptr; 1360 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1361 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1362 else 1363 iph_hdr_length = IPV6_HDR_LEN; 1364 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1365 ip6h = (ip6_t *)&icmp6[1]; 1366 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1367 freemsg(mp); 1368 return; 1369 } 1370 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1371 1372 switch (icmp6->icmp6_type) { 1373 case ICMP6_DST_UNREACH: 1374 switch (icmp6->icmp6_code) { 1375 case ICMP6_DST_UNREACH_NOPORT: 1376 error = ECONNREFUSED; 1377 break; 1378 case ICMP6_DST_UNREACH_ADMIN: 1379 case ICMP6_DST_UNREACH_NOROUTE: 1380 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1381 case ICMP6_DST_UNREACH_ADDR: 1382 /* Transient errors */ 1383 break; 1384 default: 1385 break; 1386 } 1387 break; 1388 case ICMP6_PACKET_TOO_BIG: { 1389 struct T_unitdata_ind *tudi; 1390 struct T_opthdr *toh; 1391 size_t udi_size; 1392 mblk_t *newmp; 1393 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1394 sizeof (struct ip6_mtuinfo); 1395 sin6_t *sin6; 1396 struct ip6_mtuinfo *mtuinfo; 1397 1398 /* 1399 * If the application has requested to receive path mtu 1400 * information, send up an empty message containing an 1401 * IPV6_PATHMTU ancillary data item. 1402 */ 1403 if (!udp->udp_ipv6_recvpathmtu) 1404 break; 1405 1406 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1407 opt_length; 1408 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1409 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1410 break; 1411 } 1412 1413 /* 1414 * newmp->b_cont is left to NULL on purpose. This is an 1415 * empty message containing only ancillary data. 1416 */ 1417 newmp->b_datap->db_type = M_PROTO; 1418 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1419 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1420 tudi->PRIM_type = T_UNITDATA_IND; 1421 tudi->SRC_length = sizeof (sin6_t); 1422 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1423 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1424 tudi->OPT_length = opt_length; 1425 1426 sin6 = (sin6_t *)&tudi[1]; 1427 bzero(sin6, sizeof (sin6_t)); 1428 sin6->sin6_family = AF_INET6; 1429 sin6->sin6_addr = udp->udp_v6dst; 1430 1431 toh = (struct T_opthdr *)&sin6[1]; 1432 toh->level = IPPROTO_IPV6; 1433 toh->name = IPV6_PATHMTU; 1434 toh->len = opt_length; 1435 toh->status = 0; 1436 1437 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1438 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1439 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1440 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1441 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1442 /* 1443 * We've consumed everything we need from the original 1444 * message. Free it, then send our empty message. 1445 */ 1446 freemsg(mp); 1447 udp_ulp_recv(connp, newmp); 1448 1449 return; 1450 } 1451 case ICMP6_TIME_EXCEEDED: 1452 /* Transient errors */ 1453 break; 1454 case ICMP6_PARAM_PROB: 1455 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1456 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1457 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1458 (uchar_t *)nexthdrp) { 1459 error = ECONNREFUSED; 1460 break; 1461 } 1462 break; 1463 } 1464 if (error == 0) { 1465 freemsg(mp); 1466 return; 1467 } 1468 1469 /* 1470 * Deliver T_UDERROR_IND when the application has asked for it. 1471 * The socket layer enables this automatically when connected. 1472 */ 1473 if (!udp->udp_dgram_errind) { 1474 freemsg(mp); 1475 return; 1476 } 1477 1478 sin6 = sin6_null; 1479 sin6.sin6_family = AF_INET6; 1480 sin6.sin6_addr = ip6h->ip6_dst; 1481 sin6.sin6_port = udpha->uha_dst_port; 1482 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1483 1484 if (IPCL_IS_NONSTR(connp)) { 1485 rw_enter(&udp->udp_rwlock, RW_WRITER); 1486 if (udp->udp_state == TS_DATA_XFER) { 1487 if (sin6.sin6_port == udp->udp_dstport && 1488 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1489 &udp->udp_v6dst)) { 1490 rw_exit(&udp->udp_rwlock); 1491 (*connp->conn_upcalls->su_set_error) 1492 (connp->conn_upper_handle, error); 1493 goto done; 1494 } 1495 } else { 1496 udp->udp_delayed_error = error; 1497 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1498 } 1499 rw_exit(&udp->udp_rwlock); 1500 } else { 1501 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1502 NULL, 0, error); 1503 if (mp1 != NULL) 1504 putnext(connp->conn_rq, mp1); 1505 } 1506 done: 1507 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1508 freemsg(mp); 1509 } 1510 1511 /* 1512 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1513 * The local address is filled in if endpoint is bound. The remote address 1514 * is filled in if remote address has been precified ("connected endpoint") 1515 * (The concept of connected CLTS sockets is alien to published TPI 1516 * but we support it anyway). 1517 */ 1518 static void 1519 udp_addr_req(queue_t *q, mblk_t *mp) 1520 { 1521 sin_t *sin; 1522 sin6_t *sin6; 1523 mblk_t *ackmp; 1524 struct T_addr_ack *taa; 1525 udp_t *udp = Q_TO_UDP(q); 1526 1527 /* Make it large enough for worst case */ 1528 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1529 2 * sizeof (sin6_t), 1); 1530 if (ackmp == NULL) { 1531 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1532 return; 1533 } 1534 taa = (struct T_addr_ack *)ackmp->b_rptr; 1535 1536 bzero(taa, sizeof (struct T_addr_ack)); 1537 ackmp->b_wptr = (uchar_t *)&taa[1]; 1538 1539 taa->PRIM_type = T_ADDR_ACK; 1540 ackmp->b_datap->db_type = M_PCPROTO; 1541 rw_enter(&udp->udp_rwlock, RW_READER); 1542 /* 1543 * Note: Following code assumes 32 bit alignment of basic 1544 * data structures like sin_t and struct T_addr_ack. 1545 */ 1546 if (udp->udp_state != TS_UNBND) { 1547 /* 1548 * Fill in local address first 1549 */ 1550 taa->LOCADDR_offset = sizeof (*taa); 1551 if (udp->udp_family == AF_INET) { 1552 taa->LOCADDR_length = sizeof (sin_t); 1553 sin = (sin_t *)&taa[1]; 1554 /* Fill zeroes and then initialize non-zero fields */ 1555 *sin = sin_null; 1556 sin->sin_family = AF_INET; 1557 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1558 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1559 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1560 sin->sin_addr.s_addr); 1561 } else { 1562 /* 1563 * INADDR_ANY 1564 * udp_v6src is not set, we might be bound to 1565 * broadcast/multicast. Use udp_bound_v6src as 1566 * local address instead (that could 1567 * also still be INADDR_ANY) 1568 */ 1569 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1570 sin->sin_addr.s_addr); 1571 } 1572 sin->sin_port = udp->udp_port; 1573 ackmp->b_wptr = (uchar_t *)&sin[1]; 1574 if (udp->udp_state == TS_DATA_XFER) { 1575 /* 1576 * connected, fill remote address too 1577 */ 1578 taa->REMADDR_length = sizeof (sin_t); 1579 /* assumed 32-bit alignment */ 1580 taa->REMADDR_offset = taa->LOCADDR_offset + 1581 taa->LOCADDR_length; 1582 1583 sin = (sin_t *)(ackmp->b_rptr + 1584 taa->REMADDR_offset); 1585 /* initialize */ 1586 *sin = sin_null; 1587 sin->sin_family = AF_INET; 1588 sin->sin_addr.s_addr = 1589 V4_PART_OF_V6(udp->udp_v6dst); 1590 sin->sin_port = udp->udp_dstport; 1591 ackmp->b_wptr = (uchar_t *)&sin[1]; 1592 } 1593 } else { 1594 taa->LOCADDR_length = sizeof (sin6_t); 1595 sin6 = (sin6_t *)&taa[1]; 1596 /* Fill zeroes and then initialize non-zero fields */ 1597 *sin6 = sin6_null; 1598 sin6->sin6_family = AF_INET6; 1599 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1600 sin6->sin6_addr = udp->udp_v6src; 1601 } else { 1602 /* 1603 * UNSPECIFIED 1604 * udp_v6src is not set, we might be bound to 1605 * broadcast/multicast. Use udp_bound_v6src as 1606 * local address instead (that could 1607 * also still be UNSPECIFIED) 1608 */ 1609 sin6->sin6_addr = 1610 udp->udp_bound_v6src; 1611 } 1612 sin6->sin6_port = udp->udp_port; 1613 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1614 if (udp->udp_state == TS_DATA_XFER) { 1615 /* 1616 * connected, fill remote address too 1617 */ 1618 taa->REMADDR_length = sizeof (sin6_t); 1619 /* assumed 32-bit alignment */ 1620 taa->REMADDR_offset = taa->LOCADDR_offset + 1621 taa->LOCADDR_length; 1622 1623 sin6 = (sin6_t *)(ackmp->b_rptr + 1624 taa->REMADDR_offset); 1625 /* initialize */ 1626 *sin6 = sin6_null; 1627 sin6->sin6_family = AF_INET6; 1628 sin6->sin6_addr = udp->udp_v6dst; 1629 sin6->sin6_port = udp->udp_dstport; 1630 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1631 } 1632 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1633 } 1634 } 1635 rw_exit(&udp->udp_rwlock); 1636 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1637 qreply(q, ackmp); 1638 } 1639 1640 static void 1641 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1642 { 1643 if (udp->udp_family == AF_INET) { 1644 *tap = udp_g_t_info_ack_ipv4; 1645 } else { 1646 *tap = udp_g_t_info_ack_ipv6; 1647 } 1648 tap->CURRENT_state = udp->udp_state; 1649 tap->OPT_size = udp_max_optsize; 1650 } 1651 1652 static void 1653 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1654 t_uscalar_t cap_bits1) 1655 { 1656 tcap->CAP_bits1 = 0; 1657 1658 if (cap_bits1 & TC1_INFO) { 1659 udp_copy_info(&tcap->INFO_ack, udp); 1660 tcap->CAP_bits1 |= TC1_INFO; 1661 } 1662 } 1663 1664 /* 1665 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1666 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1667 * udp_g_t_info_ack. The current state of the stream is copied from 1668 * udp_state. 1669 */ 1670 static void 1671 udp_capability_req(queue_t *q, mblk_t *mp) 1672 { 1673 t_uscalar_t cap_bits1; 1674 struct T_capability_ack *tcap; 1675 udp_t *udp = Q_TO_UDP(q); 1676 1677 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1678 1679 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1680 mp->b_datap->db_type, T_CAPABILITY_ACK); 1681 if (!mp) 1682 return; 1683 1684 tcap = (struct T_capability_ack *)mp->b_rptr; 1685 udp_do_capability_ack(udp, tcap, cap_bits1); 1686 1687 qreply(q, mp); 1688 } 1689 1690 /* 1691 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1692 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1693 * The current state of the stream is copied from udp_state. 1694 */ 1695 static void 1696 udp_info_req(queue_t *q, mblk_t *mp) 1697 { 1698 udp_t *udp = Q_TO_UDP(q); 1699 1700 /* Create a T_INFO_ACK message. */ 1701 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1702 T_INFO_ACK); 1703 if (!mp) 1704 return; 1705 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1706 qreply(q, mp); 1707 } 1708 1709 /* 1710 * IP recognizes seven kinds of bind requests: 1711 * 1712 * - A zero-length address binds only to the protocol number. 1713 * 1714 * - A 4-byte address is treated as a request to 1715 * validate that the address is a valid local IPv4 1716 * address, appropriate for an application to bind to. 1717 * IP does the verification, but does not make any note 1718 * of the address at this time. 1719 * 1720 * - A 16-byte address contains is treated as a request 1721 * to validate a local IPv6 address, as the 4-byte 1722 * address case above. 1723 * 1724 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1725 * use it for the inbound fanout of packets. 1726 * 1727 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1728 * use it for the inbound fanout of packets. 1729 * 1730 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1731 * information consisting of local and remote addresses 1732 * and ports. In this case, the addresses are both 1733 * validated as appropriate for this operation, and, if 1734 * so, the information is retained for use in the 1735 * inbound fanout. 1736 * 1737 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1738 * fanout information, like the 12-byte case above. 1739 * 1740 * IP will also fill in the IRE request mblk with information 1741 * regarding our peer. In all cases, we notify IP of our protocol 1742 * type by appending a single protocol byte to the bind request. 1743 */ 1744 static mblk_t * 1745 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1746 { 1747 char *cp; 1748 mblk_t *mp; 1749 struct T_bind_req *tbr; 1750 ipa_conn_t *ac; 1751 ipa6_conn_t *ac6; 1752 sin_t *sin; 1753 sin6_t *sin6; 1754 1755 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1756 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1757 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1758 if (!mp) 1759 return (mp); 1760 mp->b_datap->db_type = M_PROTO; 1761 tbr = (struct T_bind_req *)mp->b_rptr; 1762 tbr->PRIM_type = bind_prim; 1763 tbr->ADDR_offset = sizeof (*tbr); 1764 tbr->CONIND_number = 0; 1765 tbr->ADDR_length = addr_length; 1766 cp = (char *)&tbr[1]; 1767 switch (addr_length) { 1768 case sizeof (ipa_conn_t): 1769 ASSERT(udp->udp_family == AF_INET); 1770 /* Append a request for an IRE */ 1771 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1772 if (!mp->b_cont) { 1773 freemsg(mp); 1774 return (NULL); 1775 } 1776 mp->b_cont->b_wptr += sizeof (ire_t); 1777 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1778 1779 /* cp known to be 32 bit aligned */ 1780 ac = (ipa_conn_t *)cp; 1781 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1782 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1783 ac->ac_fport = udp->udp_dstport; 1784 ac->ac_lport = udp->udp_port; 1785 break; 1786 1787 case sizeof (ipa6_conn_t): 1788 ASSERT(udp->udp_family == AF_INET6); 1789 /* Append a request for an IRE */ 1790 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1791 if (!mp->b_cont) { 1792 freemsg(mp); 1793 return (NULL); 1794 } 1795 mp->b_cont->b_wptr += sizeof (ire_t); 1796 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1797 1798 /* cp known to be 32 bit aligned */ 1799 ac6 = (ipa6_conn_t *)cp; 1800 ac6->ac6_laddr = udp->udp_v6src; 1801 ac6->ac6_faddr = udp->udp_v6dst; 1802 ac6->ac6_fport = udp->udp_dstport; 1803 ac6->ac6_lport = udp->udp_port; 1804 break; 1805 1806 case sizeof (sin_t): 1807 ASSERT(udp->udp_family == AF_INET); 1808 /* Append a request for an IRE */ 1809 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1810 if (!mp->b_cont) { 1811 freemsg(mp); 1812 return (NULL); 1813 } 1814 mp->b_cont->b_wptr += sizeof (ire_t); 1815 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1816 1817 sin = (sin_t *)cp; 1818 *sin = sin_null; 1819 sin->sin_family = AF_INET; 1820 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1821 sin->sin_port = udp->udp_port; 1822 break; 1823 1824 case sizeof (sin6_t): 1825 ASSERT(udp->udp_family == AF_INET6); 1826 /* Append a request for an IRE */ 1827 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1828 if (!mp->b_cont) { 1829 freemsg(mp); 1830 return (NULL); 1831 } 1832 mp->b_cont->b_wptr += sizeof (ire_t); 1833 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1834 1835 sin6 = (sin6_t *)cp; 1836 *sin6 = sin6_null; 1837 sin6->sin6_family = AF_INET6; 1838 sin6->sin6_addr = udp->udp_bound_v6src; 1839 sin6->sin6_port = udp->udp_port; 1840 break; 1841 } 1842 /* Add protocol number to end */ 1843 cp[addr_length] = (char)IPPROTO_UDP; 1844 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1845 return (mp); 1846 } 1847 1848 /* For /dev/udp aka AF_INET open */ 1849 static int 1850 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1851 { 1852 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1853 } 1854 1855 /* For /dev/udp6 aka AF_INET6 open */ 1856 static int 1857 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1858 { 1859 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1860 } 1861 1862 /* 1863 * This is the open routine for udp. It allocates a udp_t structure for 1864 * the stream and, on the first open of the module, creates an ND table. 1865 */ 1866 /*ARGSUSED2*/ 1867 static int 1868 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1869 boolean_t isv6) 1870 { 1871 int error; 1872 udp_t *udp; 1873 conn_t *connp; 1874 dev_t conn_dev; 1875 udp_stack_t *us; 1876 vmem_t *minor_arena; 1877 1878 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1879 1880 /* If the stream is already open, return immediately. */ 1881 if (q->q_ptr != NULL) 1882 return (0); 1883 1884 if (sflag == MODOPEN) 1885 return (EINVAL); 1886 1887 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1888 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1889 minor_arena = ip_minor_arena_la; 1890 } else { 1891 /* 1892 * Either minor numbers in the large arena were exhausted 1893 * or a non socket application is doing the open. 1894 * Try to allocate from the small arena. 1895 */ 1896 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1897 return (EBUSY); 1898 1899 minor_arena = ip_minor_arena_sa; 1900 } 1901 1902 if (flag & SO_FALLBACK) { 1903 /* 1904 * Non streams socket needs a stream to fallback to 1905 */ 1906 RD(q)->q_ptr = (void *)conn_dev; 1907 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1908 WR(q)->q_ptr = (void *)minor_arena; 1909 qprocson(q); 1910 return (0); 1911 } 1912 1913 connp = udp_do_open(credp, isv6, KM_SLEEP); 1914 if (connp == NULL) { 1915 inet_minor_free(minor_arena, conn_dev); 1916 return (ENOMEM); 1917 } 1918 udp = connp->conn_udp; 1919 us = udp->udp_us; 1920 1921 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1922 connp->conn_dev = conn_dev; 1923 connp->conn_minor_arena = minor_arena; 1924 1925 /* 1926 * Initialize the udp_t structure for this stream. 1927 */ 1928 q->q_ptr = connp; 1929 WR(q)->q_ptr = connp; 1930 connp->conn_rq = q; 1931 connp->conn_wq = WR(q); 1932 1933 rw_enter(&udp->udp_rwlock, RW_WRITER); 1934 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1935 ASSERT(connp->conn_udp == udp); 1936 ASSERT(udp->udp_connp == connp); 1937 1938 if (flag & SO_SOCKSTR) { 1939 connp->conn_flags |= IPCL_SOCKET; 1940 udp->udp_issocket = B_TRUE; 1941 udp->udp_direct_sockfs = B_TRUE; 1942 } 1943 1944 q->q_hiwat = us->us_recv_hiwat; 1945 WR(q)->q_hiwat = us->us_xmit_hiwat; 1946 WR(q)->q_lowat = us->us_xmit_lowat; 1947 1948 qprocson(q); 1949 1950 if (udp->udp_family == AF_INET6) { 1951 /* Build initial header template for transmit */ 1952 if ((error = udp_build_hdrs(udp)) != 0) { 1953 rw_exit(&udp->udp_rwlock); 1954 qprocsoff(q); 1955 inet_minor_free(minor_arena, conn_dev); 1956 ipcl_conn_destroy(connp); 1957 return (error); 1958 } 1959 } 1960 rw_exit(&udp->udp_rwlock); 1961 1962 /* Set the Stream head write offset and high watermark. */ 1963 (void) proto_set_tx_wroff(q, connp, 1964 udp->udp_max_hdr_len + us->us_wroff_extra); 1965 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1966 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1967 1968 mutex_enter(&connp->conn_lock); 1969 connp->conn_state_flags &= ~CONN_INCIPIENT; 1970 mutex_exit(&connp->conn_lock); 1971 return (0); 1972 } 1973 1974 /* 1975 * Which UDP options OK to set through T_UNITDATA_REQ... 1976 */ 1977 /* ARGSUSED */ 1978 static boolean_t 1979 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1980 { 1981 return (B_TRUE); 1982 } 1983 1984 /* 1985 * This routine gets default values of certain options whose default 1986 * values are maintained by protcol specific code 1987 */ 1988 /* ARGSUSED */ 1989 int 1990 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1991 { 1992 udp_t *udp = Q_TO_UDP(q); 1993 udp_stack_t *us = udp->udp_us; 1994 int *i1 = (int *)ptr; 1995 1996 switch (level) { 1997 case IPPROTO_IP: 1998 switch (name) { 1999 case IP_MULTICAST_TTL: 2000 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2001 return (sizeof (uchar_t)); 2002 case IP_MULTICAST_LOOP: 2003 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2004 return (sizeof (uchar_t)); 2005 } 2006 break; 2007 case IPPROTO_IPV6: 2008 switch (name) { 2009 case IPV6_MULTICAST_HOPS: 2010 *i1 = IP_DEFAULT_MULTICAST_TTL; 2011 return (sizeof (int)); 2012 case IPV6_MULTICAST_LOOP: 2013 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2014 return (sizeof (int)); 2015 case IPV6_UNICAST_HOPS: 2016 *i1 = us->us_ipv6_hoplimit; 2017 return (sizeof (int)); 2018 } 2019 break; 2020 } 2021 return (-1); 2022 } 2023 2024 /* 2025 * This routine retrieves the current status of socket options. 2026 * It returns the size of the option retrieved. 2027 */ 2028 static int 2029 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 2030 { 2031 udp_t *udp = connp->conn_udp; 2032 udp_stack_t *us = udp->udp_us; 2033 int *i1 = (int *)ptr; 2034 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 2035 int len; 2036 2037 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 2038 switch (level) { 2039 case SOL_SOCKET: 2040 switch (name) { 2041 case SO_DEBUG: 2042 *i1 = udp->udp_debug; 2043 break; /* goto sizeof (int) option return */ 2044 case SO_REUSEADDR: 2045 *i1 = udp->udp_reuseaddr; 2046 break; /* goto sizeof (int) option return */ 2047 case SO_TYPE: 2048 *i1 = SOCK_DGRAM; 2049 break; /* goto sizeof (int) option return */ 2050 2051 /* 2052 * The following three items are available here, 2053 * but are only meaningful to IP. 2054 */ 2055 case SO_DONTROUTE: 2056 *i1 = udp->udp_dontroute; 2057 break; /* goto sizeof (int) option return */ 2058 case SO_USELOOPBACK: 2059 *i1 = udp->udp_useloopback; 2060 break; /* goto sizeof (int) option return */ 2061 case SO_BROADCAST: 2062 *i1 = udp->udp_broadcast; 2063 break; /* goto sizeof (int) option return */ 2064 2065 case SO_SNDBUF: 2066 *i1 = udp->udp_xmit_hiwat; 2067 break; /* goto sizeof (int) option return */ 2068 case SO_RCVBUF: 2069 *i1 = udp->udp_rcv_disply_hiwat; 2070 break; /* goto sizeof (int) option return */ 2071 case SO_DGRAM_ERRIND: 2072 *i1 = udp->udp_dgram_errind; 2073 break; /* goto sizeof (int) option return */ 2074 case SO_RECVUCRED: 2075 *i1 = udp->udp_recvucred; 2076 break; /* goto sizeof (int) option return */ 2077 case SO_TIMESTAMP: 2078 *i1 = udp->udp_timestamp; 2079 break; /* goto sizeof (int) option return */ 2080 case SO_ANON_MLP: 2081 *i1 = connp->conn_anon_mlp; 2082 break; /* goto sizeof (int) option return */ 2083 case SO_MAC_EXEMPT: 2084 *i1 = connp->conn_mac_exempt; 2085 break; /* goto sizeof (int) option return */ 2086 case SO_ALLZONES: 2087 *i1 = connp->conn_allzones; 2088 break; /* goto sizeof (int) option return */ 2089 case SO_EXCLBIND: 2090 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2091 break; 2092 case SO_PROTOTYPE: 2093 *i1 = IPPROTO_UDP; 2094 break; 2095 case SO_DOMAIN: 2096 *i1 = udp->udp_family; 2097 break; 2098 default: 2099 return (-1); 2100 } 2101 break; 2102 case IPPROTO_IP: 2103 if (udp->udp_family != AF_INET) 2104 return (-1); 2105 switch (name) { 2106 case IP_OPTIONS: 2107 case T_IP_OPTIONS: 2108 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2109 if (len > 0) { 2110 bcopy(udp->udp_ip_rcv_options + 2111 udp->udp_label_len, ptr, len); 2112 } 2113 return (len); 2114 case IP_TOS: 2115 case T_IP_TOS: 2116 *i1 = (int)udp->udp_type_of_service; 2117 break; /* goto sizeof (int) option return */ 2118 case IP_TTL: 2119 *i1 = (int)udp->udp_ttl; 2120 break; /* goto sizeof (int) option return */ 2121 case IP_DHCPINIT_IF: 2122 return (-EINVAL); 2123 case IP_NEXTHOP: 2124 case IP_RECVPKTINFO: 2125 /* 2126 * This also handles IP_PKTINFO. 2127 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2128 * Differentiation is based on the size of the argument 2129 * passed in. 2130 * This option is handled in IP which will return an 2131 * error for IP_PKTINFO as it's not supported as a 2132 * sticky option. 2133 */ 2134 return (-EINVAL); 2135 case IP_MULTICAST_IF: 2136 /* 0 address if not set */ 2137 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2138 return (sizeof (ipaddr_t)); 2139 case IP_MULTICAST_TTL: 2140 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2141 return (sizeof (uchar_t)); 2142 case IP_MULTICAST_LOOP: 2143 *ptr = connp->conn_multicast_loop; 2144 return (sizeof (uint8_t)); 2145 case IP_RECVOPTS: 2146 *i1 = udp->udp_recvopts; 2147 break; /* goto sizeof (int) option return */ 2148 case IP_RECVDSTADDR: 2149 *i1 = udp->udp_recvdstaddr; 2150 break; /* goto sizeof (int) option return */ 2151 case IP_RECVIF: 2152 *i1 = udp->udp_recvif; 2153 break; /* goto sizeof (int) option return */ 2154 case IP_RECVSLLA: 2155 *i1 = udp->udp_recvslla; 2156 break; /* goto sizeof (int) option return */ 2157 case IP_RECVTTL: 2158 *i1 = udp->udp_recvttl; 2159 break; /* goto sizeof (int) option return */ 2160 case IP_ADD_MEMBERSHIP: 2161 case IP_DROP_MEMBERSHIP: 2162 case IP_BLOCK_SOURCE: 2163 case IP_UNBLOCK_SOURCE: 2164 case IP_ADD_SOURCE_MEMBERSHIP: 2165 case IP_DROP_SOURCE_MEMBERSHIP: 2166 case MCAST_JOIN_GROUP: 2167 case MCAST_LEAVE_GROUP: 2168 case MCAST_BLOCK_SOURCE: 2169 case MCAST_UNBLOCK_SOURCE: 2170 case MCAST_JOIN_SOURCE_GROUP: 2171 case MCAST_LEAVE_SOURCE_GROUP: 2172 /* cannot "get" the value for these */ 2173 return (-1); 2174 case IP_BOUND_IF: 2175 /* Zero if not set */ 2176 *i1 = udp->udp_bound_if; 2177 break; /* goto sizeof (int) option return */ 2178 case IP_UNSPEC_SRC: 2179 *i1 = udp->udp_unspec_source; 2180 break; /* goto sizeof (int) option return */ 2181 case IP_BROADCAST_TTL: 2182 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2183 return (sizeof (uchar_t)); 2184 default: 2185 return (-1); 2186 } 2187 break; 2188 case IPPROTO_IPV6: 2189 if (udp->udp_family != AF_INET6) 2190 return (-1); 2191 switch (name) { 2192 case IPV6_UNICAST_HOPS: 2193 *i1 = (unsigned int)udp->udp_ttl; 2194 break; /* goto sizeof (int) option return */ 2195 case IPV6_MULTICAST_IF: 2196 /* 0 index if not set */ 2197 *i1 = udp->udp_multicast_if_index; 2198 break; /* goto sizeof (int) option return */ 2199 case IPV6_MULTICAST_HOPS: 2200 *i1 = udp->udp_multicast_ttl; 2201 break; /* goto sizeof (int) option return */ 2202 case IPV6_MULTICAST_LOOP: 2203 *i1 = connp->conn_multicast_loop; 2204 break; /* goto sizeof (int) option return */ 2205 case IPV6_JOIN_GROUP: 2206 case IPV6_LEAVE_GROUP: 2207 case MCAST_JOIN_GROUP: 2208 case MCAST_LEAVE_GROUP: 2209 case MCAST_BLOCK_SOURCE: 2210 case MCAST_UNBLOCK_SOURCE: 2211 case MCAST_JOIN_SOURCE_GROUP: 2212 case MCAST_LEAVE_SOURCE_GROUP: 2213 /* cannot "get" the value for these */ 2214 return (-1); 2215 case IPV6_BOUND_IF: 2216 /* Zero if not set */ 2217 *i1 = udp->udp_bound_if; 2218 break; /* goto sizeof (int) option return */ 2219 case IPV6_UNSPEC_SRC: 2220 *i1 = udp->udp_unspec_source; 2221 break; /* goto sizeof (int) option return */ 2222 case IPV6_RECVPKTINFO: 2223 *i1 = udp->udp_ip_recvpktinfo; 2224 break; /* goto sizeof (int) option return */ 2225 case IPV6_RECVTCLASS: 2226 *i1 = udp->udp_ipv6_recvtclass; 2227 break; /* goto sizeof (int) option return */ 2228 case IPV6_RECVPATHMTU: 2229 *i1 = udp->udp_ipv6_recvpathmtu; 2230 break; /* goto sizeof (int) option return */ 2231 case IPV6_RECVHOPLIMIT: 2232 *i1 = udp->udp_ipv6_recvhoplimit; 2233 break; /* goto sizeof (int) option return */ 2234 case IPV6_RECVHOPOPTS: 2235 *i1 = udp->udp_ipv6_recvhopopts; 2236 break; /* goto sizeof (int) option return */ 2237 case IPV6_RECVDSTOPTS: 2238 *i1 = udp->udp_ipv6_recvdstopts; 2239 break; /* goto sizeof (int) option return */ 2240 case _OLD_IPV6_RECVDSTOPTS: 2241 *i1 = udp->udp_old_ipv6_recvdstopts; 2242 break; /* goto sizeof (int) option return */ 2243 case IPV6_RECVRTHDRDSTOPTS: 2244 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2245 break; /* goto sizeof (int) option return */ 2246 case IPV6_RECVRTHDR: 2247 *i1 = udp->udp_ipv6_recvrthdr; 2248 break; /* goto sizeof (int) option return */ 2249 case IPV6_PKTINFO: { 2250 /* XXX assumes that caller has room for max size! */ 2251 struct in6_pktinfo *pkti; 2252 2253 pkti = (struct in6_pktinfo *)ptr; 2254 if (ipp->ipp_fields & IPPF_IFINDEX) 2255 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2256 else 2257 pkti->ipi6_ifindex = 0; 2258 if (ipp->ipp_fields & IPPF_ADDR) 2259 pkti->ipi6_addr = ipp->ipp_addr; 2260 else 2261 pkti->ipi6_addr = ipv6_all_zeros; 2262 return (sizeof (struct in6_pktinfo)); 2263 } 2264 case IPV6_TCLASS: 2265 if (ipp->ipp_fields & IPPF_TCLASS) 2266 *i1 = ipp->ipp_tclass; 2267 else 2268 *i1 = IPV6_FLOW_TCLASS( 2269 IPV6_DEFAULT_VERS_AND_FLOW); 2270 break; /* goto sizeof (int) option return */ 2271 case IPV6_NEXTHOP: { 2272 sin6_t *sin6 = (sin6_t *)ptr; 2273 2274 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2275 return (0); 2276 *sin6 = sin6_null; 2277 sin6->sin6_family = AF_INET6; 2278 sin6->sin6_addr = ipp->ipp_nexthop; 2279 return (sizeof (sin6_t)); 2280 } 2281 case IPV6_HOPOPTS: 2282 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2283 return (0); 2284 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2285 return (0); 2286 /* 2287 * The cipso/label option is added by kernel. 2288 * User is not usually aware of this option. 2289 * We copy out the hbh opt after the label option. 2290 */ 2291 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2292 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2293 if (udp->udp_label_len_v6 > 0) { 2294 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2295 ptr[1] = (ipp->ipp_hopoptslen - 2296 udp->udp_label_len_v6 + 7) / 8 - 1; 2297 } 2298 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2299 case IPV6_RTHDRDSTOPTS: 2300 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2301 return (0); 2302 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2303 return (ipp->ipp_rtdstoptslen); 2304 case IPV6_RTHDR: 2305 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2306 return (0); 2307 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2308 return (ipp->ipp_rthdrlen); 2309 case IPV6_DSTOPTS: 2310 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2311 return (0); 2312 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2313 return (ipp->ipp_dstoptslen); 2314 case IPV6_PATHMTU: 2315 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2316 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2317 us->us_netstack)); 2318 default: 2319 return (-1); 2320 } 2321 break; 2322 case IPPROTO_UDP: 2323 switch (name) { 2324 case UDP_ANONPRIVBIND: 2325 *i1 = udp->udp_anon_priv_bind; 2326 break; 2327 case UDP_EXCLBIND: 2328 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2329 break; 2330 case UDP_RCVHDR: 2331 *i1 = udp->udp_rcvhdr ? 1 : 0; 2332 break; 2333 case UDP_NAT_T_ENDPOINT: 2334 *i1 = udp->udp_nat_t_endpoint; 2335 break; 2336 default: 2337 return (-1); 2338 } 2339 break; 2340 default: 2341 return (-1); 2342 } 2343 return (sizeof (int)); 2344 } 2345 2346 int 2347 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2348 { 2349 udp_t *udp; 2350 int err; 2351 2352 udp = Q_TO_UDP(q); 2353 2354 rw_enter(&udp->udp_rwlock, RW_READER); 2355 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2356 rw_exit(&udp->udp_rwlock); 2357 return (err); 2358 } 2359 2360 /* 2361 * This routine sets socket options. 2362 */ 2363 /* ARGSUSED */ 2364 static int 2365 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2366 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2367 void *thisdg_attrs, boolean_t checkonly) 2368 { 2369 udpattrs_t *attrs = thisdg_attrs; 2370 int *i1 = (int *)invalp; 2371 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2372 udp_t *udp = connp->conn_udp; 2373 udp_stack_t *us = udp->udp_us; 2374 int error; 2375 uint_t newlen; 2376 size_t sth_wroff; 2377 2378 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2379 /* 2380 * For fixed length options, no sanity check 2381 * of passed in length is done. It is assumed *_optcom_req() 2382 * routines do the right thing. 2383 */ 2384 switch (level) { 2385 case SOL_SOCKET: 2386 switch (name) { 2387 case SO_REUSEADDR: 2388 if (!checkonly) { 2389 udp->udp_reuseaddr = onoff; 2390 PASS_OPT_TO_IP(connp); 2391 } 2392 break; 2393 case SO_DEBUG: 2394 if (!checkonly) 2395 udp->udp_debug = onoff; 2396 break; 2397 /* 2398 * The following three items are available here, 2399 * but are only meaningful to IP. 2400 */ 2401 case SO_DONTROUTE: 2402 if (!checkonly) { 2403 udp->udp_dontroute = onoff; 2404 PASS_OPT_TO_IP(connp); 2405 } 2406 break; 2407 case SO_USELOOPBACK: 2408 if (!checkonly) { 2409 udp->udp_useloopback = onoff; 2410 PASS_OPT_TO_IP(connp); 2411 } 2412 break; 2413 case SO_BROADCAST: 2414 if (!checkonly) { 2415 udp->udp_broadcast = onoff; 2416 PASS_OPT_TO_IP(connp); 2417 } 2418 break; 2419 2420 case SO_SNDBUF: 2421 if (*i1 > us->us_max_buf) { 2422 *outlenp = 0; 2423 return (ENOBUFS); 2424 } 2425 if (!checkonly) { 2426 udp->udp_xmit_hiwat = *i1; 2427 connp->conn_wq->q_hiwat = *i1; 2428 } 2429 break; 2430 case SO_RCVBUF: 2431 if (*i1 > us->us_max_buf) { 2432 *outlenp = 0; 2433 return (ENOBUFS); 2434 } 2435 if (!checkonly) { 2436 int size; 2437 2438 udp->udp_rcv_disply_hiwat = *i1; 2439 size = udp_set_rcv_hiwat(udp, *i1); 2440 rw_exit(&udp->udp_rwlock); 2441 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2442 size); 2443 rw_enter(&udp->udp_rwlock, RW_WRITER); 2444 } 2445 break; 2446 case SO_DGRAM_ERRIND: 2447 if (!checkonly) 2448 udp->udp_dgram_errind = onoff; 2449 break; 2450 case SO_RECVUCRED: 2451 if (!checkonly) 2452 udp->udp_recvucred = onoff; 2453 break; 2454 case SO_ALLZONES: 2455 /* 2456 * "soft" error (negative) 2457 * option not handled at this level 2458 * Do not modify *outlenp. 2459 */ 2460 return (-EINVAL); 2461 case SO_TIMESTAMP: 2462 if (!checkonly) 2463 udp->udp_timestamp = onoff; 2464 break; 2465 case SO_ANON_MLP: 2466 if (!checkonly) { 2467 connp->conn_anon_mlp = onoff; 2468 PASS_OPT_TO_IP(connp); 2469 } 2470 break; 2471 case SO_MAC_EXEMPT: 2472 if (secpolicy_net_mac_aware(cr) != 0 || 2473 udp->udp_state != TS_UNBND) 2474 return (EACCES); 2475 if (!checkonly) { 2476 connp->conn_mac_exempt = onoff; 2477 PASS_OPT_TO_IP(connp); 2478 } 2479 break; 2480 case SCM_UCRED: { 2481 struct ucred_s *ucr; 2482 cred_t *cr, *newcr; 2483 ts_label_t *tsl; 2484 2485 /* 2486 * Only sockets that have proper privileges and are 2487 * bound to MLPs will have any other value here, so 2488 * this implicitly tests for privilege to set label. 2489 */ 2490 if (connp->conn_mlp_type == mlptSingle) 2491 break; 2492 ucr = (struct ucred_s *)invalp; 2493 if (inlen != ucredsize || 2494 ucr->uc_labeloff < sizeof (*ucr) || 2495 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2496 return (EINVAL); 2497 if (!checkonly) { 2498 mblk_t *mb; 2499 pid_t cpid; 2500 2501 if (attrs == NULL || 2502 (mb = attrs->udpattr_mb) == NULL) 2503 return (EINVAL); 2504 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2505 cr = udp->udp_connp->conn_cred; 2506 ASSERT(cr != NULL); 2507 if ((tsl = crgetlabel(cr)) == NULL) 2508 return (EINVAL); 2509 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2510 tsl->tsl_doi, KM_NOSLEEP); 2511 if (newcr == NULL) 2512 return (ENOSR); 2513 mblk_setcred(mb, newcr, cpid); 2514 attrs->udpattr_credset = B_TRUE; 2515 crfree(newcr); 2516 } 2517 break; 2518 } 2519 case SO_EXCLBIND: 2520 if (!checkonly) 2521 udp->udp_exclbind = onoff; 2522 break; 2523 case SO_RCVTIMEO: 2524 case SO_SNDTIMEO: 2525 /* 2526 * Pass these two options in order for third part 2527 * protocol usage. Here just return directly. 2528 */ 2529 return (0); 2530 default: 2531 *outlenp = 0; 2532 return (EINVAL); 2533 } 2534 break; 2535 case IPPROTO_IP: 2536 if (udp->udp_family != AF_INET) { 2537 *outlenp = 0; 2538 return (ENOPROTOOPT); 2539 } 2540 switch (name) { 2541 case IP_OPTIONS: 2542 case T_IP_OPTIONS: 2543 /* Save options for use by IP. */ 2544 newlen = inlen + udp->udp_label_len; 2545 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2546 *outlenp = 0; 2547 return (EINVAL); 2548 } 2549 if (checkonly) 2550 break; 2551 2552 /* 2553 * Update the stored options taking into account 2554 * any CIPSO option which we should not overwrite. 2555 */ 2556 if (!tsol_option_set(&udp->udp_ip_snd_options, 2557 &udp->udp_ip_snd_options_len, 2558 udp->udp_label_len, invalp, inlen)) { 2559 *outlenp = 0; 2560 return (ENOMEM); 2561 } 2562 2563 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2564 UDPH_SIZE + udp->udp_ip_snd_options_len; 2565 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2566 rw_exit(&udp->udp_rwlock); 2567 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2568 sth_wroff); 2569 rw_enter(&udp->udp_rwlock, RW_WRITER); 2570 break; 2571 2572 case IP_TTL: 2573 if (!checkonly) { 2574 udp->udp_ttl = (uchar_t)*i1; 2575 } 2576 break; 2577 case IP_TOS: 2578 case T_IP_TOS: 2579 if (!checkonly) { 2580 udp->udp_type_of_service = (uchar_t)*i1; 2581 } 2582 break; 2583 case IP_MULTICAST_IF: { 2584 /* 2585 * TODO should check OPTMGMT reply and undo this if 2586 * there is an error. 2587 */ 2588 struct in_addr *inap = (struct in_addr *)invalp; 2589 if (!checkonly) { 2590 udp->udp_multicast_if_addr = 2591 inap->s_addr; 2592 PASS_OPT_TO_IP(connp); 2593 } 2594 break; 2595 } 2596 case IP_MULTICAST_TTL: 2597 if (!checkonly) 2598 udp->udp_multicast_ttl = *invalp; 2599 break; 2600 case IP_MULTICAST_LOOP: 2601 if (!checkonly) { 2602 connp->conn_multicast_loop = *invalp; 2603 PASS_OPT_TO_IP(connp); 2604 } 2605 break; 2606 case IP_RECVOPTS: 2607 if (!checkonly) 2608 udp->udp_recvopts = onoff; 2609 break; 2610 case IP_RECVDSTADDR: 2611 if (!checkonly) 2612 udp->udp_recvdstaddr = onoff; 2613 break; 2614 case IP_RECVIF: 2615 if (!checkonly) { 2616 udp->udp_recvif = onoff; 2617 PASS_OPT_TO_IP(connp); 2618 } 2619 break; 2620 case IP_RECVSLLA: 2621 if (!checkonly) { 2622 udp->udp_recvslla = onoff; 2623 PASS_OPT_TO_IP(connp); 2624 } 2625 break; 2626 case IP_RECVTTL: 2627 if (!checkonly) 2628 udp->udp_recvttl = onoff; 2629 break; 2630 case IP_PKTINFO: { 2631 /* 2632 * This also handles IP_RECVPKTINFO. 2633 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2634 * Differentiation is based on the size of the 2635 * argument passed in. 2636 */ 2637 struct in_pktinfo *pktinfop; 2638 ip4_pkt_t *attr_pktinfop; 2639 2640 if (checkonly) 2641 break; 2642 2643 if (inlen == sizeof (int)) { 2644 /* 2645 * This is IP_RECVPKTINFO option. 2646 * Keep a local copy of whether this option is 2647 * set or not and pass it down to IP for 2648 * processing. 2649 */ 2650 2651 udp->udp_ip_recvpktinfo = onoff; 2652 return (-EINVAL); 2653 } 2654 2655 if (attrs == NULL || 2656 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2657 /* 2658 * sticky option or no buffer to return 2659 * the results. 2660 */ 2661 return (EINVAL); 2662 } 2663 2664 if (inlen != sizeof (struct in_pktinfo)) 2665 return (EINVAL); 2666 2667 pktinfop = (struct in_pktinfo *)invalp; 2668 2669 /* 2670 * At least one of the values should be specified 2671 */ 2672 if (pktinfop->ipi_ifindex == 0 && 2673 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2674 return (EINVAL); 2675 } 2676 2677 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2678 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2679 2680 break; 2681 } 2682 case IP_ADD_MEMBERSHIP: 2683 case IP_DROP_MEMBERSHIP: 2684 case IP_BLOCK_SOURCE: 2685 case IP_UNBLOCK_SOURCE: 2686 case IP_ADD_SOURCE_MEMBERSHIP: 2687 case IP_DROP_SOURCE_MEMBERSHIP: 2688 case MCAST_JOIN_GROUP: 2689 case MCAST_LEAVE_GROUP: 2690 case MCAST_BLOCK_SOURCE: 2691 case MCAST_UNBLOCK_SOURCE: 2692 case MCAST_JOIN_SOURCE_GROUP: 2693 case MCAST_LEAVE_SOURCE_GROUP: 2694 case IP_SEC_OPT: 2695 case IP_NEXTHOP: 2696 case IP_DHCPINIT_IF: 2697 /* 2698 * "soft" error (negative) 2699 * option not handled at this level 2700 * Do not modify *outlenp. 2701 */ 2702 return (-EINVAL); 2703 case IP_BOUND_IF: 2704 if (!checkonly) { 2705 udp->udp_bound_if = *i1; 2706 PASS_OPT_TO_IP(connp); 2707 } 2708 break; 2709 case IP_UNSPEC_SRC: 2710 if (!checkonly) { 2711 udp->udp_unspec_source = onoff; 2712 PASS_OPT_TO_IP(connp); 2713 } 2714 break; 2715 case IP_BROADCAST_TTL: 2716 if (!checkonly) 2717 connp->conn_broadcast_ttl = *invalp; 2718 break; 2719 default: 2720 *outlenp = 0; 2721 return (EINVAL); 2722 } 2723 break; 2724 case IPPROTO_IPV6: { 2725 ip6_pkt_t *ipp; 2726 boolean_t sticky; 2727 2728 if (udp->udp_family != AF_INET6) { 2729 *outlenp = 0; 2730 return (ENOPROTOOPT); 2731 } 2732 /* 2733 * Deal with both sticky options and ancillary data 2734 */ 2735 sticky = B_FALSE; 2736 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2737 NULL) { 2738 /* sticky options, or none */ 2739 ipp = &udp->udp_sticky_ipp; 2740 sticky = B_TRUE; 2741 } 2742 2743 switch (name) { 2744 case IPV6_MULTICAST_IF: 2745 if (!checkonly) { 2746 udp->udp_multicast_if_index = *i1; 2747 PASS_OPT_TO_IP(connp); 2748 } 2749 break; 2750 case IPV6_UNICAST_HOPS: 2751 /* -1 means use default */ 2752 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2753 *outlenp = 0; 2754 return (EINVAL); 2755 } 2756 if (!checkonly) { 2757 if (*i1 == -1) { 2758 udp->udp_ttl = ipp->ipp_unicast_hops = 2759 us->us_ipv6_hoplimit; 2760 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2761 /* Pass modified value to IP. */ 2762 *i1 = udp->udp_ttl; 2763 } else { 2764 udp->udp_ttl = ipp->ipp_unicast_hops = 2765 (uint8_t)*i1; 2766 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2767 } 2768 /* Rebuild the header template */ 2769 error = udp_build_hdrs(udp); 2770 if (error != 0) { 2771 *outlenp = 0; 2772 return (error); 2773 } 2774 } 2775 break; 2776 case IPV6_MULTICAST_HOPS: 2777 /* -1 means use default */ 2778 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2779 *outlenp = 0; 2780 return (EINVAL); 2781 } 2782 if (!checkonly) { 2783 if (*i1 == -1) { 2784 udp->udp_multicast_ttl = 2785 ipp->ipp_multicast_hops = 2786 IP_DEFAULT_MULTICAST_TTL; 2787 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2788 /* Pass modified value to IP. */ 2789 *i1 = udp->udp_multicast_ttl; 2790 } else { 2791 udp->udp_multicast_ttl = 2792 ipp->ipp_multicast_hops = 2793 (uint8_t)*i1; 2794 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2795 } 2796 } 2797 break; 2798 case IPV6_MULTICAST_LOOP: 2799 if (*i1 != 0 && *i1 != 1) { 2800 *outlenp = 0; 2801 return (EINVAL); 2802 } 2803 if (!checkonly) { 2804 connp->conn_multicast_loop = *i1; 2805 PASS_OPT_TO_IP(connp); 2806 } 2807 break; 2808 case IPV6_JOIN_GROUP: 2809 case IPV6_LEAVE_GROUP: 2810 case MCAST_JOIN_GROUP: 2811 case MCAST_LEAVE_GROUP: 2812 case MCAST_BLOCK_SOURCE: 2813 case MCAST_UNBLOCK_SOURCE: 2814 case MCAST_JOIN_SOURCE_GROUP: 2815 case MCAST_LEAVE_SOURCE_GROUP: 2816 /* 2817 * "soft" error (negative) 2818 * option not handled at this level 2819 * Note: Do not modify *outlenp 2820 */ 2821 return (-EINVAL); 2822 case IPV6_BOUND_IF: 2823 if (!checkonly) { 2824 udp->udp_bound_if = *i1; 2825 PASS_OPT_TO_IP(connp); 2826 } 2827 break; 2828 case IPV6_UNSPEC_SRC: 2829 if (!checkonly) { 2830 udp->udp_unspec_source = onoff; 2831 PASS_OPT_TO_IP(connp); 2832 } 2833 break; 2834 /* 2835 * Set boolean switches for ancillary data delivery 2836 */ 2837 case IPV6_RECVPKTINFO: 2838 if (!checkonly) { 2839 udp->udp_ip_recvpktinfo = onoff; 2840 PASS_OPT_TO_IP(connp); 2841 } 2842 break; 2843 case IPV6_RECVTCLASS: 2844 if (!checkonly) { 2845 udp->udp_ipv6_recvtclass = onoff; 2846 PASS_OPT_TO_IP(connp); 2847 } 2848 break; 2849 case IPV6_RECVPATHMTU: 2850 if (!checkonly) { 2851 udp->udp_ipv6_recvpathmtu = onoff; 2852 PASS_OPT_TO_IP(connp); 2853 } 2854 break; 2855 case IPV6_RECVHOPLIMIT: 2856 if (!checkonly) { 2857 udp->udp_ipv6_recvhoplimit = onoff; 2858 PASS_OPT_TO_IP(connp); 2859 } 2860 break; 2861 case IPV6_RECVHOPOPTS: 2862 if (!checkonly) { 2863 udp->udp_ipv6_recvhopopts = onoff; 2864 PASS_OPT_TO_IP(connp); 2865 } 2866 break; 2867 case IPV6_RECVDSTOPTS: 2868 if (!checkonly) { 2869 udp->udp_ipv6_recvdstopts = onoff; 2870 PASS_OPT_TO_IP(connp); 2871 } 2872 break; 2873 case _OLD_IPV6_RECVDSTOPTS: 2874 if (!checkonly) 2875 udp->udp_old_ipv6_recvdstopts = onoff; 2876 break; 2877 case IPV6_RECVRTHDRDSTOPTS: 2878 if (!checkonly) { 2879 udp->udp_ipv6_recvrthdrdstopts = onoff; 2880 PASS_OPT_TO_IP(connp); 2881 } 2882 break; 2883 case IPV6_RECVRTHDR: 2884 if (!checkonly) { 2885 udp->udp_ipv6_recvrthdr = onoff; 2886 PASS_OPT_TO_IP(connp); 2887 } 2888 break; 2889 /* 2890 * Set sticky options or ancillary data. 2891 * If sticky options, (re)build any extension headers 2892 * that might be needed as a result. 2893 */ 2894 case IPV6_PKTINFO: 2895 /* 2896 * The source address and ifindex are verified 2897 * in ip_opt_set(). For ancillary data the 2898 * source address is checked in ip_wput_v6. 2899 */ 2900 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2901 return (EINVAL); 2902 if (checkonly) 2903 break; 2904 2905 if (inlen == 0) { 2906 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2907 ipp->ipp_sticky_ignored |= 2908 (IPPF_IFINDEX|IPPF_ADDR); 2909 } else { 2910 struct in6_pktinfo *pkti; 2911 2912 pkti = (struct in6_pktinfo *)invalp; 2913 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2914 ipp->ipp_addr = pkti->ipi6_addr; 2915 if (ipp->ipp_ifindex != 0) 2916 ipp->ipp_fields |= IPPF_IFINDEX; 2917 else 2918 ipp->ipp_fields &= ~IPPF_IFINDEX; 2919 if (!IN6_IS_ADDR_UNSPECIFIED( 2920 &ipp->ipp_addr)) 2921 ipp->ipp_fields |= IPPF_ADDR; 2922 else 2923 ipp->ipp_fields &= ~IPPF_ADDR; 2924 } 2925 if (sticky) { 2926 error = udp_build_hdrs(udp); 2927 if (error != 0) 2928 return (error); 2929 PASS_OPT_TO_IP(connp); 2930 } 2931 break; 2932 case IPV6_HOPLIMIT: 2933 if (sticky) 2934 return (EINVAL); 2935 if (inlen != 0 && inlen != sizeof (int)) 2936 return (EINVAL); 2937 if (checkonly) 2938 break; 2939 2940 if (inlen == 0) { 2941 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2942 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2943 } else { 2944 if (*i1 > 255 || *i1 < -1) 2945 return (EINVAL); 2946 if (*i1 == -1) 2947 ipp->ipp_hoplimit = 2948 us->us_ipv6_hoplimit; 2949 else 2950 ipp->ipp_hoplimit = *i1; 2951 ipp->ipp_fields |= IPPF_HOPLIMIT; 2952 } 2953 break; 2954 case IPV6_TCLASS: 2955 if (inlen != 0 && inlen != sizeof (int)) 2956 return (EINVAL); 2957 if (checkonly) 2958 break; 2959 2960 if (inlen == 0) { 2961 ipp->ipp_fields &= ~IPPF_TCLASS; 2962 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2963 } else { 2964 if (*i1 > 255 || *i1 < -1) 2965 return (EINVAL); 2966 if (*i1 == -1) 2967 ipp->ipp_tclass = 0; 2968 else 2969 ipp->ipp_tclass = *i1; 2970 ipp->ipp_fields |= IPPF_TCLASS; 2971 } 2972 if (sticky) { 2973 error = udp_build_hdrs(udp); 2974 if (error != 0) 2975 return (error); 2976 } 2977 break; 2978 case IPV6_NEXTHOP: 2979 /* 2980 * IP will verify that the nexthop is reachable 2981 * and fail for sticky options. 2982 */ 2983 if (inlen != 0 && inlen != sizeof (sin6_t)) 2984 return (EINVAL); 2985 if (checkonly) 2986 break; 2987 2988 if (inlen == 0) { 2989 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2990 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2991 } else { 2992 sin6_t *sin6 = (sin6_t *)invalp; 2993 2994 if (sin6->sin6_family != AF_INET6) { 2995 return (EAFNOSUPPORT); 2996 } 2997 if (IN6_IS_ADDR_V4MAPPED( 2998 &sin6->sin6_addr)) 2999 return (EADDRNOTAVAIL); 3000 ipp->ipp_nexthop = sin6->sin6_addr; 3001 if (!IN6_IS_ADDR_UNSPECIFIED( 3002 &ipp->ipp_nexthop)) 3003 ipp->ipp_fields |= IPPF_NEXTHOP; 3004 else 3005 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3006 } 3007 if (sticky) { 3008 error = udp_build_hdrs(udp); 3009 if (error != 0) 3010 return (error); 3011 PASS_OPT_TO_IP(connp); 3012 } 3013 break; 3014 case IPV6_HOPOPTS: { 3015 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3016 /* 3017 * Sanity checks - minimum size, size a multiple of 3018 * eight bytes, and matching size passed in. 3019 */ 3020 if (inlen != 0 && 3021 inlen != (8 * (hopts->ip6h_len + 1))) 3022 return (EINVAL); 3023 3024 if (checkonly) 3025 break; 3026 3027 error = optcom_pkt_set(invalp, inlen, sticky, 3028 (uchar_t **)&ipp->ipp_hopopts, 3029 &ipp->ipp_hopoptslen, 3030 sticky ? udp->udp_label_len_v6 : 0); 3031 if (error != 0) 3032 return (error); 3033 if (ipp->ipp_hopoptslen == 0) { 3034 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3035 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3036 } else { 3037 ipp->ipp_fields |= IPPF_HOPOPTS; 3038 } 3039 if (sticky) { 3040 error = udp_build_hdrs(udp); 3041 if (error != 0) 3042 return (error); 3043 } 3044 break; 3045 } 3046 case IPV6_RTHDRDSTOPTS: { 3047 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3048 3049 /* 3050 * Sanity checks - minimum size, size a multiple of 3051 * eight bytes, and matching size passed in. 3052 */ 3053 if (inlen != 0 && 3054 inlen != (8 * (dopts->ip6d_len + 1))) 3055 return (EINVAL); 3056 3057 if (checkonly) 3058 break; 3059 3060 if (inlen == 0) { 3061 if (sticky && 3062 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3063 kmem_free(ipp->ipp_rtdstopts, 3064 ipp->ipp_rtdstoptslen); 3065 ipp->ipp_rtdstopts = NULL; 3066 ipp->ipp_rtdstoptslen = 0; 3067 } 3068 3069 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3070 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3071 } else { 3072 error = optcom_pkt_set(invalp, inlen, sticky, 3073 (uchar_t **)&ipp->ipp_rtdstopts, 3074 &ipp->ipp_rtdstoptslen, 0); 3075 if (error != 0) 3076 return (error); 3077 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3078 } 3079 if (sticky) { 3080 error = udp_build_hdrs(udp); 3081 if (error != 0) 3082 return (error); 3083 } 3084 break; 3085 } 3086 case IPV6_DSTOPTS: { 3087 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3088 3089 /* 3090 * Sanity checks - minimum size, size a multiple of 3091 * eight bytes, and matching size passed in. 3092 */ 3093 if (inlen != 0 && 3094 inlen != (8 * (dopts->ip6d_len + 1))) 3095 return (EINVAL); 3096 3097 if (checkonly) 3098 break; 3099 3100 if (inlen == 0) { 3101 if (sticky && 3102 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3103 kmem_free(ipp->ipp_dstopts, 3104 ipp->ipp_dstoptslen); 3105 ipp->ipp_dstopts = NULL; 3106 ipp->ipp_dstoptslen = 0; 3107 } 3108 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3109 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3110 } else { 3111 error = optcom_pkt_set(invalp, inlen, sticky, 3112 (uchar_t **)&ipp->ipp_dstopts, 3113 &ipp->ipp_dstoptslen, 0); 3114 if (error != 0) 3115 return (error); 3116 ipp->ipp_fields |= IPPF_DSTOPTS; 3117 } 3118 if (sticky) { 3119 error = udp_build_hdrs(udp); 3120 if (error != 0) 3121 return (error); 3122 } 3123 break; 3124 } 3125 case IPV6_RTHDR: { 3126 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3127 3128 /* 3129 * Sanity checks - minimum size, size a multiple of 3130 * eight bytes, and matching size passed in. 3131 */ 3132 if (inlen != 0 && 3133 inlen != (8 * (rt->ip6r_len + 1))) 3134 return (EINVAL); 3135 3136 if (checkonly) 3137 break; 3138 3139 if (inlen == 0) { 3140 if (sticky && 3141 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3142 kmem_free(ipp->ipp_rthdr, 3143 ipp->ipp_rthdrlen); 3144 ipp->ipp_rthdr = NULL; 3145 ipp->ipp_rthdrlen = 0; 3146 } 3147 ipp->ipp_fields &= ~IPPF_RTHDR; 3148 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3149 } else { 3150 error = optcom_pkt_set(invalp, inlen, sticky, 3151 (uchar_t **)&ipp->ipp_rthdr, 3152 &ipp->ipp_rthdrlen, 0); 3153 if (error != 0) 3154 return (error); 3155 ipp->ipp_fields |= IPPF_RTHDR; 3156 } 3157 if (sticky) { 3158 error = udp_build_hdrs(udp); 3159 if (error != 0) 3160 return (error); 3161 } 3162 break; 3163 } 3164 3165 case IPV6_DONTFRAG: 3166 if (checkonly) 3167 break; 3168 3169 if (onoff) { 3170 ipp->ipp_fields |= IPPF_DONTFRAG; 3171 } else { 3172 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3173 } 3174 break; 3175 3176 case IPV6_USE_MIN_MTU: 3177 if (inlen != sizeof (int)) 3178 return (EINVAL); 3179 3180 if (*i1 < -1 || *i1 > 1) 3181 return (EINVAL); 3182 3183 if (checkonly) 3184 break; 3185 3186 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3187 ipp->ipp_use_min_mtu = *i1; 3188 break; 3189 3190 case IPV6_SEC_OPT: 3191 case IPV6_SRC_PREFERENCES: 3192 case IPV6_V6ONLY: 3193 /* Handled at the IP level */ 3194 return (-EINVAL); 3195 default: 3196 *outlenp = 0; 3197 return (EINVAL); 3198 } 3199 break; 3200 } /* end IPPROTO_IPV6 */ 3201 case IPPROTO_UDP: 3202 switch (name) { 3203 case UDP_ANONPRIVBIND: 3204 if ((error = secpolicy_net_privaddr(cr, 0, 3205 IPPROTO_UDP)) != 0) { 3206 *outlenp = 0; 3207 return (error); 3208 } 3209 if (!checkonly) { 3210 udp->udp_anon_priv_bind = onoff; 3211 } 3212 break; 3213 case UDP_EXCLBIND: 3214 if (!checkonly) 3215 udp->udp_exclbind = onoff; 3216 break; 3217 case UDP_RCVHDR: 3218 if (!checkonly) 3219 udp->udp_rcvhdr = onoff; 3220 break; 3221 case UDP_NAT_T_ENDPOINT: 3222 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3223 *outlenp = 0; 3224 return (error); 3225 } 3226 3227 /* 3228 * Use udp_family instead so we can avoid ambiguitites 3229 * with AF_INET6 sockets that may switch from IPv4 3230 * to IPv6. 3231 */ 3232 if (udp->udp_family != AF_INET) { 3233 *outlenp = 0; 3234 return (EAFNOSUPPORT); 3235 } 3236 3237 if (!checkonly) { 3238 int size; 3239 3240 udp->udp_nat_t_endpoint = onoff; 3241 3242 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3243 UDPH_SIZE + udp->udp_ip_snd_options_len; 3244 3245 /* Also, adjust wroff */ 3246 if (onoff) { 3247 udp->udp_max_hdr_len += 3248 sizeof (uint32_t); 3249 } 3250 size = udp->udp_max_hdr_len + 3251 us->us_wroff_extra; 3252 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3253 size); 3254 } 3255 break; 3256 default: 3257 *outlenp = 0; 3258 return (EINVAL); 3259 } 3260 break; 3261 default: 3262 *outlenp = 0; 3263 return (EINVAL); 3264 } 3265 /* 3266 * Common case of OK return with outval same as inval. 3267 */ 3268 if (invalp != outvalp) { 3269 /* don't trust bcopy for identical src/dst */ 3270 (void) bcopy(invalp, outvalp, inlen); 3271 } 3272 *outlenp = inlen; 3273 return (0); 3274 } 3275 3276 int 3277 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3278 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3279 void *thisdg_attrs, cred_t *cr) 3280 { 3281 int error; 3282 boolean_t checkonly; 3283 3284 error = 0; 3285 switch (optset_context) { 3286 case SETFN_OPTCOM_CHECKONLY: 3287 checkonly = B_TRUE; 3288 /* 3289 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3290 * inlen != 0 implies value supplied and 3291 * we have to "pretend" to set it. 3292 * inlen == 0 implies that there is no 3293 * value part in T_CHECK request and just validation 3294 * done elsewhere should be enough, we just return here. 3295 */ 3296 if (inlen == 0) { 3297 *outlenp = 0; 3298 goto done; 3299 } 3300 break; 3301 case SETFN_OPTCOM_NEGOTIATE: 3302 checkonly = B_FALSE; 3303 break; 3304 case SETFN_UD_NEGOTIATE: 3305 case SETFN_CONN_NEGOTIATE: 3306 checkonly = B_FALSE; 3307 /* 3308 * Negotiating local and "association-related" options 3309 * through T_UNITDATA_REQ. 3310 * 3311 * Following routine can filter out ones we do not 3312 * want to be "set" this way. 3313 */ 3314 if (!udp_opt_allow_udr_set(level, name)) { 3315 *outlenp = 0; 3316 error = EINVAL; 3317 goto done; 3318 } 3319 break; 3320 default: 3321 /* 3322 * We should never get here 3323 */ 3324 *outlenp = 0; 3325 error = EINVAL; 3326 goto done; 3327 } 3328 3329 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3330 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3331 3332 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3333 outvalp, cr, thisdg_attrs, checkonly); 3334 done: 3335 return (error); 3336 } 3337 3338 /* ARGSUSED */ 3339 int 3340 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3341 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3342 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3343 { 3344 conn_t *connp = Q_TO_CONN(q); 3345 int error; 3346 udp_t *udp = connp->conn_udp; 3347 3348 rw_enter(&udp->udp_rwlock, RW_WRITER); 3349 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3350 outlenp, outvalp, thisdg_attrs, cr); 3351 rw_exit(&udp->udp_rwlock); 3352 return (error); 3353 } 3354 3355 /* 3356 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3357 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3358 * headers, and the udp header. 3359 * Returns failure if can't allocate memory. 3360 */ 3361 static int 3362 udp_build_hdrs(udp_t *udp) 3363 { 3364 udp_stack_t *us = udp->udp_us; 3365 uchar_t *hdrs; 3366 uint_t hdrs_len; 3367 ip6_t *ip6h; 3368 ip6i_t *ip6i; 3369 udpha_t *udpha; 3370 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3371 size_t sth_wroff; 3372 conn_t *connp = udp->udp_connp; 3373 3374 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3375 ASSERT(connp != NULL); 3376 3377 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3378 ASSERT(hdrs_len != 0); 3379 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3380 /* Need to reallocate */ 3381 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3382 if (hdrs == NULL) 3383 return (ENOMEM); 3384 3385 if (udp->udp_sticky_hdrs_len != 0) { 3386 kmem_free(udp->udp_sticky_hdrs, 3387 udp->udp_sticky_hdrs_len); 3388 } 3389 udp->udp_sticky_hdrs = hdrs; 3390 udp->udp_sticky_hdrs_len = hdrs_len; 3391 } 3392 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3393 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3394 3395 /* Set header fields not in ipp */ 3396 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3397 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3398 ip6h = (ip6_t *)&ip6i[1]; 3399 } else { 3400 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3401 } 3402 3403 if (!(ipp->ipp_fields & IPPF_ADDR)) 3404 ip6h->ip6_src = udp->udp_v6src; 3405 3406 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3407 udpha->uha_src_port = udp->udp_port; 3408 3409 /* Try to get everything in a single mblk */ 3410 if (hdrs_len > udp->udp_max_hdr_len) { 3411 udp->udp_max_hdr_len = hdrs_len; 3412 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3413 rw_exit(&udp->udp_rwlock); 3414 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3415 udp->udp_connp, sth_wroff); 3416 rw_enter(&udp->udp_rwlock, RW_WRITER); 3417 } 3418 return (0); 3419 } 3420 3421 /* 3422 * This routine retrieves the value of an ND variable in a udpparam_t 3423 * structure. It is called through nd_getset when a user reads the 3424 * variable. 3425 */ 3426 /* ARGSUSED */ 3427 static int 3428 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3429 { 3430 udpparam_t *udppa = (udpparam_t *)cp; 3431 3432 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3433 return (0); 3434 } 3435 3436 /* 3437 * Walk through the param array specified registering each element with the 3438 * named dispatch (ND) handler. 3439 */ 3440 static boolean_t 3441 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3442 { 3443 for (; cnt-- > 0; udppa++) { 3444 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3445 if (!nd_load(ndp, udppa->udp_param_name, 3446 udp_param_get, udp_param_set, 3447 (caddr_t)udppa)) { 3448 nd_free(ndp); 3449 return (B_FALSE); 3450 } 3451 } 3452 } 3453 if (!nd_load(ndp, "udp_extra_priv_ports", 3454 udp_extra_priv_ports_get, NULL, NULL)) { 3455 nd_free(ndp); 3456 return (B_FALSE); 3457 } 3458 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3459 NULL, udp_extra_priv_ports_add, NULL)) { 3460 nd_free(ndp); 3461 return (B_FALSE); 3462 } 3463 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3464 NULL, udp_extra_priv_ports_del, NULL)) { 3465 nd_free(ndp); 3466 return (B_FALSE); 3467 } 3468 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3469 NULL)) { 3470 nd_free(ndp); 3471 return (B_FALSE); 3472 } 3473 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3474 NULL)) { 3475 nd_free(ndp); 3476 return (B_FALSE); 3477 } 3478 return (B_TRUE); 3479 } 3480 3481 /* This routine sets an ND variable in a udpparam_t structure. */ 3482 /* ARGSUSED */ 3483 static int 3484 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3485 { 3486 long new_value; 3487 udpparam_t *udppa = (udpparam_t *)cp; 3488 3489 /* 3490 * Fail the request if the new value does not lie within the 3491 * required bounds. 3492 */ 3493 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3494 new_value < udppa->udp_param_min || 3495 new_value > udppa->udp_param_max) { 3496 return (EINVAL); 3497 } 3498 3499 /* Set the new value */ 3500 udppa->udp_param_value = new_value; 3501 return (0); 3502 } 3503 3504 /* 3505 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3506 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3507 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3508 * then it's assumed to be allocated to be large enough. 3509 * 3510 * Returns zero if trimming of the security option causes all options to go 3511 * away. 3512 */ 3513 static size_t 3514 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3515 { 3516 struct T_opthdr *toh; 3517 size_t hol = ipp->ipp_hopoptslen; 3518 ip6_hbh_t *dstopt = NULL; 3519 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3520 size_t tlen, olen, plen; 3521 boolean_t deleting; 3522 const struct ip6_opt *sopt, *lastpad; 3523 struct ip6_opt *dopt; 3524 3525 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3526 toh->level = IPPROTO_IPV6; 3527 toh->name = IPV6_HOPOPTS; 3528 toh->status = 0; 3529 dstopt = (ip6_hbh_t *)(toh + 1); 3530 } 3531 3532 /* 3533 * If labeling is enabled, then skip the label option 3534 * but get other options if there are any. 3535 */ 3536 if (is_system_labeled()) { 3537 dopt = NULL; 3538 if (dstopt != NULL) { 3539 /* will fill in ip6h_len later */ 3540 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3541 dopt = (struct ip6_opt *)(dstopt + 1); 3542 } 3543 sopt = (const struct ip6_opt *)(srcopt + 1); 3544 hol -= sizeof (*srcopt); 3545 tlen = sizeof (*dstopt); 3546 lastpad = NULL; 3547 deleting = B_FALSE; 3548 /* 3549 * This loop finds the first (lastpad pointer) of any number of 3550 * pads that preceeds the security option, then treats the 3551 * security option as though it were a pad, and then finds the 3552 * next non-pad option (or end of list). 3553 * 3554 * It then treats the entire block as one big pad. To preserve 3555 * alignment of any options that follow, or just the end of the 3556 * list, it computes a minimal new padding size that keeps the 3557 * same alignment for the next option. 3558 * 3559 * If it encounters just a sequence of pads with no security 3560 * option, those are copied as-is rather than collapsed. 3561 * 3562 * Note that to handle the end of list case, the code makes one 3563 * loop with 'hol' set to zero. 3564 */ 3565 for (;;) { 3566 if (hol > 0) { 3567 if (sopt->ip6o_type == IP6OPT_PAD1) { 3568 if (lastpad == NULL) 3569 lastpad = sopt; 3570 sopt = (const struct ip6_opt *) 3571 &sopt->ip6o_len; 3572 hol--; 3573 continue; 3574 } 3575 olen = sopt->ip6o_len + sizeof (*sopt); 3576 if (olen > hol) 3577 olen = hol; 3578 if (sopt->ip6o_type == IP6OPT_PADN || 3579 sopt->ip6o_type == ip6opt_ls) { 3580 if (sopt->ip6o_type == ip6opt_ls) 3581 deleting = B_TRUE; 3582 if (lastpad == NULL) 3583 lastpad = sopt; 3584 sopt = (const struct ip6_opt *) 3585 ((const char *)sopt + olen); 3586 hol -= olen; 3587 continue; 3588 } 3589 } else { 3590 /* if nothing was copied at all, then delete */ 3591 if (tlen == sizeof (*dstopt)) 3592 return (0); 3593 /* last pass; pick up any trailing padding */ 3594 olen = 0; 3595 } 3596 if (deleting) { 3597 /* 3598 * compute aligning effect of deleted material 3599 * to reproduce with pad. 3600 */ 3601 plen = ((const char *)sopt - 3602 (const char *)lastpad) & 7; 3603 tlen += plen; 3604 if (dopt != NULL) { 3605 if (plen == 1) { 3606 dopt->ip6o_type = IP6OPT_PAD1; 3607 } else if (plen > 1) { 3608 plen -= sizeof (*dopt); 3609 dopt->ip6o_type = IP6OPT_PADN; 3610 dopt->ip6o_len = plen; 3611 if (plen > 0) 3612 bzero(dopt + 1, plen); 3613 } 3614 dopt = (struct ip6_opt *) 3615 ((char *)dopt + plen); 3616 } 3617 deleting = B_FALSE; 3618 lastpad = NULL; 3619 } 3620 /* if there's uncopied padding, then copy that now */ 3621 if (lastpad != NULL) { 3622 olen += (const char *)sopt - 3623 (const char *)lastpad; 3624 sopt = lastpad; 3625 lastpad = NULL; 3626 } 3627 if (dopt != NULL && olen > 0) { 3628 bcopy(sopt, dopt, olen); 3629 dopt = (struct ip6_opt *)((char *)dopt + olen); 3630 } 3631 if (hol == 0) 3632 break; 3633 tlen += olen; 3634 sopt = (const struct ip6_opt *) 3635 ((const char *)sopt + olen); 3636 hol -= olen; 3637 } 3638 /* go back and patch up the length value, rounded upward */ 3639 if (dstopt != NULL) 3640 dstopt->ip6h_len = (tlen - 1) >> 3; 3641 } else { 3642 tlen = hol; 3643 if (dstopt != NULL) 3644 bcopy(srcopt, dstopt, hol); 3645 } 3646 3647 tlen += sizeof (*toh); 3648 if (toh != NULL) 3649 toh->len = tlen; 3650 3651 return (tlen); 3652 } 3653 3654 /* 3655 * Update udp_rcv_opt_len from the packet. 3656 * Called when options received, and when no options received but 3657 * udp_ip_recv_opt_len has previously recorded options. 3658 */ 3659 static void 3660 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3661 { 3662 /* Save the options if any */ 3663 if (opt_len > 0) { 3664 if (opt_len > udp->udp_ip_rcv_options_len) { 3665 /* Need to allocate larger buffer */ 3666 if (udp->udp_ip_rcv_options_len != 0) 3667 mi_free((char *)udp->udp_ip_rcv_options); 3668 udp->udp_ip_rcv_options_len = 0; 3669 udp->udp_ip_rcv_options = 3670 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3671 if (udp->udp_ip_rcv_options != NULL) 3672 udp->udp_ip_rcv_options_len = opt_len; 3673 } 3674 if (udp->udp_ip_rcv_options_len != 0) { 3675 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3676 /* Adjust length if we are resusing the space */ 3677 udp->udp_ip_rcv_options_len = opt_len; 3678 } 3679 } else if (udp->udp_ip_rcv_options_len != 0) { 3680 /* Clear out previously recorded options */ 3681 mi_free((char *)udp->udp_ip_rcv_options); 3682 udp->udp_ip_rcv_options = NULL; 3683 udp->udp_ip_rcv_options_len = 0; 3684 } 3685 } 3686 3687 static mblk_t * 3688 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3689 { 3690 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3691 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3692 /* 3693 * fallback has started but messages have not been moved yet 3694 */ 3695 if (udp->udp_fallback_queue_head == NULL) { 3696 ASSERT(udp->udp_fallback_queue_tail == NULL); 3697 udp->udp_fallback_queue_head = mp; 3698 udp->udp_fallback_queue_tail = mp; 3699 } else { 3700 ASSERT(udp->udp_fallback_queue_tail != NULL); 3701 udp->udp_fallback_queue_tail->b_next = mp; 3702 udp->udp_fallback_queue_tail = mp; 3703 } 3704 return (NULL); 3705 } else { 3706 /* 3707 * Fallback completed, let the caller putnext() the mblk. 3708 */ 3709 return (mp); 3710 } 3711 } 3712 3713 /* 3714 * Deliver data to ULP. In case we have a socket, and it's falling back to 3715 * TPI, then we'll queue the mp for later processing. 3716 */ 3717 static void 3718 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3719 { 3720 if (IPCL_IS_NONSTR(connp)) { 3721 udp_t *udp = connp->conn_udp; 3722 int error; 3723 3724 if ((*connp->conn_upcalls->su_recv) 3725 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3726 NULL) < 0) { 3727 mutex_enter(&udp->udp_recv_lock); 3728 if (error == ENOSPC) { 3729 /* 3730 * let's confirm while holding the lock 3731 */ 3732 if ((*connp->conn_upcalls->su_recv) 3733 (connp->conn_upper_handle, NULL, 0, 0, 3734 &error, NULL) < 0) { 3735 ASSERT(error == ENOSPC); 3736 if (error == ENOSPC) { 3737 connp->conn_flow_cntrld = 3738 B_TRUE; 3739 } 3740 } 3741 mutex_exit(&udp->udp_recv_lock); 3742 } else { 3743 ASSERT(error == EOPNOTSUPP); 3744 mp = udp_queue_fallback(udp, mp); 3745 mutex_exit(&udp->udp_recv_lock); 3746 if (mp != NULL) 3747 putnext(connp->conn_rq, mp); 3748 } 3749 } 3750 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3751 } else { 3752 putnext(connp->conn_rq, mp); 3753 } 3754 } 3755 3756 /* ARGSUSED2 */ 3757 static void 3758 udp_input(void *arg1, mblk_t *mp, void *arg2) 3759 { 3760 conn_t *connp = (conn_t *)arg1; 3761 struct T_unitdata_ind *tudi; 3762 uchar_t *rptr; /* Pointer to IP header */ 3763 int hdr_length; /* Length of IP+UDP headers */ 3764 int opt_len; 3765 int udi_size; /* Size of T_unitdata_ind */ 3766 int mp_len; 3767 udp_t *udp; 3768 udpha_t *udpha; 3769 int ipversion; 3770 ip6_pkt_t ipp; 3771 ip6_t *ip6h; 3772 ip6i_t *ip6i; 3773 mblk_t *mp1; 3774 mblk_t *options_mp = NULL; 3775 ip_pktinfo_t *pinfo = NULL; 3776 cred_t *cr = NULL; 3777 pid_t cpid; 3778 uint32_t udp_ip_rcv_options_len; 3779 udp_bits_t udp_bits; 3780 cred_t *rcr = connp->conn_cred; 3781 udp_stack_t *us; 3782 3783 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3784 3785 udp = connp->conn_udp; 3786 us = udp->udp_us; 3787 rptr = mp->b_rptr; 3788 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3789 ASSERT(OK_32PTR(rptr)); 3790 3791 /* 3792 * IP should have prepended the options data in an M_CTL 3793 * Check M_CTL "type" to make sure are not here bcos of 3794 * a valid ICMP message 3795 */ 3796 if (DB_TYPE(mp) == M_CTL) { 3797 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3798 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3799 IN_PKTINFO) { 3800 /* 3801 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3802 * has been prepended to the packet by IP. We need to 3803 * extract the mblk and adjust the rptr 3804 */ 3805 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3806 options_mp = mp; 3807 mp = mp->b_cont; 3808 rptr = mp->b_rptr; 3809 UDP_STAT(us, udp_in_pktinfo); 3810 } else { 3811 /* 3812 * ICMP messages. 3813 */ 3814 udp_icmp_error(connp, mp); 3815 return; 3816 } 3817 } 3818 3819 mp_len = msgdsize(mp); 3820 /* 3821 * This is the inbound data path. 3822 * First, we check to make sure the IP version number is correct, 3823 * and then pull the IP and UDP headers into the first mblk. 3824 */ 3825 3826 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3827 ipp.ipp_fields = 0; 3828 3829 ipversion = IPH_HDR_VERSION(rptr); 3830 3831 rw_enter(&udp->udp_rwlock, RW_READER); 3832 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3833 udp_bits = udp->udp_bits; 3834 rw_exit(&udp->udp_rwlock); 3835 3836 switch (ipversion) { 3837 case IPV4_VERSION: 3838 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3839 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3840 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3841 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3842 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3843 udp->udp_family == AF_INET) { 3844 /* 3845 * Record/update udp_ip_rcv_options with the lock 3846 * held. Not needed for AF_INET6 sockets 3847 * since they don't support a getsockopt of IP_OPTIONS. 3848 */ 3849 rw_enter(&udp->udp_rwlock, RW_WRITER); 3850 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3851 opt_len); 3852 rw_exit(&udp->udp_rwlock); 3853 } 3854 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3855 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3856 udp->udp_ip_recvpktinfo) { 3857 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3858 ipp.ipp_fields |= IPPF_IFINDEX; 3859 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3860 } 3861 } 3862 break; 3863 case IPV6_VERSION: 3864 /* 3865 * IPv6 packets can only be received by applications 3866 * that are prepared to receive IPv6 addresses. 3867 * The IP fanout must ensure this. 3868 */ 3869 ASSERT(udp->udp_family == AF_INET6); 3870 3871 ip6h = (ip6_t *)rptr; 3872 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3873 3874 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3875 uint8_t nexthdrp; 3876 /* Look for ifindex information */ 3877 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3878 ip6i = (ip6i_t *)ip6h; 3879 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3880 goto tossit; 3881 3882 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3883 ASSERT(ip6i->ip6i_ifindex != 0); 3884 ipp.ipp_fields |= IPPF_IFINDEX; 3885 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3886 } 3887 rptr = (uchar_t *)&ip6i[1]; 3888 mp->b_rptr = rptr; 3889 if (rptr == mp->b_wptr) { 3890 mp1 = mp->b_cont; 3891 freeb(mp); 3892 mp = mp1; 3893 rptr = mp->b_rptr; 3894 } 3895 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3896 goto tossit; 3897 ip6h = (ip6_t *)rptr; 3898 mp_len = msgdsize(mp); 3899 } 3900 /* 3901 * Find any potentially interesting extension headers 3902 * as well as the length of the IPv6 + extension 3903 * headers. 3904 */ 3905 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3906 UDPH_SIZE; 3907 ASSERT(nexthdrp == IPPROTO_UDP); 3908 } else { 3909 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3910 ip6i = NULL; 3911 } 3912 break; 3913 default: 3914 ASSERT(0); 3915 } 3916 3917 /* 3918 * IP inspected the UDP header thus all of it must be in the mblk. 3919 * UDP length check is performed for IPv6 packets and IPv4 packets 3920 * to check if the size of the packet as specified 3921 * by the header is the same as the physical size of the packet. 3922 * FIXME? Didn't IP already check this? 3923 */ 3924 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3925 if ((MBLKL(mp) < hdr_length) || 3926 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3927 goto tossit; 3928 } 3929 3930 3931 /* Walk past the headers unless UDP_RCVHDR was set. */ 3932 if (!udp_bits.udpb_rcvhdr) { 3933 mp->b_rptr = rptr + hdr_length; 3934 mp_len -= hdr_length; 3935 } 3936 3937 /* 3938 * This is the inbound data path. Packets are passed upstream as 3939 * T_UNITDATA_IND messages with full IP headers still attached. 3940 */ 3941 if (udp->udp_family == AF_INET) { 3942 sin_t *sin; 3943 3944 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3945 3946 /* 3947 * Normally only send up the source address. 3948 * If IP_RECVDSTADDR is set we include the destination IP 3949 * address as an option. With IP_RECVOPTS we include all 3950 * the IP options. 3951 */ 3952 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3953 if (udp_bits.udpb_recvdstaddr) { 3954 udi_size += sizeof (struct T_opthdr) + 3955 sizeof (struct in_addr); 3956 UDP_STAT(us, udp_in_recvdstaddr); 3957 } 3958 3959 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3960 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3961 udi_size += sizeof (struct T_opthdr) + 3962 sizeof (struct in_pktinfo); 3963 UDP_STAT(us, udp_ip_rcvpktinfo); 3964 } 3965 3966 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3967 udi_size += sizeof (struct T_opthdr) + opt_len; 3968 UDP_STAT(us, udp_in_recvopts); 3969 } 3970 3971 /* 3972 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3973 * space accordingly 3974 */ 3975 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3976 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3977 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3978 UDP_STAT(us, udp_in_recvif); 3979 } 3980 3981 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3982 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3983 udi_size += sizeof (struct T_opthdr) + 3984 sizeof (struct sockaddr_dl); 3985 UDP_STAT(us, udp_in_recvslla); 3986 } 3987 3988 if ((udp_bits.udpb_recvucred) && 3989 (cr = msg_getcred(mp, &cpid)) != NULL) { 3990 udi_size += sizeof (struct T_opthdr) + ucredsize; 3991 UDP_STAT(us, udp_in_recvucred); 3992 } 3993 3994 /* 3995 * If SO_TIMESTAMP is set allocate the appropriate sized 3996 * buffer. Since gethrestime() expects a pointer aligned 3997 * argument, we allocate space necessary for extra 3998 * alignment (even though it might not be used). 3999 */ 4000 if (udp_bits.udpb_timestamp) { 4001 udi_size += sizeof (struct T_opthdr) + 4002 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4003 UDP_STAT(us, udp_in_timestamp); 4004 } 4005 4006 /* 4007 * If IP_RECVTTL is set allocate the appropriate sized buffer 4008 */ 4009 if (udp_bits.udpb_recvttl) { 4010 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4011 UDP_STAT(us, udp_in_recvttl); 4012 } 4013 4014 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4015 mp1 = allocb(udi_size, BPRI_MED); 4016 if (mp1 == NULL) { 4017 freemsg(mp); 4018 if (options_mp != NULL) 4019 freeb(options_mp); 4020 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4021 return; 4022 } 4023 mp1->b_cont = mp; 4024 mp = mp1; 4025 mp->b_datap->db_type = M_PROTO; 4026 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4027 mp->b_wptr = (uchar_t *)tudi + udi_size; 4028 tudi->PRIM_type = T_UNITDATA_IND; 4029 tudi->SRC_length = sizeof (sin_t); 4030 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4031 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4032 sizeof (sin_t); 4033 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4034 tudi->OPT_length = udi_size; 4035 sin = (sin_t *)&tudi[1]; 4036 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4037 sin->sin_port = udpha->uha_src_port; 4038 sin->sin_family = udp->udp_family; 4039 *(uint32_t *)&sin->sin_zero[0] = 0; 4040 *(uint32_t *)&sin->sin_zero[4] = 0; 4041 4042 /* 4043 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4044 * IP_RECVTTL has been set. 4045 */ 4046 if (udi_size != 0) { 4047 /* 4048 * Copy in destination address before options to avoid 4049 * any padding issues. 4050 */ 4051 char *dstopt; 4052 4053 dstopt = (char *)&sin[1]; 4054 if (udp_bits.udpb_recvdstaddr) { 4055 struct T_opthdr *toh; 4056 ipaddr_t *dstptr; 4057 4058 toh = (struct T_opthdr *)dstopt; 4059 toh->level = IPPROTO_IP; 4060 toh->name = IP_RECVDSTADDR; 4061 toh->len = sizeof (struct T_opthdr) + 4062 sizeof (ipaddr_t); 4063 toh->status = 0; 4064 dstopt += sizeof (struct T_opthdr); 4065 dstptr = (ipaddr_t *)dstopt; 4066 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4067 dstopt += sizeof (ipaddr_t); 4068 udi_size -= toh->len; 4069 } 4070 4071 if (udp_bits.udpb_recvopts && opt_len > 0) { 4072 struct T_opthdr *toh; 4073 4074 toh = (struct T_opthdr *)dstopt; 4075 toh->level = IPPROTO_IP; 4076 toh->name = IP_RECVOPTS; 4077 toh->len = sizeof (struct T_opthdr) + opt_len; 4078 toh->status = 0; 4079 dstopt += sizeof (struct T_opthdr); 4080 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4081 opt_len); 4082 dstopt += opt_len; 4083 udi_size -= toh->len; 4084 } 4085 4086 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4087 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4088 struct T_opthdr *toh; 4089 struct in_pktinfo *pktinfop; 4090 4091 toh = (struct T_opthdr *)dstopt; 4092 toh->level = IPPROTO_IP; 4093 toh->name = IP_PKTINFO; 4094 toh->len = sizeof (struct T_opthdr) + 4095 sizeof (*pktinfop); 4096 toh->status = 0; 4097 dstopt += sizeof (struct T_opthdr); 4098 pktinfop = (struct in_pktinfo *)dstopt; 4099 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4100 pktinfop->ipi_spec_dst = 4101 pinfo->ip_pkt_match_addr; 4102 pktinfop->ipi_addr.s_addr = 4103 ((ipha_t *)rptr)->ipha_dst; 4104 4105 dstopt += sizeof (struct in_pktinfo); 4106 udi_size -= toh->len; 4107 } 4108 4109 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4110 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4111 4112 struct T_opthdr *toh; 4113 struct sockaddr_dl *dstptr; 4114 4115 toh = (struct T_opthdr *)dstopt; 4116 toh->level = IPPROTO_IP; 4117 toh->name = IP_RECVSLLA; 4118 toh->len = sizeof (struct T_opthdr) + 4119 sizeof (struct sockaddr_dl); 4120 toh->status = 0; 4121 dstopt += sizeof (struct T_opthdr); 4122 dstptr = (struct sockaddr_dl *)dstopt; 4123 bcopy(&pinfo->ip_pkt_slla, dstptr, 4124 sizeof (struct sockaddr_dl)); 4125 dstopt += sizeof (struct sockaddr_dl); 4126 udi_size -= toh->len; 4127 } 4128 4129 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4130 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4131 4132 struct T_opthdr *toh; 4133 uint_t *dstptr; 4134 4135 toh = (struct T_opthdr *)dstopt; 4136 toh->level = IPPROTO_IP; 4137 toh->name = IP_RECVIF; 4138 toh->len = sizeof (struct T_opthdr) + 4139 sizeof (uint_t); 4140 toh->status = 0; 4141 dstopt += sizeof (struct T_opthdr); 4142 dstptr = (uint_t *)dstopt; 4143 *dstptr = pinfo->ip_pkt_ifindex; 4144 dstopt += sizeof (uint_t); 4145 udi_size -= toh->len; 4146 } 4147 4148 if (cr != NULL) { 4149 struct T_opthdr *toh; 4150 4151 toh = (struct T_opthdr *)dstopt; 4152 toh->level = SOL_SOCKET; 4153 toh->name = SCM_UCRED; 4154 toh->len = sizeof (struct T_opthdr) + ucredsize; 4155 toh->status = 0; 4156 dstopt += sizeof (struct T_opthdr); 4157 (void) cred2ucred(cr, cpid, dstopt, rcr); 4158 dstopt += ucredsize; 4159 udi_size -= toh->len; 4160 } 4161 4162 if (udp_bits.udpb_timestamp) { 4163 struct T_opthdr *toh; 4164 4165 toh = (struct T_opthdr *)dstopt; 4166 toh->level = SOL_SOCKET; 4167 toh->name = SCM_TIMESTAMP; 4168 toh->len = sizeof (struct T_opthdr) + 4169 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4170 toh->status = 0; 4171 dstopt += sizeof (struct T_opthdr); 4172 /* Align for gethrestime() */ 4173 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4174 sizeof (intptr_t)); 4175 gethrestime((timestruc_t *)dstopt); 4176 dstopt = (char *)toh + toh->len; 4177 udi_size -= toh->len; 4178 } 4179 4180 /* 4181 * CAUTION: 4182 * Due to aligment issues 4183 * Processing of IP_RECVTTL option 4184 * should always be the last. Adding 4185 * any option processing after this will 4186 * cause alignment panic. 4187 */ 4188 if (udp_bits.udpb_recvttl) { 4189 struct T_opthdr *toh; 4190 uint8_t *dstptr; 4191 4192 toh = (struct T_opthdr *)dstopt; 4193 toh->level = IPPROTO_IP; 4194 toh->name = IP_RECVTTL; 4195 toh->len = sizeof (struct T_opthdr) + 4196 sizeof (uint8_t); 4197 toh->status = 0; 4198 dstopt += sizeof (struct T_opthdr); 4199 dstptr = (uint8_t *)dstopt; 4200 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4201 dstopt += sizeof (uint8_t); 4202 udi_size -= toh->len; 4203 } 4204 4205 /* Consumed all of allocated space */ 4206 ASSERT(udi_size == 0); 4207 } 4208 } else { 4209 sin6_t *sin6; 4210 4211 /* 4212 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4213 * 4214 * Normally we only send up the address. If receiving of any 4215 * optional receive side information is enabled, we also send 4216 * that up as options. 4217 */ 4218 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4219 4220 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4221 IPPF_RTHDR|IPPF_IFINDEX)) { 4222 if ((udp_bits.udpb_ipv6_recvhopopts) && 4223 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4224 size_t hlen; 4225 4226 UDP_STAT(us, udp_in_recvhopopts); 4227 hlen = copy_hop_opts(&ipp, NULL); 4228 if (hlen == 0) 4229 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4230 udi_size += hlen; 4231 } 4232 if (((udp_bits.udpb_ipv6_recvdstopts) || 4233 udp_bits.udpb_old_ipv6_recvdstopts) && 4234 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4235 udi_size += sizeof (struct T_opthdr) + 4236 ipp.ipp_dstoptslen; 4237 UDP_STAT(us, udp_in_recvdstopts); 4238 } 4239 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4240 udp_bits.udpb_ipv6_recvrthdr && 4241 (ipp.ipp_fields & IPPF_RTHDR)) || 4242 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4243 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4244 udi_size += sizeof (struct T_opthdr) + 4245 ipp.ipp_rtdstoptslen; 4246 UDP_STAT(us, udp_in_recvrtdstopts); 4247 } 4248 if ((udp_bits.udpb_ipv6_recvrthdr) && 4249 (ipp.ipp_fields & IPPF_RTHDR)) { 4250 udi_size += sizeof (struct T_opthdr) + 4251 ipp.ipp_rthdrlen; 4252 UDP_STAT(us, udp_in_recvrthdr); 4253 } 4254 if ((udp_bits.udpb_ip_recvpktinfo) && 4255 (ipp.ipp_fields & IPPF_IFINDEX)) { 4256 udi_size += sizeof (struct T_opthdr) + 4257 sizeof (struct in6_pktinfo); 4258 UDP_STAT(us, udp_in_recvpktinfo); 4259 } 4260 4261 } 4262 if ((udp_bits.udpb_recvucred) && 4263 (cr = msg_getcred(mp, &cpid)) != NULL) { 4264 udi_size += sizeof (struct T_opthdr) + ucredsize; 4265 UDP_STAT(us, udp_in_recvucred); 4266 } 4267 4268 /* 4269 * If SO_TIMESTAMP is set allocate the appropriate sized 4270 * buffer. Since gethrestime() expects a pointer aligned 4271 * argument, we allocate space necessary for extra 4272 * alignment (even though it might not be used). 4273 */ 4274 if (udp_bits.udpb_timestamp) { 4275 udi_size += sizeof (struct T_opthdr) + 4276 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4277 UDP_STAT(us, udp_in_timestamp); 4278 } 4279 4280 if (udp_bits.udpb_ipv6_recvhoplimit) { 4281 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4282 UDP_STAT(us, udp_in_recvhoplimit); 4283 } 4284 4285 if (udp_bits.udpb_ipv6_recvtclass) { 4286 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4287 UDP_STAT(us, udp_in_recvtclass); 4288 } 4289 4290 mp1 = allocb(udi_size, BPRI_MED); 4291 if (mp1 == NULL) { 4292 freemsg(mp); 4293 if (options_mp != NULL) 4294 freeb(options_mp); 4295 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4296 return; 4297 } 4298 mp1->b_cont = mp; 4299 mp = mp1; 4300 mp->b_datap->db_type = M_PROTO; 4301 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4302 mp->b_wptr = (uchar_t *)tudi + udi_size; 4303 tudi->PRIM_type = T_UNITDATA_IND; 4304 tudi->SRC_length = sizeof (sin6_t); 4305 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4306 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4307 sizeof (sin6_t); 4308 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4309 tudi->OPT_length = udi_size; 4310 sin6 = (sin6_t *)&tudi[1]; 4311 if (ipversion == IPV4_VERSION) { 4312 in6_addr_t v6dst; 4313 4314 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4315 &sin6->sin6_addr); 4316 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4317 &v6dst); 4318 sin6->sin6_flowinfo = 0; 4319 sin6->sin6_scope_id = 0; 4320 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4321 connp->conn_zoneid, us->us_netstack); 4322 } else { 4323 sin6->sin6_addr = ip6h->ip6_src; 4324 /* No sin6_flowinfo per API */ 4325 sin6->sin6_flowinfo = 0; 4326 /* For link-scope source pass up scope id */ 4327 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4328 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4329 sin6->sin6_scope_id = ipp.ipp_ifindex; 4330 else 4331 sin6->sin6_scope_id = 0; 4332 sin6->__sin6_src_id = ip_srcid_find_addr( 4333 &ip6h->ip6_dst, connp->conn_zoneid, 4334 us->us_netstack); 4335 } 4336 sin6->sin6_port = udpha->uha_src_port; 4337 sin6->sin6_family = udp->udp_family; 4338 4339 if (udi_size != 0) { 4340 uchar_t *dstopt; 4341 4342 dstopt = (uchar_t *)&sin6[1]; 4343 if ((udp_bits.udpb_ip_recvpktinfo) && 4344 (ipp.ipp_fields & IPPF_IFINDEX)) { 4345 struct T_opthdr *toh; 4346 struct in6_pktinfo *pkti; 4347 4348 toh = (struct T_opthdr *)dstopt; 4349 toh->level = IPPROTO_IPV6; 4350 toh->name = IPV6_PKTINFO; 4351 toh->len = sizeof (struct T_opthdr) + 4352 sizeof (*pkti); 4353 toh->status = 0; 4354 dstopt += sizeof (struct T_opthdr); 4355 pkti = (struct in6_pktinfo *)dstopt; 4356 if (ipversion == IPV6_VERSION) 4357 pkti->ipi6_addr = ip6h->ip6_dst; 4358 else 4359 IN6_IPADDR_TO_V4MAPPED( 4360 ((ipha_t *)rptr)->ipha_dst, 4361 &pkti->ipi6_addr); 4362 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4363 dstopt += sizeof (*pkti); 4364 udi_size -= toh->len; 4365 } 4366 if (udp_bits.udpb_ipv6_recvhoplimit) { 4367 struct T_opthdr *toh; 4368 4369 toh = (struct T_opthdr *)dstopt; 4370 toh->level = IPPROTO_IPV6; 4371 toh->name = IPV6_HOPLIMIT; 4372 toh->len = sizeof (struct T_opthdr) + 4373 sizeof (uint_t); 4374 toh->status = 0; 4375 dstopt += sizeof (struct T_opthdr); 4376 if (ipversion == IPV6_VERSION) 4377 *(uint_t *)dstopt = ip6h->ip6_hops; 4378 else 4379 *(uint_t *)dstopt = 4380 ((ipha_t *)rptr)->ipha_ttl; 4381 dstopt += sizeof (uint_t); 4382 udi_size -= toh->len; 4383 } 4384 if (udp_bits.udpb_ipv6_recvtclass) { 4385 struct T_opthdr *toh; 4386 4387 toh = (struct T_opthdr *)dstopt; 4388 toh->level = IPPROTO_IPV6; 4389 toh->name = IPV6_TCLASS; 4390 toh->len = sizeof (struct T_opthdr) + 4391 sizeof (uint_t); 4392 toh->status = 0; 4393 dstopt += sizeof (struct T_opthdr); 4394 if (ipversion == IPV6_VERSION) { 4395 *(uint_t *)dstopt = 4396 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4397 } else { 4398 ipha_t *ipha = (ipha_t *)rptr; 4399 *(uint_t *)dstopt = 4400 ipha->ipha_type_of_service; 4401 } 4402 dstopt += sizeof (uint_t); 4403 udi_size -= toh->len; 4404 } 4405 if ((udp_bits.udpb_ipv6_recvhopopts) && 4406 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4407 size_t hlen; 4408 4409 hlen = copy_hop_opts(&ipp, dstopt); 4410 dstopt += hlen; 4411 udi_size -= hlen; 4412 } 4413 if ((udp_bits.udpb_ipv6_recvdstopts) && 4414 (udp_bits.udpb_ipv6_recvrthdr) && 4415 (ipp.ipp_fields & IPPF_RTHDR) && 4416 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4417 struct T_opthdr *toh; 4418 4419 toh = (struct T_opthdr *)dstopt; 4420 toh->level = IPPROTO_IPV6; 4421 toh->name = IPV6_DSTOPTS; 4422 toh->len = sizeof (struct T_opthdr) + 4423 ipp.ipp_rtdstoptslen; 4424 toh->status = 0; 4425 dstopt += sizeof (struct T_opthdr); 4426 bcopy(ipp.ipp_rtdstopts, dstopt, 4427 ipp.ipp_rtdstoptslen); 4428 dstopt += ipp.ipp_rtdstoptslen; 4429 udi_size -= toh->len; 4430 } 4431 if ((udp_bits.udpb_ipv6_recvrthdr) && 4432 (ipp.ipp_fields & IPPF_RTHDR)) { 4433 struct T_opthdr *toh; 4434 4435 toh = (struct T_opthdr *)dstopt; 4436 toh->level = IPPROTO_IPV6; 4437 toh->name = IPV6_RTHDR; 4438 toh->len = sizeof (struct T_opthdr) + 4439 ipp.ipp_rthdrlen; 4440 toh->status = 0; 4441 dstopt += sizeof (struct T_opthdr); 4442 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4443 dstopt += ipp.ipp_rthdrlen; 4444 udi_size -= toh->len; 4445 } 4446 if ((udp_bits.udpb_ipv6_recvdstopts) && 4447 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4448 struct T_opthdr *toh; 4449 4450 toh = (struct T_opthdr *)dstopt; 4451 toh->level = IPPROTO_IPV6; 4452 toh->name = IPV6_DSTOPTS; 4453 toh->len = sizeof (struct T_opthdr) + 4454 ipp.ipp_dstoptslen; 4455 toh->status = 0; 4456 dstopt += sizeof (struct T_opthdr); 4457 bcopy(ipp.ipp_dstopts, dstopt, 4458 ipp.ipp_dstoptslen); 4459 dstopt += ipp.ipp_dstoptslen; 4460 udi_size -= toh->len; 4461 } 4462 if (cr != NULL) { 4463 struct T_opthdr *toh; 4464 4465 toh = (struct T_opthdr *)dstopt; 4466 toh->level = SOL_SOCKET; 4467 toh->name = SCM_UCRED; 4468 toh->len = sizeof (struct T_opthdr) + ucredsize; 4469 toh->status = 0; 4470 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4471 dstopt += toh->len; 4472 udi_size -= toh->len; 4473 } 4474 if (udp_bits.udpb_timestamp) { 4475 struct T_opthdr *toh; 4476 4477 toh = (struct T_opthdr *)dstopt; 4478 toh->level = SOL_SOCKET; 4479 toh->name = SCM_TIMESTAMP; 4480 toh->len = sizeof (struct T_opthdr) + 4481 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4482 toh->status = 0; 4483 dstopt += sizeof (struct T_opthdr); 4484 /* Align for gethrestime() */ 4485 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4486 sizeof (intptr_t)); 4487 gethrestime((timestruc_t *)dstopt); 4488 dstopt = (uchar_t *)toh + toh->len; 4489 udi_size -= toh->len; 4490 } 4491 4492 /* Consumed all of allocated space */ 4493 ASSERT(udi_size == 0); 4494 } 4495 #undef sin6 4496 /* No IP_RECVDSTADDR for IPv6. */ 4497 } 4498 4499 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4500 if (options_mp != NULL) 4501 freeb(options_mp); 4502 4503 udp_ulp_recv(connp, mp); 4504 4505 return; 4506 4507 tossit: 4508 freemsg(mp); 4509 if (options_mp != NULL) 4510 freeb(options_mp); 4511 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4512 } 4513 4514 /* 4515 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4516 * information that can be changing beneath us. 4517 */ 4518 mblk_t * 4519 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4520 { 4521 mblk_t *mpdata; 4522 mblk_t *mp_conn_ctl; 4523 mblk_t *mp_attr_ctl; 4524 mblk_t *mp6_conn_ctl; 4525 mblk_t *mp6_attr_ctl; 4526 mblk_t *mp_conn_tail; 4527 mblk_t *mp_attr_tail; 4528 mblk_t *mp6_conn_tail; 4529 mblk_t *mp6_attr_tail; 4530 struct opthdr *optp; 4531 mib2_udpEntry_t ude; 4532 mib2_udp6Entry_t ude6; 4533 mib2_transportMLPEntry_t mlp; 4534 int state; 4535 zoneid_t zoneid; 4536 int i; 4537 connf_t *connfp; 4538 conn_t *connp = Q_TO_CONN(q); 4539 int v4_conn_idx; 4540 int v6_conn_idx; 4541 boolean_t needattr; 4542 udp_t *udp; 4543 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4544 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4545 mblk_t *mp2ctl; 4546 4547 /* 4548 * make a copy of the original message 4549 */ 4550 mp2ctl = copymsg(mpctl); 4551 4552 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4553 if (mpctl == NULL || 4554 (mpdata = mpctl->b_cont) == NULL || 4555 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4556 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4557 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4558 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4559 freemsg(mp_conn_ctl); 4560 freemsg(mp_attr_ctl); 4561 freemsg(mp6_conn_ctl); 4562 freemsg(mpctl); 4563 freemsg(mp2ctl); 4564 return (0); 4565 } 4566 4567 zoneid = connp->conn_zoneid; 4568 4569 /* fixed length structure for IPv4 and IPv6 counters */ 4570 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4571 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4572 /* synchronize 64- and 32-bit counters */ 4573 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4574 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4575 4576 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4577 optp->level = MIB2_UDP; 4578 optp->name = 0; 4579 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4580 sizeof (us->us_udp_mib)); 4581 optp->len = msgdsize(mpdata); 4582 qreply(q, mpctl); 4583 4584 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4585 v4_conn_idx = v6_conn_idx = 0; 4586 4587 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4588 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4589 connp = NULL; 4590 4591 while ((connp = ipcl_get_next_conn(connfp, connp, 4592 IPCL_UDPCONN))) { 4593 udp = connp->conn_udp; 4594 if (zoneid != connp->conn_zoneid) 4595 continue; 4596 4597 /* 4598 * Note that the port numbers are sent in 4599 * host byte order 4600 */ 4601 4602 if (udp->udp_state == TS_UNBND) 4603 state = MIB2_UDP_unbound; 4604 else if (udp->udp_state == TS_IDLE) 4605 state = MIB2_UDP_idle; 4606 else if (udp->udp_state == TS_DATA_XFER) 4607 state = MIB2_UDP_connected; 4608 else 4609 state = MIB2_UDP_unknown; 4610 4611 needattr = B_FALSE; 4612 bzero(&mlp, sizeof (mlp)); 4613 if (connp->conn_mlp_type != mlptSingle) { 4614 if (connp->conn_mlp_type == mlptShared || 4615 connp->conn_mlp_type == mlptBoth) 4616 mlp.tme_flags |= MIB2_TMEF_SHARED; 4617 if (connp->conn_mlp_type == mlptPrivate || 4618 connp->conn_mlp_type == mlptBoth) 4619 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4620 needattr = B_TRUE; 4621 } 4622 4623 /* 4624 * Create an IPv4 table entry for IPv4 entries and also 4625 * any IPv6 entries which are bound to in6addr_any 4626 * (i.e. anything a IPv4 peer could connect/send to). 4627 */ 4628 if (udp->udp_ipversion == IPV4_VERSION || 4629 (udp->udp_state <= TS_IDLE && 4630 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4631 ude.udpEntryInfo.ue_state = state; 4632 /* 4633 * If in6addr_any this will set it to 4634 * INADDR_ANY 4635 */ 4636 ude.udpLocalAddress = 4637 V4_PART_OF_V6(udp->udp_v6src); 4638 ude.udpLocalPort = ntohs(udp->udp_port); 4639 if (udp->udp_state == TS_DATA_XFER) { 4640 /* 4641 * Can potentially get here for 4642 * v6 socket if another process 4643 * (say, ping) has just done a 4644 * sendto(), changing the state 4645 * from the TS_IDLE above to 4646 * TS_DATA_XFER by the time we hit 4647 * this part of the code. 4648 */ 4649 ude.udpEntryInfo.ue_RemoteAddress = 4650 V4_PART_OF_V6(udp->udp_v6dst); 4651 ude.udpEntryInfo.ue_RemotePort = 4652 ntohs(udp->udp_dstport); 4653 } else { 4654 ude.udpEntryInfo.ue_RemoteAddress = 0; 4655 ude.udpEntryInfo.ue_RemotePort = 0; 4656 } 4657 4658 /* 4659 * We make the assumption that all udp_t 4660 * structs will be created within an address 4661 * region no larger than 32-bits. 4662 */ 4663 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4664 ude.udpCreationProcess = 4665 (udp->udp_open_pid < 0) ? 4666 MIB2_UNKNOWN_PROCESS : 4667 udp->udp_open_pid; 4668 ude.udpCreationTime = udp->udp_open_time; 4669 4670 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4671 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4672 mlp.tme_connidx = v4_conn_idx++; 4673 if (needattr) 4674 (void) snmp_append_data2( 4675 mp_attr_ctl->b_cont, &mp_attr_tail, 4676 (char *)&mlp, sizeof (mlp)); 4677 } 4678 if (udp->udp_ipversion == IPV6_VERSION) { 4679 ude6.udp6EntryInfo.ue_state = state; 4680 ude6.udp6LocalAddress = udp->udp_v6src; 4681 ude6.udp6LocalPort = ntohs(udp->udp_port); 4682 ude6.udp6IfIndex = udp->udp_bound_if; 4683 if (udp->udp_state == TS_DATA_XFER) { 4684 ude6.udp6EntryInfo.ue_RemoteAddress = 4685 udp->udp_v6dst; 4686 ude6.udp6EntryInfo.ue_RemotePort = 4687 ntohs(udp->udp_dstport); 4688 } else { 4689 ude6.udp6EntryInfo.ue_RemoteAddress = 4690 sin6_null.sin6_addr; 4691 ude6.udp6EntryInfo.ue_RemotePort = 0; 4692 } 4693 /* 4694 * We make the assumption that all udp_t 4695 * structs will be created within an address 4696 * region no larger than 32-bits. 4697 */ 4698 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4699 ude6.udp6CreationProcess = 4700 (udp->udp_open_pid < 0) ? 4701 MIB2_UNKNOWN_PROCESS : 4702 udp->udp_open_pid; 4703 ude6.udp6CreationTime = udp->udp_open_time; 4704 4705 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4706 &mp6_conn_tail, (char *)&ude6, 4707 sizeof (ude6)); 4708 mlp.tme_connidx = v6_conn_idx++; 4709 if (needattr) 4710 (void) snmp_append_data2( 4711 mp6_attr_ctl->b_cont, 4712 &mp6_attr_tail, (char *)&mlp, 4713 sizeof (mlp)); 4714 } 4715 } 4716 } 4717 4718 /* IPv4 UDP endpoints */ 4719 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4720 sizeof (struct T_optmgmt_ack)]; 4721 optp->level = MIB2_UDP; 4722 optp->name = MIB2_UDP_ENTRY; 4723 optp->len = msgdsize(mp_conn_ctl->b_cont); 4724 qreply(q, mp_conn_ctl); 4725 4726 /* table of MLP attributes... */ 4727 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4728 sizeof (struct T_optmgmt_ack)]; 4729 optp->level = MIB2_UDP; 4730 optp->name = EXPER_XPORT_MLP; 4731 optp->len = msgdsize(mp_attr_ctl->b_cont); 4732 if (optp->len == 0) 4733 freemsg(mp_attr_ctl); 4734 else 4735 qreply(q, mp_attr_ctl); 4736 4737 /* IPv6 UDP endpoints */ 4738 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4739 sizeof (struct T_optmgmt_ack)]; 4740 optp->level = MIB2_UDP6; 4741 optp->name = MIB2_UDP6_ENTRY; 4742 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4743 qreply(q, mp6_conn_ctl); 4744 4745 /* table of MLP attributes... */ 4746 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4747 sizeof (struct T_optmgmt_ack)]; 4748 optp->level = MIB2_UDP6; 4749 optp->name = EXPER_XPORT_MLP; 4750 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4751 if (optp->len == 0) 4752 freemsg(mp6_attr_ctl); 4753 else 4754 qreply(q, mp6_attr_ctl); 4755 4756 return (mp2ctl); 4757 } 4758 4759 /* 4760 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4761 * NOTE: Per MIB-II, UDP has no writable data. 4762 * TODO: If this ever actually tries to set anything, it needs to be 4763 * to do the appropriate locking. 4764 */ 4765 /* ARGSUSED */ 4766 int 4767 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4768 uchar_t *ptr, int len) 4769 { 4770 switch (level) { 4771 case MIB2_UDP: 4772 return (0); 4773 default: 4774 return (1); 4775 } 4776 } 4777 4778 static void 4779 udp_report_item(mblk_t *mp, udp_t *udp) 4780 { 4781 char *state; 4782 char addrbuf1[INET6_ADDRSTRLEN]; 4783 char addrbuf2[INET6_ADDRSTRLEN]; 4784 uint_t print_len, buf_len; 4785 4786 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4787 ASSERT(buf_len >= 0); 4788 if (buf_len == 0) 4789 return; 4790 4791 if (udp->udp_state == TS_UNBND) 4792 state = "UNBOUND"; 4793 else if (udp->udp_state == TS_IDLE) 4794 state = "IDLE"; 4795 else if (udp->udp_state == TS_DATA_XFER) 4796 state = "CONNECTED"; 4797 else 4798 state = "UnkState"; 4799 print_len = snprintf((char *)mp->b_wptr, buf_len, 4800 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4801 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4802 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4803 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4804 ntohs(udp->udp_dstport), state); 4805 if (print_len < buf_len) { 4806 mp->b_wptr += print_len; 4807 } else { 4808 mp->b_wptr += buf_len; 4809 } 4810 } 4811 4812 /* Report for ndd "udp_status" */ 4813 /* ARGSUSED */ 4814 static int 4815 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4816 { 4817 zoneid_t zoneid; 4818 connf_t *connfp; 4819 conn_t *connp = Q_TO_CONN(q); 4820 udp_t *udp = connp->conn_udp; 4821 int i; 4822 udp_stack_t *us = udp->udp_us; 4823 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4824 4825 /* 4826 * Because of the ndd constraint, at most we can have 64K buffer 4827 * to put in all UDP info. So to be more efficient, just 4828 * allocate a 64K buffer here, assuming we need that large buffer. 4829 * This may be a problem as any user can read udp_status. Therefore 4830 * we limit the rate of doing this using us_ndd_get_info_interval. 4831 * This should be OK as normal users should not do this too often. 4832 */ 4833 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4834 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4835 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4836 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4837 return (0); 4838 } 4839 } 4840 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4841 /* The following may work even if we cannot get a large buf. */ 4842 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4843 return (0); 4844 } 4845 (void) mi_mpprintf(mp, 4846 "UDP " MI_COL_HDRPAD_STR 4847 /* 12345678[89ABCDEF] */ 4848 " zone lport src addr dest addr port state"); 4849 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4850 4851 zoneid = connp->conn_zoneid; 4852 4853 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4854 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4855 connp = NULL; 4856 4857 while ((connp = ipcl_get_next_conn(connfp, connp, 4858 IPCL_UDPCONN))) { 4859 udp = connp->conn_udp; 4860 if (zoneid != GLOBAL_ZONEID && 4861 zoneid != connp->conn_zoneid) 4862 continue; 4863 4864 udp_report_item(mp->b_cont, udp); 4865 } 4866 } 4867 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4868 return (0); 4869 } 4870 4871 /* 4872 * This routine creates a T_UDERROR_IND message and passes it upstream. 4873 * The address and options are copied from the T_UNITDATA_REQ message 4874 * passed in mp. This message is freed. 4875 */ 4876 static void 4877 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4878 t_scalar_t err) 4879 { 4880 struct T_unitdata_req *tudr; 4881 mblk_t *mp1; 4882 uchar_t *optaddr; 4883 t_scalar_t optlen; 4884 4885 if (DB_TYPE(mp) == M_DATA) { 4886 ASSERT(destaddr != NULL && destlen != 0); 4887 optaddr = NULL; 4888 optlen = 0; 4889 } else { 4890 if ((mp->b_wptr < mp->b_rptr) || 4891 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4892 goto done; 4893 } 4894 tudr = (struct T_unitdata_req *)mp->b_rptr; 4895 destaddr = mp->b_rptr + tudr->DEST_offset; 4896 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4897 destaddr + tudr->DEST_length < mp->b_rptr || 4898 destaddr + tudr->DEST_length > mp->b_wptr) { 4899 goto done; 4900 } 4901 optaddr = mp->b_rptr + tudr->OPT_offset; 4902 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4903 optaddr + tudr->OPT_length < mp->b_rptr || 4904 optaddr + tudr->OPT_length > mp->b_wptr) { 4905 goto done; 4906 } 4907 destlen = tudr->DEST_length; 4908 optlen = tudr->OPT_length; 4909 } 4910 4911 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4912 (char *)optaddr, optlen, err); 4913 if (mp1 != NULL) 4914 qreply(q, mp1); 4915 4916 done: 4917 freemsg(mp); 4918 } 4919 4920 /* 4921 * This routine removes a port number association from a stream. It 4922 * is called by udp_wput to handle T_UNBIND_REQ messages. 4923 */ 4924 static void 4925 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4926 { 4927 conn_t *connp = Q_TO_CONN(q); 4928 int error; 4929 4930 error = udp_do_unbind(connp); 4931 if (error) { 4932 if (error < 0) 4933 udp_err_ack(q, mp, -error, 0); 4934 else 4935 udp_err_ack(q, mp, TSYSERR, error); 4936 return; 4937 } 4938 4939 mp = mi_tpi_ok_ack_alloc(mp); 4940 ASSERT(mp != NULL); 4941 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4942 qreply(q, mp); 4943 } 4944 4945 /* 4946 * Don't let port fall into the privileged range. 4947 * Since the extra privileged ports can be arbitrary we also 4948 * ensure that we exclude those from consideration. 4949 * us->us_epriv_ports is not sorted thus we loop over it until 4950 * there are no changes. 4951 */ 4952 static in_port_t 4953 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4954 { 4955 int i; 4956 in_port_t nextport; 4957 boolean_t restart = B_FALSE; 4958 udp_stack_t *us = udp->udp_us; 4959 4960 if (random && udp_random_anon_port != 0) { 4961 (void) random_get_pseudo_bytes((uint8_t *)&port, 4962 sizeof (in_port_t)); 4963 /* 4964 * Unless changed by a sys admin, the smallest anon port 4965 * is 32768 and the largest anon port is 65535. It is 4966 * very likely (50%) for the random port to be smaller 4967 * than the smallest anon port. When that happens, 4968 * add port % (anon port range) to the smallest anon 4969 * port to get the random port. It should fall into the 4970 * valid anon port range. 4971 */ 4972 if (port < us->us_smallest_anon_port) { 4973 port = us->us_smallest_anon_port + 4974 port % (us->us_largest_anon_port - 4975 us->us_smallest_anon_port); 4976 } 4977 } 4978 4979 retry: 4980 if (port < us->us_smallest_anon_port) 4981 port = us->us_smallest_anon_port; 4982 4983 if (port > us->us_largest_anon_port) { 4984 port = us->us_smallest_anon_port; 4985 if (restart) 4986 return (0); 4987 restart = B_TRUE; 4988 } 4989 4990 if (port < us->us_smallest_nonpriv_port) 4991 port = us->us_smallest_nonpriv_port; 4992 4993 for (i = 0; i < us->us_num_epriv_ports; i++) { 4994 if (port == us->us_epriv_ports[i]) { 4995 port++; 4996 /* 4997 * Make sure that the port is in the 4998 * valid range. 4999 */ 5000 goto retry; 5001 } 5002 } 5003 5004 if (is_system_labeled() && 5005 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5006 port, IPPROTO_UDP, B_TRUE)) != 0) { 5007 port = nextport; 5008 goto retry; 5009 } 5010 5011 return (port); 5012 } 5013 5014 static int 5015 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 5016 boolean_t *update_lastdst) 5017 { 5018 int err; 5019 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5020 udp_t *udp = Q_TO_UDP(wq); 5021 udp_stack_t *us = udp->udp_us; 5022 cred_t *cr; 5023 5024 /* 5025 * All Solaris components should pass a db_credp 5026 * for this message, hence we ASSERT. 5027 * On production kernels we return an error to be robust against 5028 * random streams modules sitting on top of us. 5029 */ 5030 cr = msg_getcred(mp, NULL); 5031 ASSERT(cr != NULL); 5032 if (cr == NULL) 5033 return (EINVAL); 5034 5035 /* Note that we use the cred/label from the message to handle MLP */ 5036 err = tsol_compute_label(cr, dst, 5037 opt_storage, udp->udp_connp->conn_mac_exempt, 5038 us->us_netstack->netstack_ip); 5039 if (err == 0) { 5040 err = tsol_update_options(&udp->udp_ip_snd_options, 5041 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5042 opt_storage); 5043 } 5044 if (err != 0) { 5045 DTRACE_PROBE4( 5046 tx__ip__log__info__updatelabel__udp, 5047 char *, "queue(1) failed to update options(2) on mp(3)", 5048 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5049 } else { 5050 *update_lastdst = B_TRUE; 5051 } 5052 return (err); 5053 } 5054 5055 static mblk_t * 5056 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5057 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 5058 cred_t *cr, pid_t pid) 5059 { 5060 udp_t *udp = connp->conn_udp; 5061 mblk_t *mp1 = mp; 5062 mblk_t *mp2; 5063 ipha_t *ipha; 5064 int ip_hdr_length; 5065 uint32_t ip_len; 5066 udpha_t *udpha; 5067 boolean_t lock_held = B_FALSE; 5068 in_port_t uha_src_port; 5069 udpattrs_t attrs; 5070 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5071 uint32_t ip_snd_opt_len = 0; 5072 ip4_pkt_t pktinfo; 5073 ip4_pkt_t *pktinfop = &pktinfo; 5074 ip_opt_info_t optinfo; 5075 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5076 udp_stack_t *us = udp->udp_us; 5077 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5078 queue_t *q = connp->conn_wq; 5079 ire_t *ire; 5080 in6_addr_t v6dst; 5081 boolean_t update_lastdst = B_FALSE; 5082 5083 *error = 0; 5084 pktinfop->ip4_ill_index = 0; 5085 pktinfop->ip4_addr = INADDR_ANY; 5086 optinfo.ip_opt_flags = 0; 5087 optinfo.ip_opt_ill_index = 0; 5088 5089 if (v4dst == INADDR_ANY) 5090 v4dst = htonl(INADDR_LOOPBACK); 5091 5092 /* 5093 * If options passed in, feed it for verification and handling 5094 */ 5095 attrs.udpattr_credset = B_FALSE; 5096 if (IPCL_IS_NONSTR(connp)) { 5097 if (msg->msg_controllen != 0) { 5098 attrs.udpattr_ipp4 = pktinfop; 5099 attrs.udpattr_mb = mp; 5100 5101 rw_enter(&udp->udp_rwlock, RW_WRITER); 5102 *error = process_auxiliary_options(connp, 5103 msg->msg_control, msg->msg_controllen, 5104 &attrs, &udp_opt_obj, udp_opt_set, cr); 5105 rw_exit(&udp->udp_rwlock); 5106 if (*error) 5107 goto done; 5108 } 5109 } else { 5110 if (DB_TYPE(mp) != M_DATA) { 5111 mp1 = mp->b_cont; 5112 if (((struct T_unitdata_req *) 5113 mp->b_rptr)->OPT_length != 0) { 5114 attrs.udpattr_ipp4 = pktinfop; 5115 attrs.udpattr_mb = mp; 5116 if (udp_unitdata_opt_process(q, mp, error, 5117 &attrs) < 0) 5118 goto done; 5119 /* 5120 * Note: success in processing options. 5121 * mp option buffer represented by 5122 * OPT_length/offset now potentially modified 5123 * and contain option setting results 5124 */ 5125 ASSERT(*error == 0); 5126 } 5127 } 5128 } 5129 5130 /* mp1 points to the M_DATA mblk carrying the packet */ 5131 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5132 5133 /* 5134 * Determine whether we need to mark the mblk with the user's 5135 * credentials. 5136 * If labeled then sockfs would have already done this. 5137 */ 5138 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5139 5140 ire = connp->conn_ire_cache; 5141 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 5142 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5143 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 5144 mblk_setcred(mp, cr, pid); 5145 } 5146 5147 rw_enter(&udp->udp_rwlock, RW_READER); 5148 lock_held = B_TRUE; 5149 5150 /* 5151 * Cluster and TSOL note: 5152 * udp.udp_v6lastdst is shared by Cluster and TSOL 5153 * udp.udp_lastdstport is used by Cluster 5154 * 5155 * Both Cluster and TSOL need to update the dest addr and/or port. 5156 * Updating is done after both Cluster and TSOL checks, protected 5157 * by conn_lock. 5158 */ 5159 mutex_enter(&connp->conn_lock); 5160 5161 if (cl_inet_connect2 != NULL && 5162 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5163 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5164 udp->udp_lastdstport != port)) { 5165 mutex_exit(&connp->conn_lock); 5166 *error = 0; 5167 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5168 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5169 if (*error != 0) { 5170 *error = EHOSTUNREACH; 5171 goto done; 5172 } 5173 update_lastdst = B_TRUE; 5174 mutex_enter(&connp->conn_lock); 5175 } 5176 5177 /* 5178 * Check if our saved options are valid; update if not. 5179 * TSOL Note: Since we are not in WRITER mode, UDP packets 5180 * to different destination may require different labels, 5181 * or worse, UDP packets to same IP address may require 5182 * different labels due to use of shared all-zones address. 5183 * We use conn_lock to ensure that lastdst, ip_snd_options, 5184 * and ip_snd_options_len are consistent for the current 5185 * destination and are updated atomically. 5186 */ 5187 if (is_system_labeled()) { 5188 /* Using UDP MLP requires SCM_UCRED from user */ 5189 if (connp->conn_mlp_type != mlptSingle && 5190 !attrs.udpattr_credset) { 5191 mutex_exit(&connp->conn_lock); 5192 DTRACE_PROBE4( 5193 tx__ip__log__info__output__udp, 5194 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5195 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5196 *error = ECONNREFUSED; 5197 goto done; 5198 } 5199 /* 5200 * update label option for this UDP socket if 5201 * - the destination has changed, or 5202 * - the UDP socket is MLP 5203 */ 5204 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5205 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5206 connp->conn_mlp_type != mlptSingle) && 5207 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5208 != 0) { 5209 mutex_exit(&connp->conn_lock); 5210 goto done; 5211 } 5212 } 5213 if (update_lastdst) { 5214 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5215 udp->udp_lastdstport = port; 5216 } 5217 if (udp->udp_ip_snd_options_len > 0) { 5218 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5219 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5220 } 5221 mutex_exit(&connp->conn_lock); 5222 5223 /* Add an IP header */ 5224 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5225 (insert_spi ? sizeof (uint32_t) : 0); 5226 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5227 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5228 !OK_32PTR(ipha)) { 5229 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5230 if (mp2 == NULL) { 5231 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5232 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5233 *error = ENOMEM; 5234 goto done; 5235 } 5236 mp2->b_wptr = DB_LIM(mp2); 5237 mp2->b_cont = mp1; 5238 mp1 = mp2; 5239 if (DB_TYPE(mp) != M_DATA) 5240 mp->b_cont = mp1; 5241 else 5242 mp = mp1; 5243 5244 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5245 } 5246 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5247 #ifdef _BIG_ENDIAN 5248 /* Set version, header length, and tos */ 5249 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5250 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5251 udp->udp_type_of_service); 5252 /* Set ttl and protocol */ 5253 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5254 #else 5255 /* Set version, header length, and tos */ 5256 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5257 ((udp->udp_type_of_service << 8) | 5258 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5259 /* Set ttl and protocol */ 5260 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5261 #endif 5262 if (pktinfop->ip4_addr != INADDR_ANY) { 5263 ipha->ipha_src = pktinfop->ip4_addr; 5264 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5265 } else { 5266 /* 5267 * Copy our address into the packet. If this is zero, 5268 * first look at __sin6_src_id for a hint. If we leave the 5269 * source as INADDR_ANY then ip will fill in the real source 5270 * address. 5271 */ 5272 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5273 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5274 in6_addr_t v6src; 5275 5276 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5277 us->us_netstack); 5278 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5279 } 5280 } 5281 uha_src_port = udp->udp_port; 5282 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5283 rw_exit(&udp->udp_rwlock); 5284 lock_held = B_FALSE; 5285 } 5286 5287 if (pktinfop->ip4_ill_index != 0) { 5288 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5289 } 5290 5291 ipha->ipha_fragment_offset_and_flags = 0; 5292 ipha->ipha_ident = 0; 5293 5294 mp1->b_rptr = (uchar_t *)ipha; 5295 5296 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5297 (uintptr_t)UINT_MAX); 5298 5299 /* Determine length of packet */ 5300 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5301 if ((mp2 = mp1->b_cont) != NULL) { 5302 do { 5303 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5304 ip_len += (uint32_t)MBLKL(mp2); 5305 } while ((mp2 = mp2->b_cont) != NULL); 5306 } 5307 /* 5308 * If the size of the packet is greater than the maximum allowed by 5309 * ip, return an error. Passing this down could cause panics because 5310 * the size will have wrapped and be inconsistent with the msg size. 5311 */ 5312 if (ip_len > IP_MAXPACKET) { 5313 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5314 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5315 *error = EMSGSIZE; 5316 goto done; 5317 } 5318 ipha->ipha_length = htons((uint16_t)ip_len); 5319 ip_len -= ip_hdr_length; 5320 ip_len = htons((uint16_t)ip_len); 5321 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5322 5323 /* Insert all-0s SPI now. */ 5324 if (insert_spi) 5325 *((uint32_t *)(udpha + 1)) = 0; 5326 5327 /* 5328 * Copy in the destination address 5329 */ 5330 ipha->ipha_dst = v4dst; 5331 5332 /* 5333 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5334 */ 5335 if (CLASSD(v4dst)) 5336 ipha->ipha_ttl = udp->udp_multicast_ttl; 5337 5338 udpha->uha_dst_port = port; 5339 udpha->uha_src_port = uha_src_port; 5340 5341 if (ip_snd_opt_len > 0) { 5342 uint32_t cksum; 5343 5344 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5345 lock_held = B_FALSE; 5346 rw_exit(&udp->udp_rwlock); 5347 /* 5348 * Massage source route putting first source route in ipha_dst. 5349 * Ignore the destination in T_unitdata_req. 5350 * Create a checksum adjustment for a source route, if any. 5351 */ 5352 cksum = ip_massage_options(ipha, us->us_netstack); 5353 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5354 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5355 (ipha->ipha_dst & 0xFFFF); 5356 if ((int)cksum < 0) 5357 cksum--; 5358 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5359 /* 5360 * IP does the checksum if uha_checksum is non-zero, 5361 * We make it easy for IP to include our pseudo header 5362 * by putting our length in uha_checksum. 5363 */ 5364 cksum += ip_len; 5365 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5366 /* There might be a carry. */ 5367 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5368 #ifdef _LITTLE_ENDIAN 5369 if (us->us_do_checksum) 5370 ip_len = (cksum << 16) | ip_len; 5371 #else 5372 if (us->us_do_checksum) 5373 ip_len = (ip_len << 16) | cksum; 5374 else 5375 ip_len <<= 16; 5376 #endif 5377 } else { 5378 /* 5379 * IP does the checksum if uha_checksum is non-zero, 5380 * We make it easy for IP to include our pseudo header 5381 * by putting our length in uha_checksum. 5382 */ 5383 if (us->us_do_checksum) 5384 ip_len |= (ip_len << 16); 5385 #ifndef _LITTLE_ENDIAN 5386 else 5387 ip_len <<= 16; 5388 #endif 5389 } 5390 ASSERT(!lock_held); 5391 /* Set UDP length and checksum */ 5392 *((uint32_t *)&udpha->uha_length) = ip_len; 5393 5394 if (DB_TYPE(mp) != M_DATA) { 5395 cred_t *cr; 5396 pid_t cpid; 5397 5398 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5399 cr = msg_extractcred(mp, &cpid); 5400 if (cr != NULL) { 5401 if (mp1->b_datap->db_credp != NULL) 5402 crfree(mp1->b_datap->db_credp); 5403 mp1->b_datap->db_credp = cr; 5404 mp1->b_datap->db_cpid = cpid; 5405 } 5406 ASSERT(mp != mp1); 5407 freeb(mp); 5408 } 5409 5410 /* mp has been consumed and we'll return success */ 5411 ASSERT(*error == 0); 5412 mp = NULL; 5413 5414 /* We're done. Pass the packet to ip. */ 5415 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5416 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5417 "udp_wput_end: q %p (%S)", q, "end"); 5418 5419 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5420 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5421 connp->conn_dontroute || 5422 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5423 optinfo.ip_opt_ill_index != 0 || 5424 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5425 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5426 ipst->ips_ip_g_mrouter != NULL) { 5427 UDP_STAT(us, udp_ip_send); 5428 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5429 &optinfo); 5430 } else { 5431 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5432 } 5433 5434 done: 5435 if (lock_held) 5436 rw_exit(&udp->udp_rwlock); 5437 if (*error != 0) { 5438 ASSERT(mp != NULL); 5439 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5440 } 5441 return (mp); 5442 } 5443 5444 static void 5445 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5446 { 5447 conn_t *connp = udp->udp_connp; 5448 ipaddr_t src, dst; 5449 ire_t *ire; 5450 ipif_t *ipif = NULL; 5451 mblk_t *ire_fp_mp; 5452 boolean_t retry_caching; 5453 udp_stack_t *us = udp->udp_us; 5454 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5455 5456 dst = ipha->ipha_dst; 5457 src = ipha->ipha_src; 5458 ASSERT(ipha->ipha_ident == 0); 5459 5460 if (CLASSD(dst)) { 5461 int err; 5462 5463 ipif = conn_get_held_ipif(connp, 5464 &connp->conn_multicast_ipif, &err); 5465 5466 if (ipif == NULL || ipif->ipif_isv6 || 5467 (ipif->ipif_ill->ill_phyint->phyint_flags & 5468 PHYI_LOOPBACK)) { 5469 if (ipif != NULL) 5470 ipif_refrele(ipif); 5471 UDP_STAT(us, udp_ip_send); 5472 ip_output(connp, mp, q, IP_WPUT); 5473 return; 5474 } 5475 } 5476 5477 retry_caching = B_FALSE; 5478 mutex_enter(&connp->conn_lock); 5479 ire = connp->conn_ire_cache; 5480 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5481 5482 if (ire == NULL || ire->ire_addr != dst || 5483 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5484 retry_caching = B_TRUE; 5485 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5486 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5487 5488 ASSERT(ipif != NULL); 5489 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5490 retry_caching = B_TRUE; 5491 } 5492 5493 if (!retry_caching) { 5494 ASSERT(ire != NULL); 5495 IRE_REFHOLD(ire); 5496 mutex_exit(&connp->conn_lock); 5497 } else { 5498 boolean_t cached = B_FALSE; 5499 5500 connp->conn_ire_cache = NULL; 5501 mutex_exit(&connp->conn_lock); 5502 5503 /* Release the old ire */ 5504 if (ire != NULL) { 5505 IRE_REFRELE_NOTR(ire); 5506 ire = NULL; 5507 } 5508 5509 if (CLASSD(dst)) { 5510 ASSERT(ipif != NULL); 5511 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5512 connp->conn_zoneid, msg_getlabel(mp), 5513 MATCH_IRE_ILL, ipst); 5514 } else { 5515 ASSERT(ipif == NULL); 5516 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5517 msg_getlabel(mp), ipst); 5518 } 5519 5520 if (ire == NULL) { 5521 if (ipif != NULL) 5522 ipif_refrele(ipif); 5523 UDP_STAT(us, udp_ire_null); 5524 ip_output(connp, mp, q, IP_WPUT); 5525 return; 5526 } 5527 IRE_REFHOLD_NOTR(ire); 5528 5529 mutex_enter(&connp->conn_lock); 5530 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5531 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5532 irb_t *irb = ire->ire_bucket; 5533 5534 /* 5535 * IRE's created for non-connection oriented transports 5536 * are normally initialized with IRE_MARK_TEMPORARY set 5537 * in the ire_marks. These IRE's are preferentially 5538 * reaped when the hash chain length in the cache 5539 * bucket exceeds the maximum value specified in 5540 * ip[6]_ire_max_bucket_cnt. This can severely affect 5541 * UDP performance if IRE cache entries that we need 5542 * to reuse are continually removed. To remedy this, 5543 * when we cache the IRE in the conn_t, we remove the 5544 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5545 * set. 5546 */ 5547 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5548 rw_enter(&irb->irb_lock, RW_WRITER); 5549 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5550 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5551 irb->irb_tmp_ire_cnt--; 5552 } 5553 rw_exit(&irb->irb_lock); 5554 } 5555 connp->conn_ire_cache = ire; 5556 cached = B_TRUE; 5557 } 5558 mutex_exit(&connp->conn_lock); 5559 5560 /* 5561 * We can continue to use the ire but since it was not 5562 * cached, we should drop the extra reference. 5563 */ 5564 if (!cached) 5565 IRE_REFRELE_NOTR(ire); 5566 } 5567 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5568 ASSERT(!CLASSD(dst) || ipif != NULL); 5569 5570 /* 5571 * Check if we can take the fast-path. 5572 * Note that "incomplete" ire's (where the link-layer for next hop 5573 * is not resolved, or where the fast-path header in nce_fp_mp is not 5574 * available yet) are sent down the legacy (slow) path 5575 */ 5576 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5577 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5578 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5579 ((ire->ire_nce == NULL) || 5580 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5581 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5582 if (ipif != NULL) 5583 ipif_refrele(ipif); 5584 UDP_STAT(us, udp_ip_ire_send); 5585 IRE_REFRELE(ire); 5586 ip_output(connp, mp, q, IP_WPUT); 5587 return; 5588 } 5589 5590 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5591 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5592 ipha->ipha_src = ipif->ipif_src_addr; 5593 else 5594 ipha->ipha_src = ire->ire_src_addr; 5595 } 5596 5597 if (ipif != NULL) 5598 ipif_refrele(ipif); 5599 5600 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5601 } 5602 5603 static void 5604 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5605 { 5606 ipaddr_t src, dst; 5607 ill_t *ill; 5608 mblk_t *ire_fp_mp; 5609 uint_t ire_fp_mp_len; 5610 uint16_t *up; 5611 uint32_t cksum, hcksum_txflags; 5612 queue_t *dev_q; 5613 udp_t *udp = connp->conn_udp; 5614 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5615 udp_stack_t *us = udp->udp_us; 5616 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5617 boolean_t ll_multicast = B_FALSE; 5618 boolean_t direct_send; 5619 5620 dev_q = ire->ire_stq->q_next; 5621 ASSERT(dev_q != NULL); 5622 5623 ill = ire_to_ill(ire); 5624 ASSERT(ill != NULL); 5625 5626 /* 5627 * For the direct send case, if resetting of conn_direct_blocked 5628 * was missed, it is still ok because the putq() would enable 5629 * the queue and write service will drain it out. 5630 */ 5631 direct_send = ILL_DIRECT_CAPABLE(ill); 5632 5633 /* is queue flow controlled? */ 5634 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5635 DEV_Q_FLOW_BLOCKED(dev_q))) { 5636 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5637 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5638 if (ipst->ips_ip_output_queue) { 5639 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5640 (void) putq(connp->conn_wq, mp); 5641 } else { 5642 freemsg(mp); 5643 } 5644 ire_refrele(ire); 5645 return; 5646 } 5647 5648 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5649 ire_fp_mp_len = MBLKL(ire_fp_mp); 5650 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5651 5652 dst = ipha->ipha_dst; 5653 src = ipha->ipha_src; 5654 5655 5656 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5657 5658 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5659 #ifndef _BIG_ENDIAN 5660 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5661 #endif 5662 5663 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5664 ASSERT(ill->ill_hcksum_capab != NULL); 5665 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5666 } else { 5667 hcksum_txflags = 0; 5668 } 5669 5670 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5671 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5672 5673 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5674 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5675 if (*up != 0) { 5676 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5677 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5678 ntohs(ipha->ipha_length), cksum); 5679 5680 /* Software checksum? */ 5681 if (DB_CKSUMFLAGS(mp) == 0) { 5682 UDP_STAT(us, udp_out_sw_cksum); 5683 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5684 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5685 } 5686 } 5687 5688 if (!CLASSD(dst)) { 5689 ipha->ipha_fragment_offset_and_flags |= 5690 (uint32_t)htons(ire->ire_frag_flag); 5691 } 5692 5693 /* Calculate IP header checksum if hardware isn't capable */ 5694 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5695 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5696 ((uint16_t *)ipha)[4]); 5697 } 5698 5699 if (CLASSD(dst)) { 5700 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5701 ip_multicast_loopback(q, ill, mp, 5702 connp->conn_multicast_loop ? 0 : 5703 IP_FF_NO_MCAST_LOOP, zoneid); 5704 } 5705 5706 /* If multicast TTL is 0 then we are done */ 5707 if (ipha->ipha_ttl == 0) { 5708 freemsg(mp); 5709 ire_refrele(ire); 5710 return; 5711 } 5712 ll_multicast = B_TRUE; 5713 } 5714 5715 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5716 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5717 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5718 5719 UPDATE_OB_PKT_COUNT(ire); 5720 ire->ire_last_used_time = lbolt; 5721 5722 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5723 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5724 ntohs(ipha->ipha_length)); 5725 5726 DTRACE_PROBE4(ip4__physical__out__start, 5727 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5728 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5729 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5730 ll_multicast, ipst); 5731 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5732 if (ipst->ips_ipobs_enabled && mp != NULL) { 5733 zoneid_t szone; 5734 5735 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5736 ipst, ALL_ZONES); 5737 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5738 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5739 } 5740 5741 if (mp == NULL) 5742 goto bail; 5743 5744 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5745 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5746 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5747 5748 if (direct_send) { 5749 uintptr_t cookie; 5750 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5751 5752 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5753 (uintptr_t)connp, 0); 5754 if (cookie != NULL) { 5755 idl_tx_list_t *idl_txl; 5756 5757 /* 5758 * Flow controlled. 5759 */ 5760 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5761 cookie, conn_t *, connp); 5762 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5763 mutex_enter(&idl_txl->txl_lock); 5764 /* 5765 * Check again after holding txl_lock to see if Tx 5766 * ring is still blocked and only then insert the 5767 * connp into the drain list. 5768 */ 5769 if (connp->conn_direct_blocked || 5770 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5771 cookie) == 0)) { 5772 mutex_exit(&idl_txl->txl_lock); 5773 goto bail; 5774 } 5775 if (idl_txl->txl_cookie != NULL && 5776 idl_txl->txl_cookie != cookie) { 5777 DTRACE_PROBE2(udp__xmit__collision, 5778 uintptr_t, cookie, 5779 uintptr_t, idl_txl->txl_cookie); 5780 UDP_STAT(us, udp_cookie_coll); 5781 } else { 5782 connp->conn_direct_blocked = B_TRUE; 5783 idl_txl->txl_cookie = cookie; 5784 conn_drain_insert(connp, idl_txl); 5785 DTRACE_PROBE1(udp__xmit__insert, 5786 conn_t *, connp); 5787 } 5788 mutex_exit(&idl_txl->txl_lock); 5789 } 5790 } else { 5791 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5792 putnext(ire->ire_stq, mp); 5793 } 5794 bail: 5795 IRE_REFRELE(ire); 5796 } 5797 5798 static boolean_t 5799 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5800 boolean_t *update_lastdst) 5801 { 5802 udp_t *udp = Q_TO_UDP(wq); 5803 int err; 5804 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5805 udp_stack_t *us = udp->udp_us; 5806 cred_t *cr; 5807 5808 /* 5809 * All Solaris components should pass a db_credp 5810 * for this message, hence we ASSERT. 5811 * On production kernels we return an error to be robust against 5812 * random streams modules sitting on top of us. 5813 */ 5814 cr = msg_getcred(mp, NULL); 5815 ASSERT(cr != NULL); 5816 if (cr == NULL) 5817 return (EINVAL); 5818 5819 /* Note that we use the cred/label from the message to handle MLP */ 5820 err = tsol_compute_label_v6(cr, 5821 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5822 us->us_netstack->netstack_ip); 5823 if (err == 0) { 5824 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5825 &udp->udp_label_len_v6, opt_storage); 5826 } 5827 if (err != 0) { 5828 DTRACE_PROBE4( 5829 tx__ip__log__drop__updatelabel__udp6, 5830 char *, "queue(1) failed to update options(2) on mp(3)", 5831 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5832 } else { 5833 *update_lastdst = B_TRUE; 5834 } 5835 return (err); 5836 } 5837 5838 static int 5839 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5840 pid_t pid) 5841 { 5842 udp_t *udp = connp->conn_udp; 5843 udp_stack_t *us = udp->udp_us; 5844 ipaddr_t v4dst; 5845 in_port_t dstport; 5846 boolean_t mapped_addr; 5847 struct sockaddr_storage ss; 5848 sin_t *sin; 5849 sin6_t *sin6; 5850 struct sockaddr *addr; 5851 socklen_t addrlen; 5852 int error; 5853 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5854 5855 /* M_DATA for connected socket */ 5856 5857 ASSERT(udp->udp_issocket); 5858 UDP_DBGSTAT(us, udp_data_conn); 5859 5860 mutex_enter(&connp->conn_lock); 5861 if (udp->udp_state != TS_DATA_XFER) { 5862 mutex_exit(&connp->conn_lock); 5863 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5864 UDP_STAT(us, udp_out_err_notconn); 5865 freemsg(mp); 5866 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5867 "udp_wput_end: connp %p (%S)", connp, 5868 "not-connected; address required"); 5869 return (EDESTADDRREQ); 5870 } 5871 5872 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5873 if (mapped_addr) 5874 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5875 5876 /* Initialize addr and addrlen as if they're passed in */ 5877 if (udp->udp_family == AF_INET) { 5878 sin = (sin_t *)&ss; 5879 sin->sin_family = AF_INET; 5880 dstport = sin->sin_port = udp->udp_dstport; 5881 ASSERT(mapped_addr); 5882 sin->sin_addr.s_addr = v4dst; 5883 addr = (struct sockaddr *)sin; 5884 addrlen = sizeof (*sin); 5885 } else { 5886 sin6 = (sin6_t *)&ss; 5887 sin6->sin6_family = AF_INET6; 5888 dstport = sin6->sin6_port = udp->udp_dstport; 5889 sin6->sin6_flowinfo = udp->udp_flowinfo; 5890 sin6->sin6_addr = udp->udp_v6dst; 5891 sin6->sin6_scope_id = 0; 5892 sin6->__sin6_src_id = 0; 5893 addr = (struct sockaddr *)sin6; 5894 addrlen = sizeof (*sin6); 5895 } 5896 mutex_exit(&connp->conn_lock); 5897 5898 if (mapped_addr) { 5899 /* 5900 * Handle both AF_INET and AF_INET6; the latter 5901 * for IPV4 mapped destination addresses. Note 5902 * here that both addr and addrlen point to the 5903 * corresponding struct depending on the address 5904 * family of the socket. 5905 */ 5906 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5907 insert_spi, msg, cr, pid); 5908 } else { 5909 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5910 } 5911 if (error == 0) { 5912 ASSERT(mp == NULL); 5913 return (0); 5914 } 5915 5916 UDP_STAT(us, udp_out_err_output); 5917 ASSERT(mp != NULL); 5918 if (IPCL_IS_NONSTR(connp)) { 5919 freemsg(mp); 5920 return (error); 5921 } else { 5922 /* mp is freed by the following routine */ 5923 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5924 (t_scalar_t)addrlen, (t_scalar_t)error); 5925 return (0); 5926 } 5927 } 5928 5929 /* ARGSUSED */ 5930 static int 5931 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5932 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5933 { 5934 5935 udp_t *udp = connp->conn_udp; 5936 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5937 int error = 0; 5938 sin6_t *sin6; 5939 sin_t *sin; 5940 uint_t srcid; 5941 uint16_t port; 5942 ipaddr_t v4dst; 5943 5944 5945 ASSERT(addr != NULL); 5946 5947 switch (udp->udp_family) { 5948 case AF_INET6: 5949 sin6 = (sin6_t *)addr; 5950 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5951 /* 5952 * Destination is a non-IPv4-compatible IPv6 address. 5953 * Send out an IPv6 format packet. 5954 */ 5955 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5956 pid); 5957 if (error != 0) 5958 goto ud_error; 5959 5960 return (0); 5961 } 5962 /* 5963 * If the local address is not zero or a mapped address 5964 * return an error. It would be possible to send an IPv4 5965 * packet but the response would never make it back to the 5966 * application since it is bound to a non-mapped address. 5967 */ 5968 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5969 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5970 error = EADDRNOTAVAIL; 5971 goto ud_error; 5972 } 5973 /* Send IPv4 packet without modifying udp_ipversion */ 5974 /* Extract port and ipaddr */ 5975 port = sin6->sin6_port; 5976 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5977 srcid = sin6->__sin6_src_id; 5978 break; 5979 5980 case AF_INET: 5981 sin = (sin_t *)addr; 5982 /* Extract port and ipaddr */ 5983 port = sin->sin_port; 5984 v4dst = sin->sin_addr.s_addr; 5985 srcid = 0; 5986 break; 5987 } 5988 5989 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5990 msg, cr, pid); 5991 5992 if (error == 0) { 5993 ASSERT(mp == NULL); 5994 return (0); 5995 } 5996 5997 ud_error: 5998 ASSERT(mp != NULL); 5999 6000 return (error); 6001 } 6002 6003 /* 6004 * This routine handles all messages passed downstream. It either 6005 * consumes the message or passes it downstream; it never queues a 6006 * a message. 6007 * 6008 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6009 * is valid when we are directly beneath the stream head, and thus sockfs 6010 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6011 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6012 * connected endpoints. 6013 */ 6014 void 6015 udp_wput(queue_t *q, mblk_t *mp) 6016 { 6017 conn_t *connp = Q_TO_CONN(q); 6018 udp_t *udp = connp->conn_udp; 6019 int error = 0; 6020 struct sockaddr *addr; 6021 socklen_t addrlen; 6022 udp_stack_t *us = udp->udp_us; 6023 6024 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6025 "udp_wput_start: queue %p mp %p", q, mp); 6026 6027 /* 6028 * We directly handle several cases here: T_UNITDATA_REQ message 6029 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6030 * socket. 6031 */ 6032 switch (DB_TYPE(mp)) { 6033 case M_DATA: 6034 /* 6035 * Quick check for error cases. Checks will be done again 6036 * under the lock later on 6037 */ 6038 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6039 /* Not connected; address is required */ 6040 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6041 UDP_STAT(us, udp_out_err_notconn); 6042 freemsg(mp); 6043 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6044 "udp_wput_end: connp %p (%S)", connp, 6045 "not-connected; address required"); 6046 return; 6047 } 6048 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 6049 return; 6050 6051 case M_PROTO: 6052 case M_PCPROTO: { 6053 struct T_unitdata_req *tudr; 6054 6055 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6056 tudr = (struct T_unitdata_req *)mp->b_rptr; 6057 6058 /* Handle valid T_UNITDATA_REQ here */ 6059 if (MBLKL(mp) >= sizeof (*tudr) && 6060 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6061 if (mp->b_cont == NULL) { 6062 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6063 "udp_wput_end: q %p (%S)", q, "badaddr"); 6064 error = EPROTO; 6065 goto ud_error; 6066 } 6067 6068 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6069 tudr->DEST_length)) { 6070 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6071 "udp_wput_end: q %p (%S)", q, "badaddr"); 6072 error = EADDRNOTAVAIL; 6073 goto ud_error; 6074 } 6075 /* 6076 * If a port has not been bound to the stream, fail. 6077 * This is not a problem when sockfs is directly 6078 * above us, because it will ensure that the socket 6079 * is first bound before allowing data to be sent. 6080 */ 6081 if (udp->udp_state == TS_UNBND) { 6082 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6083 "udp_wput_end: q %p (%S)", q, "outstate"); 6084 error = EPROTO; 6085 goto ud_error; 6086 } 6087 addr = (struct sockaddr *) 6088 &mp->b_rptr[tudr->DEST_offset]; 6089 addrlen = tudr->DEST_length; 6090 if (tudr->OPT_length != 0) 6091 UDP_STAT(us, udp_out_opt); 6092 break; 6093 } 6094 /* FALLTHRU */ 6095 } 6096 default: 6097 udp_wput_other(q, mp); 6098 return; 6099 } 6100 ASSERT(addr != NULL); 6101 6102 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 6103 -1); 6104 if (error != 0) { 6105 ud_error: 6106 UDP_STAT(us, udp_out_err_output); 6107 ASSERT(mp != NULL); 6108 /* mp is freed by the following routine */ 6109 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6110 (t_scalar_t)error); 6111 } 6112 } 6113 6114 /* ARGSUSED */ 6115 static void 6116 udp_wput_fallback(queue_t *wq, mblk_t *mp) 6117 { 6118 #ifdef DEBUG 6119 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 6120 #endif 6121 freemsg(mp); 6122 } 6123 6124 6125 /* 6126 * udp_output_v6(): 6127 * Assumes that udp_wput did some sanity checking on the destination 6128 * address. 6129 */ 6130 static mblk_t * 6131 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6132 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6133 { 6134 ip6_t *ip6h; 6135 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6136 mblk_t *mp1 = mp; 6137 mblk_t *mp2; 6138 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6139 size_t ip_len; 6140 udpha_t *udph; 6141 udp_t *udp = connp->conn_udp; 6142 udp_stack_t *us = udp->udp_us; 6143 queue_t *q = connp->conn_wq; 6144 ip6_pkt_t ipp_s; /* For ancillary data options */ 6145 ip6_pkt_t *ipp = &ipp_s; 6146 ip6_pkt_t *tipp; /* temporary ipp */ 6147 uint32_t csum = 0; 6148 uint_t ignore = 0; 6149 uint_t option_exists = 0, is_sticky = 0; 6150 uint8_t *cp; 6151 uint8_t *nxthdr_ptr; 6152 in6_addr_t ip6_dst; 6153 in_port_t port; 6154 udpattrs_t attrs; 6155 boolean_t opt_present; 6156 ip6_hbh_t *hopoptsptr = NULL; 6157 uint_t hopoptslen = 0; 6158 boolean_t is_ancillary = B_FALSE; 6159 size_t sth_wroff = 0; 6160 ire_t *ire; 6161 boolean_t update_lastdst = B_FALSE; 6162 6163 *error = 0; 6164 6165 /* 6166 * If the local address is a mapped address return 6167 * an error. 6168 * It would be possible to send an IPv6 packet but the 6169 * response would never make it back to the application 6170 * since it is bound to a mapped address. 6171 */ 6172 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6173 *error = EADDRNOTAVAIL; 6174 goto done; 6175 } 6176 6177 ipp->ipp_fields = 0; 6178 ipp->ipp_sticky_ignored = 0; 6179 6180 /* 6181 * If TPI options passed in, feed it for verification and handling 6182 */ 6183 attrs.udpattr_credset = B_FALSE; 6184 opt_present = B_FALSE; 6185 if (IPCL_IS_NONSTR(connp)) { 6186 if (msg->msg_controllen != 0) { 6187 attrs.udpattr_ipp6 = ipp; 6188 attrs.udpattr_mb = mp; 6189 6190 rw_enter(&udp->udp_rwlock, RW_WRITER); 6191 *error = process_auxiliary_options(connp, 6192 msg->msg_control, msg->msg_controllen, 6193 &attrs, &udp_opt_obj, udp_opt_set, cr); 6194 rw_exit(&udp->udp_rwlock); 6195 if (*error) 6196 goto done; 6197 ASSERT(*error == 0); 6198 opt_present = B_TRUE; 6199 } 6200 } else { 6201 if (DB_TYPE(mp) != M_DATA) { 6202 mp1 = mp->b_cont; 6203 if (((struct T_unitdata_req *) 6204 mp->b_rptr)->OPT_length != 0) { 6205 attrs.udpattr_ipp6 = ipp; 6206 attrs.udpattr_mb = mp; 6207 if (udp_unitdata_opt_process(q, mp, error, 6208 &attrs) < 0) { 6209 goto done; 6210 } 6211 ASSERT(*error == 0); 6212 opt_present = B_TRUE; 6213 } 6214 } 6215 } 6216 6217 /* 6218 * Determine whether we need to mark the mblk with the user's 6219 * credentials. 6220 * If labeled then sockfs would have already done this. 6221 */ 6222 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6223 ire = connp->conn_ire_cache; 6224 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6225 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6226 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6227 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6228 mblk_setcred(mp, cr, pid); 6229 } 6230 6231 rw_enter(&udp->udp_rwlock, RW_READER); 6232 ignore = ipp->ipp_sticky_ignored; 6233 6234 /* mp1 points to the M_DATA mblk carrying the packet */ 6235 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6236 6237 if (sin6->sin6_scope_id != 0 && 6238 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6239 /* 6240 * IPPF_SCOPE_ID is special. It's neither a sticky 6241 * option nor ancillary data. It needs to be 6242 * explicitly set in options_exists. 6243 */ 6244 option_exists |= IPPF_SCOPE_ID; 6245 } 6246 6247 /* 6248 * Compute the destination address 6249 */ 6250 ip6_dst = sin6->sin6_addr; 6251 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6252 ip6_dst = ipv6_loopback; 6253 6254 port = sin6->sin6_port; 6255 6256 /* 6257 * Cluster and TSOL notes, Cluster check: 6258 * see comments in udp_output_v4(). 6259 */ 6260 mutex_enter(&connp->conn_lock); 6261 6262 if (cl_inet_connect2 != NULL && 6263 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6264 port != udp->udp_lastdstport)) { 6265 mutex_exit(&connp->conn_lock); 6266 *error = 0; 6267 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6268 if (*error != 0) { 6269 *error = EHOSTUNREACH; 6270 rw_exit(&udp->udp_rwlock); 6271 goto done; 6272 } 6273 update_lastdst = B_TRUE; 6274 mutex_enter(&connp->conn_lock); 6275 } 6276 6277 /* 6278 * If we're not going to the same destination as last time, then 6279 * recompute the label required. This is done in a separate routine to 6280 * avoid blowing up our stack here. 6281 * 6282 * TSOL Note: Since we are not in WRITER mode, UDP packets 6283 * to different destination may require different labels, 6284 * or worse, UDP packets to same IP address may require 6285 * different labels due to use of shared all-zones address. 6286 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6287 * and sticky ipp_hopoptslen are consistent for the current 6288 * destination and are updated atomically. 6289 */ 6290 if (is_system_labeled()) { 6291 /* Using UDP MLP requires SCM_UCRED from user */ 6292 if (connp->conn_mlp_type != mlptSingle && 6293 !attrs.udpattr_credset) { 6294 DTRACE_PROBE4( 6295 tx__ip__log__info__output__udp6, 6296 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6297 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6298 *error = ECONNREFUSED; 6299 rw_exit(&udp->udp_rwlock); 6300 mutex_exit(&connp->conn_lock); 6301 goto done; 6302 } 6303 /* 6304 * update label option for this UDP socket if 6305 * - the destination has changed, or 6306 * - the UDP socket is MLP 6307 */ 6308 if ((opt_present || 6309 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6310 connp->conn_mlp_type != mlptSingle) && 6311 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6312 &update_lastdst)) != 0) { 6313 rw_exit(&udp->udp_rwlock); 6314 mutex_exit(&connp->conn_lock); 6315 goto done; 6316 } 6317 } 6318 6319 if (update_lastdst) { 6320 udp->udp_v6lastdst = ip6_dst; 6321 udp->udp_lastdstport = port; 6322 } 6323 6324 /* 6325 * If there's a security label here, then we ignore any options the 6326 * user may try to set. We keep the peer's label as a hidden sticky 6327 * option. We make a private copy of this label before releasing the 6328 * lock so that label is kept consistent with the destination addr. 6329 */ 6330 if (udp->udp_label_len_v6 > 0) { 6331 ignore &= ~IPPF_HOPOPTS; 6332 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6333 } 6334 6335 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6336 /* No sticky options nor ancillary data. */ 6337 mutex_exit(&connp->conn_lock); 6338 goto no_options; 6339 } 6340 6341 /* 6342 * Go through the options figuring out where each is going to 6343 * come from and build two masks. The first mask indicates if 6344 * the option exists at all. The second mask indicates if the 6345 * option is sticky or ancillary. 6346 */ 6347 if (!(ignore & IPPF_HOPOPTS)) { 6348 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6349 option_exists |= IPPF_HOPOPTS; 6350 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6351 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6352 option_exists |= IPPF_HOPOPTS; 6353 is_sticky |= IPPF_HOPOPTS; 6354 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6355 hopoptsptr = kmem_alloc( 6356 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6357 if (hopoptsptr == NULL) { 6358 *error = ENOMEM; 6359 mutex_exit(&connp->conn_lock); 6360 goto done; 6361 } 6362 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6363 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6364 hopoptslen); 6365 udp_ip_hdr_len += hopoptslen; 6366 } 6367 } 6368 mutex_exit(&connp->conn_lock); 6369 6370 if (!(ignore & IPPF_RTHDR)) { 6371 if (ipp->ipp_fields & IPPF_RTHDR) { 6372 option_exists |= IPPF_RTHDR; 6373 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6374 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6375 option_exists |= IPPF_RTHDR; 6376 is_sticky |= IPPF_RTHDR; 6377 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6378 } 6379 } 6380 6381 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6382 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6383 option_exists |= IPPF_RTDSTOPTS; 6384 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6385 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6386 option_exists |= IPPF_RTDSTOPTS; 6387 is_sticky |= IPPF_RTDSTOPTS; 6388 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6389 } 6390 } 6391 6392 if (!(ignore & IPPF_DSTOPTS)) { 6393 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6394 option_exists |= IPPF_DSTOPTS; 6395 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6396 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6397 option_exists |= IPPF_DSTOPTS; 6398 is_sticky |= IPPF_DSTOPTS; 6399 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6400 } 6401 } 6402 6403 if (!(ignore & IPPF_IFINDEX)) { 6404 if (ipp->ipp_fields & IPPF_IFINDEX) { 6405 option_exists |= IPPF_IFINDEX; 6406 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6407 option_exists |= IPPF_IFINDEX; 6408 is_sticky |= IPPF_IFINDEX; 6409 } 6410 } 6411 6412 if (!(ignore & IPPF_ADDR)) { 6413 if (ipp->ipp_fields & IPPF_ADDR) { 6414 option_exists |= IPPF_ADDR; 6415 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6416 option_exists |= IPPF_ADDR; 6417 is_sticky |= IPPF_ADDR; 6418 } 6419 } 6420 6421 if (!(ignore & IPPF_DONTFRAG)) { 6422 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6423 option_exists |= IPPF_DONTFRAG; 6424 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6425 option_exists |= IPPF_DONTFRAG; 6426 is_sticky |= IPPF_DONTFRAG; 6427 } 6428 } 6429 6430 if (!(ignore & IPPF_USE_MIN_MTU)) { 6431 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6432 option_exists |= IPPF_USE_MIN_MTU; 6433 } else if (udp->udp_sticky_ipp.ipp_fields & 6434 IPPF_USE_MIN_MTU) { 6435 option_exists |= IPPF_USE_MIN_MTU; 6436 is_sticky |= IPPF_USE_MIN_MTU; 6437 } 6438 } 6439 6440 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6441 option_exists |= IPPF_HOPLIMIT; 6442 /* IPV6_HOPLIMIT can never be sticky */ 6443 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6444 6445 if (!(ignore & IPPF_UNICAST_HOPS) && 6446 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6447 option_exists |= IPPF_UNICAST_HOPS; 6448 is_sticky |= IPPF_UNICAST_HOPS; 6449 } 6450 6451 if (!(ignore & IPPF_MULTICAST_HOPS) && 6452 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6453 option_exists |= IPPF_MULTICAST_HOPS; 6454 is_sticky |= IPPF_MULTICAST_HOPS; 6455 } 6456 6457 if (!(ignore & IPPF_TCLASS)) { 6458 if (ipp->ipp_fields & IPPF_TCLASS) { 6459 option_exists |= IPPF_TCLASS; 6460 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6461 option_exists |= IPPF_TCLASS; 6462 is_sticky |= IPPF_TCLASS; 6463 } 6464 } 6465 6466 if (!(ignore & IPPF_NEXTHOP) && 6467 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6468 option_exists |= IPPF_NEXTHOP; 6469 is_sticky |= IPPF_NEXTHOP; 6470 } 6471 6472 no_options: 6473 6474 /* 6475 * If any options carried in the ip6i_t were specified, we 6476 * need to account for the ip6i_t in the data we'll be sending 6477 * down. 6478 */ 6479 if (option_exists & IPPF_HAS_IP6I) 6480 udp_ip_hdr_len += sizeof (ip6i_t); 6481 6482 /* check/fix buffer config, setup pointers into it */ 6483 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6484 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6485 !OK_32PTR(ip6h)) { 6486 6487 /* Try to get everything in a single mblk next time */ 6488 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6489 udp->udp_max_hdr_len = udp_ip_hdr_len; 6490 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6491 } 6492 6493 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6494 if (mp2 == NULL) { 6495 *error = ENOMEM; 6496 rw_exit(&udp->udp_rwlock); 6497 goto done; 6498 } 6499 mp2->b_wptr = DB_LIM(mp2); 6500 mp2->b_cont = mp1; 6501 mp1 = mp2; 6502 if (DB_TYPE(mp) != M_DATA) 6503 mp->b_cont = mp1; 6504 else 6505 mp = mp1; 6506 6507 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6508 } 6509 mp1->b_rptr = (unsigned char *)ip6h; 6510 ip6i = (ip6i_t *)ip6h; 6511 6512 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6513 if (option_exists & IPPF_HAS_IP6I) { 6514 ip6h = (ip6_t *)&ip6i[1]; 6515 ip6i->ip6i_flags = 0; 6516 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6517 6518 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6519 if (option_exists & IPPF_SCOPE_ID) { 6520 ip6i->ip6i_flags |= IP6I_IFINDEX; 6521 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6522 } else if (option_exists & IPPF_IFINDEX) { 6523 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6524 ASSERT(tipp->ipp_ifindex != 0); 6525 ip6i->ip6i_flags |= IP6I_IFINDEX; 6526 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6527 } 6528 6529 if (option_exists & IPPF_ADDR) { 6530 /* 6531 * Enable per-packet source address verification if 6532 * IPV6_PKTINFO specified the source address. 6533 * ip6_src is set in the transport's _wput function. 6534 */ 6535 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6536 } 6537 6538 if (option_exists & IPPF_DONTFRAG) { 6539 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6540 } 6541 6542 if (option_exists & IPPF_USE_MIN_MTU) { 6543 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6544 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6545 } 6546 6547 if (option_exists & IPPF_NEXTHOP) { 6548 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6549 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6550 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6551 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6552 } 6553 6554 /* 6555 * tell IP this is an ip6i_t private header 6556 */ 6557 ip6i->ip6i_nxt = IPPROTO_RAW; 6558 } 6559 6560 /* Initialize IPv6 header */ 6561 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6562 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6563 6564 /* Set the hoplimit of the outgoing packet. */ 6565 if (option_exists & IPPF_HOPLIMIT) { 6566 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6567 ip6h->ip6_hops = ipp->ipp_hoplimit; 6568 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6569 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6570 ip6h->ip6_hops = udp->udp_multicast_ttl; 6571 if (option_exists & IPPF_MULTICAST_HOPS) 6572 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6573 } else { 6574 ip6h->ip6_hops = udp->udp_ttl; 6575 if (option_exists & IPPF_UNICAST_HOPS) 6576 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6577 } 6578 6579 if (option_exists & IPPF_ADDR) { 6580 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6581 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6582 ip6h->ip6_src = tipp->ipp_addr; 6583 } else { 6584 /* 6585 * The source address was not set using IPV6_PKTINFO. 6586 * First look at the bound source. 6587 * If unspecified fallback to __sin6_src_id. 6588 */ 6589 ip6h->ip6_src = udp->udp_v6src; 6590 if (sin6->__sin6_src_id != 0 && 6591 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6592 ip_srcid_find_id(sin6->__sin6_src_id, 6593 &ip6h->ip6_src, connp->conn_zoneid, 6594 us->us_netstack); 6595 } 6596 } 6597 6598 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6599 cp = (uint8_t *)&ip6h[1]; 6600 6601 /* 6602 * Here's where we have to start stringing together 6603 * any extension headers in the right order: 6604 * Hop-by-hop, destination, routing, and final destination opts. 6605 */ 6606 if (option_exists & IPPF_HOPOPTS) { 6607 /* Hop-by-hop options */ 6608 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6609 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6610 if (hopoptslen == 0) { 6611 hopoptsptr = tipp->ipp_hopopts; 6612 hopoptslen = tipp->ipp_hopoptslen; 6613 is_ancillary = B_TRUE; 6614 } 6615 6616 *nxthdr_ptr = IPPROTO_HOPOPTS; 6617 nxthdr_ptr = &hbh->ip6h_nxt; 6618 6619 bcopy(hopoptsptr, cp, hopoptslen); 6620 cp += hopoptslen; 6621 6622 if (hopoptsptr != NULL && !is_ancillary) { 6623 kmem_free(hopoptsptr, hopoptslen); 6624 hopoptsptr = NULL; 6625 hopoptslen = 0; 6626 } 6627 } 6628 /* 6629 * En-route destination options 6630 * Only do them if there's a routing header as well 6631 */ 6632 if (option_exists & IPPF_RTDSTOPTS) { 6633 ip6_dest_t *dst = (ip6_dest_t *)cp; 6634 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6635 6636 *nxthdr_ptr = IPPROTO_DSTOPTS; 6637 nxthdr_ptr = &dst->ip6d_nxt; 6638 6639 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6640 cp += tipp->ipp_rtdstoptslen; 6641 } 6642 /* 6643 * Routing header next 6644 */ 6645 if (option_exists & IPPF_RTHDR) { 6646 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6647 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6648 6649 *nxthdr_ptr = IPPROTO_ROUTING; 6650 nxthdr_ptr = &rt->ip6r_nxt; 6651 6652 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6653 cp += tipp->ipp_rthdrlen; 6654 } 6655 /* 6656 * Do ultimate destination options 6657 */ 6658 if (option_exists & IPPF_DSTOPTS) { 6659 ip6_dest_t *dest = (ip6_dest_t *)cp; 6660 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6661 6662 *nxthdr_ptr = IPPROTO_DSTOPTS; 6663 nxthdr_ptr = &dest->ip6d_nxt; 6664 6665 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6666 cp += tipp->ipp_dstoptslen; 6667 } 6668 /* 6669 * Now set the last header pointer to the proto passed in 6670 */ 6671 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6672 *nxthdr_ptr = IPPROTO_UDP; 6673 6674 /* Update UDP header */ 6675 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6676 udph->uha_dst_port = sin6->sin6_port; 6677 udph->uha_src_port = udp->udp_port; 6678 6679 /* 6680 * Copy in the destination address 6681 */ 6682 ip6h->ip6_dst = ip6_dst; 6683 6684 ip6h->ip6_vcf = 6685 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6686 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6687 6688 if (option_exists & IPPF_TCLASS) { 6689 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6690 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6691 tipp->ipp_tclass); 6692 } 6693 rw_exit(&udp->udp_rwlock); 6694 6695 if (option_exists & IPPF_RTHDR) { 6696 ip6_rthdr_t *rth; 6697 6698 /* 6699 * Perform any processing needed for source routing. 6700 * We know that all extension headers will be in the same mblk 6701 * as the IPv6 header. 6702 */ 6703 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6704 if (rth != NULL && rth->ip6r_segleft != 0) { 6705 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6706 /* 6707 * Drop packet - only support Type 0 routing. 6708 * Notify the application as well. 6709 */ 6710 *error = EPROTO; 6711 goto done; 6712 } 6713 6714 /* 6715 * rth->ip6r_len is twice the number of 6716 * addresses in the header. Thus it must be even. 6717 */ 6718 if (rth->ip6r_len & 0x1) { 6719 *error = EPROTO; 6720 goto done; 6721 } 6722 /* 6723 * Shuffle the routing header and ip6_dst 6724 * addresses, and get the checksum difference 6725 * between the first hop (in ip6_dst) and 6726 * the destination (in the last routing hdr entry). 6727 */ 6728 csum = ip_massage_options_v6(ip6h, rth, 6729 us->us_netstack); 6730 /* 6731 * Verify that the first hop isn't a mapped address. 6732 * Routers along the path need to do this verification 6733 * for subsequent hops. 6734 */ 6735 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6736 *error = EADDRNOTAVAIL; 6737 goto done; 6738 } 6739 6740 cp += (rth->ip6r_len + 1)*8; 6741 } 6742 } 6743 6744 /* count up length of UDP packet */ 6745 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6746 if ((mp2 = mp1->b_cont) != NULL) { 6747 do { 6748 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6749 ip_len += (uint32_t)MBLKL(mp2); 6750 } while ((mp2 = mp2->b_cont) != NULL); 6751 } 6752 6753 /* 6754 * If the size of the packet is greater than the maximum allowed by 6755 * ip, return an error. Passing this down could cause panics because 6756 * the size will have wrapped and be inconsistent with the msg size. 6757 */ 6758 if (ip_len > IP_MAXPACKET) { 6759 *error = EMSGSIZE; 6760 goto done; 6761 } 6762 6763 /* Store the UDP length. Subtract length of extension hdrs */ 6764 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6765 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6766 6767 /* 6768 * We make it easy for IP to include our pseudo header 6769 * by putting our length in uh_checksum, modified (if 6770 * we have a routing header) by the checksum difference 6771 * between the ultimate destination and first hop addresses. 6772 * Note: UDP over IPv6 must always checksum the packet. 6773 */ 6774 csum += udph->uha_length; 6775 csum = (csum & 0xFFFF) + (csum >> 16); 6776 udph->uha_checksum = (uint16_t)csum; 6777 6778 #ifdef _LITTLE_ENDIAN 6779 ip_len = htons(ip_len); 6780 #endif 6781 ip6h->ip6_plen = ip_len; 6782 6783 if (DB_TYPE(mp) != M_DATA) { 6784 cred_t *cr; 6785 pid_t cpid; 6786 6787 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6788 cr = msg_extractcred(mp, &cpid); 6789 if (cr != NULL) { 6790 if (mp1->b_datap->db_credp != NULL) 6791 crfree(mp1->b_datap->db_credp); 6792 mp1->b_datap->db_credp = cr; 6793 mp1->b_datap->db_cpid = cpid; 6794 } 6795 6796 ASSERT(mp != mp1); 6797 freeb(mp); 6798 } 6799 6800 /* mp has been consumed and we'll return success */ 6801 ASSERT(*error == 0); 6802 mp = NULL; 6803 6804 /* We're done. Pass the packet to IP */ 6805 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6806 ip_output_v6(connp, mp1, q, IP_WPUT); 6807 6808 done: 6809 if (sth_wroff != 0) { 6810 (void) proto_set_tx_wroff(RD(q), connp, 6811 udp->udp_max_hdr_len + us->us_wroff_extra); 6812 } 6813 if (hopoptsptr != NULL && !is_ancillary) { 6814 kmem_free(hopoptsptr, hopoptslen); 6815 hopoptsptr = NULL; 6816 } 6817 if (*error != 0) { 6818 ASSERT(mp != NULL); 6819 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6820 } 6821 return (mp); 6822 } 6823 6824 6825 static int 6826 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6827 { 6828 sin_t *sin = (sin_t *)sa; 6829 sin6_t *sin6 = (sin6_t *)sa; 6830 6831 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6832 6833 if (udp->udp_state != TS_DATA_XFER) 6834 return (ENOTCONN); 6835 6836 switch (udp->udp_family) { 6837 case AF_INET: 6838 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6839 6840 if (*salenp < sizeof (sin_t)) 6841 return (EINVAL); 6842 6843 *salenp = sizeof (sin_t); 6844 *sin = sin_null; 6845 sin->sin_family = AF_INET; 6846 sin->sin_port = udp->udp_dstport; 6847 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6848 break; 6849 6850 case AF_INET6: 6851 if (*salenp < sizeof (sin6_t)) 6852 return (EINVAL); 6853 6854 *salenp = sizeof (sin6_t); 6855 *sin6 = sin6_null; 6856 sin6->sin6_family = AF_INET6; 6857 sin6->sin6_port = udp->udp_dstport; 6858 sin6->sin6_addr = udp->udp_v6dst; 6859 sin6->sin6_flowinfo = udp->udp_flowinfo; 6860 break; 6861 } 6862 6863 return (0); 6864 } 6865 6866 static int 6867 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6868 { 6869 sin_t *sin = (sin_t *)sa; 6870 sin6_t *sin6 = (sin6_t *)sa; 6871 6872 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6873 6874 switch (udp->udp_family) { 6875 case AF_INET: 6876 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6877 6878 if (*salenp < sizeof (sin_t)) 6879 return (EINVAL); 6880 6881 *salenp = sizeof (sin_t); 6882 *sin = sin_null; 6883 sin->sin_family = AF_INET; 6884 sin->sin_port = udp->udp_port; 6885 6886 /* 6887 * If udp_v6src is unspecified, we might be bound to broadcast 6888 * / multicast. Use udp_bound_v6src as local address instead 6889 * (that could also still be unspecified). 6890 */ 6891 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6892 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6893 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6894 } else { 6895 sin->sin_addr.s_addr = 6896 V4_PART_OF_V6(udp->udp_bound_v6src); 6897 } 6898 break; 6899 6900 case AF_INET6: 6901 if (*salenp < sizeof (sin6_t)) 6902 return (EINVAL); 6903 6904 *salenp = sizeof (sin6_t); 6905 *sin6 = sin6_null; 6906 sin6->sin6_family = AF_INET6; 6907 sin6->sin6_port = udp->udp_port; 6908 sin6->sin6_flowinfo = udp->udp_flowinfo; 6909 6910 /* 6911 * If udp_v6src is unspecified, we might be bound to broadcast 6912 * / multicast. Use udp_bound_v6src as local address instead 6913 * (that could also still be unspecified). 6914 */ 6915 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6916 sin6->sin6_addr = udp->udp_v6src; 6917 else 6918 sin6->sin6_addr = udp->udp_bound_v6src; 6919 break; 6920 } 6921 6922 return (0); 6923 } 6924 6925 /* 6926 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6927 */ 6928 static void 6929 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6930 { 6931 void *data; 6932 mblk_t *datamp = mp->b_cont; 6933 udp_t *udp = Q_TO_UDP(q); 6934 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6935 6936 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6937 cmdp->cb_error = EPROTO; 6938 qreply(q, mp); 6939 return; 6940 } 6941 data = datamp->b_rptr; 6942 6943 rw_enter(&udp->udp_rwlock, RW_READER); 6944 switch (cmdp->cb_cmd) { 6945 case TI_GETPEERNAME: 6946 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6947 break; 6948 case TI_GETMYNAME: 6949 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6950 break; 6951 default: 6952 cmdp->cb_error = EINVAL; 6953 break; 6954 } 6955 rw_exit(&udp->udp_rwlock); 6956 6957 qreply(q, mp); 6958 } 6959 6960 static void 6961 udp_disable_direct_sockfs(udp_t *udp) 6962 { 6963 udp->udp_issocket = B_FALSE; 6964 if (udp->udp_direct_sockfs) { 6965 /* 6966 * Disable read-side synchronous stream interface and 6967 * drain any queued data. 6968 */ 6969 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6970 ASSERT(!udp->udp_direct_sockfs); 6971 UDP_STAT(udp->udp_us, udp_sock_fallback); 6972 } 6973 } 6974 6975 static void 6976 udp_wput_other(queue_t *q, mblk_t *mp) 6977 { 6978 uchar_t *rptr = mp->b_rptr; 6979 struct datab *db; 6980 struct iocblk *iocp; 6981 cred_t *cr; 6982 conn_t *connp = Q_TO_CONN(q); 6983 udp_t *udp = connp->conn_udp; 6984 udp_stack_t *us; 6985 6986 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6987 "udp_wput_other_start: q %p", q); 6988 6989 us = udp->udp_us; 6990 db = mp->b_datap; 6991 6992 switch (db->db_type) { 6993 case M_CMD: 6994 udp_wput_cmdblk(q, mp); 6995 return; 6996 6997 case M_PROTO: 6998 case M_PCPROTO: 6999 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7000 freemsg(mp); 7001 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7002 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7003 return; 7004 } 7005 switch (((t_primp_t)rptr)->type) { 7006 case T_ADDR_REQ: 7007 udp_addr_req(q, mp); 7008 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7009 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7010 return; 7011 case O_T_BIND_REQ: 7012 case T_BIND_REQ: 7013 udp_tpi_bind(q, mp); 7014 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7015 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7016 return; 7017 case T_CONN_REQ: 7018 udp_tpi_connect(q, mp); 7019 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7020 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7021 return; 7022 case T_CAPABILITY_REQ: 7023 udp_capability_req(q, mp); 7024 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7025 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7026 return; 7027 case T_INFO_REQ: 7028 udp_info_req(q, mp); 7029 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7030 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7031 return; 7032 case T_UNITDATA_REQ: 7033 /* 7034 * If a T_UNITDATA_REQ gets here, the address must 7035 * be bad. Valid T_UNITDATA_REQs are handled 7036 * in udp_wput. 7037 */ 7038 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7039 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7040 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7041 return; 7042 case T_UNBIND_REQ: 7043 udp_tpi_unbind(q, mp); 7044 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7045 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7046 return; 7047 case T_SVR4_OPTMGMT_REQ: 7048 /* 7049 * All Solaris components should pass a db_credp 7050 * for this TPI message, hence we ASSERT. 7051 * But in case there is some other M_PROTO that looks 7052 * like a TPI message sent by some other kernel 7053 * component, we check and return an error. 7054 */ 7055 cr = msg_getcred(mp, NULL); 7056 ASSERT(cr != NULL); 7057 if (cr == NULL) { 7058 udp_err_ack(q, mp, TSYSERR, EINVAL); 7059 return; 7060 } 7061 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7062 cr)) { 7063 (void) svr4_optcom_req(q, 7064 mp, cr, &udp_opt_obj, B_TRUE); 7065 } 7066 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7067 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7068 return; 7069 7070 case T_OPTMGMT_REQ: 7071 /* 7072 * All Solaris components should pass a db_credp 7073 * for this TPI message, hence we ASSERT. 7074 * But in case there is some other M_PROTO that looks 7075 * like a TPI message sent by some other kernel 7076 * component, we check and return an error. 7077 */ 7078 cr = msg_getcred(mp, NULL); 7079 ASSERT(cr != NULL); 7080 if (cr == NULL) { 7081 udp_err_ack(q, mp, TSYSERR, EINVAL); 7082 return; 7083 } 7084 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7085 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7086 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7087 return; 7088 7089 case T_DISCON_REQ: 7090 udp_tpi_disconnect(q, mp); 7091 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7092 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7093 return; 7094 7095 /* The following TPI message is not supported by udp. */ 7096 case O_T_CONN_RES: 7097 case T_CONN_RES: 7098 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7099 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7100 "udp_wput_other_end: q %p (%S)", q, 7101 "connres/disconreq"); 7102 return; 7103 7104 /* The following 3 TPI messages are illegal for udp. */ 7105 case T_DATA_REQ: 7106 case T_EXDATA_REQ: 7107 case T_ORDREL_REQ: 7108 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7109 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7110 "udp_wput_other_end: q %p (%S)", q, 7111 "data/exdata/ordrel"); 7112 return; 7113 default: 7114 break; 7115 } 7116 break; 7117 case M_FLUSH: 7118 if (*rptr & FLUSHW) 7119 flushq(q, FLUSHDATA); 7120 break; 7121 case M_IOCTL: 7122 iocp = (struct iocblk *)mp->b_rptr; 7123 switch (iocp->ioc_cmd) { 7124 case TI_GETPEERNAME: 7125 if (udp->udp_state != TS_DATA_XFER) { 7126 /* 7127 * If a default destination address has not 7128 * been associated with the stream, then we 7129 * don't know the peer's name. 7130 */ 7131 iocp->ioc_error = ENOTCONN; 7132 iocp->ioc_count = 0; 7133 mp->b_datap->db_type = M_IOCACK; 7134 qreply(q, mp); 7135 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7136 "udp_wput_other_end: q %p (%S)", q, 7137 "getpeername"); 7138 return; 7139 } 7140 /* FALLTHRU */ 7141 case TI_GETMYNAME: { 7142 /* 7143 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7144 * need to copyin the user's strbuf structure. 7145 * Processing will continue in the M_IOCDATA case 7146 * below. 7147 */ 7148 mi_copyin(q, mp, NULL, 7149 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7150 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7151 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7152 return; 7153 } 7154 case ND_SET: 7155 /* nd_getset performs the necessary checking */ 7156 case ND_GET: 7157 if (nd_getset(q, us->us_nd, mp)) { 7158 qreply(q, mp); 7159 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7160 "udp_wput_other_end: q %p (%S)", q, "get"); 7161 return; 7162 } 7163 break; 7164 case _SIOCSOCKFALLBACK: 7165 /* 7166 * Either sockmod is about to be popped and the 7167 * socket would now be treated as a plain stream, 7168 * or a module is about to be pushed so we could 7169 * no longer use read-side synchronous stream. 7170 * Drain any queued data and disable direct sockfs 7171 * interface from now on. 7172 */ 7173 if (!udp->udp_issocket) { 7174 DB_TYPE(mp) = M_IOCNAK; 7175 iocp->ioc_error = EINVAL; 7176 } else { 7177 udp_disable_direct_sockfs(udp); 7178 7179 DB_TYPE(mp) = M_IOCACK; 7180 iocp->ioc_error = 0; 7181 } 7182 iocp->ioc_count = 0; 7183 iocp->ioc_rval = 0; 7184 qreply(q, mp); 7185 return; 7186 default: 7187 break; 7188 } 7189 break; 7190 case M_IOCDATA: 7191 udp_wput_iocdata(q, mp); 7192 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7193 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7194 return; 7195 default: 7196 /* Unrecognized messages are passed through without change. */ 7197 break; 7198 } 7199 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7200 "udp_wput_other_end: q %p (%S)", q, "end"); 7201 ip_output(connp, mp, q, IP_WPUT); 7202 } 7203 7204 /* 7205 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7206 * messages. 7207 */ 7208 static void 7209 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7210 { 7211 mblk_t *mp1; 7212 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7213 STRUCT_HANDLE(strbuf, sb); 7214 udp_t *udp = Q_TO_UDP(q); 7215 int error; 7216 uint_t addrlen; 7217 7218 /* Make sure it is one of ours. */ 7219 switch (iocp->ioc_cmd) { 7220 case TI_GETMYNAME: 7221 case TI_GETPEERNAME: 7222 break; 7223 default: 7224 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7225 return; 7226 } 7227 7228 switch (mi_copy_state(q, mp, &mp1)) { 7229 case -1: 7230 return; 7231 case MI_COPY_CASE(MI_COPY_IN, 1): 7232 break; 7233 case MI_COPY_CASE(MI_COPY_OUT, 1): 7234 /* 7235 * The address has been copied out, so now 7236 * copyout the strbuf. 7237 */ 7238 mi_copyout(q, mp); 7239 return; 7240 case MI_COPY_CASE(MI_COPY_OUT, 2): 7241 /* 7242 * The address and strbuf have been copied out. 7243 * We're done, so just acknowledge the original 7244 * M_IOCTL. 7245 */ 7246 mi_copy_done(q, mp, 0); 7247 return; 7248 default: 7249 /* 7250 * Something strange has happened, so acknowledge 7251 * the original M_IOCTL with an EPROTO error. 7252 */ 7253 mi_copy_done(q, mp, EPROTO); 7254 return; 7255 } 7256 7257 /* 7258 * Now we have the strbuf structure for TI_GETMYNAME 7259 * and TI_GETPEERNAME. Next we copyout the requested 7260 * address and then we'll copyout the strbuf. 7261 */ 7262 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7263 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7264 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7265 mi_copy_done(q, mp, EINVAL); 7266 return; 7267 } 7268 7269 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7270 7271 if (mp1 == NULL) 7272 return; 7273 7274 rw_enter(&udp->udp_rwlock, RW_READER); 7275 switch (iocp->ioc_cmd) { 7276 case TI_GETMYNAME: 7277 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7278 break; 7279 case TI_GETPEERNAME: 7280 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7281 break; 7282 } 7283 rw_exit(&udp->udp_rwlock); 7284 7285 if (error != 0) { 7286 mi_copy_done(q, mp, error); 7287 } else { 7288 mp1->b_wptr += addrlen; 7289 STRUCT_FSET(sb, len, addrlen); 7290 7291 /* Copy out the address */ 7292 mi_copyout(q, mp); 7293 } 7294 } 7295 7296 static int 7297 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7298 udpattrs_t *udpattrs) 7299 { 7300 struct T_unitdata_req *udreqp; 7301 int is_absreq_failure; 7302 cred_t *cr; 7303 7304 ASSERT(((t_primp_t)mp->b_rptr)->type); 7305 7306 /* 7307 * All Solaris components should pass a db_credp 7308 * for this TPI message, hence we should ASSERT. 7309 * However, RPC (svc_clts_ksend) does this odd thing where it 7310 * passes the options from a T_UNITDATA_IND unchanged in a 7311 * T_UNITDATA_REQ. While that is the right thing to do for 7312 * some options, SCM_UCRED being the key one, this also makes it 7313 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7314 */ 7315 cr = msg_getcred(mp, NULL); 7316 if (cr == NULL) { 7317 cr = Q_TO_CONN(q)->conn_cred; 7318 } 7319 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7320 7321 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7322 udreqp->OPT_offset, cr, &udp_opt_obj, 7323 udpattrs, &is_absreq_failure); 7324 7325 if (*errorp != 0) { 7326 /* 7327 * Note: No special action needed in this 7328 * module for "is_absreq_failure" 7329 */ 7330 return (-1); /* failure */ 7331 } 7332 ASSERT(is_absreq_failure == 0); 7333 return (0); /* success */ 7334 } 7335 7336 void 7337 udp_ddi_g_init(void) 7338 { 7339 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7340 udp_opt_obj.odb_opt_arr_cnt); 7341 7342 /* 7343 * We want to be informed each time a stack is created or 7344 * destroyed in the kernel, so we can maintain the 7345 * set of udp_stack_t's. 7346 */ 7347 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7348 } 7349 7350 void 7351 udp_ddi_g_destroy(void) 7352 { 7353 netstack_unregister(NS_UDP); 7354 } 7355 7356 #define INET_NAME "ip" 7357 7358 /* 7359 * Initialize the UDP stack instance. 7360 */ 7361 static void * 7362 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7363 { 7364 udp_stack_t *us; 7365 udpparam_t *pa; 7366 int i; 7367 int error = 0; 7368 major_t major; 7369 7370 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7371 us->us_netstack = ns; 7372 7373 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7374 us->us_epriv_ports[0] = 2049; 7375 us->us_epriv_ports[1] = 4045; 7376 7377 /* 7378 * The smallest anonymous port in the priviledged port range which UDP 7379 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7380 */ 7381 us->us_min_anonpriv_port = 512; 7382 7383 us->us_bind_fanout_size = udp_bind_fanout_size; 7384 7385 /* Roundup variable that might have been modified in /etc/system */ 7386 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7387 /* Not a power of two. Round up to nearest power of two */ 7388 for (i = 0; i < 31; i++) { 7389 if (us->us_bind_fanout_size < (1 << i)) 7390 break; 7391 } 7392 us->us_bind_fanout_size = 1 << i; 7393 } 7394 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7395 sizeof (udp_fanout_t), KM_SLEEP); 7396 for (i = 0; i < us->us_bind_fanout_size; i++) { 7397 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7398 NULL); 7399 } 7400 7401 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7402 7403 us->us_param_arr = pa; 7404 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7405 7406 (void) udp_param_register(&us->us_nd, 7407 us->us_param_arr, A_CNT(udp_param_arr)); 7408 7409 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7410 us->us_mibkp = udp_kstat_init(stackid); 7411 7412 major = mod_name_to_major(INET_NAME); 7413 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7414 ASSERT(error == 0); 7415 return (us); 7416 } 7417 7418 /* 7419 * Free the UDP stack instance. 7420 */ 7421 static void 7422 udp_stack_fini(netstackid_t stackid, void *arg) 7423 { 7424 udp_stack_t *us = (udp_stack_t *)arg; 7425 int i; 7426 7427 for (i = 0; i < us->us_bind_fanout_size; i++) { 7428 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7429 } 7430 7431 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7432 sizeof (udp_fanout_t)); 7433 7434 us->us_bind_fanout = NULL; 7435 7436 nd_free(&us->us_nd); 7437 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7438 us->us_param_arr = NULL; 7439 7440 udp_kstat_fini(stackid, us->us_mibkp); 7441 us->us_mibkp = NULL; 7442 7443 udp_kstat2_fini(stackid, us->us_kstat); 7444 us->us_kstat = NULL; 7445 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7446 7447 ldi_ident_release(us->us_ldi_ident); 7448 kmem_free(us, sizeof (*us)); 7449 } 7450 7451 static void * 7452 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7453 { 7454 kstat_t *ksp; 7455 7456 udp_stat_t template = { 7457 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7458 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7459 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7460 { "udp_drain", KSTAT_DATA_UINT64 }, 7461 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7462 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7463 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7464 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7465 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7466 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7467 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7468 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7469 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7470 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7471 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7472 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7473 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7474 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7475 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7476 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7477 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7478 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7479 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7480 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7481 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7482 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7483 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7484 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7485 #ifdef DEBUG 7486 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7487 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7488 #endif 7489 }; 7490 7491 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7492 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7493 KSTAT_FLAG_VIRTUAL, stackid); 7494 7495 if (ksp == NULL) 7496 return (NULL); 7497 7498 bcopy(&template, us_statisticsp, sizeof (template)); 7499 ksp->ks_data = (void *)us_statisticsp; 7500 ksp->ks_private = (void *)(uintptr_t)stackid; 7501 7502 kstat_install(ksp); 7503 return (ksp); 7504 } 7505 7506 static void 7507 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7508 { 7509 if (ksp != NULL) { 7510 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7511 kstat_delete_netstack(ksp, stackid); 7512 } 7513 } 7514 7515 static void * 7516 udp_kstat_init(netstackid_t stackid) 7517 { 7518 kstat_t *ksp; 7519 7520 udp_named_kstat_t template = { 7521 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7522 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7523 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7524 { "entrySize", KSTAT_DATA_INT32, 0 }, 7525 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7526 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7527 }; 7528 7529 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7530 KSTAT_TYPE_NAMED, 7531 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7532 7533 if (ksp == NULL || ksp->ks_data == NULL) 7534 return (NULL); 7535 7536 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7537 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7538 7539 bcopy(&template, ksp->ks_data, sizeof (template)); 7540 ksp->ks_update = udp_kstat_update; 7541 ksp->ks_private = (void *)(uintptr_t)stackid; 7542 7543 kstat_install(ksp); 7544 return (ksp); 7545 } 7546 7547 static void 7548 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7549 { 7550 if (ksp != NULL) { 7551 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7552 kstat_delete_netstack(ksp, stackid); 7553 } 7554 } 7555 7556 static int 7557 udp_kstat_update(kstat_t *kp, int rw) 7558 { 7559 udp_named_kstat_t *udpkp; 7560 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7561 netstack_t *ns; 7562 udp_stack_t *us; 7563 7564 if ((kp == NULL) || (kp->ks_data == NULL)) 7565 return (EIO); 7566 7567 if (rw == KSTAT_WRITE) 7568 return (EACCES); 7569 7570 ns = netstack_find_by_stackid(stackid); 7571 if (ns == NULL) 7572 return (-1); 7573 us = ns->netstack_udp; 7574 if (us == NULL) { 7575 netstack_rele(ns); 7576 return (-1); 7577 } 7578 udpkp = (udp_named_kstat_t *)kp->ks_data; 7579 7580 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7581 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7582 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7583 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7584 netstack_rele(ns); 7585 return (0); 7586 } 7587 7588 /* 7589 * Read-side synchronous stream info entry point, called as a 7590 * result of handling certain STREAMS ioctl operations. 7591 */ 7592 static int 7593 udp_rinfop(queue_t *q, infod_t *dp) 7594 { 7595 mblk_t *mp; 7596 uint_t cmd = dp->d_cmd; 7597 int res = 0; 7598 int error = 0; 7599 udp_t *udp = Q_TO_UDP(q); 7600 struct stdata *stp = STREAM(q); 7601 7602 mutex_enter(&udp->udp_drain_lock); 7603 /* If shutdown on read has happened, return nothing */ 7604 mutex_enter(&stp->sd_lock); 7605 if (stp->sd_flag & STREOF) { 7606 mutex_exit(&stp->sd_lock); 7607 goto done; 7608 } 7609 mutex_exit(&stp->sd_lock); 7610 7611 if ((mp = udp->udp_rcv_list_head) == NULL) 7612 goto done; 7613 7614 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7615 7616 if (cmd & INFOD_COUNT) { 7617 /* 7618 * Return the number of messages. 7619 */ 7620 dp->d_count += udp->udp_rcv_msgcnt; 7621 res |= INFOD_COUNT; 7622 } 7623 if (cmd & INFOD_BYTES) { 7624 /* 7625 * Return size of all data messages. 7626 */ 7627 dp->d_bytes += udp->udp_rcv_cnt; 7628 res |= INFOD_BYTES; 7629 } 7630 if (cmd & INFOD_FIRSTBYTES) { 7631 /* 7632 * Return size of first data message. 7633 */ 7634 dp->d_bytes = msgdsize(mp); 7635 res |= INFOD_FIRSTBYTES; 7636 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7637 } 7638 if (cmd & INFOD_COPYOUT) { 7639 mblk_t *mp1 = mp->b_cont; 7640 int n; 7641 /* 7642 * Return data contents of first message. 7643 */ 7644 ASSERT(DB_TYPE(mp1) == M_DATA); 7645 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7646 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7647 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7648 UIO_READ, dp->d_uiop)) != 0) { 7649 goto done; 7650 } 7651 mp1 = mp1->b_cont; 7652 } 7653 res |= INFOD_COPYOUT; 7654 dp->d_cmd &= ~INFOD_COPYOUT; 7655 } 7656 done: 7657 mutex_exit(&udp->udp_drain_lock); 7658 7659 dp->d_res |= res; 7660 7661 return (error); 7662 } 7663 7664 /* 7665 * Read-side synchronous stream entry point. This is called as a result 7666 * of recv/read operation done at sockfs, and is guaranteed to execute 7667 * outside of the interrupt thread context. It returns a single datagram 7668 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7669 */ 7670 static int 7671 udp_rrw(queue_t *q, struiod_t *dp) 7672 { 7673 mblk_t *mp; 7674 udp_t *udp = Q_TO_UDP(q); 7675 udp_stack_t *us = udp->udp_us; 7676 7677 /* 7678 * Dequeue datagram from the head of the list and return 7679 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7680 * set/cleared depending on whether or not there's data 7681 * remaining in the list. 7682 */ 7683 mutex_enter(&udp->udp_drain_lock); 7684 if (!udp->udp_direct_sockfs) { 7685 mutex_exit(&udp->udp_drain_lock); 7686 UDP_STAT(us, udp_rrw_busy); 7687 return (EBUSY); 7688 } 7689 if ((mp = udp->udp_rcv_list_head) != NULL) { 7690 uint_t size = msgdsize(mp); 7691 7692 /* Last datagram in the list? */ 7693 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7694 udp->udp_rcv_list_tail = NULL; 7695 mp->b_next = NULL; 7696 7697 udp->udp_rcv_cnt -= size; 7698 udp->udp_rcv_msgcnt--; 7699 UDP_STAT(us, udp_rrw_msgcnt); 7700 7701 /* No longer flow-controlling? */ 7702 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7703 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7704 udp->udp_drain_qfull = B_FALSE; 7705 } 7706 if (udp->udp_rcv_list_head == NULL) { 7707 /* 7708 * Either we just dequeued the last datagram or 7709 * we get here from sockfs and have nothing to 7710 * return; in this case clear RSLEEP. 7711 */ 7712 ASSERT(udp->udp_rcv_cnt == 0); 7713 ASSERT(udp->udp_rcv_msgcnt == 0); 7714 ASSERT(udp->udp_rcv_list_tail == NULL); 7715 STR_WAKEUP_CLEAR(STREAM(q)); 7716 } else { 7717 /* 7718 * More data follows; we need udp_rrw() to be 7719 * called in future to pick up the rest. 7720 */ 7721 STR_WAKEUP_SET(STREAM(q)); 7722 } 7723 mutex_exit(&udp->udp_drain_lock); 7724 dp->d_mp = mp; 7725 return (0); 7726 } 7727 7728 /* 7729 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7730 * list; this is typically executed within the interrupt thread context 7731 * and so we do things as quickly as possible. 7732 */ 7733 static void 7734 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7735 { 7736 ASSERT(q == RD(q)); 7737 ASSERT(pkt_len == msgdsize(mp)); 7738 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7739 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7740 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7741 7742 mutex_enter(&udp->udp_drain_lock); 7743 /* 7744 * Wake up and signal the receiving app; it is okay to do this 7745 * before enqueueing the mp because we are holding the drain lock. 7746 * One of the advantages of synchronous stream is the ability for 7747 * us to find out when the application performs a read on the 7748 * socket by way of udp_rrw() entry point being called. We need 7749 * to generate SIGPOLL/SIGIO for each received data in the case 7750 * of asynchronous socket just as in the strrput() case. However, 7751 * we only wake the application up when necessary, i.e. during the 7752 * first enqueue. When udp_rrw() is called, we send up a single 7753 * datagram upstream and call STR_WAKEUP_SET() again when there 7754 * are still data remaining in our receive queue. 7755 */ 7756 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7757 if (udp->udp_rcv_list_head == NULL) 7758 udp->udp_rcv_list_head = mp; 7759 else 7760 udp->udp_rcv_list_tail->b_next = mp; 7761 udp->udp_rcv_list_tail = mp; 7762 udp->udp_rcv_cnt += pkt_len; 7763 udp->udp_rcv_msgcnt++; 7764 7765 /* Need to flow-control? */ 7766 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7767 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7768 udp->udp_drain_qfull = B_TRUE; 7769 7770 mutex_exit(&udp->udp_drain_lock); 7771 } 7772 7773 /* 7774 * Drain the contents of receive list to the module upstream; we do 7775 * this during close or when we fallback to the slow mode due to 7776 * sockmod being popped or a module being pushed on top of us. 7777 */ 7778 static void 7779 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7780 { 7781 mblk_t *mp; 7782 udp_stack_t *us = udp->udp_us; 7783 7784 mutex_enter(&udp->udp_drain_lock); 7785 /* 7786 * There is no race with a concurrent udp_input() sending 7787 * up packets using putnext() after we have cleared the 7788 * udp_direct_sockfs flag but before we have completed 7789 * sending up the packets in udp_rcv_list, since we are 7790 * either a writer or we have quiesced the conn. 7791 */ 7792 udp->udp_direct_sockfs = B_FALSE; 7793 mutex_exit(&udp->udp_drain_lock); 7794 7795 if (udp->udp_rcv_list_head != NULL) 7796 UDP_STAT(us, udp_drain); 7797 7798 /* 7799 * Send up everything via putnext(); note here that we 7800 * don't need the udp_drain_lock to protect us since 7801 * nothing can enter udp_rrw() and that we currently 7802 * have exclusive access to this udp. 7803 */ 7804 while ((mp = udp->udp_rcv_list_head) != NULL) { 7805 udp->udp_rcv_list_head = mp->b_next; 7806 mp->b_next = NULL; 7807 udp->udp_rcv_cnt -= msgdsize(mp); 7808 udp->udp_rcv_msgcnt--; 7809 if (closing) { 7810 freemsg(mp); 7811 } else { 7812 ASSERT(q == RD(q)); 7813 putnext(q, mp); 7814 } 7815 } 7816 ASSERT(udp->udp_rcv_cnt == 0); 7817 ASSERT(udp->udp_rcv_msgcnt == 0); 7818 ASSERT(udp->udp_rcv_list_head == NULL); 7819 udp->udp_rcv_list_tail = NULL; 7820 udp->udp_drain_qfull = B_FALSE; 7821 } 7822 7823 static size_t 7824 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7825 { 7826 udp_stack_t *us = udp->udp_us; 7827 7828 /* We add a bit of extra buffering */ 7829 size += size >> 1; 7830 if (size > us->us_max_buf) 7831 size = us->us_max_buf; 7832 7833 udp->udp_rcv_hiwat = size; 7834 return (size); 7835 } 7836 7837 /* 7838 * For the lower queue so that UDP can be a dummy mux. 7839 * Nobody should be sending 7840 * packets up this stream 7841 */ 7842 static void 7843 udp_lrput(queue_t *q, mblk_t *mp) 7844 { 7845 mblk_t *mp1; 7846 7847 switch (mp->b_datap->db_type) { 7848 case M_FLUSH: 7849 /* Turn around */ 7850 if (*mp->b_rptr & FLUSHW) { 7851 *mp->b_rptr &= ~FLUSHR; 7852 qreply(q, mp); 7853 return; 7854 } 7855 break; 7856 } 7857 /* Could receive messages that passed through ar_rput */ 7858 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7859 mp1->b_prev = mp1->b_next = NULL; 7860 freemsg(mp); 7861 } 7862 7863 /* 7864 * For the lower queue so that UDP can be a dummy mux. 7865 * Nobody should be sending packets down this stream. 7866 */ 7867 /* ARGSUSED */ 7868 void 7869 udp_lwput(queue_t *q, mblk_t *mp) 7870 { 7871 freemsg(mp); 7872 } 7873 7874 /* 7875 * Below routines for UDP socket module. 7876 */ 7877 7878 static conn_t * 7879 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7880 { 7881 udp_t *udp; 7882 conn_t *connp; 7883 zoneid_t zoneid; 7884 netstack_t *ns; 7885 udp_stack_t *us; 7886 7887 ns = netstack_find_by_cred(credp); 7888 ASSERT(ns != NULL); 7889 us = ns->netstack_udp; 7890 ASSERT(us != NULL); 7891 7892 /* 7893 * For exclusive stacks we set the zoneid to zero 7894 * to make UDP operate as if in the global zone. 7895 */ 7896 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7897 zoneid = GLOBAL_ZONEID; 7898 else 7899 zoneid = crgetzoneid(credp); 7900 7901 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7902 7903 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7904 if (connp == NULL) { 7905 netstack_rele(ns); 7906 return (NULL); 7907 } 7908 udp = connp->conn_udp; 7909 7910 /* 7911 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7912 * done by netstack_find_by_cred() 7913 */ 7914 netstack_rele(ns); 7915 7916 rw_enter(&udp->udp_rwlock, RW_WRITER); 7917 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7918 ASSERT(connp->conn_udp == udp); 7919 ASSERT(udp->udp_connp == connp); 7920 7921 /* Set the initial state of the stream and the privilege status. */ 7922 udp->udp_state = TS_UNBND; 7923 if (isv6) { 7924 udp->udp_family = AF_INET6; 7925 udp->udp_ipversion = IPV6_VERSION; 7926 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7927 udp->udp_ttl = us->us_ipv6_hoplimit; 7928 connp->conn_af_isv6 = B_TRUE; 7929 connp->conn_flags |= IPCL_ISV6; 7930 } else { 7931 udp->udp_family = AF_INET; 7932 udp->udp_ipversion = IPV4_VERSION; 7933 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7934 udp->udp_ttl = us->us_ipv4_ttl; 7935 connp->conn_af_isv6 = B_FALSE; 7936 connp->conn_flags &= ~IPCL_ISV6; 7937 } 7938 7939 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7940 udp->udp_pending_op = -1; 7941 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7942 connp->conn_zoneid = zoneid; 7943 7944 udp->udp_open_time = lbolt64; 7945 udp->udp_open_pid = curproc->p_pid; 7946 7947 /* 7948 * If the caller has the process-wide flag set, then default to MAC 7949 * exempt mode. This allows read-down to unlabeled hosts. 7950 */ 7951 if (getpflags(NET_MAC_AWARE, credp) != 0) 7952 connp->conn_mac_exempt = B_TRUE; 7953 7954 connp->conn_ulp_labeled = is_system_labeled(); 7955 7956 udp->udp_us = us; 7957 7958 connp->conn_recv = udp_input; 7959 crhold(credp); 7960 connp->conn_cred = credp; 7961 7962 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7963 7964 rw_exit(&udp->udp_rwlock); 7965 7966 return (connp); 7967 } 7968 7969 /* ARGSUSED */ 7970 sock_lower_handle_t 7971 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7972 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7973 { 7974 udp_t *udp = NULL; 7975 udp_stack_t *us; 7976 conn_t *connp; 7977 boolean_t isv6; 7978 7979 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7980 (proto != 0 && proto != IPPROTO_UDP)) { 7981 *errorp = EPROTONOSUPPORT; 7982 return (NULL); 7983 } 7984 7985 if (family == AF_INET6) 7986 isv6 = B_TRUE; 7987 else 7988 isv6 = B_FALSE; 7989 7990 connp = udp_do_open(credp, isv6, flags); 7991 if (connp == NULL) { 7992 *errorp = ENOMEM; 7993 return (NULL); 7994 } 7995 7996 udp = connp->conn_udp; 7997 ASSERT(udp != NULL); 7998 us = udp->udp_us; 7999 ASSERT(us != NULL); 8000 8001 udp->udp_issocket = B_TRUE; 8002 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 8003 8004 /* Set flow control */ 8005 rw_enter(&udp->udp_rwlock, RW_WRITER); 8006 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 8007 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 8008 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 8009 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 8010 udp->udp_xmit_lowat = us->us_xmit_lowat; 8011 8012 if (udp->udp_family == AF_INET6) { 8013 /* Build initial header template for transmit */ 8014 if ((*errorp = udp_build_hdrs(udp)) != 0) { 8015 rw_exit(&udp->udp_rwlock); 8016 ipcl_conn_destroy(connp); 8017 return (NULL); 8018 } 8019 } 8020 rw_exit(&udp->udp_rwlock); 8021 8022 connp->conn_flow_cntrld = B_FALSE; 8023 8024 ASSERT(us->us_ldi_ident != NULL); 8025 8026 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 8027 ip1dbg(("udp_create: create of IP helper stream failed\n")); 8028 udp_do_close(connp); 8029 return (NULL); 8030 } 8031 8032 /* Set the send flow control */ 8033 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 8034 connp->conn_wq->q_lowat = us->us_xmit_lowat; 8035 8036 mutex_enter(&connp->conn_lock); 8037 connp->conn_state_flags &= ~CONN_INCIPIENT; 8038 mutex_exit(&connp->conn_lock); 8039 8040 *errorp = 0; 8041 *smodep = SM_ATOMIC; 8042 *sock_downcalls = &sock_udp_downcalls; 8043 return ((sock_lower_handle_t)connp); 8044 } 8045 8046 /* ARGSUSED */ 8047 void 8048 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 8049 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 8050 { 8051 conn_t *connp = (conn_t *)proto_handle; 8052 udp_t *udp = connp->conn_udp; 8053 udp_stack_t *us = udp->udp_us; 8054 struct sock_proto_props sopp; 8055 8056 /* All Solaris components should pass a cred for this operation. */ 8057 ASSERT(cr != NULL); 8058 8059 connp->conn_upcalls = sock_upcalls; 8060 connp->conn_upper_handle = sock_handle; 8061 8062 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 8063 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 8064 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 8065 sopp.sopp_maxblk = INFPSZ; 8066 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 8067 sopp.sopp_maxaddrlen = sizeof (sin6_t); 8068 sopp.sopp_maxpsz = 8069 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 8070 UDP_MAXPACKET_IPV6; 8071 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 8072 udp_mod_info.mi_minpsz; 8073 8074 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 8075 &sopp); 8076 } 8077 8078 static void 8079 udp_do_close(conn_t *connp) 8080 { 8081 udp_t *udp; 8082 8083 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 8084 udp = connp->conn_udp; 8085 8086 udp_quiesce_conn(connp); 8087 ip_quiesce_conn(connp); 8088 8089 if (!IPCL_IS_NONSTR(connp)) { 8090 /* 8091 * Disable read-side synchronous stream 8092 * interface and drain any queued data. 8093 */ 8094 ASSERT(connp->conn_wq != NULL); 8095 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 8096 ASSERT(!udp->udp_direct_sockfs); 8097 8098 ASSERT(connp->conn_rq != NULL); 8099 qprocsoff(connp->conn_rq); 8100 } 8101 8102 ASSERT(udp->udp_rcv_cnt == 0); 8103 ASSERT(udp->udp_rcv_msgcnt == 0); 8104 ASSERT(udp->udp_rcv_list_head == NULL); 8105 ASSERT(udp->udp_rcv_list_tail == NULL); 8106 8107 udp_close_free(connp); 8108 8109 /* 8110 * Now we are truly single threaded on this stream, and can 8111 * delete the things hanging off the connp, and finally the connp. 8112 * We removed this connp from the fanout list, it cannot be 8113 * accessed thru the fanouts, and we already waited for the 8114 * conn_ref to drop to 0. We are already in close, so 8115 * there cannot be any other thread from the top. qprocsoff 8116 * has completed, and service has completed or won't run in 8117 * future. 8118 */ 8119 ASSERT(connp->conn_ref == 1); 8120 if (!IPCL_IS_NONSTR(connp)) { 8121 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 8122 } else { 8123 ip_free_helper_stream(connp); 8124 } 8125 8126 connp->conn_ref--; 8127 ipcl_conn_destroy(connp); 8128 } 8129 8130 /* ARGSUSED */ 8131 int 8132 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 8133 { 8134 conn_t *connp = (conn_t *)proto_handle; 8135 8136 /* All Solaris components should pass a cred for this operation. */ 8137 ASSERT(cr != NULL); 8138 8139 udp_do_close(connp); 8140 return (0); 8141 } 8142 8143 static int 8144 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 8145 boolean_t bind_to_req_port_only) 8146 { 8147 sin_t *sin; 8148 sin6_t *sin6; 8149 sin6_t sin6addr; 8150 in_port_t port; /* Host byte order */ 8151 in_port_t requested_port; /* Host byte order */ 8152 int count; 8153 in6_addr_t v6src; 8154 int loopmax; 8155 udp_fanout_t *udpf; 8156 in_port_t lport; /* Network byte order */ 8157 zoneid_t zoneid; 8158 udp_t *udp; 8159 boolean_t is_inaddr_any; 8160 mlp_type_t addrtype, mlptype; 8161 udp_stack_t *us; 8162 int error = 0; 8163 mblk_t *mp = NULL; 8164 8165 udp = connp->conn_udp; 8166 us = udp->udp_us; 8167 8168 if (udp->udp_state != TS_UNBND) { 8169 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8170 "udp_bind: bad state, %u", udp->udp_state); 8171 return (-TOUTSTATE); 8172 } 8173 8174 switch (len) { 8175 case 0: 8176 if (udp->udp_family == AF_INET) { 8177 sin = (sin_t *)&sin6addr; 8178 *sin = sin_null; 8179 sin->sin_family = AF_INET; 8180 sin->sin_addr.s_addr = INADDR_ANY; 8181 udp->udp_ipversion = IPV4_VERSION; 8182 } else { 8183 ASSERT(udp->udp_family == AF_INET6); 8184 sin6 = (sin6_t *)&sin6addr; 8185 *sin6 = sin6_null; 8186 sin6->sin6_family = AF_INET6; 8187 V6_SET_ZERO(sin6->sin6_addr); 8188 udp->udp_ipversion = IPV6_VERSION; 8189 } 8190 port = 0; 8191 break; 8192 8193 case sizeof (sin_t): /* Complete IPv4 address */ 8194 sin = (sin_t *)sa; 8195 8196 if (sin == NULL || !OK_32PTR((char *)sin)) 8197 return (EINVAL); 8198 8199 if (udp->udp_family != AF_INET || 8200 sin->sin_family != AF_INET) { 8201 return (EAFNOSUPPORT); 8202 } 8203 port = ntohs(sin->sin_port); 8204 break; 8205 8206 case sizeof (sin6_t): /* complete IPv6 address */ 8207 sin6 = (sin6_t *)sa; 8208 8209 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8210 return (EINVAL); 8211 8212 if (udp->udp_family != AF_INET6 || 8213 sin6->sin6_family != AF_INET6) { 8214 return (EAFNOSUPPORT); 8215 } 8216 port = ntohs(sin6->sin6_port); 8217 break; 8218 8219 default: /* Invalid request */ 8220 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8221 "udp_bind: bad ADDR_length length %u", len); 8222 return (-TBADADDR); 8223 } 8224 8225 requested_port = port; 8226 8227 if (requested_port == 0 || !bind_to_req_port_only) 8228 bind_to_req_port_only = B_FALSE; 8229 else /* T_BIND_REQ and requested_port != 0 */ 8230 bind_to_req_port_only = B_TRUE; 8231 8232 if (requested_port == 0) { 8233 /* 8234 * If the application passed in zero for the port number, it 8235 * doesn't care which port number we bind to. Get one in the 8236 * valid range. 8237 */ 8238 if (udp->udp_anon_priv_bind) { 8239 port = udp_get_next_priv_port(udp); 8240 } else { 8241 port = udp_update_next_port(udp, 8242 us->us_next_port_to_try, B_TRUE); 8243 } 8244 } else { 8245 /* 8246 * If the port is in the well-known privileged range, 8247 * make sure the caller was privileged. 8248 */ 8249 int i; 8250 boolean_t priv = B_FALSE; 8251 8252 if (port < us->us_smallest_nonpriv_port) { 8253 priv = B_TRUE; 8254 } else { 8255 for (i = 0; i < us->us_num_epriv_ports; i++) { 8256 if (port == us->us_epriv_ports[i]) { 8257 priv = B_TRUE; 8258 break; 8259 } 8260 } 8261 } 8262 8263 if (priv) { 8264 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8265 return (-TACCES); 8266 } 8267 } 8268 8269 if (port == 0) 8270 return (-TNOADDR); 8271 8272 /* 8273 * The state must be TS_UNBND. TPI mandates that users must send 8274 * TPI primitives only 1 at a time and wait for the response before 8275 * sending the next primitive. 8276 */ 8277 rw_enter(&udp->udp_rwlock, RW_WRITER); 8278 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8279 rw_exit(&udp->udp_rwlock); 8280 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8281 "udp_bind: bad state, %u", udp->udp_state); 8282 return (-TOUTSTATE); 8283 } 8284 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8285 udp->udp_pending_op = T_BIND_REQ; 8286 /* 8287 * Copy the source address into our udp structure. This address 8288 * may still be zero; if so, IP will fill in the correct address 8289 * each time an outbound packet is passed to it. Since the udp is 8290 * not yet in the bind hash list, we don't grab the uf_lock to 8291 * change udp_ipversion 8292 */ 8293 if (udp->udp_family == AF_INET) { 8294 ASSERT(sin != NULL); 8295 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8296 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8297 udp->udp_ip_snd_options_len; 8298 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8299 } else { 8300 ASSERT(sin6 != NULL); 8301 v6src = sin6->sin6_addr; 8302 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8303 /* 8304 * no need to hold the uf_lock to set the udp_ipversion 8305 * since we are not yet in the fanout list 8306 */ 8307 udp->udp_ipversion = IPV4_VERSION; 8308 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8309 UDPH_SIZE + udp->udp_ip_snd_options_len; 8310 } else { 8311 udp->udp_ipversion = IPV6_VERSION; 8312 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8313 } 8314 } 8315 8316 /* 8317 * If udp_reuseaddr is not set, then we have to make sure that 8318 * the IP address and port number the application requested 8319 * (or we selected for the application) is not being used by 8320 * another stream. If another stream is already using the 8321 * requested IP address and port, the behavior depends on 8322 * "bind_to_req_port_only". If set the bind fails; otherwise we 8323 * search for any an unused port to bind to the the stream. 8324 * 8325 * As per the BSD semantics, as modified by the Deering multicast 8326 * changes, if udp_reuseaddr is set, then we allow multiple binds 8327 * to the same port independent of the local IP address. 8328 * 8329 * This is slightly different than in SunOS 4.X which did not 8330 * support IP multicast. Note that the change implemented by the 8331 * Deering multicast code effects all binds - not only binding 8332 * to IP multicast addresses. 8333 * 8334 * Note that when binding to port zero we ignore SO_REUSEADDR in 8335 * order to guarantee a unique port. 8336 */ 8337 8338 count = 0; 8339 if (udp->udp_anon_priv_bind) { 8340 /* 8341 * loopmax = (IPPORT_RESERVED-1) - 8342 * us->us_min_anonpriv_port + 1 8343 */ 8344 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8345 } else { 8346 loopmax = us->us_largest_anon_port - 8347 us->us_smallest_anon_port + 1; 8348 } 8349 8350 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8351 zoneid = connp->conn_zoneid; 8352 8353 for (;;) { 8354 udp_t *udp1; 8355 boolean_t found_exclbind = B_FALSE; 8356 8357 /* 8358 * Walk through the list of udp streams bound to 8359 * requested port with the same IP address. 8360 */ 8361 lport = htons(port); 8362 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8363 us->us_bind_fanout_size)]; 8364 mutex_enter(&udpf->uf_lock); 8365 for (udp1 = udpf->uf_udp; udp1 != NULL; 8366 udp1 = udp1->udp_bind_hash) { 8367 if (lport != udp1->udp_port) 8368 continue; 8369 8370 /* 8371 * On a labeled system, we must treat bindings to ports 8372 * on shared IP addresses by sockets with MAC exemption 8373 * privilege as being in all zones, as there's 8374 * otherwise no way to identify the right receiver. 8375 */ 8376 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8377 IPCL_ZONE_MATCH(connp, 8378 udp1->udp_connp->conn_zoneid)) && 8379 !connp->conn_mac_exempt && \ 8380 !udp1->udp_connp->conn_mac_exempt) 8381 continue; 8382 8383 /* 8384 * If UDP_EXCLBIND is set for either the bound or 8385 * binding endpoint, the semantics of bind 8386 * is changed according to the following chart. 8387 * 8388 * spec = specified address (v4 or v6) 8389 * unspec = unspecified address (v4 or v6) 8390 * A = specified addresses are different for endpoints 8391 * 8392 * bound bind to allowed? 8393 * ------------------------------------- 8394 * unspec unspec no 8395 * unspec spec no 8396 * spec unspec no 8397 * spec spec yes if A 8398 * 8399 * For labeled systems, SO_MAC_EXEMPT behaves the same 8400 * as UDP_EXCLBIND, except that zoneid is ignored. 8401 */ 8402 if (udp1->udp_exclbind || udp->udp_exclbind || 8403 udp1->udp_connp->conn_mac_exempt || 8404 connp->conn_mac_exempt) { 8405 if (V6_OR_V4_INADDR_ANY( 8406 udp1->udp_bound_v6src) || 8407 is_inaddr_any || 8408 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8409 &v6src)) { 8410 found_exclbind = B_TRUE; 8411 break; 8412 } 8413 continue; 8414 } 8415 8416 /* 8417 * Check ipversion to allow IPv4 and IPv6 sockets to 8418 * have disjoint port number spaces. 8419 */ 8420 if (udp->udp_ipversion != udp1->udp_ipversion) { 8421 8422 /* 8423 * On the first time through the loop, if the 8424 * the user intentionally specified a 8425 * particular port number, then ignore any 8426 * bindings of the other protocol that may 8427 * conflict. This allows the user to bind IPv6 8428 * alone and get both v4 and v6, or bind both 8429 * both and get each seperately. On subsequent 8430 * times through the loop, we're checking a 8431 * port that we chose (not the user) and thus 8432 * we do not allow casual duplicate bindings. 8433 */ 8434 if (count == 0 && requested_port != 0) 8435 continue; 8436 } 8437 8438 /* 8439 * No difference depending on SO_REUSEADDR. 8440 * 8441 * If existing port is bound to a 8442 * non-wildcard IP address and 8443 * the requesting stream is bound to 8444 * a distinct different IP addresses 8445 * (non-wildcard, also), keep going. 8446 */ 8447 if (!is_inaddr_any && 8448 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8449 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8450 &v6src)) { 8451 continue; 8452 } 8453 break; 8454 } 8455 8456 if (!found_exclbind && 8457 (udp->udp_reuseaddr && requested_port != 0)) { 8458 break; 8459 } 8460 8461 if (udp1 == NULL) { 8462 /* 8463 * No other stream has this IP address 8464 * and port number. We can use it. 8465 */ 8466 break; 8467 } 8468 mutex_exit(&udpf->uf_lock); 8469 if (bind_to_req_port_only) { 8470 /* 8471 * We get here only when requested port 8472 * is bound (and only first of the for() 8473 * loop iteration). 8474 * 8475 * The semantics of this bind request 8476 * require it to fail so we return from 8477 * the routine (and exit the loop). 8478 * 8479 */ 8480 udp->udp_pending_op = -1; 8481 rw_exit(&udp->udp_rwlock); 8482 return (-TADDRBUSY); 8483 } 8484 8485 if (udp->udp_anon_priv_bind) { 8486 port = udp_get_next_priv_port(udp); 8487 } else { 8488 if ((count == 0) && (requested_port != 0)) { 8489 /* 8490 * If the application wants us to find 8491 * a port, get one to start with. Set 8492 * requested_port to 0, so that we will 8493 * update us->us_next_port_to_try below. 8494 */ 8495 port = udp_update_next_port(udp, 8496 us->us_next_port_to_try, B_TRUE); 8497 requested_port = 0; 8498 } else { 8499 port = udp_update_next_port(udp, port + 1, 8500 B_FALSE); 8501 } 8502 } 8503 8504 if (port == 0 || ++count >= loopmax) { 8505 /* 8506 * We've tried every possible port number and 8507 * there are none available, so send an error 8508 * to the user. 8509 */ 8510 udp->udp_pending_op = -1; 8511 rw_exit(&udp->udp_rwlock); 8512 return (-TNOADDR); 8513 } 8514 } 8515 8516 /* 8517 * Copy the source address into our udp structure. This address 8518 * may still be zero; if so, ip will fill in the correct address 8519 * each time an outbound packet is passed to it. 8520 * If we are binding to a broadcast or multicast address then 8521 * udp_post_ip_bind_connect will clear the source address 8522 * when udp_do_bind success. 8523 */ 8524 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8525 udp->udp_port = lport; 8526 /* 8527 * Now reset the the next anonymous port if the application requested 8528 * an anonymous port, or we handed out the next anonymous port. 8529 */ 8530 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8531 us->us_next_port_to_try = port + 1; 8532 } 8533 8534 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8535 if (udp->udp_family == AF_INET) { 8536 sin->sin_port = udp->udp_port; 8537 } else { 8538 sin6->sin6_port = udp->udp_port; 8539 /* Rebuild the header template */ 8540 error = udp_build_hdrs(udp); 8541 if (error != 0) { 8542 udp->udp_pending_op = -1; 8543 rw_exit(&udp->udp_rwlock); 8544 mutex_exit(&udpf->uf_lock); 8545 return (error); 8546 } 8547 } 8548 udp->udp_state = TS_IDLE; 8549 udp_bind_hash_insert(udpf, udp); 8550 mutex_exit(&udpf->uf_lock); 8551 rw_exit(&udp->udp_rwlock); 8552 8553 if (cl_inet_bind) { 8554 /* 8555 * Running in cluster mode - register bind information 8556 */ 8557 if (udp->udp_ipversion == IPV4_VERSION) { 8558 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8559 IPPROTO_UDP, AF_INET, 8560 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8561 (in_port_t)udp->udp_port, NULL); 8562 } else { 8563 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8564 IPPROTO_UDP, AF_INET6, 8565 (uint8_t *)&(udp->udp_v6src), 8566 (in_port_t)udp->udp_port, NULL); 8567 } 8568 } 8569 8570 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8571 if (is_system_labeled() && (!connp->conn_anon_port || 8572 connp->conn_anon_mlp)) { 8573 uint16_t mlpport; 8574 zone_t *zone; 8575 8576 zone = crgetzone(cr); 8577 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8578 mlptSingle; 8579 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8580 &v6src, us->us_netstack->netstack_ip); 8581 if (addrtype == mlptSingle) { 8582 rw_enter(&udp->udp_rwlock, RW_WRITER); 8583 udp->udp_pending_op = -1; 8584 rw_exit(&udp->udp_rwlock); 8585 connp->conn_anon_port = B_FALSE; 8586 connp->conn_mlp_type = mlptSingle; 8587 return (-TNOADDR); 8588 } 8589 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8590 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8591 addrtype); 8592 if (mlptype != mlptSingle && 8593 (connp->conn_mlp_type == mlptSingle || 8594 secpolicy_net_bindmlp(cr) != 0)) { 8595 if (udp->udp_debug) { 8596 (void) strlog(UDP_MOD_ID, 0, 1, 8597 SL_ERROR|SL_TRACE, 8598 "udp_bind: no priv for multilevel port %d", 8599 mlpport); 8600 } 8601 rw_enter(&udp->udp_rwlock, RW_WRITER); 8602 udp->udp_pending_op = -1; 8603 rw_exit(&udp->udp_rwlock); 8604 connp->conn_anon_port = B_FALSE; 8605 connp->conn_mlp_type = mlptSingle; 8606 return (-TACCES); 8607 } 8608 8609 /* 8610 * If we're specifically binding a shared IP address and the 8611 * port is MLP on shared addresses, then check to see if this 8612 * zone actually owns the MLP. Reject if not. 8613 */ 8614 if (mlptype == mlptShared && addrtype == mlptShared) { 8615 /* 8616 * No need to handle exclusive-stack zones since 8617 * ALL_ZONES only applies to the shared stack. 8618 */ 8619 zoneid_t mlpzone; 8620 8621 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8622 htons(mlpport)); 8623 if (connp->conn_zoneid != mlpzone) { 8624 if (udp->udp_debug) { 8625 (void) strlog(UDP_MOD_ID, 0, 1, 8626 SL_ERROR|SL_TRACE, 8627 "udp_bind: attempt to bind port " 8628 "%d on shared addr in zone %d " 8629 "(should be %d)", 8630 mlpport, connp->conn_zoneid, 8631 mlpzone); 8632 } 8633 rw_enter(&udp->udp_rwlock, RW_WRITER); 8634 udp->udp_pending_op = -1; 8635 rw_exit(&udp->udp_rwlock); 8636 connp->conn_anon_port = B_FALSE; 8637 connp->conn_mlp_type = mlptSingle; 8638 return (-TACCES); 8639 } 8640 } 8641 if (connp->conn_anon_port) { 8642 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8643 port, B_TRUE); 8644 if (error != 0) { 8645 if (udp->udp_debug) { 8646 (void) strlog(UDP_MOD_ID, 0, 1, 8647 SL_ERROR|SL_TRACE, 8648 "udp_bind: cannot establish anon " 8649 "MLP for port %d", port); 8650 } 8651 rw_enter(&udp->udp_rwlock, RW_WRITER); 8652 udp->udp_pending_op = -1; 8653 rw_exit(&udp->udp_rwlock); 8654 connp->conn_anon_port = B_FALSE; 8655 connp->conn_mlp_type = mlptSingle; 8656 return (-TACCES); 8657 } 8658 } 8659 connp->conn_mlp_type = mlptype; 8660 } 8661 8662 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8663 /* 8664 * Append a request for an IRE if udp_v6src not 8665 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8666 */ 8667 mp = allocb(sizeof (ire_t), BPRI_HI); 8668 if (!mp) { 8669 rw_enter(&udp->udp_rwlock, RW_WRITER); 8670 udp->udp_pending_op = -1; 8671 rw_exit(&udp->udp_rwlock); 8672 return (ENOMEM); 8673 } 8674 mp->b_wptr += sizeof (ire_t); 8675 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8676 } 8677 if (udp->udp_family == AF_INET6) { 8678 ASSERT(udp->udp_connp->conn_af_isv6); 8679 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8680 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8681 } else { 8682 ASSERT(!udp->udp_connp->conn_af_isv6); 8683 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8684 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8685 B_TRUE); 8686 } 8687 8688 (void) udp_post_ip_bind_connect(udp, mp, error); 8689 return (error); 8690 } 8691 8692 int 8693 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8694 socklen_t len, cred_t *cr) 8695 { 8696 int error; 8697 conn_t *connp; 8698 8699 /* All Solaris components should pass a cred for this operation. */ 8700 ASSERT(cr != NULL); 8701 8702 connp = (conn_t *)proto_handle; 8703 8704 if (sa == NULL) 8705 error = udp_do_unbind(connp); 8706 else 8707 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8708 8709 if (error < 0) { 8710 if (error == -TOUTSTATE) 8711 error = EINVAL; 8712 else 8713 error = proto_tlitosyserr(-error); 8714 } 8715 8716 return (error); 8717 } 8718 8719 static int 8720 udp_implicit_bind(conn_t *connp, cred_t *cr) 8721 { 8722 int error; 8723 8724 /* All Solaris components should pass a cred for this operation. */ 8725 ASSERT(cr != NULL); 8726 8727 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8728 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8729 } 8730 8731 /* 8732 * This routine removes a port number association from a stream. It 8733 * is called by udp_unbind and udp_tpi_unbind. 8734 */ 8735 static int 8736 udp_do_unbind(conn_t *connp) 8737 { 8738 udp_t *udp = connp->conn_udp; 8739 udp_fanout_t *udpf; 8740 udp_stack_t *us = udp->udp_us; 8741 8742 if (cl_inet_unbind != NULL) { 8743 /* 8744 * Running in cluster mode - register unbind information 8745 */ 8746 if (udp->udp_ipversion == IPV4_VERSION) { 8747 (*cl_inet_unbind)( 8748 connp->conn_netstack->netstack_stackid, 8749 IPPROTO_UDP, AF_INET, 8750 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8751 (in_port_t)udp->udp_port, NULL); 8752 } else { 8753 (*cl_inet_unbind)( 8754 connp->conn_netstack->netstack_stackid, 8755 IPPROTO_UDP, AF_INET6, 8756 (uint8_t *)&(udp->udp_v6src), 8757 (in_port_t)udp->udp_port, NULL); 8758 } 8759 } 8760 8761 rw_enter(&udp->udp_rwlock, RW_WRITER); 8762 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8763 rw_exit(&udp->udp_rwlock); 8764 return (-TOUTSTATE); 8765 } 8766 udp->udp_pending_op = T_UNBIND_REQ; 8767 rw_exit(&udp->udp_rwlock); 8768 8769 /* 8770 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8771 * and therefore ip_unbind must never return NULL. 8772 */ 8773 ip_unbind(connp); 8774 8775 /* 8776 * Once we're unbound from IP, the pending operation may be cleared 8777 * here. 8778 */ 8779 rw_enter(&udp->udp_rwlock, RW_WRITER); 8780 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8781 us->us_bind_fanout_size)]; 8782 8783 mutex_enter(&udpf->uf_lock); 8784 udp_bind_hash_remove(udp, B_TRUE); 8785 V6_SET_ZERO(udp->udp_v6src); 8786 V6_SET_ZERO(udp->udp_bound_v6src); 8787 udp->udp_port = 0; 8788 mutex_exit(&udpf->uf_lock); 8789 8790 udp->udp_pending_op = -1; 8791 udp->udp_state = TS_UNBND; 8792 if (udp->udp_family == AF_INET6) 8793 (void) udp_build_hdrs(udp); 8794 rw_exit(&udp->udp_rwlock); 8795 8796 return (0); 8797 } 8798 8799 static int 8800 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8801 { 8802 ire_t *ire; 8803 udp_fanout_t *udpf; 8804 udp_stack_t *us = udp->udp_us; 8805 8806 ASSERT(udp->udp_pending_op != -1); 8807 rw_enter(&udp->udp_rwlock, RW_WRITER); 8808 if (error == 0) { 8809 /* For udp_do_connect() success */ 8810 /* udp_do_bind() success will do nothing in here */ 8811 /* 8812 * If a broadcast/multicast address was bound, set 8813 * the source address to 0. 8814 * This ensures no datagrams with broadcast address 8815 * as source address are emitted (which would violate 8816 * RFC1122 - Hosts requirements) 8817 * 8818 * Note that when connecting the returned IRE is 8819 * for the destination address and we only perform 8820 * the broadcast check for the source address (it 8821 * is OK to connect to a broadcast/multicast address.) 8822 */ 8823 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8824 ire = (ire_t *)ire_mp->b_rptr; 8825 8826 /* 8827 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8828 * multicast local address. 8829 */ 8830 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8831 us->us_bind_fanout_size)]; 8832 if (ire->ire_type == IRE_BROADCAST && 8833 udp->udp_state != TS_DATA_XFER) { 8834 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8835 udp->udp_pending_op == O_T_BIND_REQ); 8836 /* 8837 * This was just a local bind to a broadcast 8838 * addr. 8839 */ 8840 mutex_enter(&udpf->uf_lock); 8841 V6_SET_ZERO(udp->udp_v6src); 8842 mutex_exit(&udpf->uf_lock); 8843 if (udp->udp_family == AF_INET6) 8844 (void) udp_build_hdrs(udp); 8845 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8846 if (udp->udp_family == AF_INET6) 8847 (void) udp_build_hdrs(udp); 8848 } 8849 } 8850 } else { 8851 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8852 us->us_bind_fanout_size)]; 8853 mutex_enter(&udpf->uf_lock); 8854 8855 if (udp->udp_state == TS_DATA_XFER) { 8856 /* Connect failed */ 8857 /* Revert back to the bound source */ 8858 udp->udp_v6src = udp->udp_bound_v6src; 8859 udp->udp_state = TS_IDLE; 8860 } else { 8861 /* For udp_do_bind() failed */ 8862 V6_SET_ZERO(udp->udp_v6src); 8863 V6_SET_ZERO(udp->udp_bound_v6src); 8864 udp->udp_state = TS_UNBND; 8865 udp_bind_hash_remove(udp, B_TRUE); 8866 udp->udp_port = 0; 8867 } 8868 mutex_exit(&udpf->uf_lock); 8869 if (udp->udp_family == AF_INET6) 8870 (void) udp_build_hdrs(udp); 8871 } 8872 udp->udp_pending_op = -1; 8873 rw_exit(&udp->udp_rwlock); 8874 if (ire_mp != NULL) 8875 freeb(ire_mp); 8876 return (error); 8877 } 8878 8879 /* 8880 * It associates a default destination address with the stream. 8881 */ 8882 static int 8883 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8884 cred_t *cr) 8885 { 8886 sin6_t *sin6; 8887 sin_t *sin; 8888 in6_addr_t v6dst; 8889 ipaddr_t v4dst; 8890 uint16_t dstport; 8891 uint32_t flowinfo; 8892 mblk_t *ire_mp; 8893 udp_fanout_t *udpf; 8894 udp_t *udp, *udp1; 8895 ushort_t ipversion; 8896 udp_stack_t *us; 8897 int error; 8898 8899 udp = connp->conn_udp; 8900 us = udp->udp_us; 8901 8902 /* 8903 * Address has been verified by the caller 8904 */ 8905 switch (len) { 8906 default: 8907 /* 8908 * Should never happen 8909 */ 8910 return (EINVAL); 8911 8912 case sizeof (sin_t): 8913 sin = (sin_t *)sa; 8914 v4dst = sin->sin_addr.s_addr; 8915 dstport = sin->sin_port; 8916 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8917 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8918 ipversion = IPV4_VERSION; 8919 break; 8920 8921 case sizeof (sin6_t): 8922 sin6 = (sin6_t *)sa; 8923 v6dst = sin6->sin6_addr; 8924 dstport = sin6->sin6_port; 8925 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8926 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8927 ipversion = IPV4_VERSION; 8928 flowinfo = 0; 8929 } else { 8930 ipversion = IPV6_VERSION; 8931 flowinfo = sin6->sin6_flowinfo; 8932 } 8933 break; 8934 } 8935 8936 if (dstport == 0) 8937 return (-TBADADDR); 8938 8939 rw_enter(&udp->udp_rwlock, RW_WRITER); 8940 8941 /* 8942 * This UDP must have bound to a port already before doing a connect. 8943 * TPI mandates that users must send TPI primitives only 1 at a time 8944 * and wait for the response before sending the next primitive. 8945 */ 8946 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8947 rw_exit(&udp->udp_rwlock); 8948 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8949 "udp_connect: bad state, %u", udp->udp_state); 8950 return (-TOUTSTATE); 8951 } 8952 udp->udp_pending_op = T_CONN_REQ; 8953 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8954 8955 if (ipversion == IPV4_VERSION) { 8956 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8957 udp->udp_ip_snd_options_len; 8958 } else { 8959 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8960 } 8961 8962 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8963 us->us_bind_fanout_size)]; 8964 8965 mutex_enter(&udpf->uf_lock); 8966 if (udp->udp_state == TS_DATA_XFER) { 8967 /* Already connected - clear out state */ 8968 udp->udp_v6src = udp->udp_bound_v6src; 8969 udp->udp_state = TS_IDLE; 8970 } 8971 8972 /* 8973 * Create a default IP header with no IP options. 8974 */ 8975 udp->udp_dstport = dstport; 8976 udp->udp_ipversion = ipversion; 8977 if (ipversion == IPV4_VERSION) { 8978 /* 8979 * Interpret a zero destination to mean loopback. 8980 * Update the T_CONN_REQ (sin/sin6) since it is used to 8981 * generate the T_CONN_CON. 8982 */ 8983 if (v4dst == INADDR_ANY) { 8984 v4dst = htonl(INADDR_LOOPBACK); 8985 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8986 if (udp->udp_family == AF_INET) { 8987 sin->sin_addr.s_addr = v4dst; 8988 } else { 8989 sin6->sin6_addr = v6dst; 8990 } 8991 } 8992 udp->udp_v6dst = v6dst; 8993 udp->udp_flowinfo = 0; 8994 8995 /* 8996 * If the destination address is multicast and 8997 * an outgoing multicast interface has been set, 8998 * use the address of that interface as our 8999 * source address if no source address has been set. 9000 */ 9001 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 9002 CLASSD(v4dst) && 9003 udp->udp_multicast_if_addr != INADDR_ANY) { 9004 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 9005 &udp->udp_v6src); 9006 } 9007 } else { 9008 ASSERT(udp->udp_ipversion == IPV6_VERSION); 9009 /* 9010 * Interpret a zero destination to mean loopback. 9011 * Update the T_CONN_REQ (sin/sin6) since it is used to 9012 * generate the T_CONN_CON. 9013 */ 9014 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 9015 v6dst = ipv6_loopback; 9016 sin6->sin6_addr = v6dst; 9017 } 9018 udp->udp_v6dst = v6dst; 9019 udp->udp_flowinfo = flowinfo; 9020 /* 9021 * If the destination address is multicast and 9022 * an outgoing multicast interface has been set, 9023 * then the ip bind logic will pick the correct source 9024 * address (i.e. matching the outgoing multicast interface). 9025 */ 9026 } 9027 9028 /* 9029 * Verify that the src/port/dst/port is unique for all 9030 * connections in TS_DATA_XFER 9031 */ 9032 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 9033 if (udp1->udp_state != TS_DATA_XFER) 9034 continue; 9035 if (udp->udp_port != udp1->udp_port || 9036 udp->udp_ipversion != udp1->udp_ipversion || 9037 dstport != udp1->udp_dstport || 9038 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 9039 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 9040 !(IPCL_ZONE_MATCH(udp->udp_connp, 9041 udp1->udp_connp->conn_zoneid) || 9042 IPCL_ZONE_MATCH(udp1->udp_connp, 9043 udp->udp_connp->conn_zoneid))) 9044 continue; 9045 mutex_exit(&udpf->uf_lock); 9046 udp->udp_pending_op = -1; 9047 rw_exit(&udp->udp_rwlock); 9048 return (-TBADADDR); 9049 } 9050 9051 if (cl_inet_connect2 != NULL) { 9052 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 9053 if (error != 0) { 9054 mutex_exit(&udpf->uf_lock); 9055 udp->udp_pending_op = -1; 9056 rw_exit(&udp->udp_rwlock); 9057 return (-TBADADDR); 9058 } 9059 } 9060 9061 udp->udp_state = TS_DATA_XFER; 9062 mutex_exit(&udpf->uf_lock); 9063 9064 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 9065 if (ire_mp == NULL) { 9066 mutex_enter(&udpf->uf_lock); 9067 udp->udp_state = TS_IDLE; 9068 udp->udp_pending_op = -1; 9069 mutex_exit(&udpf->uf_lock); 9070 rw_exit(&udp->udp_rwlock); 9071 return (ENOMEM); 9072 } 9073 9074 rw_exit(&udp->udp_rwlock); 9075 9076 ire_mp->b_wptr += sizeof (ire_t); 9077 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 9078 9079 if (udp->udp_family == AF_INET) { 9080 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 9081 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 9082 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 9083 B_TRUE, B_TRUE, cr); 9084 } else { 9085 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 9086 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 9087 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 9088 } 9089 9090 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 9091 } 9092 9093 /* ARGSUSED */ 9094 static int 9095 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 9096 socklen_t len, sock_connid_t *id, cred_t *cr) 9097 { 9098 conn_t *connp = (conn_t *)proto_handle; 9099 udp_t *udp = connp->conn_udp; 9100 int error; 9101 boolean_t did_bind = B_FALSE; 9102 9103 /* All Solaris components should pass a cred for this operation. */ 9104 ASSERT(cr != NULL); 9105 9106 if (sa == NULL) { 9107 /* 9108 * Disconnect 9109 * Make sure we are connected 9110 */ 9111 if (udp->udp_state != TS_DATA_XFER) 9112 return (EINVAL); 9113 9114 error = udp_disconnect(connp); 9115 return (error); 9116 } 9117 9118 error = proto_verify_ip_addr(udp->udp_family, sa, len); 9119 if (error != 0) 9120 goto done; 9121 9122 /* do an implicit bind if necessary */ 9123 if (udp->udp_state == TS_UNBND) { 9124 error = udp_implicit_bind(connp, cr); 9125 /* 9126 * We could be racing with an actual bind, in which case 9127 * we would see EPROTO. We cross our fingers and try 9128 * to connect. 9129 */ 9130 if (!(error == 0 || error == EPROTO)) 9131 goto done; 9132 did_bind = B_TRUE; 9133 } 9134 /* 9135 * set SO_DGRAM_ERRIND 9136 */ 9137 udp->udp_dgram_errind = B_TRUE; 9138 9139 error = udp_do_connect(connp, sa, len, cr); 9140 9141 if (error != 0 && did_bind) { 9142 int unbind_err; 9143 9144 unbind_err = udp_do_unbind(connp); 9145 ASSERT(unbind_err == 0); 9146 } 9147 9148 if (error == 0) { 9149 *id = 0; 9150 (*connp->conn_upcalls->su_connected) 9151 (connp->conn_upper_handle, 0, NULL, -1); 9152 } else if (error < 0) { 9153 error = proto_tlitosyserr(-error); 9154 } 9155 9156 done: 9157 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 9158 /* 9159 * No need to hold locks to set state 9160 * after connect failure socket state is undefined 9161 * We set the state only to imitate old sockfs behavior 9162 */ 9163 udp->udp_state = TS_IDLE; 9164 } 9165 return (error); 9166 } 9167 9168 /* ARGSUSED */ 9169 int 9170 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 9171 cred_t *cr) 9172 { 9173 conn_t *connp = (conn_t *)proto_handle; 9174 udp_t *udp = connp->conn_udp; 9175 udp_stack_t *us = udp->udp_us; 9176 int error = 0; 9177 9178 ASSERT(DB_TYPE(mp) == M_DATA); 9179 9180 /* All Solaris components should pass a cred for this operation. */ 9181 ASSERT(cr != NULL); 9182 9183 /* If labeled then sockfs should have already set db_credp */ 9184 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 9185 9186 /* 9187 * If the socket is connected and no change in destination 9188 */ 9189 if (msg->msg_namelen == 0) { 9190 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9191 if (error == EDESTADDRREQ) 9192 return (error); 9193 else 9194 return (udp->udp_dgram_errind ? error : 0); 9195 } 9196 9197 /* 9198 * Do an implicit bind if necessary. 9199 */ 9200 if (udp->udp_state == TS_UNBND) { 9201 error = udp_implicit_bind(connp, cr); 9202 /* 9203 * We could be racing with an actual bind, in which case 9204 * we would see EPROTO. We cross our fingers and try 9205 * to send. 9206 */ 9207 if (!(error == 0 || error == EPROTO)) { 9208 freemsg(mp); 9209 return (error); 9210 } 9211 } 9212 9213 rw_enter(&udp->udp_rwlock, RW_WRITER); 9214 9215 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9216 rw_exit(&udp->udp_rwlock); 9217 freemsg(mp); 9218 return (EISCONN); 9219 } 9220 9221 9222 if (udp->udp_delayed_error != 0) { 9223 boolean_t match; 9224 9225 error = udp->udp_delayed_error; 9226 match = B_FALSE; 9227 udp->udp_delayed_error = 0; 9228 switch (udp->udp_family) { 9229 case AF_INET: { 9230 /* Compare just IP address and port */ 9231 sin_t *sin1 = (sin_t *)msg->msg_name; 9232 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9233 9234 if (msg->msg_namelen == sizeof (sin_t) && 9235 sin1->sin_port == sin2->sin_port && 9236 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9237 match = B_TRUE; 9238 9239 break; 9240 } 9241 case AF_INET6: { 9242 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9243 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9244 9245 if (msg->msg_namelen == sizeof (sin6_t) && 9246 sin1->sin6_port == sin2->sin6_port && 9247 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9248 &sin2->sin6_addr)) 9249 match = B_TRUE; 9250 break; 9251 } 9252 default: 9253 ASSERT(0); 9254 } 9255 9256 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9257 9258 if (match) { 9259 rw_exit(&udp->udp_rwlock); 9260 freemsg(mp); 9261 return (error); 9262 } 9263 } 9264 9265 error = proto_verify_ip_addr(udp->udp_family, 9266 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9267 rw_exit(&udp->udp_rwlock); 9268 9269 if (error != 0) { 9270 freemsg(mp); 9271 return (error); 9272 } 9273 9274 error = udp_send_not_connected(connp, mp, 9275 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9276 curproc->p_pid); 9277 if (error != 0) { 9278 UDP_STAT(us, udp_out_err_output); 9279 freemsg(mp); 9280 } 9281 return (udp->udp_dgram_errind ? error : 0); 9282 } 9283 9284 int 9285 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9286 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9287 { 9288 conn_t *connp = (conn_t *)proto_handle; 9289 udp_t *udp; 9290 struct T_capability_ack tca; 9291 struct sockaddr_in6 laddr, faddr; 9292 socklen_t laddrlen, faddrlen; 9293 short opts; 9294 struct stroptions *stropt; 9295 mblk_t *stropt_mp; 9296 int error; 9297 9298 udp = connp->conn_udp; 9299 9300 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9301 9302 /* 9303 * setup the fallback stream that was allocated 9304 */ 9305 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9306 connp->conn_minor_arena = WR(q)->q_ptr; 9307 9308 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9309 9310 WR(q)->q_qinfo = &udp_winit; 9311 9312 connp->conn_rq = RD(q); 9313 connp->conn_wq = WR(q); 9314 9315 /* Notify stream head about options before sending up data */ 9316 stropt_mp->b_datap->db_type = M_SETOPTS; 9317 stropt_mp->b_wptr += sizeof (*stropt); 9318 stropt = (struct stroptions *)stropt_mp->b_rptr; 9319 stropt->so_flags = SO_WROFF | SO_HIWAT; 9320 stropt->so_wroff = 9321 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9322 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9323 putnext(RD(q), stropt_mp); 9324 9325 /* 9326 * Free the helper stream 9327 */ 9328 ip_free_helper_stream(connp); 9329 9330 if (!direct_sockfs) 9331 udp_disable_direct_sockfs(udp); 9332 9333 /* 9334 * Collect the information needed to sync with the sonode 9335 */ 9336 udp_do_capability_ack(udp, &tca, TC1_INFO); 9337 9338 laddrlen = faddrlen = sizeof (sin6_t); 9339 (void) udp_getsockname((sock_lower_handle_t)connp, 9340 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9341 error = udp_getpeername((sock_lower_handle_t)connp, 9342 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9343 if (error != 0) 9344 faddrlen = 0; 9345 9346 opts = 0; 9347 if (udp->udp_dgram_errind) 9348 opts |= SO_DGRAM_ERRIND; 9349 if (udp->udp_dontroute) 9350 opts |= SO_DONTROUTE; 9351 9352 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9353 (struct sockaddr *)&laddr, laddrlen, 9354 (struct sockaddr *)&faddr, faddrlen, opts); 9355 9356 mutex_enter(&udp->udp_recv_lock); 9357 /* 9358 * Attempts to send data up during fallback will result in it being 9359 * queued in udp_t. Now we push up any queued packets. 9360 */ 9361 while (udp->udp_fallback_queue_head != NULL) { 9362 mblk_t *mp; 9363 mp = udp->udp_fallback_queue_head; 9364 udp->udp_fallback_queue_head = mp->b_next; 9365 mutex_exit(&udp->udp_recv_lock); 9366 mp->b_next = NULL; 9367 putnext(RD(q), mp); 9368 mutex_enter(&udp->udp_recv_lock); 9369 } 9370 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9371 /* 9372 * No longer a streams less socket 9373 */ 9374 rw_enter(&udp->udp_rwlock, RW_WRITER); 9375 connp->conn_flags &= ~IPCL_NONSTR; 9376 rw_exit(&udp->udp_rwlock); 9377 9378 mutex_exit(&udp->udp_recv_lock); 9379 9380 ASSERT(connp->conn_ref >= 1); 9381 9382 return (0); 9383 } 9384 9385 static int 9386 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9387 { 9388 sin_t *sin = (sin_t *)sa; 9389 sin6_t *sin6 = (sin6_t *)sa; 9390 9391 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9392 ASSERT(udp != NULL); 9393 9394 if (udp->udp_state != TS_DATA_XFER) 9395 return (ENOTCONN); 9396 9397 switch (udp->udp_family) { 9398 case AF_INET: 9399 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9400 9401 if (*salenp < sizeof (sin_t)) 9402 return (EINVAL); 9403 9404 *salenp = sizeof (sin_t); 9405 *sin = sin_null; 9406 sin->sin_family = AF_INET; 9407 sin->sin_port = udp->udp_dstport; 9408 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9409 break; 9410 case AF_INET6: 9411 if (*salenp < sizeof (sin6_t)) 9412 return (EINVAL); 9413 9414 *salenp = sizeof (sin6_t); 9415 *sin6 = sin6_null; 9416 sin6->sin6_family = AF_INET6; 9417 sin6->sin6_port = udp->udp_dstport; 9418 sin6->sin6_addr = udp->udp_v6dst; 9419 sin6->sin6_flowinfo = udp->udp_flowinfo; 9420 break; 9421 } 9422 9423 return (0); 9424 } 9425 9426 /* ARGSUSED */ 9427 int 9428 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9429 socklen_t *salenp, cred_t *cr) 9430 { 9431 conn_t *connp = (conn_t *)proto_handle; 9432 udp_t *udp = connp->conn_udp; 9433 int error; 9434 9435 /* All Solaris components should pass a cred for this operation. */ 9436 ASSERT(cr != NULL); 9437 9438 ASSERT(udp != NULL); 9439 9440 rw_enter(&udp->udp_rwlock, RW_READER); 9441 9442 error = udp_do_getpeername(udp, sa, salenp); 9443 9444 rw_exit(&udp->udp_rwlock); 9445 9446 return (error); 9447 } 9448 9449 static int 9450 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9451 { 9452 sin_t *sin = (sin_t *)sa; 9453 sin6_t *sin6 = (sin6_t *)sa; 9454 9455 ASSERT(udp != NULL); 9456 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9457 9458 switch (udp->udp_family) { 9459 case AF_INET: 9460 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9461 9462 if (*salenp < sizeof (sin_t)) 9463 return (EINVAL); 9464 9465 *salenp = sizeof (sin_t); 9466 *sin = sin_null; 9467 sin->sin_family = AF_INET; 9468 if (udp->udp_state == TS_UNBND) { 9469 break; 9470 } 9471 sin->sin_port = udp->udp_port; 9472 9473 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9474 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9475 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9476 } else { 9477 /* 9478 * INADDR_ANY 9479 * udp_v6src is not set, we might be bound to 9480 * broadcast/multicast. Use udp_bound_v6src as 9481 * local address instead (that could 9482 * also still be INADDR_ANY) 9483 */ 9484 sin->sin_addr.s_addr = 9485 V4_PART_OF_V6(udp->udp_bound_v6src); 9486 } 9487 break; 9488 9489 case AF_INET6: 9490 if (*salenp < sizeof (sin6_t)) 9491 return (EINVAL); 9492 9493 *salenp = sizeof (sin6_t); 9494 *sin6 = sin6_null; 9495 sin6->sin6_family = AF_INET6; 9496 if (udp->udp_state == TS_UNBND) { 9497 break; 9498 } 9499 sin6->sin6_port = udp->udp_port; 9500 9501 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9502 sin6->sin6_addr = udp->udp_v6src; 9503 } else { 9504 /* 9505 * UNSPECIFIED 9506 * udp_v6src is not set, we might be bound to 9507 * broadcast/multicast. Use udp_bound_v6src as 9508 * local address instead (that could 9509 * also still be UNSPECIFIED) 9510 */ 9511 sin6->sin6_addr = udp->udp_bound_v6src; 9512 } 9513 } 9514 return (0); 9515 } 9516 9517 /* ARGSUSED */ 9518 int 9519 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9520 socklen_t *salenp, cred_t *cr) 9521 { 9522 conn_t *connp = (conn_t *)proto_handle; 9523 udp_t *udp = connp->conn_udp; 9524 int error; 9525 9526 /* All Solaris components should pass a cred for this operation. */ 9527 ASSERT(cr != NULL); 9528 9529 ASSERT(udp != NULL); 9530 rw_enter(&udp->udp_rwlock, RW_READER); 9531 9532 error = udp_do_getsockname(udp, sa, salenp); 9533 9534 rw_exit(&udp->udp_rwlock); 9535 9536 return (error); 9537 } 9538 9539 int 9540 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9541 void *optvalp, socklen_t *optlen, cred_t *cr) 9542 { 9543 conn_t *connp = (conn_t *)proto_handle; 9544 udp_t *udp = connp->conn_udp; 9545 int error; 9546 t_uscalar_t max_optbuf_len; 9547 void *optvalp_buf; 9548 int len; 9549 9550 /* All Solaris components should pass a cred for this operation. */ 9551 ASSERT(cr != NULL); 9552 9553 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9554 udp_opt_obj.odb_opt_des_arr, 9555 udp_opt_obj.odb_opt_arr_cnt, 9556 udp_opt_obj.odb_topmost_tpiprovider, 9557 B_FALSE, B_TRUE, cr); 9558 if (error != 0) { 9559 if (error < 0) 9560 error = proto_tlitosyserr(-error); 9561 return (error); 9562 } 9563 9564 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9565 rw_enter(&udp->udp_rwlock, RW_READER); 9566 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9567 rw_exit(&udp->udp_rwlock); 9568 9569 if (len < 0) { 9570 /* 9571 * Pass on to IP 9572 */ 9573 kmem_free(optvalp_buf, max_optbuf_len); 9574 return (ip_get_options(connp, level, option_name, 9575 optvalp, optlen, cr)); 9576 } else { 9577 /* 9578 * update optlen and copy option value 9579 */ 9580 t_uscalar_t size = MIN(len, *optlen); 9581 bcopy(optvalp_buf, optvalp, size); 9582 bcopy(&size, optlen, sizeof (size)); 9583 9584 kmem_free(optvalp_buf, max_optbuf_len); 9585 return (0); 9586 } 9587 } 9588 9589 int 9590 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9591 const void *optvalp, socklen_t optlen, cred_t *cr) 9592 { 9593 conn_t *connp = (conn_t *)proto_handle; 9594 udp_t *udp = connp->conn_udp; 9595 int error; 9596 9597 /* All Solaris components should pass a cred for this operation. */ 9598 ASSERT(cr != NULL); 9599 9600 error = proto_opt_check(level, option_name, optlen, NULL, 9601 udp_opt_obj.odb_opt_des_arr, 9602 udp_opt_obj.odb_opt_arr_cnt, 9603 udp_opt_obj.odb_topmost_tpiprovider, 9604 B_TRUE, B_FALSE, cr); 9605 9606 if (error != 0) { 9607 if (error < 0) 9608 error = proto_tlitosyserr(-error); 9609 return (error); 9610 } 9611 9612 rw_enter(&udp->udp_rwlock, RW_WRITER); 9613 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9614 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9615 NULL, cr); 9616 rw_exit(&udp->udp_rwlock); 9617 9618 if (error < 0) { 9619 /* 9620 * Pass on to ip 9621 */ 9622 error = ip_set_options(connp, level, option_name, optvalp, 9623 optlen, cr); 9624 } 9625 9626 return (error); 9627 } 9628 9629 void 9630 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9631 { 9632 conn_t *connp = (conn_t *)proto_handle; 9633 udp_t *udp = connp->conn_udp; 9634 9635 mutex_enter(&udp->udp_recv_lock); 9636 connp->conn_flow_cntrld = B_FALSE; 9637 mutex_exit(&udp->udp_recv_lock); 9638 } 9639 9640 /* ARGSUSED */ 9641 int 9642 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9643 { 9644 conn_t *connp = (conn_t *)proto_handle; 9645 9646 /* All Solaris components should pass a cred for this operation. */ 9647 ASSERT(cr != NULL); 9648 9649 /* shut down the send side */ 9650 if (how != SHUT_RD) 9651 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9652 SOCK_OPCTL_SHUT_SEND, 0); 9653 /* shut down the recv side */ 9654 if (how != SHUT_WR) 9655 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9656 SOCK_OPCTL_SHUT_RECV, 0); 9657 return (0); 9658 } 9659 9660 int 9661 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9662 int mode, int32_t *rvalp, cred_t *cr) 9663 { 9664 conn_t *connp = (conn_t *)proto_handle; 9665 int error; 9666 9667 /* All Solaris components should pass a cred for this operation. */ 9668 ASSERT(cr != NULL); 9669 9670 switch (cmd) { 9671 case ND_SET: 9672 case ND_GET: 9673 case _SIOCSOCKFALLBACK: 9674 case TI_GETPEERNAME: 9675 case TI_GETMYNAME: 9676 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9677 cmd)); 9678 error = EINVAL; 9679 break; 9680 default: 9681 /* 9682 * Pass on to IP using helper stream 9683 */ 9684 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9685 cmd, arg, mode, cr, rvalp); 9686 break; 9687 } 9688 return (error); 9689 } 9690 9691 /* ARGSUSED */ 9692 int 9693 udp_accept(sock_lower_handle_t lproto_handle, 9694 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9695 cred_t *cr) 9696 { 9697 return (EOPNOTSUPP); 9698 } 9699 9700 /* ARGSUSED */ 9701 int 9702 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9703 { 9704 return (EOPNOTSUPP); 9705 } 9706 9707 sock_downcalls_t sock_udp_downcalls = { 9708 udp_activate, /* sd_activate */ 9709 udp_accept, /* sd_accept */ 9710 udp_bind, /* sd_bind */ 9711 udp_listen, /* sd_listen */ 9712 udp_connect, /* sd_connect */ 9713 udp_getpeername, /* sd_getpeername */ 9714 udp_getsockname, /* sd_getsockname */ 9715 udp_getsockopt, /* sd_getsockopt */ 9716 udp_setsockopt, /* sd_setsockopt */ 9717 udp_send, /* sd_send */ 9718 NULL, /* sd_send_uio */ 9719 NULL, /* sd_recv_uio */ 9720 NULL, /* sd_poll */ 9721 udp_shutdown, /* sd_shutdown */ 9722 udp_clr_flowctrl, /* sd_setflowctrl */ 9723 udp_ioctl, /* sd_ioctl */ 9724 udp_close /* sd_close */ 9725 }; 9726