1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 172 t_scalar_t addr_length); 173 static void udp_lrput(queue_t *, mblk_t *); 174 static void udp_lwput(queue_t *, mblk_t *); 175 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp, boolean_t isv6); 177 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 180 cred_t *credp); 181 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 182 int *errorp, udpattrs_t *udpattrs); 183 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 184 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 185 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 186 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 187 cred_t *cr); 188 static int udp_rinfop(queue_t *q, infod_t *dp); 189 static int udp_rrw(queue_t *q, struiod_t *dp); 190 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 191 ipha_t *ipha); 192 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 193 t_scalar_t destlen, t_scalar_t err); 194 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 195 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 196 boolean_t random); 197 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 198 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 199 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 200 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 201 static void udp_wput_other(queue_t *q, mblk_t *mp); 202 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 203 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 204 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 205 206 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 207 static void udp_stack_fini(netstackid_t stackid, void *arg); 208 209 static void *udp_kstat_init(netstackid_t stackid); 210 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 211 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 212 static void udp_kstat2_fini(netstackid_t, kstat_t *); 213 static int udp_kstat_update(kstat_t *kp, int rw); 214 215 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 216 uint_t pkt_len); 217 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 218 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 219 220 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 221 cred_t *, pid_t); 222 static void udp_ulp_recv(conn_t *, mblk_t *); 223 224 /* Common routine for TPI and socket module */ 225 static conn_t *udp_do_open(cred_t *, boolean_t, int); 226 static void udp_do_close(conn_t *); 227 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 228 boolean_t); 229 static int udp_do_unbind(conn_t *); 230 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 231 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 232 233 int udp_getsockname(sock_lower_handle_t, 234 struct sockaddr *, socklen_t *, cred_t *); 235 int udp_getpeername(sock_lower_handle_t, 236 struct sockaddr *, socklen_t *, cred_t *); 237 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 238 cred_t *cr); 239 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 240 241 #define UDP_RECV_HIWATER (56 * 1024) 242 #define UDP_RECV_LOWATER 128 243 #define UDP_XMIT_HIWATER (56 * 1024) 244 #define UDP_XMIT_LOWATER 1024 245 246 /* 247 * The following is defined in tcp.c 248 */ 249 extern int (*cl_inet_connect2)(netstackid_t stack_id, 250 uint8_t protocol, boolean_t is_outgoing, 251 sa_family_t addr_family, 252 uint8_t *laddrp, in_port_t lport, 253 uint8_t *faddrp, in_port_t fport, void *args); 254 255 /* 256 * Checks if the given destination addr/port is allowed out. 257 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 258 * Called for each connect() and for sendto()/sendmsg() to a different 259 * destination. 260 * For connect(), called in udp_connect(). 261 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 262 * 263 * This macro assumes that the cl_inet_connect2 hook is not NULL. 264 * Please check this before calling this macro. 265 * 266 * void 267 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 268 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 269 */ 270 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 271 (err) = 0; \ 272 /* \ 273 * Running in cluster mode - check and register active \ 274 * "connection" information \ 275 */ \ 276 if ((udp)->udp_ipversion == IPV4_VERSION) \ 277 (err) = (*cl_inet_connect2)( \ 278 (cp)->conn_netstack->netstack_stackid, \ 279 IPPROTO_UDP, is_outgoing, AF_INET, \ 280 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 281 (udp)->udp_port, \ 282 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 283 (in_port_t)(fport), NULL); \ 284 else \ 285 (err) = (*cl_inet_connect2)( \ 286 (cp)->conn_netstack->netstack_stackid, \ 287 IPPROTO_UDP, is_outgoing, AF_INET6, \ 288 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 289 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 290 } 291 292 static struct module_info udp_mod_info = { 293 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 294 }; 295 296 /* 297 * Entry points for UDP as a device. 298 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 299 */ 300 static struct qinit udp_rinitv4 = { 301 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 302 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 303 }; 304 305 static struct qinit udp_rinitv6 = { 306 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 307 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 308 }; 309 310 static struct qinit udp_winit = { 311 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 312 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 313 }; 314 315 /* UDP entry point during fallback */ 316 struct qinit udp_fallback_sock_winit = { 317 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 318 }; 319 320 /* 321 * UDP needs to handle I_LINK and I_PLINK since ifconfig 322 * likes to use it as a place to hang the various streams. 323 */ 324 static struct qinit udp_lrinit = { 325 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 326 &udp_mod_info 327 }; 328 329 static struct qinit udp_lwinit = { 330 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 331 &udp_mod_info 332 }; 333 334 /* For AF_INET aka /dev/udp */ 335 struct streamtab udpinfov4 = { 336 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 337 }; 338 339 /* For AF_INET6 aka /dev/udp6 */ 340 struct streamtab udpinfov6 = { 341 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 342 }; 343 344 static sin_t sin_null; /* Zero address for quick clears */ 345 static sin6_t sin6_null; /* Zero address for quick clears */ 346 347 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 348 349 /* Default structure copied into T_INFO_ACK messages */ 350 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 351 T_INFO_ACK, 352 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 353 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 354 T_INVALID, /* CDATA_size. udp does not support connect data. */ 355 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 356 sizeof (sin_t), /* ADDR_size. */ 357 0, /* OPT_size - not initialized here */ 358 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 359 T_CLTS, /* SERV_type. udp supports connection-less. */ 360 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 361 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 362 }; 363 364 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 365 366 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 367 T_INFO_ACK, 368 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 369 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 370 T_INVALID, /* CDATA_size. udp does not support connect data. */ 371 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 372 sizeof (sin6_t), /* ADDR_size. */ 373 0, /* OPT_size - not initialized here */ 374 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 375 T_CLTS, /* SERV_type. udp supports connection-less. */ 376 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 377 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 378 }; 379 380 /* largest UDP port number */ 381 #define UDP_MAX_PORT 65535 382 383 /* 384 * Table of ND variables supported by udp. These are loaded into us_nd 385 * in udp_open. 386 * All of these are alterable, within the min/max values given, at run time. 387 */ 388 /* BEGIN CSTYLED */ 389 udpparam_t udp_param_arr[] = { 390 /*min max value name */ 391 { 0L, 256, 32, "udp_wroff_extra" }, 392 { 1L, 255, 255, "udp_ipv4_ttl" }, 393 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 394 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 395 { 0, 1, 1, "udp_do_checksum" }, 396 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 397 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 398 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 399 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 400 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 401 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 402 }; 403 /* END CSTYLED */ 404 405 /* Setable in /etc/system */ 406 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 407 uint32_t udp_random_anon_port = 1; 408 409 /* 410 * Hook functions to enable cluster networking. 411 * On non-clustered systems these vectors must always be NULL 412 */ 413 414 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 415 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 416 void *args) = NULL; 417 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 418 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 419 void *args) = NULL; 420 421 typedef union T_primitives *t_primp_t; 422 423 /* 424 * Return the next anonymous port in the privileged port range for 425 * bind checking. 426 * 427 * Trusted Extension (TX) notes: TX allows administrator to mark or 428 * reserve ports as Multilevel ports (MLP). MLP has special function 429 * on TX systems. Once a port is made MLP, it's not available as 430 * ordinary port. This creates "holes" in the port name space. It 431 * may be necessary to skip the "holes" find a suitable anon port. 432 */ 433 static in_port_t 434 udp_get_next_priv_port(udp_t *udp) 435 { 436 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 437 in_port_t nextport; 438 boolean_t restart = B_FALSE; 439 udp_stack_t *us = udp->udp_us; 440 441 retry: 442 if (next_priv_port < us->us_min_anonpriv_port || 443 next_priv_port >= IPPORT_RESERVED) { 444 next_priv_port = IPPORT_RESERVED - 1; 445 if (restart) 446 return (0); 447 restart = B_TRUE; 448 } 449 450 if (is_system_labeled() && 451 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 452 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 453 next_priv_port = nextport; 454 goto retry; 455 } 456 457 return (next_priv_port--); 458 } 459 460 /* 461 * Hash list removal routine for udp_t structures. 462 */ 463 static void 464 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 465 { 466 udp_t *udpnext; 467 kmutex_t *lockp; 468 udp_stack_t *us = udp->udp_us; 469 470 if (udp->udp_ptpbhn == NULL) 471 return; 472 473 /* 474 * Extract the lock pointer in case there are concurrent 475 * hash_remove's for this instance. 476 */ 477 ASSERT(udp->udp_port != 0); 478 if (!caller_holds_lock) { 479 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 480 us->us_bind_fanout_size)].uf_lock; 481 ASSERT(lockp != NULL); 482 mutex_enter(lockp); 483 } 484 if (udp->udp_ptpbhn != NULL) { 485 udpnext = udp->udp_bind_hash; 486 if (udpnext != NULL) { 487 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 488 udp->udp_bind_hash = NULL; 489 } 490 *udp->udp_ptpbhn = udpnext; 491 udp->udp_ptpbhn = NULL; 492 } 493 if (!caller_holds_lock) { 494 mutex_exit(lockp); 495 } 496 } 497 498 static void 499 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 500 { 501 udp_t **udpp; 502 udp_t *udpnext; 503 504 ASSERT(MUTEX_HELD(&uf->uf_lock)); 505 ASSERT(udp->udp_ptpbhn == NULL); 506 udpp = &uf->uf_udp; 507 udpnext = udpp[0]; 508 if (udpnext != NULL) { 509 /* 510 * If the new udp bound to the INADDR_ANY address 511 * and the first one in the list is not bound to 512 * INADDR_ANY we skip all entries until we find the 513 * first one bound to INADDR_ANY. 514 * This makes sure that applications binding to a 515 * specific address get preference over those binding to 516 * INADDR_ANY. 517 */ 518 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 519 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 520 while ((udpnext = udpp[0]) != NULL && 521 !V6_OR_V4_INADDR_ANY( 522 udpnext->udp_bound_v6src)) { 523 udpp = &(udpnext->udp_bind_hash); 524 } 525 if (udpnext != NULL) 526 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 527 } else { 528 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 529 } 530 } 531 udp->udp_bind_hash = udpnext; 532 udp->udp_ptpbhn = udpp; 533 udpp[0] = udp; 534 } 535 536 /* 537 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 538 * passed to udp_wput. 539 * It associates a port number and local address with the stream. 540 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 541 * protocol type (IPPROTO_UDP) placed in the message following the address. 542 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 543 * (Called as writer.) 544 * 545 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 546 * without setting SO_REUSEADDR. This is needed so that they 547 * can be viewed as two independent transport protocols. 548 * However, anonymouns ports are allocated from the same range to avoid 549 * duplicating the us->us_next_port_to_try. 550 */ 551 static void 552 udp_tpi_bind(queue_t *q, mblk_t *mp) 553 { 554 sin_t *sin; 555 sin6_t *sin6; 556 mblk_t *mp1; 557 struct T_bind_req *tbr; 558 conn_t *connp; 559 udp_t *udp; 560 int error; 561 struct sockaddr *sa; 562 cred_t *cr; 563 564 /* 565 * All Solaris components should pass a db_credp 566 * for this TPI message, hence we ASSERT. 567 * But in case there is some other M_PROTO that looks 568 * like a TPI message sent by some other kernel 569 * component, we check and return an error. 570 */ 571 cr = msg_getcred(mp, NULL); 572 ASSERT(cr != NULL); 573 if (cr == NULL) { 574 udp_err_ack(q, mp, TSYSERR, EINVAL); 575 return; 576 } 577 578 connp = Q_TO_CONN(q); 579 udp = connp->conn_udp; 580 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 581 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 582 "udp_bind: bad req, len %u", 583 (uint_t)(mp->b_wptr - mp->b_rptr)); 584 udp_err_ack(q, mp, TPROTO, 0); 585 return; 586 } 587 if (udp->udp_state != TS_UNBND) { 588 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 589 "udp_bind: bad state, %u", udp->udp_state); 590 udp_err_ack(q, mp, TOUTSTATE, 0); 591 return; 592 } 593 /* 594 * Reallocate the message to make sure we have enough room for an 595 * address and the protocol type. 596 */ 597 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 598 if (!mp1) { 599 udp_err_ack(q, mp, TSYSERR, ENOMEM); 600 return; 601 } 602 603 mp = mp1; 604 605 /* Reset the message type in preparation for shipping it back. */ 606 DB_TYPE(mp) = M_PCPROTO; 607 608 tbr = (struct T_bind_req *)mp->b_rptr; 609 switch (tbr->ADDR_length) { 610 case 0: /* Request for a generic port */ 611 tbr->ADDR_offset = sizeof (struct T_bind_req); 612 if (udp->udp_family == AF_INET) { 613 tbr->ADDR_length = sizeof (sin_t); 614 sin = (sin_t *)&tbr[1]; 615 *sin = sin_null; 616 sin->sin_family = AF_INET; 617 mp->b_wptr = (uchar_t *)&sin[1]; 618 sa = (struct sockaddr *)sin; 619 } else { 620 ASSERT(udp->udp_family == AF_INET6); 621 tbr->ADDR_length = sizeof (sin6_t); 622 sin6 = (sin6_t *)&tbr[1]; 623 *sin6 = sin6_null; 624 sin6->sin6_family = AF_INET6; 625 mp->b_wptr = (uchar_t *)&sin6[1]; 626 sa = (struct sockaddr *)sin6; 627 } 628 break; 629 630 case sizeof (sin_t): /* Complete IPv4 address */ 631 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 632 sizeof (sin_t)); 633 if (sa == NULL || !OK_32PTR((char *)sa)) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 if (udp->udp_family != AF_INET || 638 sa->sa_family != AF_INET) { 639 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 640 return; 641 } 642 break; 643 644 case sizeof (sin6_t): /* complete IPv6 address */ 645 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 646 sizeof (sin6_t)); 647 if (sa == NULL || !OK_32PTR((char *)sa)) { 648 udp_err_ack(q, mp, TSYSERR, EINVAL); 649 return; 650 } 651 if (udp->udp_family != AF_INET6 || 652 sa->sa_family != AF_INET6) { 653 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 654 return; 655 } 656 break; 657 658 default: /* Invalid request */ 659 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 660 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 661 udp_err_ack(q, mp, TBADADDR, 0); 662 return; 663 } 664 665 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 666 tbr->PRIM_type != O_T_BIND_REQ); 667 668 if (error != 0) { 669 if (error > 0) { 670 udp_err_ack(q, mp, TSYSERR, error); 671 } else { 672 udp_err_ack(q, mp, -error, 0); 673 } 674 } else { 675 tbr->PRIM_type = T_BIND_ACK; 676 qreply(q, mp); 677 } 678 } 679 680 /* 681 * This routine handles each T_CONN_REQ message passed to udp. It 682 * associates a default destination address with the stream. 683 * 684 * This routine sends down a T_BIND_REQ to IP with the following mblks: 685 * T_BIND_REQ - specifying local and remote address/port 686 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 687 * T_OK_ACK - for the T_CONN_REQ 688 * T_CONN_CON - to keep the TPI user happy 689 * 690 * The connect completes in udp_do_connect. 691 * When a T_BIND_ACK is received information is extracted from the IRE 692 * and the two appended messages are sent to the TPI user. 693 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 694 * convert it to an error ack for the appropriate primitive. 695 */ 696 static void 697 udp_tpi_connect(queue_t *q, mblk_t *mp) 698 { 699 udp_t *udp; 700 conn_t *connp = Q_TO_CONN(q); 701 int error; 702 socklen_t len; 703 struct sockaddr *sa; 704 struct T_conn_req *tcr; 705 cred_t *cr; 706 707 /* 708 * All Solaris components should pass a db_credp 709 * for this TPI message, hence we ASSERT. 710 * But in case there is some other M_PROTO that looks 711 * like a TPI message sent by some other kernel 712 * component, we check and return an error. 713 */ 714 cr = msg_getcred(mp, NULL); 715 ASSERT(cr != NULL); 716 if (cr == NULL) { 717 udp_err_ack(q, mp, TSYSERR, EINVAL); 718 return; 719 } 720 721 udp = connp->conn_udp; 722 tcr = (struct T_conn_req *)mp->b_rptr; 723 724 /* A bit of sanity checking */ 725 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 726 udp_err_ack(q, mp, TPROTO, 0); 727 return; 728 } 729 730 if (tcr->OPT_length != 0) { 731 udp_err_ack(q, mp, TBADOPT, 0); 732 return; 733 } 734 735 /* 736 * Determine packet type based on type of address passed in 737 * the request should contain an IPv4 or IPv6 address. 738 * Make sure that address family matches the type of 739 * family of the the address passed down 740 */ 741 len = tcr->DEST_length; 742 switch (tcr->DEST_length) { 743 default: 744 udp_err_ack(q, mp, TBADADDR, 0); 745 return; 746 747 case sizeof (sin_t): 748 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 749 sizeof (sin_t)); 750 break; 751 752 case sizeof (sin6_t): 753 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 754 sizeof (sin6_t)); 755 break; 756 } 757 758 error = proto_verify_ip_addr(udp->udp_family, sa, len); 759 if (error != 0) { 760 udp_err_ack(q, mp, TSYSERR, error); 761 return; 762 } 763 764 error = udp_do_connect(connp, sa, len, cr); 765 if (error != 0) { 766 if (error < 0) 767 udp_err_ack(q, mp, -error, 0); 768 else 769 udp_err_ack(q, mp, TSYSERR, error); 770 } else { 771 mblk_t *mp1; 772 /* 773 * We have to send a connection confirmation to 774 * keep TLI happy. 775 */ 776 if (udp->udp_family == AF_INET) { 777 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 778 sizeof (sin_t), NULL, 0); 779 } else { 780 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 781 sizeof (sin6_t), NULL, 0); 782 } 783 if (mp1 == NULL) { 784 udp_err_ack(q, mp, TSYSERR, ENOMEM); 785 return; 786 } 787 788 /* 789 * Send ok_ack for T_CONN_REQ 790 */ 791 mp = mi_tpi_ok_ack_alloc(mp); 792 if (mp == NULL) { 793 /* Unable to reuse the T_CONN_REQ for the ack. */ 794 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 795 return; 796 } 797 798 putnext(connp->conn_rq, mp); 799 putnext(connp->conn_rq, mp1); 800 } 801 } 802 803 static int 804 udp_tpi_close(queue_t *q, int flags) 805 { 806 conn_t *connp; 807 808 if (flags & SO_FALLBACK) { 809 /* 810 * stream is being closed while in fallback 811 * simply free the resources that were allocated 812 */ 813 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 814 qprocsoff(q); 815 goto done; 816 } 817 818 connp = Q_TO_CONN(q); 819 udp_do_close(connp); 820 done: 821 q->q_ptr = WR(q)->q_ptr = NULL; 822 return (0); 823 } 824 825 /* 826 * Called in the close path to quiesce the conn 827 */ 828 void 829 udp_quiesce_conn(conn_t *connp) 830 { 831 udp_t *udp = connp->conn_udp; 832 833 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 834 /* 835 * Running in cluster mode - register unbind information 836 */ 837 if (udp->udp_ipversion == IPV4_VERSION) { 838 (*cl_inet_unbind)( 839 connp->conn_netstack->netstack_stackid, 840 IPPROTO_UDP, AF_INET, 841 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 842 (in_port_t)udp->udp_port, NULL); 843 } else { 844 (*cl_inet_unbind)( 845 connp->conn_netstack->netstack_stackid, 846 IPPROTO_UDP, AF_INET6, 847 (uint8_t *)(&(udp->udp_v6src)), 848 (in_port_t)udp->udp_port, NULL); 849 } 850 } 851 852 udp_bind_hash_remove(udp, B_FALSE); 853 854 } 855 856 void 857 udp_close_free(conn_t *connp) 858 { 859 udp_t *udp = connp->conn_udp; 860 861 /* If there are any options associated with the stream, free them. */ 862 if (udp->udp_ip_snd_options != NULL) { 863 mi_free((char *)udp->udp_ip_snd_options); 864 udp->udp_ip_snd_options = NULL; 865 udp->udp_ip_snd_options_len = 0; 866 } 867 868 if (udp->udp_ip_rcv_options != NULL) { 869 mi_free((char *)udp->udp_ip_rcv_options); 870 udp->udp_ip_rcv_options = NULL; 871 udp->udp_ip_rcv_options_len = 0; 872 } 873 874 /* Free memory associated with sticky options */ 875 if (udp->udp_sticky_hdrs_len != 0) { 876 kmem_free(udp->udp_sticky_hdrs, 877 udp->udp_sticky_hdrs_len); 878 udp->udp_sticky_hdrs = NULL; 879 udp->udp_sticky_hdrs_len = 0; 880 } 881 if (udp->udp_last_cred != NULL) { 882 crfree(udp->udp_last_cred); 883 udp->udp_last_cred = NULL; 884 } 885 if (udp->udp_effective_cred != NULL) { 886 crfree(udp->udp_effective_cred); 887 udp->udp_effective_cred = NULL; 888 } 889 890 ip6_pkt_free(&udp->udp_sticky_ipp); 891 892 /* 893 * Clear any fields which the kmem_cache constructor clears. 894 * Only udp_connp needs to be preserved. 895 * TBD: We should make this more efficient to avoid clearing 896 * everything. 897 */ 898 ASSERT(udp->udp_connp == connp); 899 bzero(udp, sizeof (udp_t)); 900 udp->udp_connp = connp; 901 } 902 903 static int 904 udp_do_disconnect(conn_t *connp) 905 { 906 udp_t *udp; 907 mblk_t *ire_mp; 908 udp_fanout_t *udpf; 909 udp_stack_t *us; 910 int error; 911 912 udp = connp->conn_udp; 913 us = udp->udp_us; 914 rw_enter(&udp->udp_rwlock, RW_WRITER); 915 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 916 rw_exit(&udp->udp_rwlock); 917 return (-TOUTSTATE); 918 } 919 udp->udp_pending_op = T_DISCON_REQ; 920 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 921 us->us_bind_fanout_size)]; 922 mutex_enter(&udpf->uf_lock); 923 udp->udp_v6src = udp->udp_bound_v6src; 924 udp->udp_state = TS_IDLE; 925 mutex_exit(&udpf->uf_lock); 926 927 if (udp->udp_family == AF_INET6) { 928 /* Rebuild the header template */ 929 error = udp_build_hdrs(udp); 930 if (error != 0) { 931 udp->udp_pending_op = -1; 932 rw_exit(&udp->udp_rwlock); 933 return (error); 934 } 935 } 936 937 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 938 if (ire_mp == NULL) { 939 mutex_enter(&udpf->uf_lock); 940 udp->udp_pending_op = -1; 941 mutex_exit(&udpf->uf_lock); 942 rw_exit(&udp->udp_rwlock); 943 return (ENOMEM); 944 } 945 946 rw_exit(&udp->udp_rwlock); 947 948 if (udp->udp_family == AF_INET6) { 949 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 950 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 951 } else { 952 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 953 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 954 } 955 956 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 957 } 958 959 960 static void 961 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 962 { 963 conn_t *connp = Q_TO_CONN(q); 964 int error; 965 966 /* 967 * Allocate the largest primitive we need to send back 968 * T_error_ack is > than T_ok_ack 969 */ 970 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 971 if (mp == NULL) { 972 /* Unable to reuse the T_DISCON_REQ for the ack. */ 973 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 974 return; 975 } 976 977 error = udp_do_disconnect(connp); 978 979 if (error != 0) { 980 if (error < 0) { 981 udp_err_ack(q, mp, -error, 0); 982 } else { 983 udp_err_ack(q, mp, TSYSERR, error); 984 } 985 } else { 986 mp = mi_tpi_ok_ack_alloc(mp); 987 ASSERT(mp != NULL); 988 qreply(q, mp); 989 } 990 } 991 992 int 993 udp_disconnect(conn_t *connp) 994 { 995 int error; 996 udp_t *udp = connp->conn_udp; 997 998 udp->udp_dgram_errind = B_FALSE; 999 1000 error = udp_do_disconnect(connp); 1001 1002 if (error < 0) 1003 error = proto_tlitosyserr(-error); 1004 1005 return (error); 1006 } 1007 1008 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1009 static void 1010 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1011 { 1012 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1013 qreply(q, mp); 1014 } 1015 1016 /* Shorthand to generate and send TPI error acks to our client */ 1017 static void 1018 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1019 int sys_error) 1020 { 1021 struct T_error_ack *teackp; 1022 1023 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1024 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1025 teackp = (struct T_error_ack *)mp->b_rptr; 1026 teackp->ERROR_prim = primitive; 1027 teackp->TLI_error = t_error; 1028 teackp->UNIX_error = sys_error; 1029 qreply(q, mp); 1030 } 1031 } 1032 1033 /*ARGSUSED*/ 1034 static int 1035 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1036 { 1037 int i; 1038 udp_t *udp = Q_TO_UDP(q); 1039 udp_stack_t *us = udp->udp_us; 1040 1041 for (i = 0; i < us->us_num_epriv_ports; i++) { 1042 if (us->us_epriv_ports[i] != 0) 1043 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1044 } 1045 return (0); 1046 } 1047 1048 /* ARGSUSED */ 1049 static int 1050 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1051 cred_t *cr) 1052 { 1053 long new_value; 1054 int i; 1055 udp_t *udp = Q_TO_UDP(q); 1056 udp_stack_t *us = udp->udp_us; 1057 1058 /* 1059 * Fail the request if the new value does not lie within the 1060 * port number limits. 1061 */ 1062 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1063 new_value <= 0 || new_value >= 65536) { 1064 return (EINVAL); 1065 } 1066 1067 /* Check if the value is already in the list */ 1068 for (i = 0; i < us->us_num_epriv_ports; i++) { 1069 if (new_value == us->us_epriv_ports[i]) { 1070 return (EEXIST); 1071 } 1072 } 1073 /* Find an empty slot */ 1074 for (i = 0; i < us->us_num_epriv_ports; i++) { 1075 if (us->us_epriv_ports[i] == 0) 1076 break; 1077 } 1078 if (i == us->us_num_epriv_ports) { 1079 return (EOVERFLOW); 1080 } 1081 1082 /* Set the new value */ 1083 us->us_epriv_ports[i] = (in_port_t)new_value; 1084 return (0); 1085 } 1086 1087 /* ARGSUSED */ 1088 static int 1089 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1090 cred_t *cr) 1091 { 1092 long new_value; 1093 int i; 1094 udp_t *udp = Q_TO_UDP(q); 1095 udp_stack_t *us = udp->udp_us; 1096 1097 /* 1098 * Fail the request if the new value does not lie within the 1099 * port number limits. 1100 */ 1101 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1102 new_value <= 0 || new_value >= 65536) { 1103 return (EINVAL); 1104 } 1105 1106 /* Check that the value is already in the list */ 1107 for (i = 0; i < us->us_num_epriv_ports; i++) { 1108 if (us->us_epriv_ports[i] == new_value) 1109 break; 1110 } 1111 if (i == us->us_num_epriv_ports) { 1112 return (ESRCH); 1113 } 1114 1115 /* Clear the value */ 1116 us->us_epriv_ports[i] = 0; 1117 return (0); 1118 } 1119 1120 /* At minimum we need 4 bytes of UDP header */ 1121 #define ICMP_MIN_UDP_HDR 4 1122 1123 /* 1124 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1125 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1126 * Assumes that IP has pulled up everything up to and including the ICMP header. 1127 */ 1128 static void 1129 udp_icmp_error(conn_t *connp, mblk_t *mp) 1130 { 1131 icmph_t *icmph; 1132 ipha_t *ipha; 1133 int iph_hdr_length; 1134 udpha_t *udpha; 1135 sin_t sin; 1136 sin6_t sin6; 1137 mblk_t *mp1; 1138 int error = 0; 1139 udp_t *udp = connp->conn_udp; 1140 1141 mp1 = NULL; 1142 ipha = (ipha_t *)mp->b_rptr; 1143 1144 ASSERT(OK_32PTR(mp->b_rptr)); 1145 1146 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1147 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1148 udp_icmp_error_ipv6(connp, mp); 1149 return; 1150 } 1151 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1152 1153 /* Skip past the outer IP and ICMP headers */ 1154 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1155 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1156 ipha = (ipha_t *)&icmph[1]; 1157 1158 /* Skip past the inner IP and find the ULP header */ 1159 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1160 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1161 1162 switch (icmph->icmph_type) { 1163 case ICMP_DEST_UNREACHABLE: 1164 switch (icmph->icmph_code) { 1165 case ICMP_FRAGMENTATION_NEEDED: 1166 /* 1167 * IP has already adjusted the path MTU. 1168 */ 1169 break; 1170 case ICMP_PORT_UNREACHABLE: 1171 case ICMP_PROTOCOL_UNREACHABLE: 1172 error = ECONNREFUSED; 1173 break; 1174 default: 1175 /* Transient errors */ 1176 break; 1177 } 1178 break; 1179 default: 1180 /* Transient errors */ 1181 break; 1182 } 1183 if (error == 0) { 1184 freemsg(mp); 1185 return; 1186 } 1187 1188 /* 1189 * Deliver T_UDERROR_IND when the application has asked for it. 1190 * The socket layer enables this automatically when connected. 1191 */ 1192 if (!udp->udp_dgram_errind) { 1193 freemsg(mp); 1194 return; 1195 } 1196 1197 1198 switch (udp->udp_family) { 1199 case AF_INET: 1200 sin = sin_null; 1201 sin.sin_family = AF_INET; 1202 sin.sin_addr.s_addr = ipha->ipha_dst; 1203 sin.sin_port = udpha->uha_dst_port; 1204 if (IPCL_IS_NONSTR(connp)) { 1205 rw_enter(&udp->udp_rwlock, RW_WRITER); 1206 if (udp->udp_state == TS_DATA_XFER) { 1207 if (sin.sin_port == udp->udp_dstport && 1208 sin.sin_addr.s_addr == 1209 V4_PART_OF_V6(udp->udp_v6dst)) { 1210 rw_exit(&udp->udp_rwlock); 1211 (*connp->conn_upcalls->su_set_error) 1212 (connp->conn_upper_handle, error); 1213 goto done; 1214 } 1215 } else { 1216 udp->udp_delayed_error = error; 1217 *((sin_t *)&udp->udp_delayed_addr) = sin; 1218 } 1219 rw_exit(&udp->udp_rwlock); 1220 } else { 1221 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1222 NULL, 0, error); 1223 } 1224 break; 1225 case AF_INET6: 1226 sin6 = sin6_null; 1227 sin6.sin6_family = AF_INET6; 1228 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1229 sin6.sin6_port = udpha->uha_dst_port; 1230 if (IPCL_IS_NONSTR(connp)) { 1231 rw_enter(&udp->udp_rwlock, RW_WRITER); 1232 if (udp->udp_state == TS_DATA_XFER) { 1233 if (sin6.sin6_port == udp->udp_dstport && 1234 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1235 &udp->udp_v6dst)) { 1236 rw_exit(&udp->udp_rwlock); 1237 (*connp->conn_upcalls->su_set_error) 1238 (connp->conn_upper_handle, error); 1239 goto done; 1240 } 1241 } else { 1242 udp->udp_delayed_error = error; 1243 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1244 } 1245 rw_exit(&udp->udp_rwlock); 1246 } else { 1247 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1248 NULL, 0, error); 1249 } 1250 break; 1251 } 1252 if (mp1 != NULL) 1253 putnext(connp->conn_rq, mp1); 1254 done: 1255 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1256 freemsg(mp); 1257 } 1258 1259 /* 1260 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1261 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1262 * Assumes that IP has pulled up all the extension headers as well as the 1263 * ICMPv6 header. 1264 */ 1265 static void 1266 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1267 { 1268 icmp6_t *icmp6; 1269 ip6_t *ip6h, *outer_ip6h; 1270 uint16_t iph_hdr_length; 1271 uint8_t *nexthdrp; 1272 udpha_t *udpha; 1273 sin6_t sin6; 1274 mblk_t *mp1; 1275 int error = 0; 1276 udp_t *udp = connp->conn_udp; 1277 udp_stack_t *us = udp->udp_us; 1278 1279 outer_ip6h = (ip6_t *)mp->b_rptr; 1280 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1281 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1282 else 1283 iph_hdr_length = IPV6_HDR_LEN; 1284 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1285 ip6h = (ip6_t *)&icmp6[1]; 1286 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1287 freemsg(mp); 1288 return; 1289 } 1290 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1291 1292 switch (icmp6->icmp6_type) { 1293 case ICMP6_DST_UNREACH: 1294 switch (icmp6->icmp6_code) { 1295 case ICMP6_DST_UNREACH_NOPORT: 1296 error = ECONNREFUSED; 1297 break; 1298 case ICMP6_DST_UNREACH_ADMIN: 1299 case ICMP6_DST_UNREACH_NOROUTE: 1300 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1301 case ICMP6_DST_UNREACH_ADDR: 1302 /* Transient errors */ 1303 break; 1304 default: 1305 break; 1306 } 1307 break; 1308 case ICMP6_PACKET_TOO_BIG: { 1309 struct T_unitdata_ind *tudi; 1310 struct T_opthdr *toh; 1311 size_t udi_size; 1312 mblk_t *newmp; 1313 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1314 sizeof (struct ip6_mtuinfo); 1315 sin6_t *sin6; 1316 struct ip6_mtuinfo *mtuinfo; 1317 1318 /* 1319 * If the application has requested to receive path mtu 1320 * information, send up an empty message containing an 1321 * IPV6_PATHMTU ancillary data item. 1322 */ 1323 if (!udp->udp_ipv6_recvpathmtu) 1324 break; 1325 1326 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1327 opt_length; 1328 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1329 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1330 break; 1331 } 1332 1333 /* 1334 * newmp->b_cont is left to NULL on purpose. This is an 1335 * empty message containing only ancillary data. 1336 */ 1337 newmp->b_datap->db_type = M_PROTO; 1338 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1339 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1340 tudi->PRIM_type = T_UNITDATA_IND; 1341 tudi->SRC_length = sizeof (sin6_t); 1342 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1343 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1344 tudi->OPT_length = opt_length; 1345 1346 sin6 = (sin6_t *)&tudi[1]; 1347 bzero(sin6, sizeof (sin6_t)); 1348 sin6->sin6_family = AF_INET6; 1349 sin6->sin6_addr = udp->udp_v6dst; 1350 1351 toh = (struct T_opthdr *)&sin6[1]; 1352 toh->level = IPPROTO_IPV6; 1353 toh->name = IPV6_PATHMTU; 1354 toh->len = opt_length; 1355 toh->status = 0; 1356 1357 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1358 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1359 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1360 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1361 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1362 /* 1363 * We've consumed everything we need from the original 1364 * message. Free it, then send our empty message. 1365 */ 1366 freemsg(mp); 1367 udp_ulp_recv(connp, newmp); 1368 1369 return; 1370 } 1371 case ICMP6_TIME_EXCEEDED: 1372 /* Transient errors */ 1373 break; 1374 case ICMP6_PARAM_PROB: 1375 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1376 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1377 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1378 (uchar_t *)nexthdrp) { 1379 error = ECONNREFUSED; 1380 break; 1381 } 1382 break; 1383 } 1384 if (error == 0) { 1385 freemsg(mp); 1386 return; 1387 } 1388 1389 /* 1390 * Deliver T_UDERROR_IND when the application has asked for it. 1391 * The socket layer enables this automatically when connected. 1392 */ 1393 if (!udp->udp_dgram_errind) { 1394 freemsg(mp); 1395 return; 1396 } 1397 1398 sin6 = sin6_null; 1399 sin6.sin6_family = AF_INET6; 1400 sin6.sin6_addr = ip6h->ip6_dst; 1401 sin6.sin6_port = udpha->uha_dst_port; 1402 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1403 1404 if (IPCL_IS_NONSTR(connp)) { 1405 rw_enter(&udp->udp_rwlock, RW_WRITER); 1406 if (udp->udp_state == TS_DATA_XFER) { 1407 if (sin6.sin6_port == udp->udp_dstport && 1408 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1409 &udp->udp_v6dst)) { 1410 rw_exit(&udp->udp_rwlock); 1411 (*connp->conn_upcalls->su_set_error) 1412 (connp->conn_upper_handle, error); 1413 goto done; 1414 } 1415 } else { 1416 udp->udp_delayed_error = error; 1417 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1418 } 1419 rw_exit(&udp->udp_rwlock); 1420 } else { 1421 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1422 NULL, 0, error); 1423 if (mp1 != NULL) 1424 putnext(connp->conn_rq, mp1); 1425 } 1426 done: 1427 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1428 freemsg(mp); 1429 } 1430 1431 /* 1432 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1433 * The local address is filled in if endpoint is bound. The remote address 1434 * is filled in if remote address has been precified ("connected endpoint") 1435 * (The concept of connected CLTS sockets is alien to published TPI 1436 * but we support it anyway). 1437 */ 1438 static void 1439 udp_addr_req(queue_t *q, mblk_t *mp) 1440 { 1441 sin_t *sin; 1442 sin6_t *sin6; 1443 mblk_t *ackmp; 1444 struct T_addr_ack *taa; 1445 udp_t *udp = Q_TO_UDP(q); 1446 1447 /* Make it large enough for worst case */ 1448 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1449 2 * sizeof (sin6_t), 1); 1450 if (ackmp == NULL) { 1451 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1452 return; 1453 } 1454 taa = (struct T_addr_ack *)ackmp->b_rptr; 1455 1456 bzero(taa, sizeof (struct T_addr_ack)); 1457 ackmp->b_wptr = (uchar_t *)&taa[1]; 1458 1459 taa->PRIM_type = T_ADDR_ACK; 1460 ackmp->b_datap->db_type = M_PCPROTO; 1461 rw_enter(&udp->udp_rwlock, RW_READER); 1462 /* 1463 * Note: Following code assumes 32 bit alignment of basic 1464 * data structures like sin_t and struct T_addr_ack. 1465 */ 1466 if (udp->udp_state != TS_UNBND) { 1467 /* 1468 * Fill in local address first 1469 */ 1470 taa->LOCADDR_offset = sizeof (*taa); 1471 if (udp->udp_family == AF_INET) { 1472 taa->LOCADDR_length = sizeof (sin_t); 1473 sin = (sin_t *)&taa[1]; 1474 /* Fill zeroes and then initialize non-zero fields */ 1475 *sin = sin_null; 1476 sin->sin_family = AF_INET; 1477 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1478 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1479 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1480 sin->sin_addr.s_addr); 1481 } else { 1482 /* 1483 * INADDR_ANY 1484 * udp_v6src is not set, we might be bound to 1485 * broadcast/multicast. Use udp_bound_v6src as 1486 * local address instead (that could 1487 * also still be INADDR_ANY) 1488 */ 1489 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1490 sin->sin_addr.s_addr); 1491 } 1492 sin->sin_port = udp->udp_port; 1493 ackmp->b_wptr = (uchar_t *)&sin[1]; 1494 if (udp->udp_state == TS_DATA_XFER) { 1495 /* 1496 * connected, fill remote address too 1497 */ 1498 taa->REMADDR_length = sizeof (sin_t); 1499 /* assumed 32-bit alignment */ 1500 taa->REMADDR_offset = taa->LOCADDR_offset + 1501 taa->LOCADDR_length; 1502 1503 sin = (sin_t *)(ackmp->b_rptr + 1504 taa->REMADDR_offset); 1505 /* initialize */ 1506 *sin = sin_null; 1507 sin->sin_family = AF_INET; 1508 sin->sin_addr.s_addr = 1509 V4_PART_OF_V6(udp->udp_v6dst); 1510 sin->sin_port = udp->udp_dstport; 1511 ackmp->b_wptr = (uchar_t *)&sin[1]; 1512 } 1513 } else { 1514 taa->LOCADDR_length = sizeof (sin6_t); 1515 sin6 = (sin6_t *)&taa[1]; 1516 /* Fill zeroes and then initialize non-zero fields */ 1517 *sin6 = sin6_null; 1518 sin6->sin6_family = AF_INET6; 1519 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1520 sin6->sin6_addr = udp->udp_v6src; 1521 } else { 1522 /* 1523 * UNSPECIFIED 1524 * udp_v6src is not set, we might be bound to 1525 * broadcast/multicast. Use udp_bound_v6src as 1526 * local address instead (that could 1527 * also still be UNSPECIFIED) 1528 */ 1529 sin6->sin6_addr = 1530 udp->udp_bound_v6src; 1531 } 1532 sin6->sin6_port = udp->udp_port; 1533 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1534 if (udp->udp_state == TS_DATA_XFER) { 1535 /* 1536 * connected, fill remote address too 1537 */ 1538 taa->REMADDR_length = sizeof (sin6_t); 1539 /* assumed 32-bit alignment */ 1540 taa->REMADDR_offset = taa->LOCADDR_offset + 1541 taa->LOCADDR_length; 1542 1543 sin6 = (sin6_t *)(ackmp->b_rptr + 1544 taa->REMADDR_offset); 1545 /* initialize */ 1546 *sin6 = sin6_null; 1547 sin6->sin6_family = AF_INET6; 1548 sin6->sin6_addr = udp->udp_v6dst; 1549 sin6->sin6_port = udp->udp_dstport; 1550 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1551 } 1552 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1553 } 1554 } 1555 rw_exit(&udp->udp_rwlock); 1556 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1557 qreply(q, ackmp); 1558 } 1559 1560 static void 1561 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1562 { 1563 if (udp->udp_family == AF_INET) { 1564 *tap = udp_g_t_info_ack_ipv4; 1565 } else { 1566 *tap = udp_g_t_info_ack_ipv6; 1567 } 1568 tap->CURRENT_state = udp->udp_state; 1569 tap->OPT_size = udp_max_optsize; 1570 } 1571 1572 static void 1573 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1574 t_uscalar_t cap_bits1) 1575 { 1576 tcap->CAP_bits1 = 0; 1577 1578 if (cap_bits1 & TC1_INFO) { 1579 udp_copy_info(&tcap->INFO_ack, udp); 1580 tcap->CAP_bits1 |= TC1_INFO; 1581 } 1582 } 1583 1584 /* 1585 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1586 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1587 * udp_g_t_info_ack. The current state of the stream is copied from 1588 * udp_state. 1589 */ 1590 static void 1591 udp_capability_req(queue_t *q, mblk_t *mp) 1592 { 1593 t_uscalar_t cap_bits1; 1594 struct T_capability_ack *tcap; 1595 udp_t *udp = Q_TO_UDP(q); 1596 1597 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1598 1599 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1600 mp->b_datap->db_type, T_CAPABILITY_ACK); 1601 if (!mp) 1602 return; 1603 1604 tcap = (struct T_capability_ack *)mp->b_rptr; 1605 udp_do_capability_ack(udp, tcap, cap_bits1); 1606 1607 qreply(q, mp); 1608 } 1609 1610 /* 1611 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1612 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1613 * The current state of the stream is copied from udp_state. 1614 */ 1615 static void 1616 udp_info_req(queue_t *q, mblk_t *mp) 1617 { 1618 udp_t *udp = Q_TO_UDP(q); 1619 1620 /* Create a T_INFO_ACK message. */ 1621 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1622 T_INFO_ACK); 1623 if (!mp) 1624 return; 1625 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1626 qreply(q, mp); 1627 } 1628 1629 /* 1630 * IP recognizes seven kinds of bind requests: 1631 * 1632 * - A zero-length address binds only to the protocol number. 1633 * 1634 * - A 4-byte address is treated as a request to 1635 * validate that the address is a valid local IPv4 1636 * address, appropriate for an application to bind to. 1637 * IP does the verification, but does not make any note 1638 * of the address at this time. 1639 * 1640 * - A 16-byte address contains is treated as a request 1641 * to validate a local IPv6 address, as the 4-byte 1642 * address case above. 1643 * 1644 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1645 * use it for the inbound fanout of packets. 1646 * 1647 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1648 * use it for the inbound fanout of packets. 1649 * 1650 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1651 * information consisting of local and remote addresses 1652 * and ports. In this case, the addresses are both 1653 * validated as appropriate for this operation, and, if 1654 * so, the information is retained for use in the 1655 * inbound fanout. 1656 * 1657 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1658 * fanout information, like the 12-byte case above. 1659 * 1660 * IP will also fill in the IRE request mblk with information 1661 * regarding our peer. In all cases, we notify IP of our protocol 1662 * type by appending a single protocol byte to the bind request. 1663 */ 1664 static mblk_t * 1665 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1666 { 1667 char *cp; 1668 mblk_t *mp; 1669 struct T_bind_req *tbr; 1670 ipa_conn_t *ac; 1671 ipa6_conn_t *ac6; 1672 sin_t *sin; 1673 sin6_t *sin6; 1674 1675 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1676 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1677 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1678 if (!mp) 1679 return (mp); 1680 mp->b_datap->db_type = M_PROTO; 1681 tbr = (struct T_bind_req *)mp->b_rptr; 1682 tbr->PRIM_type = bind_prim; 1683 tbr->ADDR_offset = sizeof (*tbr); 1684 tbr->CONIND_number = 0; 1685 tbr->ADDR_length = addr_length; 1686 cp = (char *)&tbr[1]; 1687 switch (addr_length) { 1688 case sizeof (ipa_conn_t): 1689 ASSERT(udp->udp_family == AF_INET); 1690 /* Append a request for an IRE */ 1691 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1692 if (!mp->b_cont) { 1693 freemsg(mp); 1694 return (NULL); 1695 } 1696 mp->b_cont->b_wptr += sizeof (ire_t); 1697 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1698 1699 /* cp known to be 32 bit aligned */ 1700 ac = (ipa_conn_t *)cp; 1701 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1702 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1703 ac->ac_fport = udp->udp_dstport; 1704 ac->ac_lport = udp->udp_port; 1705 break; 1706 1707 case sizeof (ipa6_conn_t): 1708 ASSERT(udp->udp_family == AF_INET6); 1709 /* Append a request for an IRE */ 1710 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1711 if (!mp->b_cont) { 1712 freemsg(mp); 1713 return (NULL); 1714 } 1715 mp->b_cont->b_wptr += sizeof (ire_t); 1716 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1717 1718 /* cp known to be 32 bit aligned */ 1719 ac6 = (ipa6_conn_t *)cp; 1720 ac6->ac6_laddr = udp->udp_v6src; 1721 ac6->ac6_faddr = udp->udp_v6dst; 1722 ac6->ac6_fport = udp->udp_dstport; 1723 ac6->ac6_lport = udp->udp_port; 1724 break; 1725 1726 case sizeof (sin_t): 1727 ASSERT(udp->udp_family == AF_INET); 1728 /* Append a request for an IRE */ 1729 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1730 if (!mp->b_cont) { 1731 freemsg(mp); 1732 return (NULL); 1733 } 1734 mp->b_cont->b_wptr += sizeof (ire_t); 1735 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1736 1737 sin = (sin_t *)cp; 1738 *sin = sin_null; 1739 sin->sin_family = AF_INET; 1740 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1741 sin->sin_port = udp->udp_port; 1742 break; 1743 1744 case sizeof (sin6_t): 1745 ASSERT(udp->udp_family == AF_INET6); 1746 /* Append a request for an IRE */ 1747 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1748 if (!mp->b_cont) { 1749 freemsg(mp); 1750 return (NULL); 1751 } 1752 mp->b_cont->b_wptr += sizeof (ire_t); 1753 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1754 1755 sin6 = (sin6_t *)cp; 1756 *sin6 = sin6_null; 1757 sin6->sin6_family = AF_INET6; 1758 sin6->sin6_addr = udp->udp_bound_v6src; 1759 sin6->sin6_port = udp->udp_port; 1760 break; 1761 } 1762 /* Add protocol number to end */ 1763 cp[addr_length] = (char)IPPROTO_UDP; 1764 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1765 return (mp); 1766 } 1767 1768 /* For /dev/udp aka AF_INET open */ 1769 static int 1770 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1771 { 1772 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1773 } 1774 1775 /* For /dev/udp6 aka AF_INET6 open */ 1776 static int 1777 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1778 { 1779 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1780 } 1781 1782 /* 1783 * This is the open routine for udp. It allocates a udp_t structure for 1784 * the stream and, on the first open of the module, creates an ND table. 1785 */ 1786 /*ARGSUSED2*/ 1787 static int 1788 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1789 boolean_t isv6) 1790 { 1791 int error; 1792 udp_t *udp; 1793 conn_t *connp; 1794 dev_t conn_dev; 1795 udp_stack_t *us; 1796 vmem_t *minor_arena; 1797 1798 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1799 1800 /* If the stream is already open, return immediately. */ 1801 if (q->q_ptr != NULL) 1802 return (0); 1803 1804 if (sflag == MODOPEN) 1805 return (EINVAL); 1806 1807 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1808 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1809 minor_arena = ip_minor_arena_la; 1810 } else { 1811 /* 1812 * Either minor numbers in the large arena were exhausted 1813 * or a non socket application is doing the open. 1814 * Try to allocate from the small arena. 1815 */ 1816 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1817 return (EBUSY); 1818 1819 minor_arena = ip_minor_arena_sa; 1820 } 1821 1822 if (flag & SO_FALLBACK) { 1823 /* 1824 * Non streams socket needs a stream to fallback to 1825 */ 1826 RD(q)->q_ptr = (void *)conn_dev; 1827 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1828 WR(q)->q_ptr = (void *)minor_arena; 1829 qprocson(q); 1830 return (0); 1831 } 1832 1833 connp = udp_do_open(credp, isv6, KM_SLEEP); 1834 if (connp == NULL) { 1835 inet_minor_free(minor_arena, conn_dev); 1836 return (ENOMEM); 1837 } 1838 udp = connp->conn_udp; 1839 us = udp->udp_us; 1840 1841 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1842 connp->conn_dev = conn_dev; 1843 connp->conn_minor_arena = minor_arena; 1844 1845 /* 1846 * Initialize the udp_t structure for this stream. 1847 */ 1848 q->q_ptr = connp; 1849 WR(q)->q_ptr = connp; 1850 connp->conn_rq = q; 1851 connp->conn_wq = WR(q); 1852 1853 rw_enter(&udp->udp_rwlock, RW_WRITER); 1854 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1855 ASSERT(connp->conn_udp == udp); 1856 ASSERT(udp->udp_connp == connp); 1857 1858 if (flag & SO_SOCKSTR) { 1859 connp->conn_flags |= IPCL_SOCKET; 1860 udp->udp_issocket = B_TRUE; 1861 udp->udp_direct_sockfs = B_TRUE; 1862 } 1863 1864 q->q_hiwat = us->us_recv_hiwat; 1865 WR(q)->q_hiwat = us->us_xmit_hiwat; 1866 WR(q)->q_lowat = us->us_xmit_lowat; 1867 1868 qprocson(q); 1869 1870 if (udp->udp_family == AF_INET6) { 1871 /* Build initial header template for transmit */ 1872 if ((error = udp_build_hdrs(udp)) != 0) { 1873 rw_exit(&udp->udp_rwlock); 1874 qprocsoff(q); 1875 inet_minor_free(minor_arena, conn_dev); 1876 ipcl_conn_destroy(connp); 1877 return (error); 1878 } 1879 } 1880 rw_exit(&udp->udp_rwlock); 1881 1882 /* Set the Stream head write offset and high watermark. */ 1883 (void) proto_set_tx_wroff(q, connp, 1884 udp->udp_max_hdr_len + us->us_wroff_extra); 1885 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1886 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1887 1888 mutex_enter(&connp->conn_lock); 1889 connp->conn_state_flags &= ~CONN_INCIPIENT; 1890 mutex_exit(&connp->conn_lock); 1891 return (0); 1892 } 1893 1894 /* 1895 * Which UDP options OK to set through T_UNITDATA_REQ... 1896 */ 1897 /* ARGSUSED */ 1898 static boolean_t 1899 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1900 { 1901 return (B_TRUE); 1902 } 1903 1904 /* 1905 * This routine gets default values of certain options whose default 1906 * values are maintained by protcol specific code 1907 */ 1908 /* ARGSUSED */ 1909 int 1910 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1911 { 1912 udp_t *udp = Q_TO_UDP(q); 1913 udp_stack_t *us = udp->udp_us; 1914 int *i1 = (int *)ptr; 1915 1916 switch (level) { 1917 case IPPROTO_IP: 1918 switch (name) { 1919 case IP_MULTICAST_TTL: 1920 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1921 return (sizeof (uchar_t)); 1922 case IP_MULTICAST_LOOP: 1923 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1924 return (sizeof (uchar_t)); 1925 } 1926 break; 1927 case IPPROTO_IPV6: 1928 switch (name) { 1929 case IPV6_MULTICAST_HOPS: 1930 *i1 = IP_DEFAULT_MULTICAST_TTL; 1931 return (sizeof (int)); 1932 case IPV6_MULTICAST_LOOP: 1933 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1934 return (sizeof (int)); 1935 case IPV6_UNICAST_HOPS: 1936 *i1 = us->us_ipv6_hoplimit; 1937 return (sizeof (int)); 1938 } 1939 break; 1940 } 1941 return (-1); 1942 } 1943 1944 /* 1945 * This routine retrieves the current status of socket options. 1946 * It returns the size of the option retrieved. 1947 */ 1948 static int 1949 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1950 { 1951 udp_t *udp = connp->conn_udp; 1952 udp_stack_t *us = udp->udp_us; 1953 int *i1 = (int *)ptr; 1954 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1955 int len; 1956 1957 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1958 switch (level) { 1959 case SOL_SOCKET: 1960 switch (name) { 1961 case SO_DEBUG: 1962 *i1 = udp->udp_debug; 1963 break; /* goto sizeof (int) option return */ 1964 case SO_REUSEADDR: 1965 *i1 = udp->udp_reuseaddr; 1966 break; /* goto sizeof (int) option return */ 1967 case SO_TYPE: 1968 *i1 = SOCK_DGRAM; 1969 break; /* goto sizeof (int) option return */ 1970 1971 /* 1972 * The following three items are available here, 1973 * but are only meaningful to IP. 1974 */ 1975 case SO_DONTROUTE: 1976 *i1 = udp->udp_dontroute; 1977 break; /* goto sizeof (int) option return */ 1978 case SO_USELOOPBACK: 1979 *i1 = udp->udp_useloopback; 1980 break; /* goto sizeof (int) option return */ 1981 case SO_BROADCAST: 1982 *i1 = udp->udp_broadcast; 1983 break; /* goto sizeof (int) option return */ 1984 1985 case SO_SNDBUF: 1986 *i1 = udp->udp_xmit_hiwat; 1987 break; /* goto sizeof (int) option return */ 1988 case SO_RCVBUF: 1989 *i1 = udp->udp_rcv_disply_hiwat; 1990 break; /* goto sizeof (int) option return */ 1991 case SO_DGRAM_ERRIND: 1992 *i1 = udp->udp_dgram_errind; 1993 break; /* goto sizeof (int) option return */ 1994 case SO_RECVUCRED: 1995 *i1 = udp->udp_recvucred; 1996 break; /* goto sizeof (int) option return */ 1997 case SO_TIMESTAMP: 1998 *i1 = udp->udp_timestamp; 1999 break; /* goto sizeof (int) option return */ 2000 case SO_ANON_MLP: 2001 *i1 = connp->conn_anon_mlp; 2002 break; /* goto sizeof (int) option return */ 2003 case SO_MAC_EXEMPT: 2004 *i1 = connp->conn_mac_exempt; 2005 break; /* goto sizeof (int) option return */ 2006 case SO_ALLZONES: 2007 *i1 = connp->conn_allzones; 2008 break; /* goto sizeof (int) option return */ 2009 case SO_EXCLBIND: 2010 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2011 break; 2012 case SO_PROTOTYPE: 2013 *i1 = IPPROTO_UDP; 2014 break; 2015 case SO_DOMAIN: 2016 *i1 = udp->udp_family; 2017 break; 2018 default: 2019 return (-1); 2020 } 2021 break; 2022 case IPPROTO_IP: 2023 if (udp->udp_family != AF_INET) 2024 return (-1); 2025 switch (name) { 2026 case IP_OPTIONS: 2027 case T_IP_OPTIONS: 2028 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2029 if (len > 0) { 2030 bcopy(udp->udp_ip_rcv_options + 2031 udp->udp_label_len, ptr, len); 2032 } 2033 return (len); 2034 case IP_TOS: 2035 case T_IP_TOS: 2036 *i1 = (int)udp->udp_type_of_service; 2037 break; /* goto sizeof (int) option return */ 2038 case IP_TTL: 2039 *i1 = (int)udp->udp_ttl; 2040 break; /* goto sizeof (int) option return */ 2041 case IP_DHCPINIT_IF: 2042 return (-EINVAL); 2043 case IP_NEXTHOP: 2044 case IP_RECVPKTINFO: 2045 /* 2046 * This also handles IP_PKTINFO. 2047 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2048 * Differentiation is based on the size of the argument 2049 * passed in. 2050 * This option is handled in IP which will return an 2051 * error for IP_PKTINFO as it's not supported as a 2052 * sticky option. 2053 */ 2054 return (-EINVAL); 2055 case IP_MULTICAST_IF: 2056 /* 0 address if not set */ 2057 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2058 return (sizeof (ipaddr_t)); 2059 case IP_MULTICAST_TTL: 2060 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2061 return (sizeof (uchar_t)); 2062 case IP_MULTICAST_LOOP: 2063 *ptr = connp->conn_multicast_loop; 2064 return (sizeof (uint8_t)); 2065 case IP_RECVOPTS: 2066 *i1 = udp->udp_recvopts; 2067 break; /* goto sizeof (int) option return */ 2068 case IP_RECVDSTADDR: 2069 *i1 = udp->udp_recvdstaddr; 2070 break; /* goto sizeof (int) option return */ 2071 case IP_RECVIF: 2072 *i1 = udp->udp_recvif; 2073 break; /* goto sizeof (int) option return */ 2074 case IP_RECVSLLA: 2075 *i1 = udp->udp_recvslla; 2076 break; /* goto sizeof (int) option return */ 2077 case IP_RECVTTL: 2078 *i1 = udp->udp_recvttl; 2079 break; /* goto sizeof (int) option return */ 2080 case IP_ADD_MEMBERSHIP: 2081 case IP_DROP_MEMBERSHIP: 2082 case IP_BLOCK_SOURCE: 2083 case IP_UNBLOCK_SOURCE: 2084 case IP_ADD_SOURCE_MEMBERSHIP: 2085 case IP_DROP_SOURCE_MEMBERSHIP: 2086 case MCAST_JOIN_GROUP: 2087 case MCAST_LEAVE_GROUP: 2088 case MCAST_BLOCK_SOURCE: 2089 case MCAST_UNBLOCK_SOURCE: 2090 case MCAST_JOIN_SOURCE_GROUP: 2091 case MCAST_LEAVE_SOURCE_GROUP: 2092 /* cannot "get" the value for these */ 2093 return (-1); 2094 case IP_BOUND_IF: 2095 /* Zero if not set */ 2096 *i1 = udp->udp_bound_if; 2097 break; /* goto sizeof (int) option return */ 2098 case IP_UNSPEC_SRC: 2099 *i1 = udp->udp_unspec_source; 2100 break; /* goto sizeof (int) option return */ 2101 case IP_BROADCAST_TTL: 2102 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2103 return (sizeof (uchar_t)); 2104 default: 2105 return (-1); 2106 } 2107 break; 2108 case IPPROTO_IPV6: 2109 if (udp->udp_family != AF_INET6) 2110 return (-1); 2111 switch (name) { 2112 case IPV6_UNICAST_HOPS: 2113 *i1 = (unsigned int)udp->udp_ttl; 2114 break; /* goto sizeof (int) option return */ 2115 case IPV6_MULTICAST_IF: 2116 /* 0 index if not set */ 2117 *i1 = udp->udp_multicast_if_index; 2118 break; /* goto sizeof (int) option return */ 2119 case IPV6_MULTICAST_HOPS: 2120 *i1 = udp->udp_multicast_ttl; 2121 break; /* goto sizeof (int) option return */ 2122 case IPV6_MULTICAST_LOOP: 2123 *i1 = connp->conn_multicast_loop; 2124 break; /* goto sizeof (int) option return */ 2125 case IPV6_JOIN_GROUP: 2126 case IPV6_LEAVE_GROUP: 2127 case MCAST_JOIN_GROUP: 2128 case MCAST_LEAVE_GROUP: 2129 case MCAST_BLOCK_SOURCE: 2130 case MCAST_UNBLOCK_SOURCE: 2131 case MCAST_JOIN_SOURCE_GROUP: 2132 case MCAST_LEAVE_SOURCE_GROUP: 2133 /* cannot "get" the value for these */ 2134 return (-1); 2135 case IPV6_BOUND_IF: 2136 /* Zero if not set */ 2137 *i1 = udp->udp_bound_if; 2138 break; /* goto sizeof (int) option return */ 2139 case IPV6_UNSPEC_SRC: 2140 *i1 = udp->udp_unspec_source; 2141 break; /* goto sizeof (int) option return */ 2142 case IPV6_RECVPKTINFO: 2143 *i1 = udp->udp_ip_recvpktinfo; 2144 break; /* goto sizeof (int) option return */ 2145 case IPV6_RECVTCLASS: 2146 *i1 = udp->udp_ipv6_recvtclass; 2147 break; /* goto sizeof (int) option return */ 2148 case IPV6_RECVPATHMTU: 2149 *i1 = udp->udp_ipv6_recvpathmtu; 2150 break; /* goto sizeof (int) option return */ 2151 case IPV6_RECVHOPLIMIT: 2152 *i1 = udp->udp_ipv6_recvhoplimit; 2153 break; /* goto sizeof (int) option return */ 2154 case IPV6_RECVHOPOPTS: 2155 *i1 = udp->udp_ipv6_recvhopopts; 2156 break; /* goto sizeof (int) option return */ 2157 case IPV6_RECVDSTOPTS: 2158 *i1 = udp->udp_ipv6_recvdstopts; 2159 break; /* goto sizeof (int) option return */ 2160 case _OLD_IPV6_RECVDSTOPTS: 2161 *i1 = udp->udp_old_ipv6_recvdstopts; 2162 break; /* goto sizeof (int) option return */ 2163 case IPV6_RECVRTHDRDSTOPTS: 2164 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2165 break; /* goto sizeof (int) option return */ 2166 case IPV6_RECVRTHDR: 2167 *i1 = udp->udp_ipv6_recvrthdr; 2168 break; /* goto sizeof (int) option return */ 2169 case IPV6_PKTINFO: { 2170 /* XXX assumes that caller has room for max size! */ 2171 struct in6_pktinfo *pkti; 2172 2173 pkti = (struct in6_pktinfo *)ptr; 2174 if (ipp->ipp_fields & IPPF_IFINDEX) 2175 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2176 else 2177 pkti->ipi6_ifindex = 0; 2178 if (ipp->ipp_fields & IPPF_ADDR) 2179 pkti->ipi6_addr = ipp->ipp_addr; 2180 else 2181 pkti->ipi6_addr = ipv6_all_zeros; 2182 return (sizeof (struct in6_pktinfo)); 2183 } 2184 case IPV6_TCLASS: 2185 if (ipp->ipp_fields & IPPF_TCLASS) 2186 *i1 = ipp->ipp_tclass; 2187 else 2188 *i1 = IPV6_FLOW_TCLASS( 2189 IPV6_DEFAULT_VERS_AND_FLOW); 2190 break; /* goto sizeof (int) option return */ 2191 case IPV6_NEXTHOP: { 2192 sin6_t *sin6 = (sin6_t *)ptr; 2193 2194 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2195 return (0); 2196 *sin6 = sin6_null; 2197 sin6->sin6_family = AF_INET6; 2198 sin6->sin6_addr = ipp->ipp_nexthop; 2199 return (sizeof (sin6_t)); 2200 } 2201 case IPV6_HOPOPTS: 2202 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2203 return (0); 2204 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2205 return (0); 2206 /* 2207 * The cipso/label option is added by kernel. 2208 * User is not usually aware of this option. 2209 * We copy out the hbh opt after the label option. 2210 */ 2211 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2212 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2213 if (udp->udp_label_len_v6 > 0) { 2214 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2215 ptr[1] = (ipp->ipp_hopoptslen - 2216 udp->udp_label_len_v6 + 7) / 8 - 1; 2217 } 2218 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2219 case IPV6_RTHDRDSTOPTS: 2220 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2221 return (0); 2222 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2223 return (ipp->ipp_rtdstoptslen); 2224 case IPV6_RTHDR: 2225 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2226 return (0); 2227 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2228 return (ipp->ipp_rthdrlen); 2229 case IPV6_DSTOPTS: 2230 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2231 return (0); 2232 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2233 return (ipp->ipp_dstoptslen); 2234 case IPV6_PATHMTU: 2235 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2236 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2237 us->us_netstack)); 2238 default: 2239 return (-1); 2240 } 2241 break; 2242 case IPPROTO_UDP: 2243 switch (name) { 2244 case UDP_ANONPRIVBIND: 2245 *i1 = udp->udp_anon_priv_bind; 2246 break; 2247 case UDP_EXCLBIND: 2248 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2249 break; 2250 case UDP_RCVHDR: 2251 *i1 = udp->udp_rcvhdr ? 1 : 0; 2252 break; 2253 case UDP_NAT_T_ENDPOINT: 2254 *i1 = udp->udp_nat_t_endpoint; 2255 break; 2256 default: 2257 return (-1); 2258 } 2259 break; 2260 default: 2261 return (-1); 2262 } 2263 return (sizeof (int)); 2264 } 2265 2266 int 2267 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2268 { 2269 udp_t *udp; 2270 int err; 2271 2272 udp = Q_TO_UDP(q); 2273 2274 rw_enter(&udp->udp_rwlock, RW_READER); 2275 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2276 rw_exit(&udp->udp_rwlock); 2277 return (err); 2278 } 2279 2280 /* 2281 * This routine sets socket options. 2282 */ 2283 /* ARGSUSED */ 2284 static int 2285 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2286 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2287 void *thisdg_attrs, boolean_t checkonly) 2288 { 2289 udpattrs_t *attrs = thisdg_attrs; 2290 int *i1 = (int *)invalp; 2291 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2292 udp_t *udp = connp->conn_udp; 2293 udp_stack_t *us = udp->udp_us; 2294 int error; 2295 uint_t newlen; 2296 size_t sth_wroff; 2297 2298 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2299 /* 2300 * For fixed length options, no sanity check 2301 * of passed in length is done. It is assumed *_optcom_req() 2302 * routines do the right thing. 2303 */ 2304 switch (level) { 2305 case SOL_SOCKET: 2306 switch (name) { 2307 case SO_REUSEADDR: 2308 if (!checkonly) { 2309 udp->udp_reuseaddr = onoff; 2310 PASS_OPT_TO_IP(connp); 2311 } 2312 break; 2313 case SO_DEBUG: 2314 if (!checkonly) 2315 udp->udp_debug = onoff; 2316 break; 2317 /* 2318 * The following three items are available here, 2319 * but are only meaningful to IP. 2320 */ 2321 case SO_DONTROUTE: 2322 if (!checkonly) { 2323 udp->udp_dontroute = onoff; 2324 PASS_OPT_TO_IP(connp); 2325 } 2326 break; 2327 case SO_USELOOPBACK: 2328 if (!checkonly) { 2329 udp->udp_useloopback = onoff; 2330 PASS_OPT_TO_IP(connp); 2331 } 2332 break; 2333 case SO_BROADCAST: 2334 if (!checkonly) { 2335 udp->udp_broadcast = onoff; 2336 PASS_OPT_TO_IP(connp); 2337 } 2338 break; 2339 2340 case SO_SNDBUF: 2341 if (*i1 > us->us_max_buf) { 2342 *outlenp = 0; 2343 return (ENOBUFS); 2344 } 2345 if (!checkonly) { 2346 udp->udp_xmit_hiwat = *i1; 2347 connp->conn_wq->q_hiwat = *i1; 2348 } 2349 break; 2350 case SO_RCVBUF: 2351 if (*i1 > us->us_max_buf) { 2352 *outlenp = 0; 2353 return (ENOBUFS); 2354 } 2355 if (!checkonly) { 2356 int size; 2357 2358 udp->udp_rcv_disply_hiwat = *i1; 2359 size = udp_set_rcv_hiwat(udp, *i1); 2360 rw_exit(&udp->udp_rwlock); 2361 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2362 size); 2363 rw_enter(&udp->udp_rwlock, RW_WRITER); 2364 } 2365 break; 2366 case SO_DGRAM_ERRIND: 2367 if (!checkonly) 2368 udp->udp_dgram_errind = onoff; 2369 break; 2370 case SO_RECVUCRED: 2371 if (!checkonly) 2372 udp->udp_recvucred = onoff; 2373 break; 2374 case SO_ALLZONES: 2375 /* 2376 * "soft" error (negative) 2377 * option not handled at this level 2378 * Do not modify *outlenp. 2379 */ 2380 return (-EINVAL); 2381 case SO_TIMESTAMP: 2382 if (!checkonly) 2383 udp->udp_timestamp = onoff; 2384 break; 2385 case SO_ANON_MLP: 2386 if (!checkonly) { 2387 connp->conn_anon_mlp = onoff; 2388 PASS_OPT_TO_IP(connp); 2389 } 2390 break; 2391 case SO_MAC_EXEMPT: 2392 if (secpolicy_net_mac_aware(cr) != 0 || 2393 udp->udp_state != TS_UNBND) 2394 return (EACCES); 2395 if (!checkonly) { 2396 connp->conn_mac_exempt = onoff; 2397 PASS_OPT_TO_IP(connp); 2398 } 2399 break; 2400 case SCM_UCRED: { 2401 struct ucred_s *ucr; 2402 cred_t *cr, *newcr; 2403 ts_label_t *tsl; 2404 2405 /* 2406 * Only sockets that have proper privileges and are 2407 * bound to MLPs will have any other value here, so 2408 * this implicitly tests for privilege to set label. 2409 */ 2410 if (connp->conn_mlp_type == mlptSingle) 2411 break; 2412 ucr = (struct ucred_s *)invalp; 2413 if (inlen != ucredsize || 2414 ucr->uc_labeloff < sizeof (*ucr) || 2415 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2416 return (EINVAL); 2417 if (!checkonly) { 2418 mblk_t *mb; 2419 pid_t cpid; 2420 2421 if (attrs == NULL || 2422 (mb = attrs->udpattr_mb) == NULL) 2423 return (EINVAL); 2424 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2425 cr = udp->udp_connp->conn_cred; 2426 ASSERT(cr != NULL); 2427 if ((tsl = crgetlabel(cr)) == NULL) 2428 return (EINVAL); 2429 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2430 tsl->tsl_doi, KM_NOSLEEP); 2431 if (newcr == NULL) 2432 return (ENOSR); 2433 mblk_setcred(mb, newcr, cpid); 2434 attrs->udpattr_credset = B_TRUE; 2435 crfree(newcr); 2436 } 2437 break; 2438 } 2439 case SO_EXCLBIND: 2440 if (!checkonly) 2441 udp->udp_exclbind = onoff; 2442 break; 2443 case SO_RCVTIMEO: 2444 case SO_SNDTIMEO: 2445 /* 2446 * Pass these two options in order for third part 2447 * protocol usage. Here just return directly. 2448 */ 2449 return (0); 2450 default: 2451 *outlenp = 0; 2452 return (EINVAL); 2453 } 2454 break; 2455 case IPPROTO_IP: 2456 if (udp->udp_family != AF_INET) { 2457 *outlenp = 0; 2458 return (ENOPROTOOPT); 2459 } 2460 switch (name) { 2461 case IP_OPTIONS: 2462 case T_IP_OPTIONS: 2463 /* Save options for use by IP. */ 2464 newlen = inlen + udp->udp_label_len; 2465 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2466 *outlenp = 0; 2467 return (EINVAL); 2468 } 2469 if (checkonly) 2470 break; 2471 2472 /* 2473 * Update the stored options taking into account 2474 * any CIPSO option which we should not overwrite. 2475 */ 2476 if (!tsol_option_set(&udp->udp_ip_snd_options, 2477 &udp->udp_ip_snd_options_len, 2478 udp->udp_label_len, invalp, inlen)) { 2479 *outlenp = 0; 2480 return (ENOMEM); 2481 } 2482 2483 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2484 UDPH_SIZE + udp->udp_ip_snd_options_len; 2485 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2486 rw_exit(&udp->udp_rwlock); 2487 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2488 sth_wroff); 2489 rw_enter(&udp->udp_rwlock, RW_WRITER); 2490 break; 2491 2492 case IP_TTL: 2493 if (!checkonly) { 2494 udp->udp_ttl = (uchar_t)*i1; 2495 } 2496 break; 2497 case IP_TOS: 2498 case T_IP_TOS: 2499 if (!checkonly) { 2500 udp->udp_type_of_service = (uchar_t)*i1; 2501 } 2502 break; 2503 case IP_MULTICAST_IF: { 2504 /* 2505 * TODO should check OPTMGMT reply and undo this if 2506 * there is an error. 2507 */ 2508 struct in_addr *inap = (struct in_addr *)invalp; 2509 if (!checkonly) { 2510 udp->udp_multicast_if_addr = 2511 inap->s_addr; 2512 PASS_OPT_TO_IP(connp); 2513 } 2514 break; 2515 } 2516 case IP_MULTICAST_TTL: 2517 if (!checkonly) 2518 udp->udp_multicast_ttl = *invalp; 2519 break; 2520 case IP_MULTICAST_LOOP: 2521 if (!checkonly) { 2522 connp->conn_multicast_loop = *invalp; 2523 PASS_OPT_TO_IP(connp); 2524 } 2525 break; 2526 case IP_RECVOPTS: 2527 if (!checkonly) 2528 udp->udp_recvopts = onoff; 2529 break; 2530 case IP_RECVDSTADDR: 2531 if (!checkonly) 2532 udp->udp_recvdstaddr = onoff; 2533 break; 2534 case IP_RECVIF: 2535 if (!checkonly) { 2536 udp->udp_recvif = onoff; 2537 PASS_OPT_TO_IP(connp); 2538 } 2539 break; 2540 case IP_RECVSLLA: 2541 if (!checkonly) { 2542 udp->udp_recvslla = onoff; 2543 PASS_OPT_TO_IP(connp); 2544 } 2545 break; 2546 case IP_RECVTTL: 2547 if (!checkonly) 2548 udp->udp_recvttl = onoff; 2549 break; 2550 case IP_PKTINFO: { 2551 /* 2552 * This also handles IP_RECVPKTINFO. 2553 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2554 * Differentiation is based on the size of the 2555 * argument passed in. 2556 */ 2557 struct in_pktinfo *pktinfop; 2558 ip4_pkt_t *attr_pktinfop; 2559 2560 if (checkonly) 2561 break; 2562 2563 if (inlen == sizeof (int)) { 2564 /* 2565 * This is IP_RECVPKTINFO option. 2566 * Keep a local copy of whether this option is 2567 * set or not and pass it down to IP for 2568 * processing. 2569 */ 2570 2571 udp->udp_ip_recvpktinfo = onoff; 2572 return (-EINVAL); 2573 } 2574 2575 if (attrs == NULL || 2576 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2577 /* 2578 * sticky option or no buffer to return 2579 * the results. 2580 */ 2581 return (EINVAL); 2582 } 2583 2584 if (inlen != sizeof (struct in_pktinfo)) 2585 return (EINVAL); 2586 2587 pktinfop = (struct in_pktinfo *)invalp; 2588 2589 /* 2590 * At least one of the values should be specified 2591 */ 2592 if (pktinfop->ipi_ifindex == 0 && 2593 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2594 return (EINVAL); 2595 } 2596 2597 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2598 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2599 2600 break; 2601 } 2602 case IP_ADD_MEMBERSHIP: 2603 case IP_DROP_MEMBERSHIP: 2604 case IP_BLOCK_SOURCE: 2605 case IP_UNBLOCK_SOURCE: 2606 case IP_ADD_SOURCE_MEMBERSHIP: 2607 case IP_DROP_SOURCE_MEMBERSHIP: 2608 case MCAST_JOIN_GROUP: 2609 case MCAST_LEAVE_GROUP: 2610 case MCAST_BLOCK_SOURCE: 2611 case MCAST_UNBLOCK_SOURCE: 2612 case MCAST_JOIN_SOURCE_GROUP: 2613 case MCAST_LEAVE_SOURCE_GROUP: 2614 case IP_SEC_OPT: 2615 case IP_NEXTHOP: 2616 case IP_DHCPINIT_IF: 2617 /* 2618 * "soft" error (negative) 2619 * option not handled at this level 2620 * Do not modify *outlenp. 2621 */ 2622 return (-EINVAL); 2623 case IP_BOUND_IF: 2624 if (!checkonly) { 2625 udp->udp_bound_if = *i1; 2626 PASS_OPT_TO_IP(connp); 2627 } 2628 break; 2629 case IP_UNSPEC_SRC: 2630 if (!checkonly) { 2631 udp->udp_unspec_source = onoff; 2632 PASS_OPT_TO_IP(connp); 2633 } 2634 break; 2635 case IP_BROADCAST_TTL: 2636 if (!checkonly) 2637 connp->conn_broadcast_ttl = *invalp; 2638 break; 2639 default: 2640 *outlenp = 0; 2641 return (EINVAL); 2642 } 2643 break; 2644 case IPPROTO_IPV6: { 2645 ip6_pkt_t *ipp; 2646 boolean_t sticky; 2647 2648 if (udp->udp_family != AF_INET6) { 2649 *outlenp = 0; 2650 return (ENOPROTOOPT); 2651 } 2652 /* 2653 * Deal with both sticky options and ancillary data 2654 */ 2655 sticky = B_FALSE; 2656 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2657 NULL) { 2658 /* sticky options, or none */ 2659 ipp = &udp->udp_sticky_ipp; 2660 sticky = B_TRUE; 2661 } 2662 2663 switch (name) { 2664 case IPV6_MULTICAST_IF: 2665 if (!checkonly) { 2666 udp->udp_multicast_if_index = *i1; 2667 PASS_OPT_TO_IP(connp); 2668 } 2669 break; 2670 case IPV6_UNICAST_HOPS: 2671 /* -1 means use default */ 2672 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2673 *outlenp = 0; 2674 return (EINVAL); 2675 } 2676 if (!checkonly) { 2677 if (*i1 == -1) { 2678 udp->udp_ttl = ipp->ipp_unicast_hops = 2679 us->us_ipv6_hoplimit; 2680 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2681 /* Pass modified value to IP. */ 2682 *i1 = udp->udp_ttl; 2683 } else { 2684 udp->udp_ttl = ipp->ipp_unicast_hops = 2685 (uint8_t)*i1; 2686 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2687 } 2688 /* Rebuild the header template */ 2689 error = udp_build_hdrs(udp); 2690 if (error != 0) { 2691 *outlenp = 0; 2692 return (error); 2693 } 2694 } 2695 break; 2696 case IPV6_MULTICAST_HOPS: 2697 /* -1 means use default */ 2698 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2699 *outlenp = 0; 2700 return (EINVAL); 2701 } 2702 if (!checkonly) { 2703 if (*i1 == -1) { 2704 udp->udp_multicast_ttl = 2705 ipp->ipp_multicast_hops = 2706 IP_DEFAULT_MULTICAST_TTL; 2707 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2708 /* Pass modified value to IP. */ 2709 *i1 = udp->udp_multicast_ttl; 2710 } else { 2711 udp->udp_multicast_ttl = 2712 ipp->ipp_multicast_hops = 2713 (uint8_t)*i1; 2714 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2715 } 2716 } 2717 break; 2718 case IPV6_MULTICAST_LOOP: 2719 if (*i1 != 0 && *i1 != 1) { 2720 *outlenp = 0; 2721 return (EINVAL); 2722 } 2723 if (!checkonly) { 2724 connp->conn_multicast_loop = *i1; 2725 PASS_OPT_TO_IP(connp); 2726 } 2727 break; 2728 case IPV6_JOIN_GROUP: 2729 case IPV6_LEAVE_GROUP: 2730 case MCAST_JOIN_GROUP: 2731 case MCAST_LEAVE_GROUP: 2732 case MCAST_BLOCK_SOURCE: 2733 case MCAST_UNBLOCK_SOURCE: 2734 case MCAST_JOIN_SOURCE_GROUP: 2735 case MCAST_LEAVE_SOURCE_GROUP: 2736 /* 2737 * "soft" error (negative) 2738 * option not handled at this level 2739 * Note: Do not modify *outlenp 2740 */ 2741 return (-EINVAL); 2742 case IPV6_BOUND_IF: 2743 if (!checkonly) { 2744 udp->udp_bound_if = *i1; 2745 PASS_OPT_TO_IP(connp); 2746 } 2747 break; 2748 case IPV6_UNSPEC_SRC: 2749 if (!checkonly) { 2750 udp->udp_unspec_source = onoff; 2751 PASS_OPT_TO_IP(connp); 2752 } 2753 break; 2754 /* 2755 * Set boolean switches for ancillary data delivery 2756 */ 2757 case IPV6_RECVPKTINFO: 2758 if (!checkonly) { 2759 udp->udp_ip_recvpktinfo = onoff; 2760 PASS_OPT_TO_IP(connp); 2761 } 2762 break; 2763 case IPV6_RECVTCLASS: 2764 if (!checkonly) { 2765 udp->udp_ipv6_recvtclass = onoff; 2766 PASS_OPT_TO_IP(connp); 2767 } 2768 break; 2769 case IPV6_RECVPATHMTU: 2770 if (!checkonly) { 2771 udp->udp_ipv6_recvpathmtu = onoff; 2772 PASS_OPT_TO_IP(connp); 2773 } 2774 break; 2775 case IPV6_RECVHOPLIMIT: 2776 if (!checkonly) { 2777 udp->udp_ipv6_recvhoplimit = onoff; 2778 PASS_OPT_TO_IP(connp); 2779 } 2780 break; 2781 case IPV6_RECVHOPOPTS: 2782 if (!checkonly) { 2783 udp->udp_ipv6_recvhopopts = onoff; 2784 PASS_OPT_TO_IP(connp); 2785 } 2786 break; 2787 case IPV6_RECVDSTOPTS: 2788 if (!checkonly) { 2789 udp->udp_ipv6_recvdstopts = onoff; 2790 PASS_OPT_TO_IP(connp); 2791 } 2792 break; 2793 case _OLD_IPV6_RECVDSTOPTS: 2794 if (!checkonly) 2795 udp->udp_old_ipv6_recvdstopts = onoff; 2796 break; 2797 case IPV6_RECVRTHDRDSTOPTS: 2798 if (!checkonly) { 2799 udp->udp_ipv6_recvrthdrdstopts = onoff; 2800 PASS_OPT_TO_IP(connp); 2801 } 2802 break; 2803 case IPV6_RECVRTHDR: 2804 if (!checkonly) { 2805 udp->udp_ipv6_recvrthdr = onoff; 2806 PASS_OPT_TO_IP(connp); 2807 } 2808 break; 2809 /* 2810 * Set sticky options or ancillary data. 2811 * If sticky options, (re)build any extension headers 2812 * that might be needed as a result. 2813 */ 2814 case IPV6_PKTINFO: 2815 /* 2816 * The source address and ifindex are verified 2817 * in ip_opt_set(). For ancillary data the 2818 * source address is checked in ip_wput_v6. 2819 */ 2820 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2821 return (EINVAL); 2822 if (checkonly) 2823 break; 2824 2825 if (inlen == 0) { 2826 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2827 ipp->ipp_sticky_ignored |= 2828 (IPPF_IFINDEX|IPPF_ADDR); 2829 } else { 2830 struct in6_pktinfo *pkti; 2831 2832 pkti = (struct in6_pktinfo *)invalp; 2833 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2834 ipp->ipp_addr = pkti->ipi6_addr; 2835 if (ipp->ipp_ifindex != 0) 2836 ipp->ipp_fields |= IPPF_IFINDEX; 2837 else 2838 ipp->ipp_fields &= ~IPPF_IFINDEX; 2839 if (!IN6_IS_ADDR_UNSPECIFIED( 2840 &ipp->ipp_addr)) 2841 ipp->ipp_fields |= IPPF_ADDR; 2842 else 2843 ipp->ipp_fields &= ~IPPF_ADDR; 2844 } 2845 if (sticky) { 2846 error = udp_build_hdrs(udp); 2847 if (error != 0) 2848 return (error); 2849 PASS_OPT_TO_IP(connp); 2850 } 2851 break; 2852 case IPV6_HOPLIMIT: 2853 if (sticky) 2854 return (EINVAL); 2855 if (inlen != 0 && inlen != sizeof (int)) 2856 return (EINVAL); 2857 if (checkonly) 2858 break; 2859 2860 if (inlen == 0) { 2861 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2862 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2863 } else { 2864 if (*i1 > 255 || *i1 < -1) 2865 return (EINVAL); 2866 if (*i1 == -1) 2867 ipp->ipp_hoplimit = 2868 us->us_ipv6_hoplimit; 2869 else 2870 ipp->ipp_hoplimit = *i1; 2871 ipp->ipp_fields |= IPPF_HOPLIMIT; 2872 } 2873 break; 2874 case IPV6_TCLASS: 2875 if (inlen != 0 && inlen != sizeof (int)) 2876 return (EINVAL); 2877 if (checkonly) 2878 break; 2879 2880 if (inlen == 0) { 2881 ipp->ipp_fields &= ~IPPF_TCLASS; 2882 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2883 } else { 2884 if (*i1 > 255 || *i1 < -1) 2885 return (EINVAL); 2886 if (*i1 == -1) 2887 ipp->ipp_tclass = 0; 2888 else 2889 ipp->ipp_tclass = *i1; 2890 ipp->ipp_fields |= IPPF_TCLASS; 2891 } 2892 if (sticky) { 2893 error = udp_build_hdrs(udp); 2894 if (error != 0) 2895 return (error); 2896 } 2897 break; 2898 case IPV6_NEXTHOP: 2899 /* 2900 * IP will verify that the nexthop is reachable 2901 * and fail for sticky options. 2902 */ 2903 if (inlen != 0 && inlen != sizeof (sin6_t)) 2904 return (EINVAL); 2905 if (checkonly) 2906 break; 2907 2908 if (inlen == 0) { 2909 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2910 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2911 } else { 2912 sin6_t *sin6 = (sin6_t *)invalp; 2913 2914 if (sin6->sin6_family != AF_INET6) { 2915 return (EAFNOSUPPORT); 2916 } 2917 if (IN6_IS_ADDR_V4MAPPED( 2918 &sin6->sin6_addr)) 2919 return (EADDRNOTAVAIL); 2920 ipp->ipp_nexthop = sin6->sin6_addr; 2921 if (!IN6_IS_ADDR_UNSPECIFIED( 2922 &ipp->ipp_nexthop)) 2923 ipp->ipp_fields |= IPPF_NEXTHOP; 2924 else 2925 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2926 } 2927 if (sticky) { 2928 error = udp_build_hdrs(udp); 2929 if (error != 0) 2930 return (error); 2931 PASS_OPT_TO_IP(connp); 2932 } 2933 break; 2934 case IPV6_HOPOPTS: { 2935 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2936 /* 2937 * Sanity checks - minimum size, size a multiple of 2938 * eight bytes, and matching size passed in. 2939 */ 2940 if (inlen != 0 && 2941 inlen != (8 * (hopts->ip6h_len + 1))) 2942 return (EINVAL); 2943 2944 if (checkonly) 2945 break; 2946 2947 error = optcom_pkt_set(invalp, inlen, sticky, 2948 (uchar_t **)&ipp->ipp_hopopts, 2949 &ipp->ipp_hopoptslen, 2950 sticky ? udp->udp_label_len_v6 : 0); 2951 if (error != 0) 2952 return (error); 2953 if (ipp->ipp_hopoptslen == 0) { 2954 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2955 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2956 } else { 2957 ipp->ipp_fields |= IPPF_HOPOPTS; 2958 } 2959 if (sticky) { 2960 error = udp_build_hdrs(udp); 2961 if (error != 0) 2962 return (error); 2963 } 2964 break; 2965 } 2966 case IPV6_RTHDRDSTOPTS: { 2967 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2968 2969 /* 2970 * Sanity checks - minimum size, size a multiple of 2971 * eight bytes, and matching size passed in. 2972 */ 2973 if (inlen != 0 && 2974 inlen != (8 * (dopts->ip6d_len + 1))) 2975 return (EINVAL); 2976 2977 if (checkonly) 2978 break; 2979 2980 if (inlen == 0) { 2981 if (sticky && 2982 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2983 kmem_free(ipp->ipp_rtdstopts, 2984 ipp->ipp_rtdstoptslen); 2985 ipp->ipp_rtdstopts = NULL; 2986 ipp->ipp_rtdstoptslen = 0; 2987 } 2988 2989 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2990 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2991 } else { 2992 error = optcom_pkt_set(invalp, inlen, sticky, 2993 (uchar_t **)&ipp->ipp_rtdstopts, 2994 &ipp->ipp_rtdstoptslen, 0); 2995 if (error != 0) 2996 return (error); 2997 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2998 } 2999 if (sticky) { 3000 error = udp_build_hdrs(udp); 3001 if (error != 0) 3002 return (error); 3003 } 3004 break; 3005 } 3006 case IPV6_DSTOPTS: { 3007 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3008 3009 /* 3010 * Sanity checks - minimum size, size a multiple of 3011 * eight bytes, and matching size passed in. 3012 */ 3013 if (inlen != 0 && 3014 inlen != (8 * (dopts->ip6d_len + 1))) 3015 return (EINVAL); 3016 3017 if (checkonly) 3018 break; 3019 3020 if (inlen == 0) { 3021 if (sticky && 3022 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3023 kmem_free(ipp->ipp_dstopts, 3024 ipp->ipp_dstoptslen); 3025 ipp->ipp_dstopts = NULL; 3026 ipp->ipp_dstoptslen = 0; 3027 } 3028 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3029 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3030 } else { 3031 error = optcom_pkt_set(invalp, inlen, sticky, 3032 (uchar_t **)&ipp->ipp_dstopts, 3033 &ipp->ipp_dstoptslen, 0); 3034 if (error != 0) 3035 return (error); 3036 ipp->ipp_fields |= IPPF_DSTOPTS; 3037 } 3038 if (sticky) { 3039 error = udp_build_hdrs(udp); 3040 if (error != 0) 3041 return (error); 3042 } 3043 break; 3044 } 3045 case IPV6_RTHDR: { 3046 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3047 3048 /* 3049 * Sanity checks - minimum size, size a multiple of 3050 * eight bytes, and matching size passed in. 3051 */ 3052 if (inlen != 0 && 3053 inlen != (8 * (rt->ip6r_len + 1))) 3054 return (EINVAL); 3055 3056 if (checkonly) 3057 break; 3058 3059 if (inlen == 0) { 3060 if (sticky && 3061 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3062 kmem_free(ipp->ipp_rthdr, 3063 ipp->ipp_rthdrlen); 3064 ipp->ipp_rthdr = NULL; 3065 ipp->ipp_rthdrlen = 0; 3066 } 3067 ipp->ipp_fields &= ~IPPF_RTHDR; 3068 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3069 } else { 3070 error = optcom_pkt_set(invalp, inlen, sticky, 3071 (uchar_t **)&ipp->ipp_rthdr, 3072 &ipp->ipp_rthdrlen, 0); 3073 if (error != 0) 3074 return (error); 3075 ipp->ipp_fields |= IPPF_RTHDR; 3076 } 3077 if (sticky) { 3078 error = udp_build_hdrs(udp); 3079 if (error != 0) 3080 return (error); 3081 } 3082 break; 3083 } 3084 3085 case IPV6_DONTFRAG: 3086 if (checkonly) 3087 break; 3088 3089 if (onoff) { 3090 ipp->ipp_fields |= IPPF_DONTFRAG; 3091 } else { 3092 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3093 } 3094 break; 3095 3096 case IPV6_USE_MIN_MTU: 3097 if (inlen != sizeof (int)) 3098 return (EINVAL); 3099 3100 if (*i1 < -1 || *i1 > 1) 3101 return (EINVAL); 3102 3103 if (checkonly) 3104 break; 3105 3106 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3107 ipp->ipp_use_min_mtu = *i1; 3108 break; 3109 3110 case IPV6_SEC_OPT: 3111 case IPV6_SRC_PREFERENCES: 3112 case IPV6_V6ONLY: 3113 /* Handled at the IP level */ 3114 return (-EINVAL); 3115 default: 3116 *outlenp = 0; 3117 return (EINVAL); 3118 } 3119 break; 3120 } /* end IPPROTO_IPV6 */ 3121 case IPPROTO_UDP: 3122 switch (name) { 3123 case UDP_ANONPRIVBIND: 3124 if ((error = secpolicy_net_privaddr(cr, 0, 3125 IPPROTO_UDP)) != 0) { 3126 *outlenp = 0; 3127 return (error); 3128 } 3129 if (!checkonly) { 3130 udp->udp_anon_priv_bind = onoff; 3131 } 3132 break; 3133 case UDP_EXCLBIND: 3134 if (!checkonly) 3135 udp->udp_exclbind = onoff; 3136 break; 3137 case UDP_RCVHDR: 3138 if (!checkonly) 3139 udp->udp_rcvhdr = onoff; 3140 break; 3141 case UDP_NAT_T_ENDPOINT: 3142 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3143 *outlenp = 0; 3144 return (error); 3145 } 3146 3147 /* 3148 * Use udp_family instead so we can avoid ambiguitites 3149 * with AF_INET6 sockets that may switch from IPv4 3150 * to IPv6. 3151 */ 3152 if (udp->udp_family != AF_INET) { 3153 *outlenp = 0; 3154 return (EAFNOSUPPORT); 3155 } 3156 3157 if (!checkonly) { 3158 int size; 3159 3160 udp->udp_nat_t_endpoint = onoff; 3161 3162 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3163 UDPH_SIZE + udp->udp_ip_snd_options_len; 3164 3165 /* Also, adjust wroff */ 3166 if (onoff) { 3167 udp->udp_max_hdr_len += 3168 sizeof (uint32_t); 3169 } 3170 size = udp->udp_max_hdr_len + 3171 us->us_wroff_extra; 3172 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3173 size); 3174 } 3175 break; 3176 default: 3177 *outlenp = 0; 3178 return (EINVAL); 3179 } 3180 break; 3181 default: 3182 *outlenp = 0; 3183 return (EINVAL); 3184 } 3185 /* 3186 * Common case of OK return with outval same as inval. 3187 */ 3188 if (invalp != outvalp) { 3189 /* don't trust bcopy for identical src/dst */ 3190 (void) bcopy(invalp, outvalp, inlen); 3191 } 3192 *outlenp = inlen; 3193 return (0); 3194 } 3195 3196 int 3197 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3198 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3199 void *thisdg_attrs, cred_t *cr) 3200 { 3201 int error; 3202 boolean_t checkonly; 3203 3204 error = 0; 3205 switch (optset_context) { 3206 case SETFN_OPTCOM_CHECKONLY: 3207 checkonly = B_TRUE; 3208 /* 3209 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3210 * inlen != 0 implies value supplied and 3211 * we have to "pretend" to set it. 3212 * inlen == 0 implies that there is no 3213 * value part in T_CHECK request and just validation 3214 * done elsewhere should be enough, we just return here. 3215 */ 3216 if (inlen == 0) { 3217 *outlenp = 0; 3218 goto done; 3219 } 3220 break; 3221 case SETFN_OPTCOM_NEGOTIATE: 3222 checkonly = B_FALSE; 3223 break; 3224 case SETFN_UD_NEGOTIATE: 3225 case SETFN_CONN_NEGOTIATE: 3226 checkonly = B_FALSE; 3227 /* 3228 * Negotiating local and "association-related" options 3229 * through T_UNITDATA_REQ. 3230 * 3231 * Following routine can filter out ones we do not 3232 * want to be "set" this way. 3233 */ 3234 if (!udp_opt_allow_udr_set(level, name)) { 3235 *outlenp = 0; 3236 error = EINVAL; 3237 goto done; 3238 } 3239 break; 3240 default: 3241 /* 3242 * We should never get here 3243 */ 3244 *outlenp = 0; 3245 error = EINVAL; 3246 goto done; 3247 } 3248 3249 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3250 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3251 3252 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3253 outvalp, cr, thisdg_attrs, checkonly); 3254 done: 3255 return (error); 3256 } 3257 3258 /* ARGSUSED */ 3259 int 3260 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3261 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3262 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3263 { 3264 conn_t *connp = Q_TO_CONN(q); 3265 int error; 3266 udp_t *udp = connp->conn_udp; 3267 3268 rw_enter(&udp->udp_rwlock, RW_WRITER); 3269 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3270 outlenp, outvalp, thisdg_attrs, cr); 3271 rw_exit(&udp->udp_rwlock); 3272 return (error); 3273 } 3274 3275 /* 3276 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3277 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3278 * headers, and the udp header. 3279 * Returns failure if can't allocate memory. 3280 */ 3281 static int 3282 udp_build_hdrs(udp_t *udp) 3283 { 3284 udp_stack_t *us = udp->udp_us; 3285 uchar_t *hdrs; 3286 uint_t hdrs_len; 3287 ip6_t *ip6h; 3288 ip6i_t *ip6i; 3289 udpha_t *udpha; 3290 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3291 size_t sth_wroff; 3292 conn_t *connp = udp->udp_connp; 3293 3294 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3295 ASSERT(connp != NULL); 3296 3297 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3298 ASSERT(hdrs_len != 0); 3299 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3300 /* Need to reallocate */ 3301 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3302 if (hdrs == NULL) 3303 return (ENOMEM); 3304 3305 if (udp->udp_sticky_hdrs_len != 0) { 3306 kmem_free(udp->udp_sticky_hdrs, 3307 udp->udp_sticky_hdrs_len); 3308 } 3309 udp->udp_sticky_hdrs = hdrs; 3310 udp->udp_sticky_hdrs_len = hdrs_len; 3311 } 3312 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3313 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3314 3315 /* Set header fields not in ipp */ 3316 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3317 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3318 ip6h = (ip6_t *)&ip6i[1]; 3319 } else { 3320 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3321 } 3322 3323 if (!(ipp->ipp_fields & IPPF_ADDR)) 3324 ip6h->ip6_src = udp->udp_v6src; 3325 3326 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3327 udpha->uha_src_port = udp->udp_port; 3328 3329 /* Try to get everything in a single mblk */ 3330 if (hdrs_len > udp->udp_max_hdr_len) { 3331 udp->udp_max_hdr_len = hdrs_len; 3332 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3333 rw_exit(&udp->udp_rwlock); 3334 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3335 udp->udp_connp, sth_wroff); 3336 rw_enter(&udp->udp_rwlock, RW_WRITER); 3337 } 3338 return (0); 3339 } 3340 3341 /* 3342 * This routine retrieves the value of an ND variable in a udpparam_t 3343 * structure. It is called through nd_getset when a user reads the 3344 * variable. 3345 */ 3346 /* ARGSUSED */ 3347 static int 3348 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3349 { 3350 udpparam_t *udppa = (udpparam_t *)cp; 3351 3352 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3353 return (0); 3354 } 3355 3356 /* 3357 * Walk through the param array specified registering each element with the 3358 * named dispatch (ND) handler. 3359 */ 3360 static boolean_t 3361 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3362 { 3363 for (; cnt-- > 0; udppa++) { 3364 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3365 if (!nd_load(ndp, udppa->udp_param_name, 3366 udp_param_get, udp_param_set, 3367 (caddr_t)udppa)) { 3368 nd_free(ndp); 3369 return (B_FALSE); 3370 } 3371 } 3372 } 3373 if (!nd_load(ndp, "udp_extra_priv_ports", 3374 udp_extra_priv_ports_get, NULL, NULL)) { 3375 nd_free(ndp); 3376 return (B_FALSE); 3377 } 3378 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3379 NULL, udp_extra_priv_ports_add, NULL)) { 3380 nd_free(ndp); 3381 return (B_FALSE); 3382 } 3383 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3384 NULL, udp_extra_priv_ports_del, NULL)) { 3385 nd_free(ndp); 3386 return (B_FALSE); 3387 } 3388 return (B_TRUE); 3389 } 3390 3391 /* This routine sets an ND variable in a udpparam_t structure. */ 3392 /* ARGSUSED */ 3393 static int 3394 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3395 { 3396 long new_value; 3397 udpparam_t *udppa = (udpparam_t *)cp; 3398 3399 /* 3400 * Fail the request if the new value does not lie within the 3401 * required bounds. 3402 */ 3403 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3404 new_value < udppa->udp_param_min || 3405 new_value > udppa->udp_param_max) { 3406 return (EINVAL); 3407 } 3408 3409 /* Set the new value */ 3410 udppa->udp_param_value = new_value; 3411 return (0); 3412 } 3413 3414 /* 3415 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3416 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3417 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3418 * then it's assumed to be allocated to be large enough. 3419 * 3420 * Returns zero if trimming of the security option causes all options to go 3421 * away. 3422 */ 3423 static size_t 3424 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3425 { 3426 struct T_opthdr *toh; 3427 size_t hol = ipp->ipp_hopoptslen; 3428 ip6_hbh_t *dstopt = NULL; 3429 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3430 size_t tlen, olen, plen; 3431 boolean_t deleting; 3432 const struct ip6_opt *sopt, *lastpad; 3433 struct ip6_opt *dopt; 3434 3435 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3436 toh->level = IPPROTO_IPV6; 3437 toh->name = IPV6_HOPOPTS; 3438 toh->status = 0; 3439 dstopt = (ip6_hbh_t *)(toh + 1); 3440 } 3441 3442 /* 3443 * If labeling is enabled, then skip the label option 3444 * but get other options if there are any. 3445 */ 3446 if (is_system_labeled()) { 3447 dopt = NULL; 3448 if (dstopt != NULL) { 3449 /* will fill in ip6h_len later */ 3450 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3451 dopt = (struct ip6_opt *)(dstopt + 1); 3452 } 3453 sopt = (const struct ip6_opt *)(srcopt + 1); 3454 hol -= sizeof (*srcopt); 3455 tlen = sizeof (*dstopt); 3456 lastpad = NULL; 3457 deleting = B_FALSE; 3458 /* 3459 * This loop finds the first (lastpad pointer) of any number of 3460 * pads that preceeds the security option, then treats the 3461 * security option as though it were a pad, and then finds the 3462 * next non-pad option (or end of list). 3463 * 3464 * It then treats the entire block as one big pad. To preserve 3465 * alignment of any options that follow, or just the end of the 3466 * list, it computes a minimal new padding size that keeps the 3467 * same alignment for the next option. 3468 * 3469 * If it encounters just a sequence of pads with no security 3470 * option, those are copied as-is rather than collapsed. 3471 * 3472 * Note that to handle the end of list case, the code makes one 3473 * loop with 'hol' set to zero. 3474 */ 3475 for (;;) { 3476 if (hol > 0) { 3477 if (sopt->ip6o_type == IP6OPT_PAD1) { 3478 if (lastpad == NULL) 3479 lastpad = sopt; 3480 sopt = (const struct ip6_opt *) 3481 &sopt->ip6o_len; 3482 hol--; 3483 continue; 3484 } 3485 olen = sopt->ip6o_len + sizeof (*sopt); 3486 if (olen > hol) 3487 olen = hol; 3488 if (sopt->ip6o_type == IP6OPT_PADN || 3489 sopt->ip6o_type == ip6opt_ls) { 3490 if (sopt->ip6o_type == ip6opt_ls) 3491 deleting = B_TRUE; 3492 if (lastpad == NULL) 3493 lastpad = sopt; 3494 sopt = (const struct ip6_opt *) 3495 ((const char *)sopt + olen); 3496 hol -= olen; 3497 continue; 3498 } 3499 } else { 3500 /* if nothing was copied at all, then delete */ 3501 if (tlen == sizeof (*dstopt)) 3502 return (0); 3503 /* last pass; pick up any trailing padding */ 3504 olen = 0; 3505 } 3506 if (deleting) { 3507 /* 3508 * compute aligning effect of deleted material 3509 * to reproduce with pad. 3510 */ 3511 plen = ((const char *)sopt - 3512 (const char *)lastpad) & 7; 3513 tlen += plen; 3514 if (dopt != NULL) { 3515 if (plen == 1) { 3516 dopt->ip6o_type = IP6OPT_PAD1; 3517 } else if (plen > 1) { 3518 plen -= sizeof (*dopt); 3519 dopt->ip6o_type = IP6OPT_PADN; 3520 dopt->ip6o_len = plen; 3521 if (plen > 0) 3522 bzero(dopt + 1, plen); 3523 } 3524 dopt = (struct ip6_opt *) 3525 ((char *)dopt + plen); 3526 } 3527 deleting = B_FALSE; 3528 lastpad = NULL; 3529 } 3530 /* if there's uncopied padding, then copy that now */ 3531 if (lastpad != NULL) { 3532 olen += (const char *)sopt - 3533 (const char *)lastpad; 3534 sopt = lastpad; 3535 lastpad = NULL; 3536 } 3537 if (dopt != NULL && olen > 0) { 3538 bcopy(sopt, dopt, olen); 3539 dopt = (struct ip6_opt *)((char *)dopt + olen); 3540 } 3541 if (hol == 0) 3542 break; 3543 tlen += olen; 3544 sopt = (const struct ip6_opt *) 3545 ((const char *)sopt + olen); 3546 hol -= olen; 3547 } 3548 /* go back and patch up the length value, rounded upward */ 3549 if (dstopt != NULL) 3550 dstopt->ip6h_len = (tlen - 1) >> 3; 3551 } else { 3552 tlen = hol; 3553 if (dstopt != NULL) 3554 bcopy(srcopt, dstopt, hol); 3555 } 3556 3557 tlen += sizeof (*toh); 3558 if (toh != NULL) 3559 toh->len = tlen; 3560 3561 return (tlen); 3562 } 3563 3564 /* 3565 * Update udp_rcv_opt_len from the packet. 3566 * Called when options received, and when no options received but 3567 * udp_ip_recv_opt_len has previously recorded options. 3568 */ 3569 static void 3570 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3571 { 3572 /* Save the options if any */ 3573 if (opt_len > 0) { 3574 if (opt_len > udp->udp_ip_rcv_options_len) { 3575 /* Need to allocate larger buffer */ 3576 if (udp->udp_ip_rcv_options_len != 0) 3577 mi_free((char *)udp->udp_ip_rcv_options); 3578 udp->udp_ip_rcv_options_len = 0; 3579 udp->udp_ip_rcv_options = 3580 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3581 if (udp->udp_ip_rcv_options != NULL) 3582 udp->udp_ip_rcv_options_len = opt_len; 3583 } 3584 if (udp->udp_ip_rcv_options_len != 0) { 3585 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3586 /* Adjust length if we are resusing the space */ 3587 udp->udp_ip_rcv_options_len = opt_len; 3588 } 3589 } else if (udp->udp_ip_rcv_options_len != 0) { 3590 /* Clear out previously recorded options */ 3591 mi_free((char *)udp->udp_ip_rcv_options); 3592 udp->udp_ip_rcv_options = NULL; 3593 udp->udp_ip_rcv_options_len = 0; 3594 } 3595 } 3596 3597 static mblk_t * 3598 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3599 { 3600 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3601 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3602 /* 3603 * fallback has started but messages have not been moved yet 3604 */ 3605 if (udp->udp_fallback_queue_head == NULL) { 3606 ASSERT(udp->udp_fallback_queue_tail == NULL); 3607 udp->udp_fallback_queue_head = mp; 3608 udp->udp_fallback_queue_tail = mp; 3609 } else { 3610 ASSERT(udp->udp_fallback_queue_tail != NULL); 3611 udp->udp_fallback_queue_tail->b_next = mp; 3612 udp->udp_fallback_queue_tail = mp; 3613 } 3614 return (NULL); 3615 } else { 3616 /* 3617 * Fallback completed, let the caller putnext() the mblk. 3618 */ 3619 return (mp); 3620 } 3621 } 3622 3623 /* 3624 * Deliver data to ULP. In case we have a socket, and it's falling back to 3625 * TPI, then we'll queue the mp for later processing. 3626 */ 3627 static void 3628 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3629 { 3630 if (IPCL_IS_NONSTR(connp)) { 3631 udp_t *udp = connp->conn_udp; 3632 int error; 3633 3634 if ((*connp->conn_upcalls->su_recv) 3635 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3636 NULL) < 0) { 3637 mutex_enter(&udp->udp_recv_lock); 3638 if (error == ENOSPC) { 3639 /* 3640 * let's confirm while holding the lock 3641 */ 3642 if ((*connp->conn_upcalls->su_recv) 3643 (connp->conn_upper_handle, NULL, 0, 0, 3644 &error, NULL) < 0) { 3645 ASSERT(error == ENOSPC); 3646 if (error == ENOSPC) { 3647 connp->conn_flow_cntrld = 3648 B_TRUE; 3649 } 3650 } 3651 mutex_exit(&udp->udp_recv_lock); 3652 } else { 3653 ASSERT(error == EOPNOTSUPP); 3654 mp = udp_queue_fallback(udp, mp); 3655 mutex_exit(&udp->udp_recv_lock); 3656 if (mp != NULL) 3657 putnext(connp->conn_rq, mp); 3658 } 3659 } 3660 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3661 } else { 3662 putnext(connp->conn_rq, mp); 3663 } 3664 } 3665 3666 /* ARGSUSED2 */ 3667 static void 3668 udp_input(void *arg1, mblk_t *mp, void *arg2) 3669 { 3670 conn_t *connp = (conn_t *)arg1; 3671 struct T_unitdata_ind *tudi; 3672 uchar_t *rptr; /* Pointer to IP header */ 3673 int hdr_length; /* Length of IP+UDP headers */ 3674 int opt_len; 3675 int udi_size; /* Size of T_unitdata_ind */ 3676 int mp_len; 3677 udp_t *udp; 3678 udpha_t *udpha; 3679 int ipversion; 3680 ip6_pkt_t ipp; 3681 ip6_t *ip6h; 3682 ip6i_t *ip6i; 3683 mblk_t *mp1; 3684 mblk_t *options_mp = NULL; 3685 ip_pktinfo_t *pinfo = NULL; 3686 cred_t *cr = NULL; 3687 pid_t cpid; 3688 uint32_t udp_ip_rcv_options_len; 3689 udp_bits_t udp_bits; 3690 cred_t *rcr = connp->conn_cred; 3691 udp_stack_t *us; 3692 3693 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3694 3695 udp = connp->conn_udp; 3696 us = udp->udp_us; 3697 rptr = mp->b_rptr; 3698 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3699 ASSERT(OK_32PTR(rptr)); 3700 3701 /* 3702 * IP should have prepended the options data in an M_CTL 3703 * Check M_CTL "type" to make sure are not here bcos of 3704 * a valid ICMP message 3705 */ 3706 if (DB_TYPE(mp) == M_CTL) { 3707 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3708 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3709 IN_PKTINFO) { 3710 /* 3711 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3712 * has been prepended to the packet by IP. We need to 3713 * extract the mblk and adjust the rptr 3714 */ 3715 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3716 options_mp = mp; 3717 mp = mp->b_cont; 3718 rptr = mp->b_rptr; 3719 UDP_STAT(us, udp_in_pktinfo); 3720 } else { 3721 /* 3722 * ICMP messages. 3723 */ 3724 udp_icmp_error(connp, mp); 3725 return; 3726 } 3727 } 3728 3729 mp_len = msgdsize(mp); 3730 /* 3731 * This is the inbound data path. 3732 * First, we check to make sure the IP version number is correct, 3733 * and then pull the IP and UDP headers into the first mblk. 3734 */ 3735 3736 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3737 ipp.ipp_fields = 0; 3738 3739 ipversion = IPH_HDR_VERSION(rptr); 3740 3741 rw_enter(&udp->udp_rwlock, RW_READER); 3742 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3743 udp_bits = udp->udp_bits; 3744 rw_exit(&udp->udp_rwlock); 3745 3746 switch (ipversion) { 3747 case IPV4_VERSION: 3748 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3749 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3750 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3751 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3752 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3753 udp->udp_family == AF_INET) { 3754 /* 3755 * Record/update udp_ip_rcv_options with the lock 3756 * held. Not needed for AF_INET6 sockets 3757 * since they don't support a getsockopt of IP_OPTIONS. 3758 */ 3759 rw_enter(&udp->udp_rwlock, RW_WRITER); 3760 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3761 opt_len); 3762 rw_exit(&udp->udp_rwlock); 3763 } 3764 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3765 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3766 udp->udp_ip_recvpktinfo) { 3767 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3768 ipp.ipp_fields |= IPPF_IFINDEX; 3769 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3770 } 3771 } 3772 break; 3773 case IPV6_VERSION: 3774 /* 3775 * IPv6 packets can only be received by applications 3776 * that are prepared to receive IPv6 addresses. 3777 * The IP fanout must ensure this. 3778 */ 3779 ASSERT(udp->udp_family == AF_INET6); 3780 3781 ip6h = (ip6_t *)rptr; 3782 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3783 3784 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3785 uint8_t nexthdrp; 3786 /* Look for ifindex information */ 3787 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3788 ip6i = (ip6i_t *)ip6h; 3789 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3790 goto tossit; 3791 3792 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3793 ASSERT(ip6i->ip6i_ifindex != 0); 3794 ipp.ipp_fields |= IPPF_IFINDEX; 3795 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3796 } 3797 rptr = (uchar_t *)&ip6i[1]; 3798 mp->b_rptr = rptr; 3799 if (rptr == mp->b_wptr) { 3800 mp1 = mp->b_cont; 3801 freeb(mp); 3802 mp = mp1; 3803 rptr = mp->b_rptr; 3804 } 3805 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3806 goto tossit; 3807 ip6h = (ip6_t *)rptr; 3808 mp_len = msgdsize(mp); 3809 } 3810 /* 3811 * Find any potentially interesting extension headers 3812 * as well as the length of the IPv6 + extension 3813 * headers. 3814 */ 3815 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3816 UDPH_SIZE; 3817 ASSERT(nexthdrp == IPPROTO_UDP); 3818 } else { 3819 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3820 ip6i = NULL; 3821 } 3822 break; 3823 default: 3824 ASSERT(0); 3825 } 3826 3827 /* 3828 * IP inspected the UDP header thus all of it must be in the mblk. 3829 * UDP length check is performed for IPv6 packets and IPv4 packets 3830 * to check if the size of the packet as specified 3831 * by the header is the same as the physical size of the packet. 3832 * FIXME? Didn't IP already check this? 3833 */ 3834 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3835 if ((MBLKL(mp) < hdr_length) || 3836 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3837 goto tossit; 3838 } 3839 3840 3841 /* Walk past the headers unless UDP_RCVHDR was set. */ 3842 if (!udp_bits.udpb_rcvhdr) { 3843 mp->b_rptr = rptr + hdr_length; 3844 mp_len -= hdr_length; 3845 } 3846 3847 /* 3848 * This is the inbound data path. Packets are passed upstream as 3849 * T_UNITDATA_IND messages with full IP headers still attached. 3850 */ 3851 if (udp->udp_family == AF_INET) { 3852 sin_t *sin; 3853 3854 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3855 3856 /* 3857 * Normally only send up the source address. 3858 * If IP_RECVDSTADDR is set we include the destination IP 3859 * address as an option. With IP_RECVOPTS we include all 3860 * the IP options. 3861 */ 3862 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3863 if (udp_bits.udpb_recvdstaddr) { 3864 udi_size += sizeof (struct T_opthdr) + 3865 sizeof (struct in_addr); 3866 UDP_STAT(us, udp_in_recvdstaddr); 3867 } 3868 3869 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3870 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3871 udi_size += sizeof (struct T_opthdr) + 3872 sizeof (struct in_pktinfo); 3873 UDP_STAT(us, udp_ip_rcvpktinfo); 3874 } 3875 3876 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3877 udi_size += sizeof (struct T_opthdr) + opt_len; 3878 UDP_STAT(us, udp_in_recvopts); 3879 } 3880 3881 /* 3882 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3883 * space accordingly 3884 */ 3885 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3886 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3887 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3888 UDP_STAT(us, udp_in_recvif); 3889 } 3890 3891 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3892 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3893 udi_size += sizeof (struct T_opthdr) + 3894 sizeof (struct sockaddr_dl); 3895 UDP_STAT(us, udp_in_recvslla); 3896 } 3897 3898 if ((udp_bits.udpb_recvucred) && 3899 (cr = msg_getcred(mp, &cpid)) != NULL) { 3900 udi_size += sizeof (struct T_opthdr) + ucredsize; 3901 UDP_STAT(us, udp_in_recvucred); 3902 } 3903 3904 /* 3905 * If SO_TIMESTAMP is set allocate the appropriate sized 3906 * buffer. Since gethrestime() expects a pointer aligned 3907 * argument, we allocate space necessary for extra 3908 * alignment (even though it might not be used). 3909 */ 3910 if (udp_bits.udpb_timestamp) { 3911 udi_size += sizeof (struct T_opthdr) + 3912 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3913 UDP_STAT(us, udp_in_timestamp); 3914 } 3915 3916 /* 3917 * If IP_RECVTTL is set allocate the appropriate sized buffer 3918 */ 3919 if (udp_bits.udpb_recvttl) { 3920 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3921 UDP_STAT(us, udp_in_recvttl); 3922 } 3923 3924 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3925 mp1 = allocb(udi_size, BPRI_MED); 3926 if (mp1 == NULL) { 3927 freemsg(mp); 3928 if (options_mp != NULL) 3929 freeb(options_mp); 3930 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3931 return; 3932 } 3933 mp1->b_cont = mp; 3934 mp = mp1; 3935 mp->b_datap->db_type = M_PROTO; 3936 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3937 mp->b_wptr = (uchar_t *)tudi + udi_size; 3938 tudi->PRIM_type = T_UNITDATA_IND; 3939 tudi->SRC_length = sizeof (sin_t); 3940 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3941 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3942 sizeof (sin_t); 3943 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3944 tudi->OPT_length = udi_size; 3945 sin = (sin_t *)&tudi[1]; 3946 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3947 sin->sin_port = udpha->uha_src_port; 3948 sin->sin_family = udp->udp_family; 3949 *(uint32_t *)&sin->sin_zero[0] = 0; 3950 *(uint32_t *)&sin->sin_zero[4] = 0; 3951 3952 /* 3953 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3954 * IP_RECVTTL has been set. 3955 */ 3956 if (udi_size != 0) { 3957 /* 3958 * Copy in destination address before options to avoid 3959 * any padding issues. 3960 */ 3961 char *dstopt; 3962 3963 dstopt = (char *)&sin[1]; 3964 if (udp_bits.udpb_recvdstaddr) { 3965 struct T_opthdr *toh; 3966 ipaddr_t *dstptr; 3967 3968 toh = (struct T_opthdr *)dstopt; 3969 toh->level = IPPROTO_IP; 3970 toh->name = IP_RECVDSTADDR; 3971 toh->len = sizeof (struct T_opthdr) + 3972 sizeof (ipaddr_t); 3973 toh->status = 0; 3974 dstopt += sizeof (struct T_opthdr); 3975 dstptr = (ipaddr_t *)dstopt; 3976 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3977 dstopt += sizeof (ipaddr_t); 3978 udi_size -= toh->len; 3979 } 3980 3981 if (udp_bits.udpb_recvopts && opt_len > 0) { 3982 struct T_opthdr *toh; 3983 3984 toh = (struct T_opthdr *)dstopt; 3985 toh->level = IPPROTO_IP; 3986 toh->name = IP_RECVOPTS; 3987 toh->len = sizeof (struct T_opthdr) + opt_len; 3988 toh->status = 0; 3989 dstopt += sizeof (struct T_opthdr); 3990 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3991 opt_len); 3992 dstopt += opt_len; 3993 udi_size -= toh->len; 3994 } 3995 3996 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3997 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3998 struct T_opthdr *toh; 3999 struct in_pktinfo *pktinfop; 4000 4001 toh = (struct T_opthdr *)dstopt; 4002 toh->level = IPPROTO_IP; 4003 toh->name = IP_PKTINFO; 4004 toh->len = sizeof (struct T_opthdr) + 4005 sizeof (*pktinfop); 4006 toh->status = 0; 4007 dstopt += sizeof (struct T_opthdr); 4008 pktinfop = (struct in_pktinfo *)dstopt; 4009 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4010 pktinfop->ipi_spec_dst = 4011 pinfo->ip_pkt_match_addr; 4012 pktinfop->ipi_addr.s_addr = 4013 ((ipha_t *)rptr)->ipha_dst; 4014 4015 dstopt += sizeof (struct in_pktinfo); 4016 udi_size -= toh->len; 4017 } 4018 4019 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4020 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4021 4022 struct T_opthdr *toh; 4023 struct sockaddr_dl *dstptr; 4024 4025 toh = (struct T_opthdr *)dstopt; 4026 toh->level = IPPROTO_IP; 4027 toh->name = IP_RECVSLLA; 4028 toh->len = sizeof (struct T_opthdr) + 4029 sizeof (struct sockaddr_dl); 4030 toh->status = 0; 4031 dstopt += sizeof (struct T_opthdr); 4032 dstptr = (struct sockaddr_dl *)dstopt; 4033 bcopy(&pinfo->ip_pkt_slla, dstptr, 4034 sizeof (struct sockaddr_dl)); 4035 dstopt += sizeof (struct sockaddr_dl); 4036 udi_size -= toh->len; 4037 } 4038 4039 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4040 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4041 4042 struct T_opthdr *toh; 4043 uint_t *dstptr; 4044 4045 toh = (struct T_opthdr *)dstopt; 4046 toh->level = IPPROTO_IP; 4047 toh->name = IP_RECVIF; 4048 toh->len = sizeof (struct T_opthdr) + 4049 sizeof (uint_t); 4050 toh->status = 0; 4051 dstopt += sizeof (struct T_opthdr); 4052 dstptr = (uint_t *)dstopt; 4053 *dstptr = pinfo->ip_pkt_ifindex; 4054 dstopt += sizeof (uint_t); 4055 udi_size -= toh->len; 4056 } 4057 4058 if (cr != NULL) { 4059 struct T_opthdr *toh; 4060 4061 toh = (struct T_opthdr *)dstopt; 4062 toh->level = SOL_SOCKET; 4063 toh->name = SCM_UCRED; 4064 toh->len = sizeof (struct T_opthdr) + ucredsize; 4065 toh->status = 0; 4066 dstopt += sizeof (struct T_opthdr); 4067 (void) cred2ucred(cr, cpid, dstopt, rcr); 4068 dstopt += ucredsize; 4069 udi_size -= toh->len; 4070 } 4071 4072 if (udp_bits.udpb_timestamp) { 4073 struct T_opthdr *toh; 4074 4075 toh = (struct T_opthdr *)dstopt; 4076 toh->level = SOL_SOCKET; 4077 toh->name = SCM_TIMESTAMP; 4078 toh->len = sizeof (struct T_opthdr) + 4079 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4080 toh->status = 0; 4081 dstopt += sizeof (struct T_opthdr); 4082 /* Align for gethrestime() */ 4083 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4084 sizeof (intptr_t)); 4085 gethrestime((timestruc_t *)dstopt); 4086 dstopt = (char *)toh + toh->len; 4087 udi_size -= toh->len; 4088 } 4089 4090 /* 4091 * CAUTION: 4092 * Due to aligment issues 4093 * Processing of IP_RECVTTL option 4094 * should always be the last. Adding 4095 * any option processing after this will 4096 * cause alignment panic. 4097 */ 4098 if (udp_bits.udpb_recvttl) { 4099 struct T_opthdr *toh; 4100 uint8_t *dstptr; 4101 4102 toh = (struct T_opthdr *)dstopt; 4103 toh->level = IPPROTO_IP; 4104 toh->name = IP_RECVTTL; 4105 toh->len = sizeof (struct T_opthdr) + 4106 sizeof (uint8_t); 4107 toh->status = 0; 4108 dstopt += sizeof (struct T_opthdr); 4109 dstptr = (uint8_t *)dstopt; 4110 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4111 dstopt += sizeof (uint8_t); 4112 udi_size -= toh->len; 4113 } 4114 4115 /* Consumed all of allocated space */ 4116 ASSERT(udi_size == 0); 4117 } 4118 } else { 4119 sin6_t *sin6; 4120 4121 /* 4122 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4123 * 4124 * Normally we only send up the address. If receiving of any 4125 * optional receive side information is enabled, we also send 4126 * that up as options. 4127 */ 4128 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4129 4130 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4131 IPPF_RTHDR|IPPF_IFINDEX)) { 4132 if ((udp_bits.udpb_ipv6_recvhopopts) && 4133 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4134 size_t hlen; 4135 4136 UDP_STAT(us, udp_in_recvhopopts); 4137 hlen = copy_hop_opts(&ipp, NULL); 4138 if (hlen == 0) 4139 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4140 udi_size += hlen; 4141 } 4142 if (((udp_bits.udpb_ipv6_recvdstopts) || 4143 udp_bits.udpb_old_ipv6_recvdstopts) && 4144 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4145 udi_size += sizeof (struct T_opthdr) + 4146 ipp.ipp_dstoptslen; 4147 UDP_STAT(us, udp_in_recvdstopts); 4148 } 4149 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4150 udp_bits.udpb_ipv6_recvrthdr && 4151 (ipp.ipp_fields & IPPF_RTHDR)) || 4152 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4153 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4154 udi_size += sizeof (struct T_opthdr) + 4155 ipp.ipp_rtdstoptslen; 4156 UDP_STAT(us, udp_in_recvrtdstopts); 4157 } 4158 if ((udp_bits.udpb_ipv6_recvrthdr) && 4159 (ipp.ipp_fields & IPPF_RTHDR)) { 4160 udi_size += sizeof (struct T_opthdr) + 4161 ipp.ipp_rthdrlen; 4162 UDP_STAT(us, udp_in_recvrthdr); 4163 } 4164 if ((udp_bits.udpb_ip_recvpktinfo) && 4165 (ipp.ipp_fields & IPPF_IFINDEX)) { 4166 udi_size += sizeof (struct T_opthdr) + 4167 sizeof (struct in6_pktinfo); 4168 UDP_STAT(us, udp_in_recvpktinfo); 4169 } 4170 4171 } 4172 if ((udp_bits.udpb_recvucred) && 4173 (cr = msg_getcred(mp, &cpid)) != NULL) { 4174 udi_size += sizeof (struct T_opthdr) + ucredsize; 4175 UDP_STAT(us, udp_in_recvucred); 4176 } 4177 4178 /* 4179 * If SO_TIMESTAMP is set allocate the appropriate sized 4180 * buffer. Since gethrestime() expects a pointer aligned 4181 * argument, we allocate space necessary for extra 4182 * alignment (even though it might not be used). 4183 */ 4184 if (udp_bits.udpb_timestamp) { 4185 udi_size += sizeof (struct T_opthdr) + 4186 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4187 UDP_STAT(us, udp_in_timestamp); 4188 } 4189 4190 if (udp_bits.udpb_ipv6_recvhoplimit) { 4191 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4192 UDP_STAT(us, udp_in_recvhoplimit); 4193 } 4194 4195 if (udp_bits.udpb_ipv6_recvtclass) { 4196 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4197 UDP_STAT(us, udp_in_recvtclass); 4198 } 4199 4200 mp1 = allocb(udi_size, BPRI_MED); 4201 if (mp1 == NULL) { 4202 freemsg(mp); 4203 if (options_mp != NULL) 4204 freeb(options_mp); 4205 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4206 return; 4207 } 4208 mp1->b_cont = mp; 4209 mp = mp1; 4210 mp->b_datap->db_type = M_PROTO; 4211 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4212 mp->b_wptr = (uchar_t *)tudi + udi_size; 4213 tudi->PRIM_type = T_UNITDATA_IND; 4214 tudi->SRC_length = sizeof (sin6_t); 4215 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4216 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4217 sizeof (sin6_t); 4218 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4219 tudi->OPT_length = udi_size; 4220 sin6 = (sin6_t *)&tudi[1]; 4221 if (ipversion == IPV4_VERSION) { 4222 in6_addr_t v6dst; 4223 4224 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4225 &sin6->sin6_addr); 4226 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4227 &v6dst); 4228 sin6->sin6_flowinfo = 0; 4229 sin6->sin6_scope_id = 0; 4230 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4231 connp->conn_zoneid, us->us_netstack); 4232 } else { 4233 sin6->sin6_addr = ip6h->ip6_src; 4234 /* No sin6_flowinfo per API */ 4235 sin6->sin6_flowinfo = 0; 4236 /* For link-scope source pass up scope id */ 4237 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4238 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4239 sin6->sin6_scope_id = ipp.ipp_ifindex; 4240 else 4241 sin6->sin6_scope_id = 0; 4242 sin6->__sin6_src_id = ip_srcid_find_addr( 4243 &ip6h->ip6_dst, connp->conn_zoneid, 4244 us->us_netstack); 4245 } 4246 sin6->sin6_port = udpha->uha_src_port; 4247 sin6->sin6_family = udp->udp_family; 4248 4249 if (udi_size != 0) { 4250 uchar_t *dstopt; 4251 4252 dstopt = (uchar_t *)&sin6[1]; 4253 if ((udp_bits.udpb_ip_recvpktinfo) && 4254 (ipp.ipp_fields & IPPF_IFINDEX)) { 4255 struct T_opthdr *toh; 4256 struct in6_pktinfo *pkti; 4257 4258 toh = (struct T_opthdr *)dstopt; 4259 toh->level = IPPROTO_IPV6; 4260 toh->name = IPV6_PKTINFO; 4261 toh->len = sizeof (struct T_opthdr) + 4262 sizeof (*pkti); 4263 toh->status = 0; 4264 dstopt += sizeof (struct T_opthdr); 4265 pkti = (struct in6_pktinfo *)dstopt; 4266 if (ipversion == IPV6_VERSION) 4267 pkti->ipi6_addr = ip6h->ip6_dst; 4268 else 4269 IN6_IPADDR_TO_V4MAPPED( 4270 ((ipha_t *)rptr)->ipha_dst, 4271 &pkti->ipi6_addr); 4272 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4273 dstopt += sizeof (*pkti); 4274 udi_size -= toh->len; 4275 } 4276 if (udp_bits.udpb_ipv6_recvhoplimit) { 4277 struct T_opthdr *toh; 4278 4279 toh = (struct T_opthdr *)dstopt; 4280 toh->level = IPPROTO_IPV6; 4281 toh->name = IPV6_HOPLIMIT; 4282 toh->len = sizeof (struct T_opthdr) + 4283 sizeof (uint_t); 4284 toh->status = 0; 4285 dstopt += sizeof (struct T_opthdr); 4286 if (ipversion == IPV6_VERSION) 4287 *(uint_t *)dstopt = ip6h->ip6_hops; 4288 else 4289 *(uint_t *)dstopt = 4290 ((ipha_t *)rptr)->ipha_ttl; 4291 dstopt += sizeof (uint_t); 4292 udi_size -= toh->len; 4293 } 4294 if (udp_bits.udpb_ipv6_recvtclass) { 4295 struct T_opthdr *toh; 4296 4297 toh = (struct T_opthdr *)dstopt; 4298 toh->level = IPPROTO_IPV6; 4299 toh->name = IPV6_TCLASS; 4300 toh->len = sizeof (struct T_opthdr) + 4301 sizeof (uint_t); 4302 toh->status = 0; 4303 dstopt += sizeof (struct T_opthdr); 4304 if (ipversion == IPV6_VERSION) { 4305 *(uint_t *)dstopt = 4306 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4307 } else { 4308 ipha_t *ipha = (ipha_t *)rptr; 4309 *(uint_t *)dstopt = 4310 ipha->ipha_type_of_service; 4311 } 4312 dstopt += sizeof (uint_t); 4313 udi_size -= toh->len; 4314 } 4315 if ((udp_bits.udpb_ipv6_recvhopopts) && 4316 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4317 size_t hlen; 4318 4319 hlen = copy_hop_opts(&ipp, dstopt); 4320 dstopt += hlen; 4321 udi_size -= hlen; 4322 } 4323 if ((udp_bits.udpb_ipv6_recvdstopts) && 4324 (udp_bits.udpb_ipv6_recvrthdr) && 4325 (ipp.ipp_fields & IPPF_RTHDR) && 4326 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4327 struct T_opthdr *toh; 4328 4329 toh = (struct T_opthdr *)dstopt; 4330 toh->level = IPPROTO_IPV6; 4331 toh->name = IPV6_DSTOPTS; 4332 toh->len = sizeof (struct T_opthdr) + 4333 ipp.ipp_rtdstoptslen; 4334 toh->status = 0; 4335 dstopt += sizeof (struct T_opthdr); 4336 bcopy(ipp.ipp_rtdstopts, dstopt, 4337 ipp.ipp_rtdstoptslen); 4338 dstopt += ipp.ipp_rtdstoptslen; 4339 udi_size -= toh->len; 4340 } 4341 if ((udp_bits.udpb_ipv6_recvrthdr) && 4342 (ipp.ipp_fields & IPPF_RTHDR)) { 4343 struct T_opthdr *toh; 4344 4345 toh = (struct T_opthdr *)dstopt; 4346 toh->level = IPPROTO_IPV6; 4347 toh->name = IPV6_RTHDR; 4348 toh->len = sizeof (struct T_opthdr) + 4349 ipp.ipp_rthdrlen; 4350 toh->status = 0; 4351 dstopt += sizeof (struct T_opthdr); 4352 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4353 dstopt += ipp.ipp_rthdrlen; 4354 udi_size -= toh->len; 4355 } 4356 if ((udp_bits.udpb_ipv6_recvdstopts) && 4357 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4358 struct T_opthdr *toh; 4359 4360 toh = (struct T_opthdr *)dstopt; 4361 toh->level = IPPROTO_IPV6; 4362 toh->name = IPV6_DSTOPTS; 4363 toh->len = sizeof (struct T_opthdr) + 4364 ipp.ipp_dstoptslen; 4365 toh->status = 0; 4366 dstopt += sizeof (struct T_opthdr); 4367 bcopy(ipp.ipp_dstopts, dstopt, 4368 ipp.ipp_dstoptslen); 4369 dstopt += ipp.ipp_dstoptslen; 4370 udi_size -= toh->len; 4371 } 4372 if (cr != NULL) { 4373 struct T_opthdr *toh; 4374 4375 toh = (struct T_opthdr *)dstopt; 4376 toh->level = SOL_SOCKET; 4377 toh->name = SCM_UCRED; 4378 toh->len = sizeof (struct T_opthdr) + ucredsize; 4379 toh->status = 0; 4380 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4381 dstopt += toh->len; 4382 udi_size -= toh->len; 4383 } 4384 if (udp_bits.udpb_timestamp) { 4385 struct T_opthdr *toh; 4386 4387 toh = (struct T_opthdr *)dstopt; 4388 toh->level = SOL_SOCKET; 4389 toh->name = SCM_TIMESTAMP; 4390 toh->len = sizeof (struct T_opthdr) + 4391 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4392 toh->status = 0; 4393 dstopt += sizeof (struct T_opthdr); 4394 /* Align for gethrestime() */ 4395 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4396 sizeof (intptr_t)); 4397 gethrestime((timestruc_t *)dstopt); 4398 dstopt = (uchar_t *)toh + toh->len; 4399 udi_size -= toh->len; 4400 } 4401 4402 /* Consumed all of allocated space */ 4403 ASSERT(udi_size == 0); 4404 } 4405 #undef sin6 4406 /* No IP_RECVDSTADDR for IPv6. */ 4407 } 4408 4409 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4410 if (options_mp != NULL) 4411 freeb(options_mp); 4412 4413 udp_ulp_recv(connp, mp); 4414 4415 return; 4416 4417 tossit: 4418 freemsg(mp); 4419 if (options_mp != NULL) 4420 freeb(options_mp); 4421 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4422 } 4423 4424 /* 4425 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4426 * information that can be changing beneath us. 4427 */ 4428 mblk_t * 4429 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4430 { 4431 mblk_t *mpdata; 4432 mblk_t *mp_conn_ctl; 4433 mblk_t *mp_attr_ctl; 4434 mblk_t *mp6_conn_ctl; 4435 mblk_t *mp6_attr_ctl; 4436 mblk_t *mp_conn_tail; 4437 mblk_t *mp_attr_tail; 4438 mblk_t *mp6_conn_tail; 4439 mblk_t *mp6_attr_tail; 4440 struct opthdr *optp; 4441 mib2_udpEntry_t ude; 4442 mib2_udp6Entry_t ude6; 4443 mib2_transportMLPEntry_t mlp; 4444 int state; 4445 zoneid_t zoneid; 4446 int i; 4447 connf_t *connfp; 4448 conn_t *connp = Q_TO_CONN(q); 4449 int v4_conn_idx; 4450 int v6_conn_idx; 4451 boolean_t needattr; 4452 udp_t *udp; 4453 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4454 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4455 mblk_t *mp2ctl; 4456 4457 /* 4458 * make a copy of the original message 4459 */ 4460 mp2ctl = copymsg(mpctl); 4461 4462 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4463 if (mpctl == NULL || 4464 (mpdata = mpctl->b_cont) == NULL || 4465 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4466 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4467 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4468 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4469 freemsg(mp_conn_ctl); 4470 freemsg(mp_attr_ctl); 4471 freemsg(mp6_conn_ctl); 4472 freemsg(mpctl); 4473 freemsg(mp2ctl); 4474 return (0); 4475 } 4476 4477 zoneid = connp->conn_zoneid; 4478 4479 /* fixed length structure for IPv4 and IPv6 counters */ 4480 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4481 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4482 /* synchronize 64- and 32-bit counters */ 4483 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4484 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4485 4486 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4487 optp->level = MIB2_UDP; 4488 optp->name = 0; 4489 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4490 sizeof (us->us_udp_mib)); 4491 optp->len = msgdsize(mpdata); 4492 qreply(q, mpctl); 4493 4494 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4495 v4_conn_idx = v6_conn_idx = 0; 4496 4497 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4498 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4499 connp = NULL; 4500 4501 while ((connp = ipcl_get_next_conn(connfp, connp, 4502 IPCL_UDPCONN))) { 4503 udp = connp->conn_udp; 4504 if (zoneid != connp->conn_zoneid) 4505 continue; 4506 4507 /* 4508 * Note that the port numbers are sent in 4509 * host byte order 4510 */ 4511 4512 if (udp->udp_state == TS_UNBND) 4513 state = MIB2_UDP_unbound; 4514 else if (udp->udp_state == TS_IDLE) 4515 state = MIB2_UDP_idle; 4516 else if (udp->udp_state == TS_DATA_XFER) 4517 state = MIB2_UDP_connected; 4518 else 4519 state = MIB2_UDP_unknown; 4520 4521 needattr = B_FALSE; 4522 bzero(&mlp, sizeof (mlp)); 4523 if (connp->conn_mlp_type != mlptSingle) { 4524 if (connp->conn_mlp_type == mlptShared || 4525 connp->conn_mlp_type == mlptBoth) 4526 mlp.tme_flags |= MIB2_TMEF_SHARED; 4527 if (connp->conn_mlp_type == mlptPrivate || 4528 connp->conn_mlp_type == mlptBoth) 4529 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4530 needattr = B_TRUE; 4531 } 4532 if (connp->conn_anon_mlp) { 4533 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 4534 needattr = B_TRUE; 4535 } 4536 if (connp->conn_mac_exempt) { 4537 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 4538 needattr = B_TRUE; 4539 } 4540 4541 /* 4542 * Create an IPv4 table entry for IPv4 entries and also 4543 * any IPv6 entries which are bound to in6addr_any 4544 * (i.e. anything a IPv4 peer could connect/send to). 4545 */ 4546 if (udp->udp_ipversion == IPV4_VERSION || 4547 (udp->udp_state <= TS_IDLE && 4548 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4549 ude.udpEntryInfo.ue_state = state; 4550 /* 4551 * If in6addr_any this will set it to 4552 * INADDR_ANY 4553 */ 4554 ude.udpLocalAddress = 4555 V4_PART_OF_V6(udp->udp_v6src); 4556 ude.udpLocalPort = ntohs(udp->udp_port); 4557 if (udp->udp_state == TS_DATA_XFER) { 4558 /* 4559 * Can potentially get here for 4560 * v6 socket if another process 4561 * (say, ping) has just done a 4562 * sendto(), changing the state 4563 * from the TS_IDLE above to 4564 * TS_DATA_XFER by the time we hit 4565 * this part of the code. 4566 */ 4567 ude.udpEntryInfo.ue_RemoteAddress = 4568 V4_PART_OF_V6(udp->udp_v6dst); 4569 ude.udpEntryInfo.ue_RemotePort = 4570 ntohs(udp->udp_dstport); 4571 } else { 4572 ude.udpEntryInfo.ue_RemoteAddress = 0; 4573 ude.udpEntryInfo.ue_RemotePort = 0; 4574 } 4575 4576 /* 4577 * We make the assumption that all udp_t 4578 * structs will be created within an address 4579 * region no larger than 32-bits. 4580 */ 4581 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4582 ude.udpCreationProcess = 4583 (udp->udp_open_pid < 0) ? 4584 MIB2_UNKNOWN_PROCESS : 4585 udp->udp_open_pid; 4586 ude.udpCreationTime = udp->udp_open_time; 4587 4588 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4589 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4590 mlp.tme_connidx = v4_conn_idx++; 4591 if (needattr) 4592 (void) snmp_append_data2( 4593 mp_attr_ctl->b_cont, &mp_attr_tail, 4594 (char *)&mlp, sizeof (mlp)); 4595 } 4596 if (udp->udp_ipversion == IPV6_VERSION) { 4597 ude6.udp6EntryInfo.ue_state = state; 4598 ude6.udp6LocalAddress = udp->udp_v6src; 4599 ude6.udp6LocalPort = ntohs(udp->udp_port); 4600 ude6.udp6IfIndex = udp->udp_bound_if; 4601 if (udp->udp_state == TS_DATA_XFER) { 4602 ude6.udp6EntryInfo.ue_RemoteAddress = 4603 udp->udp_v6dst; 4604 ude6.udp6EntryInfo.ue_RemotePort = 4605 ntohs(udp->udp_dstport); 4606 } else { 4607 ude6.udp6EntryInfo.ue_RemoteAddress = 4608 sin6_null.sin6_addr; 4609 ude6.udp6EntryInfo.ue_RemotePort = 0; 4610 } 4611 /* 4612 * We make the assumption that all udp_t 4613 * structs will be created within an address 4614 * region no larger than 32-bits. 4615 */ 4616 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4617 ude6.udp6CreationProcess = 4618 (udp->udp_open_pid < 0) ? 4619 MIB2_UNKNOWN_PROCESS : 4620 udp->udp_open_pid; 4621 ude6.udp6CreationTime = udp->udp_open_time; 4622 4623 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4624 &mp6_conn_tail, (char *)&ude6, 4625 sizeof (ude6)); 4626 mlp.tme_connidx = v6_conn_idx++; 4627 if (needattr) 4628 (void) snmp_append_data2( 4629 mp6_attr_ctl->b_cont, 4630 &mp6_attr_tail, (char *)&mlp, 4631 sizeof (mlp)); 4632 } 4633 } 4634 } 4635 4636 /* IPv4 UDP endpoints */ 4637 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4638 sizeof (struct T_optmgmt_ack)]; 4639 optp->level = MIB2_UDP; 4640 optp->name = MIB2_UDP_ENTRY; 4641 optp->len = msgdsize(mp_conn_ctl->b_cont); 4642 qreply(q, mp_conn_ctl); 4643 4644 /* table of MLP attributes... */ 4645 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4646 sizeof (struct T_optmgmt_ack)]; 4647 optp->level = MIB2_UDP; 4648 optp->name = EXPER_XPORT_MLP; 4649 optp->len = msgdsize(mp_attr_ctl->b_cont); 4650 if (optp->len == 0) 4651 freemsg(mp_attr_ctl); 4652 else 4653 qreply(q, mp_attr_ctl); 4654 4655 /* IPv6 UDP endpoints */ 4656 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4657 sizeof (struct T_optmgmt_ack)]; 4658 optp->level = MIB2_UDP6; 4659 optp->name = MIB2_UDP6_ENTRY; 4660 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4661 qreply(q, mp6_conn_ctl); 4662 4663 /* table of MLP attributes... */ 4664 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4665 sizeof (struct T_optmgmt_ack)]; 4666 optp->level = MIB2_UDP6; 4667 optp->name = EXPER_XPORT_MLP; 4668 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4669 if (optp->len == 0) 4670 freemsg(mp6_attr_ctl); 4671 else 4672 qreply(q, mp6_attr_ctl); 4673 4674 return (mp2ctl); 4675 } 4676 4677 /* 4678 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4679 * NOTE: Per MIB-II, UDP has no writable data. 4680 * TODO: If this ever actually tries to set anything, it needs to be 4681 * to do the appropriate locking. 4682 */ 4683 /* ARGSUSED */ 4684 int 4685 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4686 uchar_t *ptr, int len) 4687 { 4688 switch (level) { 4689 case MIB2_UDP: 4690 return (0); 4691 default: 4692 return (1); 4693 } 4694 } 4695 4696 /* 4697 * This routine creates a T_UDERROR_IND message and passes it upstream. 4698 * The address and options are copied from the T_UNITDATA_REQ message 4699 * passed in mp. This message is freed. 4700 */ 4701 static void 4702 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4703 t_scalar_t err) 4704 { 4705 struct T_unitdata_req *tudr; 4706 mblk_t *mp1; 4707 uchar_t *optaddr; 4708 t_scalar_t optlen; 4709 4710 if (DB_TYPE(mp) == M_DATA) { 4711 ASSERT(destaddr != NULL && destlen != 0); 4712 optaddr = NULL; 4713 optlen = 0; 4714 } else { 4715 if ((mp->b_wptr < mp->b_rptr) || 4716 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4717 goto done; 4718 } 4719 tudr = (struct T_unitdata_req *)mp->b_rptr; 4720 destaddr = mp->b_rptr + tudr->DEST_offset; 4721 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4722 destaddr + tudr->DEST_length < mp->b_rptr || 4723 destaddr + tudr->DEST_length > mp->b_wptr) { 4724 goto done; 4725 } 4726 optaddr = mp->b_rptr + tudr->OPT_offset; 4727 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4728 optaddr + tudr->OPT_length < mp->b_rptr || 4729 optaddr + tudr->OPT_length > mp->b_wptr) { 4730 goto done; 4731 } 4732 destlen = tudr->DEST_length; 4733 optlen = tudr->OPT_length; 4734 } 4735 4736 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4737 (char *)optaddr, optlen, err); 4738 if (mp1 != NULL) 4739 qreply(q, mp1); 4740 4741 done: 4742 freemsg(mp); 4743 } 4744 4745 /* 4746 * This routine removes a port number association from a stream. It 4747 * is called by udp_wput to handle T_UNBIND_REQ messages. 4748 */ 4749 static void 4750 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4751 { 4752 conn_t *connp = Q_TO_CONN(q); 4753 int error; 4754 4755 error = udp_do_unbind(connp); 4756 if (error) { 4757 if (error < 0) 4758 udp_err_ack(q, mp, -error, 0); 4759 else 4760 udp_err_ack(q, mp, TSYSERR, error); 4761 return; 4762 } 4763 4764 mp = mi_tpi_ok_ack_alloc(mp); 4765 ASSERT(mp != NULL); 4766 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4767 qreply(q, mp); 4768 } 4769 4770 /* 4771 * Don't let port fall into the privileged range. 4772 * Since the extra privileged ports can be arbitrary we also 4773 * ensure that we exclude those from consideration. 4774 * us->us_epriv_ports is not sorted thus we loop over it until 4775 * there are no changes. 4776 */ 4777 static in_port_t 4778 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4779 { 4780 int i; 4781 in_port_t nextport; 4782 boolean_t restart = B_FALSE; 4783 udp_stack_t *us = udp->udp_us; 4784 4785 if (random && udp_random_anon_port != 0) { 4786 (void) random_get_pseudo_bytes((uint8_t *)&port, 4787 sizeof (in_port_t)); 4788 /* 4789 * Unless changed by a sys admin, the smallest anon port 4790 * is 32768 and the largest anon port is 65535. It is 4791 * very likely (50%) for the random port to be smaller 4792 * than the smallest anon port. When that happens, 4793 * add port % (anon port range) to the smallest anon 4794 * port to get the random port. It should fall into the 4795 * valid anon port range. 4796 */ 4797 if (port < us->us_smallest_anon_port) { 4798 port = us->us_smallest_anon_port + 4799 port % (us->us_largest_anon_port - 4800 us->us_smallest_anon_port); 4801 } 4802 } 4803 4804 retry: 4805 if (port < us->us_smallest_anon_port) 4806 port = us->us_smallest_anon_port; 4807 4808 if (port > us->us_largest_anon_port) { 4809 port = us->us_smallest_anon_port; 4810 if (restart) 4811 return (0); 4812 restart = B_TRUE; 4813 } 4814 4815 if (port < us->us_smallest_nonpriv_port) 4816 port = us->us_smallest_nonpriv_port; 4817 4818 for (i = 0; i < us->us_num_epriv_ports; i++) { 4819 if (port == us->us_epriv_ports[i]) { 4820 port++; 4821 /* 4822 * Make sure that the port is in the 4823 * valid range. 4824 */ 4825 goto retry; 4826 } 4827 } 4828 4829 if (is_system_labeled() && 4830 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4831 port, IPPROTO_UDP, B_TRUE)) != 0) { 4832 port = nextport; 4833 goto retry; 4834 } 4835 4836 return (port); 4837 } 4838 4839 static int 4840 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 4841 { 4842 int err; 4843 cred_t *cred; 4844 cred_t *orig_cred = NULL; 4845 cred_t *effective_cred = NULL; 4846 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4847 udp_t *udp = Q_TO_UDP(wq); 4848 udp_stack_t *us = udp->udp_us; 4849 4850 /* 4851 * All Solaris components should pass a db_credp 4852 * for this message, hence we ASSERT. 4853 * On production kernels we return an error to be robust against 4854 * random streams modules sitting on top of us. 4855 */ 4856 cred = orig_cred = msg_getcred(mp, NULL); 4857 ASSERT(cred != NULL); 4858 if (cred == NULL) 4859 return (EINVAL); 4860 4861 /* 4862 * Verify the destination is allowed to receive packets at 4863 * the security label of the message data. tsol_check_dest() 4864 * may create a new effective cred for this message with a 4865 * modified label or label flags. Note that we use the cred/label 4866 * from the message to handle MLP 4867 */ 4868 if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, 4869 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 4870 goto done; 4871 if (effective_cred != NULL) 4872 cred = effective_cred; 4873 4874 /* 4875 * Calculate the security label to be placed in the text 4876 * of the message (if any). 4877 */ 4878 if ((err = tsol_compute_label(cred, dst, opt_storage, 4879 us->us_netstack->netstack_ip)) != 0) 4880 goto done; 4881 4882 /* 4883 * Insert the security label in the cached ip options, 4884 * removing any old label that may exist. 4885 */ 4886 if ((err = tsol_update_options(&udp->udp_ip_snd_options, 4887 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4888 opt_storage)) != 0) 4889 goto done; 4890 4891 /* 4892 * Save the destination address and creds we used to 4893 * generate the security label text. 4894 */ 4895 if (cred != udp->udp_effective_cred) { 4896 if (udp->udp_effective_cred != NULL) 4897 crfree(udp->udp_effective_cred); 4898 crhold(cred); 4899 udp->udp_effective_cred = cred; 4900 } 4901 if (orig_cred != udp->udp_last_cred) { 4902 if (udp->udp_last_cred != NULL) 4903 crfree(udp->udp_last_cred); 4904 crhold(orig_cred); 4905 udp->udp_last_cred = orig_cred; 4906 } 4907 done: 4908 if (effective_cred != NULL) 4909 crfree(effective_cred); 4910 4911 if (err != 0) { 4912 DTRACE_PROBE4( 4913 tx__ip__log__info__updatelabel__udp, 4914 char *, "queue(1) failed to update options(2) on mp(3)", 4915 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4916 } 4917 return (err); 4918 } 4919 4920 static mblk_t * 4921 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4922 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4923 cred_t *cr, pid_t pid) 4924 { 4925 udp_t *udp = connp->conn_udp; 4926 mblk_t *mp1 = mp; 4927 mblk_t *mp2; 4928 ipha_t *ipha; 4929 int ip_hdr_length; 4930 uint32_t ip_len; 4931 udpha_t *udpha; 4932 boolean_t lock_held = B_FALSE; 4933 in_port_t uha_src_port; 4934 udpattrs_t attrs; 4935 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4936 uint32_t ip_snd_opt_len = 0; 4937 ip4_pkt_t pktinfo; 4938 ip4_pkt_t *pktinfop = &pktinfo; 4939 ip_opt_info_t optinfo; 4940 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4941 udp_stack_t *us = udp->udp_us; 4942 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4943 queue_t *q = connp->conn_wq; 4944 ire_t *ire; 4945 in6_addr_t v6dst; 4946 boolean_t update_lastdst = B_FALSE; 4947 4948 *error = 0; 4949 pktinfop->ip4_ill_index = 0; 4950 pktinfop->ip4_addr = INADDR_ANY; 4951 optinfo.ip_opt_flags = 0; 4952 optinfo.ip_opt_ill_index = 0; 4953 4954 if (v4dst == INADDR_ANY) 4955 v4dst = htonl(INADDR_LOOPBACK); 4956 4957 /* 4958 * If options passed in, feed it for verification and handling 4959 */ 4960 attrs.udpattr_credset = B_FALSE; 4961 if (IPCL_IS_NONSTR(connp)) { 4962 if (msg->msg_controllen != 0) { 4963 attrs.udpattr_ipp4 = pktinfop; 4964 attrs.udpattr_mb = mp; 4965 4966 rw_enter(&udp->udp_rwlock, RW_WRITER); 4967 *error = process_auxiliary_options(connp, 4968 msg->msg_control, msg->msg_controllen, 4969 &attrs, &udp_opt_obj, udp_opt_set, cr); 4970 rw_exit(&udp->udp_rwlock); 4971 if (*error) 4972 goto done; 4973 } 4974 } else { 4975 if (DB_TYPE(mp) != M_DATA) { 4976 mp1 = mp->b_cont; 4977 if (((struct T_unitdata_req *) 4978 mp->b_rptr)->OPT_length != 0) { 4979 attrs.udpattr_ipp4 = pktinfop; 4980 attrs.udpattr_mb = mp; 4981 if (udp_unitdata_opt_process(q, mp, error, 4982 &attrs) < 0) 4983 goto done; 4984 /* 4985 * Note: success in processing options. 4986 * mp option buffer represented by 4987 * OPT_length/offset now potentially modified 4988 * and contain option setting results 4989 */ 4990 ASSERT(*error == 0); 4991 } 4992 } 4993 } 4994 4995 /* mp1 points to the M_DATA mblk carrying the packet */ 4996 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4997 4998 /* 4999 * Determine whether we need to mark the mblk with the user's 5000 * credentials. 5001 * If labeled then sockfs would have already done this. 5002 */ 5003 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5004 5005 ire = connp->conn_ire_cache; 5006 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 5007 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5008 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 5009 mblk_setcred(mp, cr, pid); 5010 } 5011 5012 rw_enter(&udp->udp_rwlock, RW_READER); 5013 lock_held = B_TRUE; 5014 5015 /* 5016 * Cluster and TSOL note: 5017 * udp.udp_v6lastdst is shared by Cluster and TSOL 5018 * udp.udp_lastdstport is used by Cluster 5019 * 5020 * Both Cluster and TSOL need to update the dest addr and/or port. 5021 * Updating is done after both Cluster and TSOL checks, protected 5022 * by conn_lock. 5023 */ 5024 mutex_enter(&connp->conn_lock); 5025 5026 if (cl_inet_connect2 != NULL && 5027 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5028 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5029 udp->udp_lastdstport != port)) { 5030 mutex_exit(&connp->conn_lock); 5031 *error = 0; 5032 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5033 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 5034 if (*error != 0) { 5035 *error = EHOSTUNREACH; 5036 goto done; 5037 } 5038 update_lastdst = B_TRUE; 5039 mutex_enter(&connp->conn_lock); 5040 } 5041 5042 /* 5043 * Check if our saved options are valid; update if not. 5044 * TSOL Note: Since we are not in WRITER mode, UDP packets 5045 * to different destination may require different labels, 5046 * or worse, UDP packets to same IP address may require 5047 * different labels due to use of shared all-zones address. 5048 * We use conn_lock to ensure that lastdst, ip_snd_options, 5049 * and ip_snd_options_len are consistent for the current 5050 * destination and are updated atomically. 5051 */ 5052 if (is_system_labeled()) { 5053 cred_t *credp; 5054 pid_t cpid; 5055 5056 /* Using UDP MLP requires SCM_UCRED from user */ 5057 if (connp->conn_mlp_type != mlptSingle && 5058 !attrs.udpattr_credset) { 5059 mutex_exit(&connp->conn_lock); 5060 DTRACE_PROBE4( 5061 tx__ip__log__info__output__udp, 5062 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5063 mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q); 5064 *error = EINVAL; 5065 goto done; 5066 } 5067 /* 5068 * Update label option for this UDP socket if 5069 * - the destination has changed, 5070 * - the UDP socket is MLP, or 5071 * - the cred attached to the mblk changed. 5072 */ 5073 credp = msg_getcred(mp, &cpid); 5074 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5075 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5076 connp->conn_mlp_type != mlptSingle || 5077 credp != udp->udp_last_cred) { 5078 if ((*error = udp_update_label(q, mp, v4dst)) != 0) { 5079 mutex_exit(&connp->conn_lock); 5080 goto done; 5081 } 5082 update_lastdst = B_TRUE; 5083 } 5084 5085 /* 5086 * Attach the effective cred to the mblk to ensure future 5087 * routing decisions will be based on it's label. 5088 */ 5089 mblk_setcred(mp, udp->udp_effective_cred, cpid); 5090 } 5091 if (update_lastdst) { 5092 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5093 udp->udp_lastdstport = port; 5094 } 5095 if (udp->udp_ip_snd_options_len > 0) { 5096 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5097 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5098 } 5099 mutex_exit(&connp->conn_lock); 5100 5101 /* Add an IP header */ 5102 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5103 (insert_spi ? sizeof (uint32_t) : 0); 5104 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5105 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5106 !OK_32PTR(ipha)) { 5107 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5108 if (mp2 == NULL) { 5109 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5110 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5111 *error = ENOMEM; 5112 goto done; 5113 } 5114 mp2->b_wptr = DB_LIM(mp2); 5115 mp2->b_cont = mp1; 5116 mp1 = mp2; 5117 if (DB_TYPE(mp) != M_DATA) 5118 mp->b_cont = mp1; 5119 else 5120 mp = mp1; 5121 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5122 } 5123 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5124 #ifdef _BIG_ENDIAN 5125 /* Set version, header length, and tos */ 5126 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5127 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5128 udp->udp_type_of_service); 5129 /* Set ttl and protocol */ 5130 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5131 #else 5132 /* Set version, header length, and tos */ 5133 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5134 ((udp->udp_type_of_service << 8) | 5135 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5136 /* Set ttl and protocol */ 5137 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5138 #endif 5139 if (pktinfop->ip4_addr != INADDR_ANY) { 5140 ipha->ipha_src = pktinfop->ip4_addr; 5141 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5142 } else { 5143 /* 5144 * Copy our address into the packet. If this is zero, 5145 * first look at __sin6_src_id for a hint. If we leave the 5146 * source as INADDR_ANY then ip will fill in the real source 5147 * address. 5148 */ 5149 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5150 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5151 in6_addr_t v6src; 5152 5153 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5154 us->us_netstack); 5155 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5156 } 5157 } 5158 uha_src_port = udp->udp_port; 5159 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5160 rw_exit(&udp->udp_rwlock); 5161 lock_held = B_FALSE; 5162 } 5163 5164 if (pktinfop->ip4_ill_index != 0) { 5165 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5166 } 5167 5168 ipha->ipha_fragment_offset_and_flags = 0; 5169 ipha->ipha_ident = 0; 5170 5171 mp1->b_rptr = (uchar_t *)ipha; 5172 5173 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5174 (uintptr_t)UINT_MAX); 5175 5176 /* Determine length of packet */ 5177 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5178 if ((mp2 = mp1->b_cont) != NULL) { 5179 do { 5180 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5181 ip_len += (uint32_t)MBLKL(mp2); 5182 } while ((mp2 = mp2->b_cont) != NULL); 5183 } 5184 /* 5185 * If the size of the packet is greater than the maximum allowed by 5186 * ip, return an error. Passing this down could cause panics because 5187 * the size will have wrapped and be inconsistent with the msg size. 5188 */ 5189 if (ip_len > IP_MAXPACKET) { 5190 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5191 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5192 *error = EMSGSIZE; 5193 goto done; 5194 } 5195 ipha->ipha_length = htons((uint16_t)ip_len); 5196 ip_len -= ip_hdr_length; 5197 ip_len = htons((uint16_t)ip_len); 5198 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5199 5200 /* Insert all-0s SPI now. */ 5201 if (insert_spi) 5202 *((uint32_t *)(udpha + 1)) = 0; 5203 5204 /* 5205 * Copy in the destination address 5206 */ 5207 ipha->ipha_dst = v4dst; 5208 5209 /* 5210 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5211 */ 5212 if (CLASSD(v4dst)) 5213 ipha->ipha_ttl = udp->udp_multicast_ttl; 5214 5215 udpha->uha_dst_port = port; 5216 udpha->uha_src_port = uha_src_port; 5217 5218 if (ip_snd_opt_len > 0) { 5219 uint32_t cksum; 5220 5221 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5222 lock_held = B_FALSE; 5223 rw_exit(&udp->udp_rwlock); 5224 /* 5225 * Massage source route putting first source route in ipha_dst. 5226 * Ignore the destination in T_unitdata_req. 5227 * Create a checksum adjustment for a source route, if any. 5228 */ 5229 cksum = ip_massage_options(ipha, us->us_netstack); 5230 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5231 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5232 (ipha->ipha_dst & 0xFFFF); 5233 if ((int)cksum < 0) 5234 cksum--; 5235 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5236 /* 5237 * IP does the checksum if uha_checksum is non-zero, 5238 * We make it easy for IP to include our pseudo header 5239 * by putting our length in uha_checksum. 5240 */ 5241 cksum += ip_len; 5242 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5243 /* There might be a carry. */ 5244 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5245 #ifdef _LITTLE_ENDIAN 5246 if (us->us_do_checksum) 5247 ip_len = (cksum << 16) | ip_len; 5248 #else 5249 if (us->us_do_checksum) 5250 ip_len = (ip_len << 16) | cksum; 5251 else 5252 ip_len <<= 16; 5253 #endif 5254 } else { 5255 /* 5256 * IP does the checksum if uha_checksum is non-zero, 5257 * We make it easy for IP to include our pseudo header 5258 * by putting our length in uha_checksum. 5259 */ 5260 if (us->us_do_checksum) 5261 ip_len |= (ip_len << 16); 5262 #ifndef _LITTLE_ENDIAN 5263 else 5264 ip_len <<= 16; 5265 #endif 5266 } 5267 ASSERT(!lock_held); 5268 /* Set UDP length and checksum */ 5269 *((uint32_t *)&udpha->uha_length) = ip_len; 5270 5271 if (DB_TYPE(mp) != M_DATA) { 5272 cred_t *cr; 5273 pid_t cpid; 5274 5275 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5276 cr = msg_extractcred(mp, &cpid); 5277 if (cr != NULL) { 5278 if (mp1->b_datap->db_credp != NULL) 5279 crfree(mp1->b_datap->db_credp); 5280 mp1->b_datap->db_credp = cr; 5281 mp1->b_datap->db_cpid = cpid; 5282 } 5283 ASSERT(mp != mp1); 5284 freeb(mp); 5285 } 5286 5287 /* mp has been consumed and we'll return success */ 5288 ASSERT(*error == 0); 5289 mp = NULL; 5290 5291 /* We're done. Pass the packet to ip. */ 5292 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5293 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5294 "udp_wput_end: q %p (%S)", q, "end"); 5295 5296 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5297 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5298 connp->conn_dontroute || 5299 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5300 optinfo.ip_opt_ill_index != 0 || 5301 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5302 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5303 ipst->ips_ip_g_mrouter != NULL) { 5304 UDP_STAT(us, udp_ip_send); 5305 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5306 &optinfo); 5307 } else { 5308 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5309 } 5310 5311 done: 5312 if (lock_held) 5313 rw_exit(&udp->udp_rwlock); 5314 if (*error != 0) { 5315 ASSERT(mp != NULL); 5316 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5317 } 5318 return (mp); 5319 } 5320 5321 static void 5322 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5323 { 5324 conn_t *connp = udp->udp_connp; 5325 ipaddr_t src, dst; 5326 ire_t *ire; 5327 ipif_t *ipif = NULL; 5328 mblk_t *ire_fp_mp; 5329 boolean_t retry_caching; 5330 udp_stack_t *us = udp->udp_us; 5331 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5332 5333 dst = ipha->ipha_dst; 5334 src = ipha->ipha_src; 5335 ASSERT(ipha->ipha_ident == 0); 5336 5337 if (CLASSD(dst)) { 5338 int err; 5339 5340 ipif = conn_get_held_ipif(connp, 5341 &connp->conn_multicast_ipif, &err); 5342 5343 if (ipif == NULL || ipif->ipif_isv6 || 5344 (ipif->ipif_ill->ill_phyint->phyint_flags & 5345 PHYI_LOOPBACK)) { 5346 if (ipif != NULL) 5347 ipif_refrele(ipif); 5348 UDP_STAT(us, udp_ip_send); 5349 ip_output(connp, mp, q, IP_WPUT); 5350 return; 5351 } 5352 } 5353 5354 retry_caching = B_FALSE; 5355 mutex_enter(&connp->conn_lock); 5356 ire = connp->conn_ire_cache; 5357 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5358 5359 if (ire == NULL || ire->ire_addr != dst || 5360 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5361 retry_caching = B_TRUE; 5362 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5363 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5364 5365 ASSERT(ipif != NULL); 5366 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5367 retry_caching = B_TRUE; 5368 } 5369 5370 if (!retry_caching) { 5371 ASSERT(ire != NULL); 5372 IRE_REFHOLD(ire); 5373 mutex_exit(&connp->conn_lock); 5374 } else { 5375 boolean_t cached = B_FALSE; 5376 5377 connp->conn_ire_cache = NULL; 5378 mutex_exit(&connp->conn_lock); 5379 5380 /* Release the old ire */ 5381 if (ire != NULL) { 5382 IRE_REFRELE_NOTR(ire); 5383 ire = NULL; 5384 } 5385 5386 if (CLASSD(dst)) { 5387 ASSERT(ipif != NULL); 5388 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5389 connp->conn_zoneid, msg_getlabel(mp), 5390 MATCH_IRE_ILL, ipst); 5391 } else { 5392 ASSERT(ipif == NULL); 5393 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5394 msg_getlabel(mp), ipst); 5395 } 5396 5397 if (ire == NULL) { 5398 if (ipif != NULL) 5399 ipif_refrele(ipif); 5400 UDP_STAT(us, udp_ire_null); 5401 ip_output(connp, mp, q, IP_WPUT); 5402 return; 5403 } 5404 IRE_REFHOLD_NOTR(ire); 5405 5406 mutex_enter(&connp->conn_lock); 5407 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5408 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5409 irb_t *irb = ire->ire_bucket; 5410 5411 /* 5412 * IRE's created for non-connection oriented transports 5413 * are normally initialized with IRE_MARK_TEMPORARY set 5414 * in the ire_marks. These IRE's are preferentially 5415 * reaped when the hash chain length in the cache 5416 * bucket exceeds the maximum value specified in 5417 * ip[6]_ire_max_bucket_cnt. This can severely affect 5418 * UDP performance if IRE cache entries that we need 5419 * to reuse are continually removed. To remedy this, 5420 * when we cache the IRE in the conn_t, we remove the 5421 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5422 * set. 5423 */ 5424 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5425 rw_enter(&irb->irb_lock, RW_WRITER); 5426 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5427 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5428 irb->irb_tmp_ire_cnt--; 5429 } 5430 rw_exit(&irb->irb_lock); 5431 } 5432 connp->conn_ire_cache = ire; 5433 cached = B_TRUE; 5434 } 5435 mutex_exit(&connp->conn_lock); 5436 5437 /* 5438 * We can continue to use the ire but since it was not 5439 * cached, we should drop the extra reference. 5440 */ 5441 if (!cached) 5442 IRE_REFRELE_NOTR(ire); 5443 } 5444 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5445 ASSERT(!CLASSD(dst) || ipif != NULL); 5446 5447 /* 5448 * Check if we can take the fast-path. 5449 * Note that "incomplete" ire's (where the link-layer for next hop 5450 * is not resolved, or where the fast-path header in nce_fp_mp is not 5451 * available yet) are sent down the legacy (slow) path 5452 */ 5453 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5454 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5455 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5456 ((ire->ire_nce == NULL) || 5457 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5458 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5459 if (ipif != NULL) 5460 ipif_refrele(ipif); 5461 UDP_STAT(us, udp_ip_ire_send); 5462 IRE_REFRELE(ire); 5463 ip_output(connp, mp, q, IP_WPUT); 5464 return; 5465 } 5466 5467 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5468 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5469 ipha->ipha_src = ipif->ipif_src_addr; 5470 else 5471 ipha->ipha_src = ire->ire_src_addr; 5472 } 5473 5474 if (ipif != NULL) 5475 ipif_refrele(ipif); 5476 5477 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5478 } 5479 5480 static void 5481 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5482 { 5483 ipaddr_t src, dst; 5484 ill_t *ill; 5485 mblk_t *ire_fp_mp; 5486 uint_t ire_fp_mp_len; 5487 uint16_t *up; 5488 uint32_t cksum, hcksum_txflags; 5489 queue_t *dev_q; 5490 udp_t *udp = connp->conn_udp; 5491 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5492 udp_stack_t *us = udp->udp_us; 5493 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5494 boolean_t ll_multicast = B_FALSE; 5495 boolean_t direct_send; 5496 5497 dev_q = ire->ire_stq->q_next; 5498 ASSERT(dev_q != NULL); 5499 5500 ill = ire_to_ill(ire); 5501 ASSERT(ill != NULL); 5502 5503 /* 5504 * For the direct send case, if resetting of conn_direct_blocked 5505 * was missed, it is still ok because the putq() would enable 5506 * the queue and write service will drain it out. 5507 */ 5508 direct_send = ILL_DIRECT_CAPABLE(ill); 5509 5510 /* is queue flow controlled? */ 5511 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5512 DEV_Q_FLOW_BLOCKED(dev_q))) { 5513 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5514 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5515 if (ipst->ips_ip_output_queue) { 5516 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5517 (void) putq(connp->conn_wq, mp); 5518 } else { 5519 freemsg(mp); 5520 } 5521 ire_refrele(ire); 5522 return; 5523 } 5524 5525 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5526 ire_fp_mp_len = MBLKL(ire_fp_mp); 5527 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5528 5529 dst = ipha->ipha_dst; 5530 src = ipha->ipha_src; 5531 5532 5533 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5534 5535 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5536 #ifndef _BIG_ENDIAN 5537 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5538 #endif 5539 5540 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5541 ASSERT(ill->ill_hcksum_capab != NULL); 5542 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5543 } else { 5544 hcksum_txflags = 0; 5545 } 5546 5547 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5548 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5549 5550 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5551 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5552 if (*up != 0) { 5553 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5554 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5555 ntohs(ipha->ipha_length), cksum); 5556 5557 /* Software checksum? */ 5558 if (DB_CKSUMFLAGS(mp) == 0) { 5559 UDP_STAT(us, udp_out_sw_cksum); 5560 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5561 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5562 } 5563 } 5564 5565 if (!CLASSD(dst)) { 5566 ipha->ipha_fragment_offset_and_flags |= 5567 (uint32_t)htons(ire->ire_frag_flag); 5568 } 5569 5570 /* Calculate IP header checksum if hardware isn't capable */ 5571 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5572 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5573 ((uint16_t *)ipha)[4]); 5574 } 5575 5576 if (CLASSD(dst)) { 5577 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5578 ip_multicast_loopback(q, ill, mp, 5579 connp->conn_multicast_loop ? 0 : 5580 IP_FF_NO_MCAST_LOOP, zoneid); 5581 } 5582 5583 /* If multicast TTL is 0 then we are done */ 5584 if (ipha->ipha_ttl == 0) { 5585 freemsg(mp); 5586 ire_refrele(ire); 5587 return; 5588 } 5589 ll_multicast = B_TRUE; 5590 } 5591 5592 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5593 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5594 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5595 5596 UPDATE_OB_PKT_COUNT(ire); 5597 ire->ire_last_used_time = lbolt; 5598 5599 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5600 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5601 ntohs(ipha->ipha_length)); 5602 5603 DTRACE_PROBE4(ip4__physical__out__start, 5604 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5605 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5606 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5607 ll_multicast, ipst); 5608 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5609 if (ipst->ips_ipobs_enabled && mp != NULL) { 5610 zoneid_t szone; 5611 5612 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5613 ipst, ALL_ZONES); 5614 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5615 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5616 } 5617 5618 if (mp == NULL) 5619 goto bail; 5620 5621 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5622 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5623 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5624 5625 if (direct_send) { 5626 uintptr_t cookie; 5627 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5628 5629 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5630 (uintptr_t)connp, 0); 5631 if (cookie != NULL) { 5632 idl_tx_list_t *idl_txl; 5633 5634 /* 5635 * Flow controlled. 5636 */ 5637 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5638 cookie, conn_t *, connp); 5639 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5640 mutex_enter(&idl_txl->txl_lock); 5641 /* 5642 * Check again after holding txl_lock to see if Tx 5643 * ring is still blocked and only then insert the 5644 * connp into the drain list. 5645 */ 5646 if (connp->conn_direct_blocked || 5647 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5648 cookie) == 0)) { 5649 mutex_exit(&idl_txl->txl_lock); 5650 goto bail; 5651 } 5652 if (idl_txl->txl_cookie != NULL && 5653 idl_txl->txl_cookie != cookie) { 5654 DTRACE_PROBE2(udp__xmit__collision, 5655 uintptr_t, cookie, 5656 uintptr_t, idl_txl->txl_cookie); 5657 UDP_STAT(us, udp_cookie_coll); 5658 } else { 5659 connp->conn_direct_blocked = B_TRUE; 5660 idl_txl->txl_cookie = cookie; 5661 conn_drain_insert(connp, idl_txl); 5662 DTRACE_PROBE1(udp__xmit__insert, 5663 conn_t *, connp); 5664 } 5665 mutex_exit(&idl_txl->txl_lock); 5666 } 5667 } else { 5668 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5669 putnext(ire->ire_stq, mp); 5670 } 5671 bail: 5672 IRE_REFRELE(ire); 5673 } 5674 5675 static boolean_t 5676 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 5677 { 5678 udp_t *udp = Q_TO_UDP(wq); 5679 int err; 5680 cred_t *cred; 5681 cred_t *orig_cred; 5682 cred_t *effective_cred = NULL; 5683 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5684 udp_stack_t *us = udp->udp_us; 5685 5686 /* 5687 * All Solaris components should pass a db_credp 5688 * for this message, hence we ASSERT. 5689 * On production kernels we return an error to be robust against 5690 * random streams modules sitting on top of us. 5691 */ 5692 cred = orig_cred = msg_getcred(mp, NULL); 5693 ASSERT(cred != NULL); 5694 if (cred == NULL) 5695 return (EINVAL); 5696 5697 /* 5698 * Verify the destination is allowed to receive packets at 5699 * the security label of the message data. tsol_check_dest() 5700 * may create a new effective cred for this message with a 5701 * modified label or label flags. Note that we use the 5702 * cred/label from the message to handle MLP. 5703 */ 5704 if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, 5705 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 5706 goto done; 5707 if (effective_cred != NULL) 5708 cred = effective_cred; 5709 5710 /* 5711 * Calculate the security label to be placed in the text 5712 * of the message (if any). 5713 */ 5714 if ((err = tsol_compute_label_v6(cred, dst, opt_storage, 5715 us->us_netstack->netstack_ip)) != 0) 5716 goto done; 5717 5718 /* 5719 * Insert the security label in the cached ip options, 5720 * removing any old label that may exist. 5721 */ 5722 if ((err = tsol_update_sticky(&udp->udp_sticky_ipp, 5723 &udp->udp_label_len_v6, opt_storage)) != 0) 5724 goto done; 5725 5726 /* 5727 * Save the destination address and cred we used to 5728 * generate the security label text. 5729 */ 5730 if (cred != udp->udp_effective_cred) { 5731 if (udp->udp_effective_cred != NULL) 5732 crfree(udp->udp_effective_cred); 5733 crhold(cred); 5734 udp->udp_effective_cred = cred; 5735 } 5736 if (orig_cred != udp->udp_last_cred) { 5737 if (udp->udp_last_cred != NULL) 5738 crfree(udp->udp_last_cred); 5739 crhold(orig_cred); 5740 udp->udp_last_cred = orig_cred; 5741 } 5742 5743 done: 5744 if (effective_cred != NULL) 5745 crfree(effective_cred); 5746 5747 if (err != 0) { 5748 DTRACE_PROBE4( 5749 tx__ip__log__drop__updatelabel__udp6, 5750 char *, "queue(1) failed to update options(2) on mp(3)", 5751 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5752 } 5753 return (err); 5754 } 5755 5756 static int 5757 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5758 pid_t pid) 5759 { 5760 udp_t *udp = connp->conn_udp; 5761 udp_stack_t *us = udp->udp_us; 5762 ipaddr_t v4dst; 5763 in_port_t dstport; 5764 boolean_t mapped_addr; 5765 struct sockaddr_storage ss; 5766 sin_t *sin; 5767 sin6_t *sin6; 5768 struct sockaddr *addr; 5769 socklen_t addrlen; 5770 int error; 5771 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5772 5773 /* M_DATA for connected socket */ 5774 5775 ASSERT(udp->udp_issocket); 5776 UDP_DBGSTAT(us, udp_data_conn); 5777 5778 mutex_enter(&connp->conn_lock); 5779 if (udp->udp_state != TS_DATA_XFER) { 5780 mutex_exit(&connp->conn_lock); 5781 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5782 UDP_STAT(us, udp_out_err_notconn); 5783 freemsg(mp); 5784 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5785 "udp_wput_end: connp %p (%S)", connp, 5786 "not-connected; address required"); 5787 return (EDESTADDRREQ); 5788 } 5789 5790 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5791 if (mapped_addr) 5792 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5793 5794 /* Initialize addr and addrlen as if they're passed in */ 5795 if (udp->udp_family == AF_INET) { 5796 sin = (sin_t *)&ss; 5797 sin->sin_family = AF_INET; 5798 dstport = sin->sin_port = udp->udp_dstport; 5799 ASSERT(mapped_addr); 5800 sin->sin_addr.s_addr = v4dst; 5801 addr = (struct sockaddr *)sin; 5802 addrlen = sizeof (*sin); 5803 } else { 5804 sin6 = (sin6_t *)&ss; 5805 sin6->sin6_family = AF_INET6; 5806 dstport = sin6->sin6_port = udp->udp_dstport; 5807 sin6->sin6_flowinfo = udp->udp_flowinfo; 5808 sin6->sin6_addr = udp->udp_v6dst; 5809 sin6->sin6_scope_id = 0; 5810 sin6->__sin6_src_id = 0; 5811 addr = (struct sockaddr *)sin6; 5812 addrlen = sizeof (*sin6); 5813 } 5814 mutex_exit(&connp->conn_lock); 5815 5816 if (mapped_addr) { 5817 /* 5818 * Handle both AF_INET and AF_INET6; the latter 5819 * for IPV4 mapped destination addresses. Note 5820 * here that both addr and addrlen point to the 5821 * corresponding struct depending on the address 5822 * family of the socket. 5823 */ 5824 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5825 insert_spi, msg, cr, pid); 5826 } else { 5827 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5828 } 5829 if (error == 0) { 5830 ASSERT(mp == NULL); 5831 return (0); 5832 } 5833 5834 UDP_STAT(us, udp_out_err_output); 5835 ASSERT(mp != NULL); 5836 if (IPCL_IS_NONSTR(connp)) { 5837 freemsg(mp); 5838 return (error); 5839 } else { 5840 /* mp is freed by the following routine */ 5841 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5842 (t_scalar_t)addrlen, (t_scalar_t)error); 5843 return (0); 5844 } 5845 } 5846 5847 /* ARGSUSED */ 5848 static int 5849 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5850 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5851 { 5852 5853 udp_t *udp = connp->conn_udp; 5854 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5855 int error = 0; 5856 sin6_t *sin6; 5857 sin_t *sin; 5858 uint_t srcid; 5859 uint16_t port; 5860 ipaddr_t v4dst; 5861 5862 5863 ASSERT(addr != NULL); 5864 5865 switch (udp->udp_family) { 5866 case AF_INET6: 5867 sin6 = (sin6_t *)addr; 5868 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5869 /* 5870 * Destination is a non-IPv4-compatible IPv6 address. 5871 * Send out an IPv6 format packet. 5872 */ 5873 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5874 pid); 5875 if (error != 0) 5876 goto ud_error; 5877 5878 return (0); 5879 } 5880 /* 5881 * If the local address is not zero or a mapped address 5882 * return an error. It would be possible to send an IPv4 5883 * packet but the response would never make it back to the 5884 * application since it is bound to a non-mapped address. 5885 */ 5886 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5887 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5888 error = EADDRNOTAVAIL; 5889 goto ud_error; 5890 } 5891 /* Send IPv4 packet without modifying udp_ipversion */ 5892 /* Extract port and ipaddr */ 5893 port = sin6->sin6_port; 5894 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5895 srcid = sin6->__sin6_src_id; 5896 break; 5897 5898 case AF_INET: 5899 sin = (sin_t *)addr; 5900 /* Extract port and ipaddr */ 5901 port = sin->sin_port; 5902 v4dst = sin->sin_addr.s_addr; 5903 srcid = 0; 5904 break; 5905 } 5906 5907 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5908 msg, cr, pid); 5909 5910 if (error == 0) { 5911 ASSERT(mp == NULL); 5912 return (0); 5913 } 5914 5915 ud_error: 5916 ASSERT(mp != NULL); 5917 5918 return (error); 5919 } 5920 5921 /* 5922 * This routine handles all messages passed downstream. It either 5923 * consumes the message or passes it downstream; it never queues a 5924 * a message. 5925 * 5926 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5927 * is valid when we are directly beneath the stream head, and thus sockfs 5928 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5929 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5930 * connected endpoints. 5931 */ 5932 void 5933 udp_wput(queue_t *q, mblk_t *mp) 5934 { 5935 conn_t *connp = Q_TO_CONN(q); 5936 udp_t *udp = connp->conn_udp; 5937 int error = 0; 5938 struct sockaddr *addr; 5939 socklen_t addrlen; 5940 udp_stack_t *us = udp->udp_us; 5941 5942 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5943 "udp_wput_start: queue %p mp %p", q, mp); 5944 5945 /* 5946 * We directly handle several cases here: T_UNITDATA_REQ message 5947 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5948 * socket. 5949 */ 5950 switch (DB_TYPE(mp)) { 5951 case M_DATA: 5952 /* 5953 * Quick check for error cases. Checks will be done again 5954 * under the lock later on 5955 */ 5956 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5957 /* Not connected; address is required */ 5958 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5959 UDP_STAT(us, udp_out_err_notconn); 5960 freemsg(mp); 5961 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5962 "udp_wput_end: connp %p (%S)", connp, 5963 "not-connected; address required"); 5964 return; 5965 } 5966 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5967 return; 5968 5969 case M_PROTO: 5970 case M_PCPROTO: { 5971 struct T_unitdata_req *tudr; 5972 5973 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5974 tudr = (struct T_unitdata_req *)mp->b_rptr; 5975 5976 /* Handle valid T_UNITDATA_REQ here */ 5977 if (MBLKL(mp) >= sizeof (*tudr) && 5978 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5979 if (mp->b_cont == NULL) { 5980 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5981 "udp_wput_end: q %p (%S)", q, "badaddr"); 5982 error = EPROTO; 5983 goto ud_error; 5984 } 5985 5986 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5987 tudr->DEST_length)) { 5988 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5989 "udp_wput_end: q %p (%S)", q, "badaddr"); 5990 error = EADDRNOTAVAIL; 5991 goto ud_error; 5992 } 5993 /* 5994 * If a port has not been bound to the stream, fail. 5995 * This is not a problem when sockfs is directly 5996 * above us, because it will ensure that the socket 5997 * is first bound before allowing data to be sent. 5998 */ 5999 if (udp->udp_state == TS_UNBND) { 6000 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6001 "udp_wput_end: q %p (%S)", q, "outstate"); 6002 error = EPROTO; 6003 goto ud_error; 6004 } 6005 addr = (struct sockaddr *) 6006 &mp->b_rptr[tudr->DEST_offset]; 6007 addrlen = tudr->DEST_length; 6008 if (tudr->OPT_length != 0) 6009 UDP_STAT(us, udp_out_opt); 6010 break; 6011 } 6012 /* FALLTHRU */ 6013 } 6014 default: 6015 udp_wput_other(q, mp); 6016 return; 6017 } 6018 ASSERT(addr != NULL); 6019 6020 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 6021 -1); 6022 if (error != 0) { 6023 ud_error: 6024 UDP_STAT(us, udp_out_err_output); 6025 ASSERT(mp != NULL); 6026 /* mp is freed by the following routine */ 6027 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6028 (t_scalar_t)error); 6029 } 6030 } 6031 6032 /* ARGSUSED */ 6033 static void 6034 udp_wput_fallback(queue_t *wq, mblk_t *mp) 6035 { 6036 #ifdef DEBUG 6037 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 6038 #endif 6039 freemsg(mp); 6040 } 6041 6042 6043 /* 6044 * udp_output_v6(): 6045 * Assumes that udp_wput did some sanity checking on the destination 6046 * address. 6047 */ 6048 static mblk_t * 6049 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 6050 struct nmsghdr *msg, cred_t *cr, pid_t pid) 6051 { 6052 ip6_t *ip6h; 6053 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6054 mblk_t *mp1 = mp; 6055 mblk_t *mp2; 6056 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6057 size_t ip_len; 6058 udpha_t *udph; 6059 udp_t *udp = connp->conn_udp; 6060 udp_stack_t *us = udp->udp_us; 6061 queue_t *q = connp->conn_wq; 6062 ip6_pkt_t ipp_s; /* For ancillary data options */ 6063 ip6_pkt_t *ipp = &ipp_s; 6064 ip6_pkt_t *tipp; /* temporary ipp */ 6065 uint32_t csum = 0; 6066 uint_t ignore = 0; 6067 uint_t option_exists = 0, is_sticky = 0; 6068 uint8_t *cp; 6069 uint8_t *nxthdr_ptr; 6070 in6_addr_t ip6_dst; 6071 in_port_t port; 6072 udpattrs_t attrs; 6073 boolean_t opt_present; 6074 ip6_hbh_t *hopoptsptr = NULL; 6075 uint_t hopoptslen = 0; 6076 boolean_t is_ancillary = B_FALSE; 6077 size_t sth_wroff = 0; 6078 ire_t *ire; 6079 boolean_t update_lastdst = B_FALSE; 6080 6081 *error = 0; 6082 6083 /* 6084 * If the local address is a mapped address return 6085 * an error. 6086 * It would be possible to send an IPv6 packet but the 6087 * response would never make it back to the application 6088 * since it is bound to a mapped address. 6089 */ 6090 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6091 *error = EADDRNOTAVAIL; 6092 goto done; 6093 } 6094 6095 ipp->ipp_fields = 0; 6096 ipp->ipp_sticky_ignored = 0; 6097 6098 /* 6099 * If TPI options passed in, feed it for verification and handling 6100 */ 6101 attrs.udpattr_credset = B_FALSE; 6102 opt_present = B_FALSE; 6103 if (IPCL_IS_NONSTR(connp)) { 6104 if (msg->msg_controllen != 0) { 6105 attrs.udpattr_ipp6 = ipp; 6106 attrs.udpattr_mb = mp; 6107 6108 rw_enter(&udp->udp_rwlock, RW_WRITER); 6109 *error = process_auxiliary_options(connp, 6110 msg->msg_control, msg->msg_controllen, 6111 &attrs, &udp_opt_obj, udp_opt_set, cr); 6112 rw_exit(&udp->udp_rwlock); 6113 if (*error) 6114 goto done; 6115 ASSERT(*error == 0); 6116 opt_present = B_TRUE; 6117 } 6118 } else { 6119 if (DB_TYPE(mp) != M_DATA) { 6120 mp1 = mp->b_cont; 6121 if (((struct T_unitdata_req *) 6122 mp->b_rptr)->OPT_length != 0) { 6123 attrs.udpattr_ipp6 = ipp; 6124 attrs.udpattr_mb = mp; 6125 if (udp_unitdata_opt_process(q, mp, error, 6126 &attrs) < 0) { 6127 goto done; 6128 } 6129 ASSERT(*error == 0); 6130 opt_present = B_TRUE; 6131 } 6132 } 6133 } 6134 6135 /* 6136 * Determine whether we need to mark the mblk with the user's 6137 * credentials. 6138 * If labeled then sockfs would have already done this. 6139 */ 6140 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6141 ire = connp->conn_ire_cache; 6142 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6143 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6144 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6145 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6146 mblk_setcred(mp, cr, pid); 6147 } 6148 6149 rw_enter(&udp->udp_rwlock, RW_READER); 6150 ignore = ipp->ipp_sticky_ignored; 6151 6152 /* mp1 points to the M_DATA mblk carrying the packet */ 6153 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6154 6155 if (sin6->sin6_scope_id != 0 && 6156 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6157 /* 6158 * IPPF_SCOPE_ID is special. It's neither a sticky 6159 * option nor ancillary data. It needs to be 6160 * explicitly set in options_exists. 6161 */ 6162 option_exists |= IPPF_SCOPE_ID; 6163 } 6164 6165 /* 6166 * Compute the destination address 6167 */ 6168 ip6_dst = sin6->sin6_addr; 6169 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6170 ip6_dst = ipv6_loopback; 6171 6172 port = sin6->sin6_port; 6173 6174 /* 6175 * Cluster and TSOL notes, Cluster check: 6176 * see comments in udp_output_v4(). 6177 */ 6178 mutex_enter(&connp->conn_lock); 6179 6180 if (cl_inet_connect2 != NULL && 6181 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6182 port != udp->udp_lastdstport)) { 6183 mutex_exit(&connp->conn_lock); 6184 *error = 0; 6185 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6186 if (*error != 0) { 6187 *error = EHOSTUNREACH; 6188 rw_exit(&udp->udp_rwlock); 6189 goto done; 6190 } 6191 update_lastdst = B_TRUE; 6192 mutex_enter(&connp->conn_lock); 6193 } 6194 6195 /* 6196 * If we're not going to the same destination as last time, then 6197 * recompute the label required. This is done in a separate routine to 6198 * avoid blowing up our stack here. 6199 * 6200 * TSOL Note: Since we are not in WRITER mode, UDP packets 6201 * to different destination may require different labels, 6202 * or worse, UDP packets to same IP address may require 6203 * different labels due to use of shared all-zones address. 6204 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6205 * and sticky ipp_hopoptslen are consistent for the current 6206 * destination and are updated atomically. 6207 */ 6208 if (is_system_labeled()) { 6209 cred_t *credp; 6210 pid_t cpid; 6211 6212 /* Using UDP MLP requires SCM_UCRED from user */ 6213 if (connp->conn_mlp_type != mlptSingle && 6214 !attrs.udpattr_credset) { 6215 DTRACE_PROBE4( 6216 tx__ip__log__info__output__udp6, 6217 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6218 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6219 *error = EINVAL; 6220 rw_exit(&udp->udp_rwlock); 6221 mutex_exit(&connp->conn_lock); 6222 goto done; 6223 } 6224 /* 6225 * update label option for this UDP socket if 6226 * - the destination has changed, 6227 * - the UDP socket is MLP, or 6228 * - the cred attached to the mblk changed. 6229 */ 6230 credp = msg_getcred(mp, &cpid); 6231 if (opt_present || 6232 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6233 connp->conn_mlp_type != mlptSingle || 6234 credp != udp->udp_last_cred) { 6235 if ((*error = udp_update_label_v6(q, mp, &ip6_dst)) 6236 != 0) { 6237 rw_exit(&udp->udp_rwlock); 6238 mutex_exit(&connp->conn_lock); 6239 goto done; 6240 } 6241 update_lastdst = B_TRUE; 6242 } 6243 /* 6244 * Attach the effective cred to the mblk to ensure future 6245 * routing decisions will be based on it's label. 6246 */ 6247 mblk_setcred(mp, udp->udp_effective_cred, cpid); 6248 } 6249 6250 if (update_lastdst) { 6251 udp->udp_v6lastdst = ip6_dst; 6252 udp->udp_lastdstport = port; 6253 } 6254 6255 /* 6256 * If there's a security label here, then we ignore any options the 6257 * user may try to set. We keep the peer's label as a hidden sticky 6258 * option. We make a private copy of this label before releasing the 6259 * lock so that label is kept consistent with the destination addr. 6260 */ 6261 if (udp->udp_label_len_v6 > 0) { 6262 ignore &= ~IPPF_HOPOPTS; 6263 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6264 } 6265 6266 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6267 /* No sticky options nor ancillary data. */ 6268 mutex_exit(&connp->conn_lock); 6269 goto no_options; 6270 } 6271 6272 /* 6273 * Go through the options figuring out where each is going to 6274 * come from and build two masks. The first mask indicates if 6275 * the option exists at all. The second mask indicates if the 6276 * option is sticky or ancillary. 6277 */ 6278 if (!(ignore & IPPF_HOPOPTS)) { 6279 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6280 option_exists |= IPPF_HOPOPTS; 6281 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6282 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6283 option_exists |= IPPF_HOPOPTS; 6284 is_sticky |= IPPF_HOPOPTS; 6285 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6286 hopoptsptr = kmem_alloc( 6287 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6288 if (hopoptsptr == NULL) { 6289 *error = ENOMEM; 6290 mutex_exit(&connp->conn_lock); 6291 goto done; 6292 } 6293 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6294 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6295 hopoptslen); 6296 udp_ip_hdr_len += hopoptslen; 6297 } 6298 } 6299 mutex_exit(&connp->conn_lock); 6300 6301 if (!(ignore & IPPF_RTHDR)) { 6302 if (ipp->ipp_fields & IPPF_RTHDR) { 6303 option_exists |= IPPF_RTHDR; 6304 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6305 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6306 option_exists |= IPPF_RTHDR; 6307 is_sticky |= IPPF_RTHDR; 6308 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6309 } 6310 } 6311 6312 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6313 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6314 option_exists |= IPPF_RTDSTOPTS; 6315 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6316 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6317 option_exists |= IPPF_RTDSTOPTS; 6318 is_sticky |= IPPF_RTDSTOPTS; 6319 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6320 } 6321 } 6322 6323 if (!(ignore & IPPF_DSTOPTS)) { 6324 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6325 option_exists |= IPPF_DSTOPTS; 6326 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6327 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6328 option_exists |= IPPF_DSTOPTS; 6329 is_sticky |= IPPF_DSTOPTS; 6330 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6331 } 6332 } 6333 6334 if (!(ignore & IPPF_IFINDEX)) { 6335 if (ipp->ipp_fields & IPPF_IFINDEX) { 6336 option_exists |= IPPF_IFINDEX; 6337 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6338 option_exists |= IPPF_IFINDEX; 6339 is_sticky |= IPPF_IFINDEX; 6340 } 6341 } 6342 6343 if (!(ignore & IPPF_ADDR)) { 6344 if (ipp->ipp_fields & IPPF_ADDR) { 6345 option_exists |= IPPF_ADDR; 6346 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6347 option_exists |= IPPF_ADDR; 6348 is_sticky |= IPPF_ADDR; 6349 } 6350 } 6351 6352 if (!(ignore & IPPF_DONTFRAG)) { 6353 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6354 option_exists |= IPPF_DONTFRAG; 6355 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6356 option_exists |= IPPF_DONTFRAG; 6357 is_sticky |= IPPF_DONTFRAG; 6358 } 6359 } 6360 6361 if (!(ignore & IPPF_USE_MIN_MTU)) { 6362 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6363 option_exists |= IPPF_USE_MIN_MTU; 6364 } else if (udp->udp_sticky_ipp.ipp_fields & 6365 IPPF_USE_MIN_MTU) { 6366 option_exists |= IPPF_USE_MIN_MTU; 6367 is_sticky |= IPPF_USE_MIN_MTU; 6368 } 6369 } 6370 6371 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6372 option_exists |= IPPF_HOPLIMIT; 6373 /* IPV6_HOPLIMIT can never be sticky */ 6374 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6375 6376 if (!(ignore & IPPF_UNICAST_HOPS) && 6377 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6378 option_exists |= IPPF_UNICAST_HOPS; 6379 is_sticky |= IPPF_UNICAST_HOPS; 6380 } 6381 6382 if (!(ignore & IPPF_MULTICAST_HOPS) && 6383 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6384 option_exists |= IPPF_MULTICAST_HOPS; 6385 is_sticky |= IPPF_MULTICAST_HOPS; 6386 } 6387 6388 if (!(ignore & IPPF_TCLASS)) { 6389 if (ipp->ipp_fields & IPPF_TCLASS) { 6390 option_exists |= IPPF_TCLASS; 6391 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6392 option_exists |= IPPF_TCLASS; 6393 is_sticky |= IPPF_TCLASS; 6394 } 6395 } 6396 6397 if (!(ignore & IPPF_NEXTHOP) && 6398 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6399 option_exists |= IPPF_NEXTHOP; 6400 is_sticky |= IPPF_NEXTHOP; 6401 } 6402 6403 no_options: 6404 6405 /* 6406 * If any options carried in the ip6i_t were specified, we 6407 * need to account for the ip6i_t in the data we'll be sending 6408 * down. 6409 */ 6410 if (option_exists & IPPF_HAS_IP6I) 6411 udp_ip_hdr_len += sizeof (ip6i_t); 6412 6413 /* check/fix buffer config, setup pointers into it */ 6414 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6415 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6416 !OK_32PTR(ip6h)) { 6417 6418 /* Try to get everything in a single mblk next time */ 6419 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6420 udp->udp_max_hdr_len = udp_ip_hdr_len; 6421 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6422 } 6423 6424 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6425 if (mp2 == NULL) { 6426 *error = ENOMEM; 6427 rw_exit(&udp->udp_rwlock); 6428 goto done; 6429 } 6430 mp2->b_wptr = DB_LIM(mp2); 6431 mp2->b_cont = mp1; 6432 mp1 = mp2; 6433 if (DB_TYPE(mp) != M_DATA) 6434 mp->b_cont = mp1; 6435 else 6436 mp = mp1; 6437 6438 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6439 } 6440 mp1->b_rptr = (unsigned char *)ip6h; 6441 ip6i = (ip6i_t *)ip6h; 6442 6443 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6444 if (option_exists & IPPF_HAS_IP6I) { 6445 ip6h = (ip6_t *)&ip6i[1]; 6446 ip6i->ip6i_flags = 0; 6447 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6448 6449 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6450 if (option_exists & IPPF_SCOPE_ID) { 6451 ip6i->ip6i_flags |= IP6I_IFINDEX; 6452 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6453 } else if (option_exists & IPPF_IFINDEX) { 6454 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6455 ASSERT(tipp->ipp_ifindex != 0); 6456 ip6i->ip6i_flags |= IP6I_IFINDEX; 6457 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6458 } 6459 6460 if (option_exists & IPPF_ADDR) { 6461 /* 6462 * Enable per-packet source address verification if 6463 * IPV6_PKTINFO specified the source address. 6464 * ip6_src is set in the transport's _wput function. 6465 */ 6466 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6467 } 6468 6469 if (option_exists & IPPF_DONTFRAG) { 6470 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6471 } 6472 6473 if (option_exists & IPPF_USE_MIN_MTU) { 6474 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6475 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6476 } 6477 6478 if (option_exists & IPPF_NEXTHOP) { 6479 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6480 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6481 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6482 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6483 } 6484 6485 /* 6486 * tell IP this is an ip6i_t private header 6487 */ 6488 ip6i->ip6i_nxt = IPPROTO_RAW; 6489 } 6490 6491 /* Initialize IPv6 header */ 6492 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6493 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6494 6495 /* Set the hoplimit of the outgoing packet. */ 6496 if (option_exists & IPPF_HOPLIMIT) { 6497 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6498 ip6h->ip6_hops = ipp->ipp_hoplimit; 6499 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6500 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6501 ip6h->ip6_hops = udp->udp_multicast_ttl; 6502 if (option_exists & IPPF_MULTICAST_HOPS) 6503 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6504 } else { 6505 ip6h->ip6_hops = udp->udp_ttl; 6506 if (option_exists & IPPF_UNICAST_HOPS) 6507 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6508 } 6509 6510 if (option_exists & IPPF_ADDR) { 6511 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6512 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6513 ip6h->ip6_src = tipp->ipp_addr; 6514 } else { 6515 /* 6516 * The source address was not set using IPV6_PKTINFO. 6517 * First look at the bound source. 6518 * If unspecified fallback to __sin6_src_id. 6519 */ 6520 ip6h->ip6_src = udp->udp_v6src; 6521 if (sin6->__sin6_src_id != 0 && 6522 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6523 ip_srcid_find_id(sin6->__sin6_src_id, 6524 &ip6h->ip6_src, connp->conn_zoneid, 6525 us->us_netstack); 6526 } 6527 } 6528 6529 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6530 cp = (uint8_t *)&ip6h[1]; 6531 6532 /* 6533 * Here's where we have to start stringing together 6534 * any extension headers in the right order: 6535 * Hop-by-hop, destination, routing, and final destination opts. 6536 */ 6537 if (option_exists & IPPF_HOPOPTS) { 6538 /* Hop-by-hop options */ 6539 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6540 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6541 if (hopoptslen == 0) { 6542 hopoptsptr = tipp->ipp_hopopts; 6543 hopoptslen = tipp->ipp_hopoptslen; 6544 is_ancillary = B_TRUE; 6545 } 6546 6547 *nxthdr_ptr = IPPROTO_HOPOPTS; 6548 nxthdr_ptr = &hbh->ip6h_nxt; 6549 6550 bcopy(hopoptsptr, cp, hopoptslen); 6551 cp += hopoptslen; 6552 6553 if (hopoptsptr != NULL && !is_ancillary) { 6554 kmem_free(hopoptsptr, hopoptslen); 6555 hopoptsptr = NULL; 6556 hopoptslen = 0; 6557 } 6558 } 6559 /* 6560 * En-route destination options 6561 * Only do them if there's a routing header as well 6562 */ 6563 if (option_exists & IPPF_RTDSTOPTS) { 6564 ip6_dest_t *dst = (ip6_dest_t *)cp; 6565 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6566 6567 *nxthdr_ptr = IPPROTO_DSTOPTS; 6568 nxthdr_ptr = &dst->ip6d_nxt; 6569 6570 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6571 cp += tipp->ipp_rtdstoptslen; 6572 } 6573 /* 6574 * Routing header next 6575 */ 6576 if (option_exists & IPPF_RTHDR) { 6577 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6578 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6579 6580 *nxthdr_ptr = IPPROTO_ROUTING; 6581 nxthdr_ptr = &rt->ip6r_nxt; 6582 6583 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6584 cp += tipp->ipp_rthdrlen; 6585 } 6586 /* 6587 * Do ultimate destination options 6588 */ 6589 if (option_exists & IPPF_DSTOPTS) { 6590 ip6_dest_t *dest = (ip6_dest_t *)cp; 6591 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6592 6593 *nxthdr_ptr = IPPROTO_DSTOPTS; 6594 nxthdr_ptr = &dest->ip6d_nxt; 6595 6596 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6597 cp += tipp->ipp_dstoptslen; 6598 } 6599 /* 6600 * Now set the last header pointer to the proto passed in 6601 */ 6602 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6603 *nxthdr_ptr = IPPROTO_UDP; 6604 6605 /* Update UDP header */ 6606 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6607 udph->uha_dst_port = sin6->sin6_port; 6608 udph->uha_src_port = udp->udp_port; 6609 6610 /* 6611 * Copy in the destination address 6612 */ 6613 ip6h->ip6_dst = ip6_dst; 6614 6615 ip6h->ip6_vcf = 6616 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6617 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6618 6619 if (option_exists & IPPF_TCLASS) { 6620 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6621 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6622 tipp->ipp_tclass); 6623 } 6624 rw_exit(&udp->udp_rwlock); 6625 6626 if (option_exists & IPPF_RTHDR) { 6627 ip6_rthdr_t *rth; 6628 6629 /* 6630 * Perform any processing needed for source routing. 6631 * We know that all extension headers will be in the same mblk 6632 * as the IPv6 header. 6633 */ 6634 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6635 if (rth != NULL && rth->ip6r_segleft != 0) { 6636 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6637 /* 6638 * Drop packet - only support Type 0 routing. 6639 * Notify the application as well. 6640 */ 6641 *error = EPROTO; 6642 goto done; 6643 } 6644 6645 /* 6646 * rth->ip6r_len is twice the number of 6647 * addresses in the header. Thus it must be even. 6648 */ 6649 if (rth->ip6r_len & 0x1) { 6650 *error = EPROTO; 6651 goto done; 6652 } 6653 /* 6654 * Shuffle the routing header and ip6_dst 6655 * addresses, and get the checksum difference 6656 * between the first hop (in ip6_dst) and 6657 * the destination (in the last routing hdr entry). 6658 */ 6659 csum = ip_massage_options_v6(ip6h, rth, 6660 us->us_netstack); 6661 /* 6662 * Verify that the first hop isn't a mapped address. 6663 * Routers along the path need to do this verification 6664 * for subsequent hops. 6665 */ 6666 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6667 *error = EADDRNOTAVAIL; 6668 goto done; 6669 } 6670 6671 cp += (rth->ip6r_len + 1)*8; 6672 } 6673 } 6674 6675 /* count up length of UDP packet */ 6676 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6677 if ((mp2 = mp1->b_cont) != NULL) { 6678 do { 6679 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6680 ip_len += (uint32_t)MBLKL(mp2); 6681 } while ((mp2 = mp2->b_cont) != NULL); 6682 } 6683 6684 /* 6685 * If the size of the packet is greater than the maximum allowed by 6686 * ip, return an error. Passing this down could cause panics because 6687 * the size will have wrapped and be inconsistent with the msg size. 6688 */ 6689 if (ip_len > IP_MAXPACKET) { 6690 *error = EMSGSIZE; 6691 goto done; 6692 } 6693 6694 /* Store the UDP length. Subtract length of extension hdrs */ 6695 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6696 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6697 6698 /* 6699 * We make it easy for IP to include our pseudo header 6700 * by putting our length in uh_checksum, modified (if 6701 * we have a routing header) by the checksum difference 6702 * between the ultimate destination and first hop addresses. 6703 * Note: UDP over IPv6 must always checksum the packet. 6704 */ 6705 csum += udph->uha_length; 6706 csum = (csum & 0xFFFF) + (csum >> 16); 6707 udph->uha_checksum = (uint16_t)csum; 6708 6709 #ifdef _LITTLE_ENDIAN 6710 ip_len = htons(ip_len); 6711 #endif 6712 ip6h->ip6_plen = ip_len; 6713 6714 if (DB_TYPE(mp) != M_DATA) { 6715 cred_t *cr; 6716 pid_t cpid; 6717 6718 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6719 cr = msg_extractcred(mp, &cpid); 6720 if (cr != NULL) { 6721 if (mp1->b_datap->db_credp != NULL) 6722 crfree(mp1->b_datap->db_credp); 6723 mp1->b_datap->db_credp = cr; 6724 mp1->b_datap->db_cpid = cpid; 6725 } 6726 6727 ASSERT(mp != mp1); 6728 freeb(mp); 6729 } 6730 6731 /* mp has been consumed and we'll return success */ 6732 ASSERT(*error == 0); 6733 mp = NULL; 6734 6735 /* We're done. Pass the packet to IP */ 6736 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6737 ip_output_v6(connp, mp1, q, IP_WPUT); 6738 6739 done: 6740 if (sth_wroff != 0) { 6741 (void) proto_set_tx_wroff(RD(q), connp, 6742 udp->udp_max_hdr_len + us->us_wroff_extra); 6743 } 6744 if (hopoptsptr != NULL && !is_ancillary) { 6745 kmem_free(hopoptsptr, hopoptslen); 6746 hopoptsptr = NULL; 6747 } 6748 if (*error != 0) { 6749 ASSERT(mp != NULL); 6750 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6751 } 6752 return (mp); 6753 } 6754 6755 6756 static int 6757 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6758 { 6759 sin_t *sin = (sin_t *)sa; 6760 sin6_t *sin6 = (sin6_t *)sa; 6761 6762 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6763 6764 if (udp->udp_state != TS_DATA_XFER) 6765 return (ENOTCONN); 6766 6767 switch (udp->udp_family) { 6768 case AF_INET: 6769 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6770 6771 if (*salenp < sizeof (sin_t)) 6772 return (EINVAL); 6773 6774 *salenp = sizeof (sin_t); 6775 *sin = sin_null; 6776 sin->sin_family = AF_INET; 6777 sin->sin_port = udp->udp_dstport; 6778 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6779 break; 6780 6781 case AF_INET6: 6782 if (*salenp < sizeof (sin6_t)) 6783 return (EINVAL); 6784 6785 *salenp = sizeof (sin6_t); 6786 *sin6 = sin6_null; 6787 sin6->sin6_family = AF_INET6; 6788 sin6->sin6_port = udp->udp_dstport; 6789 sin6->sin6_addr = udp->udp_v6dst; 6790 sin6->sin6_flowinfo = udp->udp_flowinfo; 6791 break; 6792 } 6793 6794 return (0); 6795 } 6796 6797 static int 6798 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6799 { 6800 sin_t *sin = (sin_t *)sa; 6801 sin6_t *sin6 = (sin6_t *)sa; 6802 6803 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6804 6805 switch (udp->udp_family) { 6806 case AF_INET: 6807 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6808 6809 if (*salenp < sizeof (sin_t)) 6810 return (EINVAL); 6811 6812 *salenp = sizeof (sin_t); 6813 *sin = sin_null; 6814 sin->sin_family = AF_INET; 6815 sin->sin_port = udp->udp_port; 6816 6817 /* 6818 * If udp_v6src is unspecified, we might be bound to broadcast 6819 * / multicast. Use udp_bound_v6src as local address instead 6820 * (that could also still be unspecified). 6821 */ 6822 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6823 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6824 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6825 } else { 6826 sin->sin_addr.s_addr = 6827 V4_PART_OF_V6(udp->udp_bound_v6src); 6828 } 6829 break; 6830 6831 case AF_INET6: 6832 if (*salenp < sizeof (sin6_t)) 6833 return (EINVAL); 6834 6835 *salenp = sizeof (sin6_t); 6836 *sin6 = sin6_null; 6837 sin6->sin6_family = AF_INET6; 6838 sin6->sin6_port = udp->udp_port; 6839 sin6->sin6_flowinfo = udp->udp_flowinfo; 6840 6841 /* 6842 * If udp_v6src is unspecified, we might be bound to broadcast 6843 * / multicast. Use udp_bound_v6src as local address instead 6844 * (that could also still be unspecified). 6845 */ 6846 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6847 sin6->sin6_addr = udp->udp_v6src; 6848 else 6849 sin6->sin6_addr = udp->udp_bound_v6src; 6850 break; 6851 } 6852 6853 return (0); 6854 } 6855 6856 /* 6857 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6858 */ 6859 static void 6860 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6861 { 6862 void *data; 6863 mblk_t *datamp = mp->b_cont; 6864 udp_t *udp = Q_TO_UDP(q); 6865 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6866 6867 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6868 cmdp->cb_error = EPROTO; 6869 qreply(q, mp); 6870 return; 6871 } 6872 data = datamp->b_rptr; 6873 6874 rw_enter(&udp->udp_rwlock, RW_READER); 6875 switch (cmdp->cb_cmd) { 6876 case TI_GETPEERNAME: 6877 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6878 break; 6879 case TI_GETMYNAME: 6880 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6881 break; 6882 default: 6883 cmdp->cb_error = EINVAL; 6884 break; 6885 } 6886 rw_exit(&udp->udp_rwlock); 6887 6888 qreply(q, mp); 6889 } 6890 6891 static void 6892 udp_disable_direct_sockfs(udp_t *udp) 6893 { 6894 udp->udp_issocket = B_FALSE; 6895 if (udp->udp_direct_sockfs) { 6896 /* 6897 * Disable read-side synchronous stream interface and 6898 * drain any queued data. 6899 */ 6900 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6901 ASSERT(!udp->udp_direct_sockfs); 6902 UDP_STAT(udp->udp_us, udp_sock_fallback); 6903 } 6904 } 6905 6906 static void 6907 udp_wput_other(queue_t *q, mblk_t *mp) 6908 { 6909 uchar_t *rptr = mp->b_rptr; 6910 struct datab *db; 6911 struct iocblk *iocp; 6912 cred_t *cr; 6913 conn_t *connp = Q_TO_CONN(q); 6914 udp_t *udp = connp->conn_udp; 6915 udp_stack_t *us; 6916 6917 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6918 "udp_wput_other_start: q %p", q); 6919 6920 us = udp->udp_us; 6921 db = mp->b_datap; 6922 6923 switch (db->db_type) { 6924 case M_CMD: 6925 udp_wput_cmdblk(q, mp); 6926 return; 6927 6928 case M_PROTO: 6929 case M_PCPROTO: 6930 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6931 freemsg(mp); 6932 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6933 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6934 return; 6935 } 6936 switch (((t_primp_t)rptr)->type) { 6937 case T_ADDR_REQ: 6938 udp_addr_req(q, mp); 6939 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6940 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6941 return; 6942 case O_T_BIND_REQ: 6943 case T_BIND_REQ: 6944 udp_tpi_bind(q, mp); 6945 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6946 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6947 return; 6948 case T_CONN_REQ: 6949 udp_tpi_connect(q, mp); 6950 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6951 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6952 return; 6953 case T_CAPABILITY_REQ: 6954 udp_capability_req(q, mp); 6955 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6956 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6957 return; 6958 case T_INFO_REQ: 6959 udp_info_req(q, mp); 6960 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6961 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6962 return; 6963 case T_UNITDATA_REQ: 6964 /* 6965 * If a T_UNITDATA_REQ gets here, the address must 6966 * be bad. Valid T_UNITDATA_REQs are handled 6967 * in udp_wput. 6968 */ 6969 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6970 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6971 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6972 return; 6973 case T_UNBIND_REQ: 6974 udp_tpi_unbind(q, mp); 6975 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6976 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6977 return; 6978 case T_SVR4_OPTMGMT_REQ: 6979 /* 6980 * All Solaris components should pass a db_credp 6981 * for this TPI message, hence we ASSERT. 6982 * But in case there is some other M_PROTO that looks 6983 * like a TPI message sent by some other kernel 6984 * component, we check and return an error. 6985 */ 6986 cr = msg_getcred(mp, NULL); 6987 ASSERT(cr != NULL); 6988 if (cr == NULL) { 6989 udp_err_ack(q, mp, TSYSERR, EINVAL); 6990 return; 6991 } 6992 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6993 cr)) { 6994 (void) svr4_optcom_req(q, 6995 mp, cr, &udp_opt_obj, B_TRUE); 6996 } 6997 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6998 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6999 return; 7000 7001 case T_OPTMGMT_REQ: 7002 /* 7003 * All Solaris components should pass a db_credp 7004 * for this TPI message, hence we ASSERT. 7005 * But in case there is some other M_PROTO that looks 7006 * like a TPI message sent by some other kernel 7007 * component, we check and return an error. 7008 */ 7009 cr = msg_getcred(mp, NULL); 7010 ASSERT(cr != NULL); 7011 if (cr == NULL) { 7012 udp_err_ack(q, mp, TSYSERR, EINVAL); 7013 return; 7014 } 7015 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7016 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7017 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7018 return; 7019 7020 case T_DISCON_REQ: 7021 udp_tpi_disconnect(q, mp); 7022 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7023 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7024 return; 7025 7026 /* The following TPI message is not supported by udp. */ 7027 case O_T_CONN_RES: 7028 case T_CONN_RES: 7029 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7030 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7031 "udp_wput_other_end: q %p (%S)", q, 7032 "connres/disconreq"); 7033 return; 7034 7035 /* The following 3 TPI messages are illegal for udp. */ 7036 case T_DATA_REQ: 7037 case T_EXDATA_REQ: 7038 case T_ORDREL_REQ: 7039 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7040 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7041 "udp_wput_other_end: q %p (%S)", q, 7042 "data/exdata/ordrel"); 7043 return; 7044 default: 7045 break; 7046 } 7047 break; 7048 case M_FLUSH: 7049 if (*rptr & FLUSHW) 7050 flushq(q, FLUSHDATA); 7051 break; 7052 case M_IOCTL: 7053 iocp = (struct iocblk *)mp->b_rptr; 7054 switch (iocp->ioc_cmd) { 7055 case TI_GETPEERNAME: 7056 if (udp->udp_state != TS_DATA_XFER) { 7057 /* 7058 * If a default destination address has not 7059 * been associated with the stream, then we 7060 * don't know the peer's name. 7061 */ 7062 iocp->ioc_error = ENOTCONN; 7063 iocp->ioc_count = 0; 7064 mp->b_datap->db_type = M_IOCACK; 7065 qreply(q, mp); 7066 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7067 "udp_wput_other_end: q %p (%S)", q, 7068 "getpeername"); 7069 return; 7070 } 7071 /* FALLTHRU */ 7072 case TI_GETMYNAME: { 7073 /* 7074 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7075 * need to copyin the user's strbuf structure. 7076 * Processing will continue in the M_IOCDATA case 7077 * below. 7078 */ 7079 mi_copyin(q, mp, NULL, 7080 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7081 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7082 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7083 return; 7084 } 7085 case ND_SET: 7086 /* nd_getset performs the necessary checking */ 7087 case ND_GET: 7088 if (nd_getset(q, us->us_nd, mp)) { 7089 qreply(q, mp); 7090 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7091 "udp_wput_other_end: q %p (%S)", q, "get"); 7092 return; 7093 } 7094 break; 7095 case _SIOCSOCKFALLBACK: 7096 /* 7097 * Either sockmod is about to be popped and the 7098 * socket would now be treated as a plain stream, 7099 * or a module is about to be pushed so we could 7100 * no longer use read-side synchronous stream. 7101 * Drain any queued data and disable direct sockfs 7102 * interface from now on. 7103 */ 7104 if (!udp->udp_issocket) { 7105 DB_TYPE(mp) = M_IOCNAK; 7106 iocp->ioc_error = EINVAL; 7107 } else { 7108 udp_disable_direct_sockfs(udp); 7109 7110 DB_TYPE(mp) = M_IOCACK; 7111 iocp->ioc_error = 0; 7112 } 7113 iocp->ioc_count = 0; 7114 iocp->ioc_rval = 0; 7115 qreply(q, mp); 7116 return; 7117 default: 7118 break; 7119 } 7120 break; 7121 case M_IOCDATA: 7122 udp_wput_iocdata(q, mp); 7123 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7124 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7125 return; 7126 default: 7127 /* Unrecognized messages are passed through without change. */ 7128 break; 7129 } 7130 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7131 "udp_wput_other_end: q %p (%S)", q, "end"); 7132 ip_output(connp, mp, q, IP_WPUT); 7133 } 7134 7135 /* 7136 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7137 * messages. 7138 */ 7139 static void 7140 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7141 { 7142 mblk_t *mp1; 7143 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7144 STRUCT_HANDLE(strbuf, sb); 7145 udp_t *udp = Q_TO_UDP(q); 7146 int error; 7147 uint_t addrlen; 7148 7149 /* Make sure it is one of ours. */ 7150 switch (iocp->ioc_cmd) { 7151 case TI_GETMYNAME: 7152 case TI_GETPEERNAME: 7153 break; 7154 default: 7155 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7156 return; 7157 } 7158 7159 switch (mi_copy_state(q, mp, &mp1)) { 7160 case -1: 7161 return; 7162 case MI_COPY_CASE(MI_COPY_IN, 1): 7163 break; 7164 case MI_COPY_CASE(MI_COPY_OUT, 1): 7165 /* 7166 * The address has been copied out, so now 7167 * copyout the strbuf. 7168 */ 7169 mi_copyout(q, mp); 7170 return; 7171 case MI_COPY_CASE(MI_COPY_OUT, 2): 7172 /* 7173 * The address and strbuf have been copied out. 7174 * We're done, so just acknowledge the original 7175 * M_IOCTL. 7176 */ 7177 mi_copy_done(q, mp, 0); 7178 return; 7179 default: 7180 /* 7181 * Something strange has happened, so acknowledge 7182 * the original M_IOCTL with an EPROTO error. 7183 */ 7184 mi_copy_done(q, mp, EPROTO); 7185 return; 7186 } 7187 7188 /* 7189 * Now we have the strbuf structure for TI_GETMYNAME 7190 * and TI_GETPEERNAME. Next we copyout the requested 7191 * address and then we'll copyout the strbuf. 7192 */ 7193 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7194 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7195 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7196 mi_copy_done(q, mp, EINVAL); 7197 return; 7198 } 7199 7200 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7201 7202 if (mp1 == NULL) 7203 return; 7204 7205 rw_enter(&udp->udp_rwlock, RW_READER); 7206 switch (iocp->ioc_cmd) { 7207 case TI_GETMYNAME: 7208 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7209 break; 7210 case TI_GETPEERNAME: 7211 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7212 break; 7213 } 7214 rw_exit(&udp->udp_rwlock); 7215 7216 if (error != 0) { 7217 mi_copy_done(q, mp, error); 7218 } else { 7219 mp1->b_wptr += addrlen; 7220 STRUCT_FSET(sb, len, addrlen); 7221 7222 /* Copy out the address */ 7223 mi_copyout(q, mp); 7224 } 7225 } 7226 7227 static int 7228 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7229 udpattrs_t *udpattrs) 7230 { 7231 struct T_unitdata_req *udreqp; 7232 int is_absreq_failure; 7233 cred_t *cr; 7234 7235 ASSERT(((t_primp_t)mp->b_rptr)->type); 7236 7237 /* 7238 * All Solaris components should pass a db_credp 7239 * for this TPI message, hence we should ASSERT. 7240 * However, RPC (svc_clts_ksend) does this odd thing where it 7241 * passes the options from a T_UNITDATA_IND unchanged in a 7242 * T_UNITDATA_REQ. While that is the right thing to do for 7243 * some options, SCM_UCRED being the key one, this also makes it 7244 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7245 */ 7246 cr = msg_getcred(mp, NULL); 7247 if (cr == NULL) { 7248 cr = Q_TO_CONN(q)->conn_cred; 7249 } 7250 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7251 7252 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7253 udreqp->OPT_offset, cr, &udp_opt_obj, 7254 udpattrs, &is_absreq_failure); 7255 7256 if (*errorp != 0) { 7257 /* 7258 * Note: No special action needed in this 7259 * module for "is_absreq_failure" 7260 */ 7261 return (-1); /* failure */ 7262 } 7263 ASSERT(is_absreq_failure == 0); 7264 return (0); /* success */ 7265 } 7266 7267 void 7268 udp_ddi_g_init(void) 7269 { 7270 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7271 udp_opt_obj.odb_opt_arr_cnt); 7272 7273 /* 7274 * We want to be informed each time a stack is created or 7275 * destroyed in the kernel, so we can maintain the 7276 * set of udp_stack_t's. 7277 */ 7278 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7279 } 7280 7281 void 7282 udp_ddi_g_destroy(void) 7283 { 7284 netstack_unregister(NS_UDP); 7285 } 7286 7287 #define INET_NAME "ip" 7288 7289 /* 7290 * Initialize the UDP stack instance. 7291 */ 7292 static void * 7293 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7294 { 7295 udp_stack_t *us; 7296 udpparam_t *pa; 7297 int i; 7298 int error = 0; 7299 major_t major; 7300 7301 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7302 us->us_netstack = ns; 7303 7304 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7305 us->us_epriv_ports[0] = 2049; 7306 us->us_epriv_ports[1] = 4045; 7307 7308 /* 7309 * The smallest anonymous port in the priviledged port range which UDP 7310 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7311 */ 7312 us->us_min_anonpriv_port = 512; 7313 7314 us->us_bind_fanout_size = udp_bind_fanout_size; 7315 7316 /* Roundup variable that might have been modified in /etc/system */ 7317 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7318 /* Not a power of two. Round up to nearest power of two */ 7319 for (i = 0; i < 31; i++) { 7320 if (us->us_bind_fanout_size < (1 << i)) 7321 break; 7322 } 7323 us->us_bind_fanout_size = 1 << i; 7324 } 7325 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7326 sizeof (udp_fanout_t), KM_SLEEP); 7327 for (i = 0; i < us->us_bind_fanout_size; i++) { 7328 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7329 NULL); 7330 } 7331 7332 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7333 7334 us->us_param_arr = pa; 7335 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7336 7337 (void) udp_param_register(&us->us_nd, 7338 us->us_param_arr, A_CNT(udp_param_arr)); 7339 7340 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7341 us->us_mibkp = udp_kstat_init(stackid); 7342 7343 major = mod_name_to_major(INET_NAME); 7344 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7345 ASSERT(error == 0); 7346 return (us); 7347 } 7348 7349 /* 7350 * Free the UDP stack instance. 7351 */ 7352 static void 7353 udp_stack_fini(netstackid_t stackid, void *arg) 7354 { 7355 udp_stack_t *us = (udp_stack_t *)arg; 7356 int i; 7357 7358 for (i = 0; i < us->us_bind_fanout_size; i++) { 7359 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7360 } 7361 7362 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7363 sizeof (udp_fanout_t)); 7364 7365 us->us_bind_fanout = NULL; 7366 7367 nd_free(&us->us_nd); 7368 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7369 us->us_param_arr = NULL; 7370 7371 udp_kstat_fini(stackid, us->us_mibkp); 7372 us->us_mibkp = NULL; 7373 7374 udp_kstat2_fini(stackid, us->us_kstat); 7375 us->us_kstat = NULL; 7376 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7377 7378 ldi_ident_release(us->us_ldi_ident); 7379 kmem_free(us, sizeof (*us)); 7380 } 7381 7382 static void * 7383 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7384 { 7385 kstat_t *ksp; 7386 7387 udp_stat_t template = { 7388 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7389 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7390 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7391 { "udp_drain", KSTAT_DATA_UINT64 }, 7392 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7393 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7394 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7395 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7396 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7397 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7398 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7399 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7400 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7401 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7402 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7403 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7404 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7405 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7406 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7407 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7408 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7409 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7410 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7411 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7412 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7413 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7414 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7415 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7416 #ifdef DEBUG 7417 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7418 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7419 #endif 7420 }; 7421 7422 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7423 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7424 KSTAT_FLAG_VIRTUAL, stackid); 7425 7426 if (ksp == NULL) 7427 return (NULL); 7428 7429 bcopy(&template, us_statisticsp, sizeof (template)); 7430 ksp->ks_data = (void *)us_statisticsp; 7431 ksp->ks_private = (void *)(uintptr_t)stackid; 7432 7433 kstat_install(ksp); 7434 return (ksp); 7435 } 7436 7437 static void 7438 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7439 { 7440 if (ksp != NULL) { 7441 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7442 kstat_delete_netstack(ksp, stackid); 7443 } 7444 } 7445 7446 static void * 7447 udp_kstat_init(netstackid_t stackid) 7448 { 7449 kstat_t *ksp; 7450 7451 udp_named_kstat_t template = { 7452 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7453 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7454 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7455 { "entrySize", KSTAT_DATA_INT32, 0 }, 7456 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7457 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7458 }; 7459 7460 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7461 KSTAT_TYPE_NAMED, 7462 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7463 7464 if (ksp == NULL || ksp->ks_data == NULL) 7465 return (NULL); 7466 7467 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7468 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7469 7470 bcopy(&template, ksp->ks_data, sizeof (template)); 7471 ksp->ks_update = udp_kstat_update; 7472 ksp->ks_private = (void *)(uintptr_t)stackid; 7473 7474 kstat_install(ksp); 7475 return (ksp); 7476 } 7477 7478 static void 7479 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7480 { 7481 if (ksp != NULL) { 7482 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7483 kstat_delete_netstack(ksp, stackid); 7484 } 7485 } 7486 7487 static int 7488 udp_kstat_update(kstat_t *kp, int rw) 7489 { 7490 udp_named_kstat_t *udpkp; 7491 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7492 netstack_t *ns; 7493 udp_stack_t *us; 7494 7495 if ((kp == NULL) || (kp->ks_data == NULL)) 7496 return (EIO); 7497 7498 if (rw == KSTAT_WRITE) 7499 return (EACCES); 7500 7501 ns = netstack_find_by_stackid(stackid); 7502 if (ns == NULL) 7503 return (-1); 7504 us = ns->netstack_udp; 7505 if (us == NULL) { 7506 netstack_rele(ns); 7507 return (-1); 7508 } 7509 udpkp = (udp_named_kstat_t *)kp->ks_data; 7510 7511 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7512 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7513 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7514 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7515 netstack_rele(ns); 7516 return (0); 7517 } 7518 7519 /* 7520 * Read-side synchronous stream info entry point, called as a 7521 * result of handling certain STREAMS ioctl operations. 7522 */ 7523 static int 7524 udp_rinfop(queue_t *q, infod_t *dp) 7525 { 7526 mblk_t *mp; 7527 uint_t cmd = dp->d_cmd; 7528 int res = 0; 7529 int error = 0; 7530 udp_t *udp = Q_TO_UDP(q); 7531 struct stdata *stp = STREAM(q); 7532 7533 mutex_enter(&udp->udp_drain_lock); 7534 /* If shutdown on read has happened, return nothing */ 7535 mutex_enter(&stp->sd_lock); 7536 if (stp->sd_flag & STREOF) { 7537 mutex_exit(&stp->sd_lock); 7538 goto done; 7539 } 7540 mutex_exit(&stp->sd_lock); 7541 7542 if ((mp = udp->udp_rcv_list_head) == NULL) 7543 goto done; 7544 7545 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7546 7547 if (cmd & INFOD_COUNT) { 7548 /* 7549 * Return the number of messages. 7550 */ 7551 dp->d_count += udp->udp_rcv_msgcnt; 7552 res |= INFOD_COUNT; 7553 } 7554 if (cmd & INFOD_BYTES) { 7555 /* 7556 * Return size of all data messages. 7557 */ 7558 dp->d_bytes += udp->udp_rcv_cnt; 7559 res |= INFOD_BYTES; 7560 } 7561 if (cmd & INFOD_FIRSTBYTES) { 7562 /* 7563 * Return size of first data message. 7564 */ 7565 dp->d_bytes = msgdsize(mp); 7566 res |= INFOD_FIRSTBYTES; 7567 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7568 } 7569 if (cmd & INFOD_COPYOUT) { 7570 mblk_t *mp1 = mp->b_cont; 7571 int n; 7572 /* 7573 * Return data contents of first message. 7574 */ 7575 ASSERT(DB_TYPE(mp1) == M_DATA); 7576 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7577 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7578 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7579 UIO_READ, dp->d_uiop)) != 0) { 7580 goto done; 7581 } 7582 mp1 = mp1->b_cont; 7583 } 7584 res |= INFOD_COPYOUT; 7585 dp->d_cmd &= ~INFOD_COPYOUT; 7586 } 7587 done: 7588 mutex_exit(&udp->udp_drain_lock); 7589 7590 dp->d_res |= res; 7591 7592 return (error); 7593 } 7594 7595 /* 7596 * Read-side synchronous stream entry point. This is called as a result 7597 * of recv/read operation done at sockfs, and is guaranteed to execute 7598 * outside of the interrupt thread context. It returns a single datagram 7599 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7600 */ 7601 static int 7602 udp_rrw(queue_t *q, struiod_t *dp) 7603 { 7604 mblk_t *mp; 7605 udp_t *udp = Q_TO_UDP(q); 7606 udp_stack_t *us = udp->udp_us; 7607 7608 /* 7609 * Dequeue datagram from the head of the list and return 7610 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7611 * set/cleared depending on whether or not there's data 7612 * remaining in the list. 7613 */ 7614 mutex_enter(&udp->udp_drain_lock); 7615 if (!udp->udp_direct_sockfs) { 7616 mutex_exit(&udp->udp_drain_lock); 7617 UDP_STAT(us, udp_rrw_busy); 7618 return (EBUSY); 7619 } 7620 if ((mp = udp->udp_rcv_list_head) != NULL) { 7621 uint_t size = msgdsize(mp); 7622 7623 /* Last datagram in the list? */ 7624 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7625 udp->udp_rcv_list_tail = NULL; 7626 mp->b_next = NULL; 7627 7628 udp->udp_rcv_cnt -= size; 7629 udp->udp_rcv_msgcnt--; 7630 UDP_STAT(us, udp_rrw_msgcnt); 7631 7632 /* No longer flow-controlling? */ 7633 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7634 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7635 udp->udp_drain_qfull = B_FALSE; 7636 } 7637 if (udp->udp_rcv_list_head == NULL) { 7638 /* 7639 * Either we just dequeued the last datagram or 7640 * we get here from sockfs and have nothing to 7641 * return; in this case clear RSLEEP. 7642 */ 7643 ASSERT(udp->udp_rcv_cnt == 0); 7644 ASSERT(udp->udp_rcv_msgcnt == 0); 7645 ASSERT(udp->udp_rcv_list_tail == NULL); 7646 STR_WAKEUP_CLEAR(STREAM(q)); 7647 } else { 7648 /* 7649 * More data follows; we need udp_rrw() to be 7650 * called in future to pick up the rest. 7651 */ 7652 STR_WAKEUP_SET(STREAM(q)); 7653 } 7654 mutex_exit(&udp->udp_drain_lock); 7655 dp->d_mp = mp; 7656 return (0); 7657 } 7658 7659 /* 7660 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7661 * list; this is typically executed within the interrupt thread context 7662 * and so we do things as quickly as possible. 7663 */ 7664 static void 7665 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7666 { 7667 ASSERT(q == RD(q)); 7668 ASSERT(pkt_len == msgdsize(mp)); 7669 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7670 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7671 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7672 7673 mutex_enter(&udp->udp_drain_lock); 7674 /* 7675 * Wake up and signal the receiving app; it is okay to do this 7676 * before enqueueing the mp because we are holding the drain lock. 7677 * One of the advantages of synchronous stream is the ability for 7678 * us to find out when the application performs a read on the 7679 * socket by way of udp_rrw() entry point being called. We need 7680 * to generate SIGPOLL/SIGIO for each received data in the case 7681 * of asynchronous socket just as in the strrput() case. However, 7682 * we only wake the application up when necessary, i.e. during the 7683 * first enqueue. When udp_rrw() is called, we send up a single 7684 * datagram upstream and call STR_WAKEUP_SET() again when there 7685 * are still data remaining in our receive queue. 7686 */ 7687 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7688 if (udp->udp_rcv_list_head == NULL) 7689 udp->udp_rcv_list_head = mp; 7690 else 7691 udp->udp_rcv_list_tail->b_next = mp; 7692 udp->udp_rcv_list_tail = mp; 7693 udp->udp_rcv_cnt += pkt_len; 7694 udp->udp_rcv_msgcnt++; 7695 7696 /* Need to flow-control? */ 7697 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7698 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7699 udp->udp_drain_qfull = B_TRUE; 7700 7701 mutex_exit(&udp->udp_drain_lock); 7702 } 7703 7704 /* 7705 * Drain the contents of receive list to the module upstream; we do 7706 * this during close or when we fallback to the slow mode due to 7707 * sockmod being popped or a module being pushed on top of us. 7708 */ 7709 static void 7710 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7711 { 7712 mblk_t *mp; 7713 udp_stack_t *us = udp->udp_us; 7714 7715 mutex_enter(&udp->udp_drain_lock); 7716 /* 7717 * There is no race with a concurrent udp_input() sending 7718 * up packets using putnext() after we have cleared the 7719 * udp_direct_sockfs flag but before we have completed 7720 * sending up the packets in udp_rcv_list, since we are 7721 * either a writer or we have quiesced the conn. 7722 */ 7723 udp->udp_direct_sockfs = B_FALSE; 7724 mutex_exit(&udp->udp_drain_lock); 7725 7726 if (udp->udp_rcv_list_head != NULL) 7727 UDP_STAT(us, udp_drain); 7728 7729 /* 7730 * Send up everything via putnext(); note here that we 7731 * don't need the udp_drain_lock to protect us since 7732 * nothing can enter udp_rrw() and that we currently 7733 * have exclusive access to this udp. 7734 */ 7735 while ((mp = udp->udp_rcv_list_head) != NULL) { 7736 udp->udp_rcv_list_head = mp->b_next; 7737 mp->b_next = NULL; 7738 udp->udp_rcv_cnt -= msgdsize(mp); 7739 udp->udp_rcv_msgcnt--; 7740 if (closing) { 7741 freemsg(mp); 7742 } else { 7743 ASSERT(q == RD(q)); 7744 putnext(q, mp); 7745 } 7746 } 7747 ASSERT(udp->udp_rcv_cnt == 0); 7748 ASSERT(udp->udp_rcv_msgcnt == 0); 7749 ASSERT(udp->udp_rcv_list_head == NULL); 7750 udp->udp_rcv_list_tail = NULL; 7751 udp->udp_drain_qfull = B_FALSE; 7752 } 7753 7754 static size_t 7755 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7756 { 7757 udp_stack_t *us = udp->udp_us; 7758 7759 /* We add a bit of extra buffering */ 7760 size += size >> 1; 7761 if (size > us->us_max_buf) 7762 size = us->us_max_buf; 7763 7764 udp->udp_rcv_hiwat = size; 7765 return (size); 7766 } 7767 7768 /* 7769 * For the lower queue so that UDP can be a dummy mux. 7770 * Nobody should be sending 7771 * packets up this stream 7772 */ 7773 static void 7774 udp_lrput(queue_t *q, mblk_t *mp) 7775 { 7776 mblk_t *mp1; 7777 7778 switch (mp->b_datap->db_type) { 7779 case M_FLUSH: 7780 /* Turn around */ 7781 if (*mp->b_rptr & FLUSHW) { 7782 *mp->b_rptr &= ~FLUSHR; 7783 qreply(q, mp); 7784 return; 7785 } 7786 break; 7787 } 7788 /* Could receive messages that passed through ar_rput */ 7789 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7790 mp1->b_prev = mp1->b_next = NULL; 7791 freemsg(mp); 7792 } 7793 7794 /* 7795 * For the lower queue so that UDP can be a dummy mux. 7796 * Nobody should be sending packets down this stream. 7797 */ 7798 /* ARGSUSED */ 7799 void 7800 udp_lwput(queue_t *q, mblk_t *mp) 7801 { 7802 freemsg(mp); 7803 } 7804 7805 /* 7806 * Below routines for UDP socket module. 7807 */ 7808 7809 static conn_t * 7810 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7811 { 7812 udp_t *udp; 7813 conn_t *connp; 7814 zoneid_t zoneid; 7815 netstack_t *ns; 7816 udp_stack_t *us; 7817 7818 ns = netstack_find_by_cred(credp); 7819 ASSERT(ns != NULL); 7820 us = ns->netstack_udp; 7821 ASSERT(us != NULL); 7822 7823 /* 7824 * For exclusive stacks we set the zoneid to zero 7825 * to make UDP operate as if in the global zone. 7826 */ 7827 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7828 zoneid = GLOBAL_ZONEID; 7829 else 7830 zoneid = crgetzoneid(credp); 7831 7832 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7833 7834 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7835 if (connp == NULL) { 7836 netstack_rele(ns); 7837 return (NULL); 7838 } 7839 udp = connp->conn_udp; 7840 7841 /* 7842 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7843 * done by netstack_find_by_cred() 7844 */ 7845 netstack_rele(ns); 7846 7847 rw_enter(&udp->udp_rwlock, RW_WRITER); 7848 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7849 ASSERT(connp->conn_udp == udp); 7850 ASSERT(udp->udp_connp == connp); 7851 7852 /* Set the initial state of the stream and the privilege status. */ 7853 udp->udp_state = TS_UNBND; 7854 if (isv6) { 7855 udp->udp_family = AF_INET6; 7856 udp->udp_ipversion = IPV6_VERSION; 7857 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7858 udp->udp_ttl = us->us_ipv6_hoplimit; 7859 connp->conn_af_isv6 = B_TRUE; 7860 connp->conn_flags |= IPCL_ISV6; 7861 } else { 7862 udp->udp_family = AF_INET; 7863 udp->udp_ipversion = IPV4_VERSION; 7864 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7865 udp->udp_ttl = us->us_ipv4_ttl; 7866 connp->conn_af_isv6 = B_FALSE; 7867 connp->conn_flags &= ~IPCL_ISV6; 7868 } 7869 7870 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7871 udp->udp_pending_op = -1; 7872 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7873 connp->conn_zoneid = zoneid; 7874 7875 udp->udp_open_time = lbolt64; 7876 udp->udp_open_pid = curproc->p_pid; 7877 7878 /* 7879 * If the caller has the process-wide flag set, then default to MAC 7880 * exempt mode. This allows read-down to unlabeled hosts. 7881 */ 7882 if (getpflags(NET_MAC_AWARE, credp) != 0) 7883 connp->conn_mac_exempt = B_TRUE; 7884 7885 connp->conn_ulp_labeled = is_system_labeled(); 7886 7887 udp->udp_us = us; 7888 7889 connp->conn_recv = udp_input; 7890 crhold(credp); 7891 connp->conn_cred = credp; 7892 7893 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7894 7895 rw_exit(&udp->udp_rwlock); 7896 7897 return (connp); 7898 } 7899 7900 /* ARGSUSED */ 7901 sock_lower_handle_t 7902 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7903 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7904 { 7905 udp_t *udp = NULL; 7906 udp_stack_t *us; 7907 conn_t *connp; 7908 boolean_t isv6; 7909 7910 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7911 (proto != 0 && proto != IPPROTO_UDP)) { 7912 *errorp = EPROTONOSUPPORT; 7913 return (NULL); 7914 } 7915 7916 if (family == AF_INET6) 7917 isv6 = B_TRUE; 7918 else 7919 isv6 = B_FALSE; 7920 7921 connp = udp_do_open(credp, isv6, flags); 7922 if (connp == NULL) { 7923 *errorp = ENOMEM; 7924 return (NULL); 7925 } 7926 7927 udp = connp->conn_udp; 7928 ASSERT(udp != NULL); 7929 us = udp->udp_us; 7930 ASSERT(us != NULL); 7931 7932 udp->udp_issocket = B_TRUE; 7933 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7934 7935 /* Set flow control */ 7936 rw_enter(&udp->udp_rwlock, RW_WRITER); 7937 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7938 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7939 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7940 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7941 udp->udp_xmit_lowat = us->us_xmit_lowat; 7942 7943 if (udp->udp_family == AF_INET6) { 7944 /* Build initial header template for transmit */ 7945 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7946 rw_exit(&udp->udp_rwlock); 7947 ipcl_conn_destroy(connp); 7948 return (NULL); 7949 } 7950 } 7951 rw_exit(&udp->udp_rwlock); 7952 7953 connp->conn_flow_cntrld = B_FALSE; 7954 7955 ASSERT(us->us_ldi_ident != NULL); 7956 7957 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7958 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7959 udp_do_close(connp); 7960 return (NULL); 7961 } 7962 7963 /* Set the send flow control */ 7964 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7965 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7966 7967 mutex_enter(&connp->conn_lock); 7968 connp->conn_state_flags &= ~CONN_INCIPIENT; 7969 mutex_exit(&connp->conn_lock); 7970 7971 *errorp = 0; 7972 *smodep = SM_ATOMIC; 7973 *sock_downcalls = &sock_udp_downcalls; 7974 return ((sock_lower_handle_t)connp); 7975 } 7976 7977 /* ARGSUSED */ 7978 void 7979 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7980 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7981 { 7982 conn_t *connp = (conn_t *)proto_handle; 7983 udp_t *udp = connp->conn_udp; 7984 udp_stack_t *us = udp->udp_us; 7985 struct sock_proto_props sopp; 7986 7987 /* All Solaris components should pass a cred for this operation. */ 7988 ASSERT(cr != NULL); 7989 7990 connp->conn_upcalls = sock_upcalls; 7991 connp->conn_upper_handle = sock_handle; 7992 7993 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7994 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7995 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7996 sopp.sopp_maxblk = INFPSZ; 7997 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7998 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7999 sopp.sopp_maxpsz = 8000 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 8001 UDP_MAXPACKET_IPV6; 8002 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 8003 udp_mod_info.mi_minpsz; 8004 8005 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 8006 &sopp); 8007 } 8008 8009 static void 8010 udp_do_close(conn_t *connp) 8011 { 8012 udp_t *udp; 8013 8014 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 8015 udp = connp->conn_udp; 8016 8017 udp_quiesce_conn(connp); 8018 ip_quiesce_conn(connp); 8019 8020 if (!IPCL_IS_NONSTR(connp)) { 8021 /* 8022 * Disable read-side synchronous stream 8023 * interface and drain any queued data. 8024 */ 8025 ASSERT(connp->conn_wq != NULL); 8026 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 8027 ASSERT(!udp->udp_direct_sockfs); 8028 8029 ASSERT(connp->conn_rq != NULL); 8030 qprocsoff(connp->conn_rq); 8031 } 8032 8033 ASSERT(udp->udp_rcv_cnt == 0); 8034 ASSERT(udp->udp_rcv_msgcnt == 0); 8035 ASSERT(udp->udp_rcv_list_head == NULL); 8036 ASSERT(udp->udp_rcv_list_tail == NULL); 8037 8038 udp_close_free(connp); 8039 8040 /* 8041 * Now we are truly single threaded on this stream, and can 8042 * delete the things hanging off the connp, and finally the connp. 8043 * We removed this connp from the fanout list, it cannot be 8044 * accessed thru the fanouts, and we already waited for the 8045 * conn_ref to drop to 0. We are already in close, so 8046 * there cannot be any other thread from the top. qprocsoff 8047 * has completed, and service has completed or won't run in 8048 * future. 8049 */ 8050 ASSERT(connp->conn_ref == 1); 8051 if (!IPCL_IS_NONSTR(connp)) { 8052 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 8053 } else { 8054 ip_free_helper_stream(connp); 8055 } 8056 8057 connp->conn_ref--; 8058 ipcl_conn_destroy(connp); 8059 } 8060 8061 /* ARGSUSED */ 8062 int 8063 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 8064 { 8065 conn_t *connp = (conn_t *)proto_handle; 8066 8067 /* All Solaris components should pass a cred for this operation. */ 8068 ASSERT(cr != NULL); 8069 8070 udp_do_close(connp); 8071 return (0); 8072 } 8073 8074 static int 8075 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 8076 boolean_t bind_to_req_port_only) 8077 { 8078 sin_t *sin; 8079 sin6_t *sin6; 8080 sin6_t sin6addr; 8081 in_port_t port; /* Host byte order */ 8082 in_port_t requested_port; /* Host byte order */ 8083 int count; 8084 in6_addr_t v6src; 8085 int loopmax; 8086 udp_fanout_t *udpf; 8087 in_port_t lport; /* Network byte order */ 8088 zoneid_t zoneid; 8089 udp_t *udp; 8090 boolean_t is_inaddr_any; 8091 mlp_type_t addrtype, mlptype; 8092 udp_stack_t *us; 8093 int error = 0; 8094 mblk_t *mp = NULL; 8095 8096 udp = connp->conn_udp; 8097 us = udp->udp_us; 8098 8099 if (udp->udp_state != TS_UNBND) { 8100 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8101 "udp_bind: bad state, %u", udp->udp_state); 8102 return (-TOUTSTATE); 8103 } 8104 8105 switch (len) { 8106 case 0: 8107 if (udp->udp_family == AF_INET) { 8108 sin = (sin_t *)&sin6addr; 8109 *sin = sin_null; 8110 sin->sin_family = AF_INET; 8111 sin->sin_addr.s_addr = INADDR_ANY; 8112 udp->udp_ipversion = IPV4_VERSION; 8113 } else { 8114 ASSERT(udp->udp_family == AF_INET6); 8115 sin6 = (sin6_t *)&sin6addr; 8116 *sin6 = sin6_null; 8117 sin6->sin6_family = AF_INET6; 8118 V6_SET_ZERO(sin6->sin6_addr); 8119 udp->udp_ipversion = IPV6_VERSION; 8120 } 8121 port = 0; 8122 break; 8123 8124 case sizeof (sin_t): /* Complete IPv4 address */ 8125 sin = (sin_t *)sa; 8126 8127 if (sin == NULL || !OK_32PTR((char *)sin)) 8128 return (EINVAL); 8129 8130 if (udp->udp_family != AF_INET || 8131 sin->sin_family != AF_INET) { 8132 return (EAFNOSUPPORT); 8133 } 8134 port = ntohs(sin->sin_port); 8135 break; 8136 8137 case sizeof (sin6_t): /* complete IPv6 address */ 8138 sin6 = (sin6_t *)sa; 8139 8140 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8141 return (EINVAL); 8142 8143 if (udp->udp_family != AF_INET6 || 8144 sin6->sin6_family != AF_INET6) { 8145 return (EAFNOSUPPORT); 8146 } 8147 port = ntohs(sin6->sin6_port); 8148 break; 8149 8150 default: /* Invalid request */ 8151 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8152 "udp_bind: bad ADDR_length length %u", len); 8153 return (-TBADADDR); 8154 } 8155 8156 requested_port = port; 8157 8158 if (requested_port == 0 || !bind_to_req_port_only) 8159 bind_to_req_port_only = B_FALSE; 8160 else /* T_BIND_REQ and requested_port != 0 */ 8161 bind_to_req_port_only = B_TRUE; 8162 8163 if (requested_port == 0) { 8164 /* 8165 * If the application passed in zero for the port number, it 8166 * doesn't care which port number we bind to. Get one in the 8167 * valid range. 8168 */ 8169 if (udp->udp_anon_priv_bind) { 8170 port = udp_get_next_priv_port(udp); 8171 } else { 8172 port = udp_update_next_port(udp, 8173 us->us_next_port_to_try, B_TRUE); 8174 } 8175 } else { 8176 /* 8177 * If the port is in the well-known privileged range, 8178 * make sure the caller was privileged. 8179 */ 8180 int i; 8181 boolean_t priv = B_FALSE; 8182 8183 if (port < us->us_smallest_nonpriv_port) { 8184 priv = B_TRUE; 8185 } else { 8186 for (i = 0; i < us->us_num_epriv_ports; i++) { 8187 if (port == us->us_epriv_ports[i]) { 8188 priv = B_TRUE; 8189 break; 8190 } 8191 } 8192 } 8193 8194 if (priv) { 8195 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8196 return (-TACCES); 8197 } 8198 } 8199 8200 if (port == 0) 8201 return (-TNOADDR); 8202 8203 /* 8204 * The state must be TS_UNBND. TPI mandates that users must send 8205 * TPI primitives only 1 at a time and wait for the response before 8206 * sending the next primitive. 8207 */ 8208 rw_enter(&udp->udp_rwlock, RW_WRITER); 8209 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8210 rw_exit(&udp->udp_rwlock); 8211 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8212 "udp_bind: bad state, %u", udp->udp_state); 8213 return (-TOUTSTATE); 8214 } 8215 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8216 udp->udp_pending_op = T_BIND_REQ; 8217 /* 8218 * Copy the source address into our udp structure. This address 8219 * may still be zero; if so, IP will fill in the correct address 8220 * each time an outbound packet is passed to it. Since the udp is 8221 * not yet in the bind hash list, we don't grab the uf_lock to 8222 * change udp_ipversion 8223 */ 8224 if (udp->udp_family == AF_INET) { 8225 ASSERT(sin != NULL); 8226 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8227 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8228 udp->udp_ip_snd_options_len; 8229 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8230 } else { 8231 ASSERT(sin6 != NULL); 8232 v6src = sin6->sin6_addr; 8233 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8234 /* 8235 * no need to hold the uf_lock to set the udp_ipversion 8236 * since we are not yet in the fanout list 8237 */ 8238 udp->udp_ipversion = IPV4_VERSION; 8239 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8240 UDPH_SIZE + udp->udp_ip_snd_options_len; 8241 } else { 8242 udp->udp_ipversion = IPV6_VERSION; 8243 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8244 } 8245 } 8246 8247 /* 8248 * If udp_reuseaddr is not set, then we have to make sure that 8249 * the IP address and port number the application requested 8250 * (or we selected for the application) is not being used by 8251 * another stream. If another stream is already using the 8252 * requested IP address and port, the behavior depends on 8253 * "bind_to_req_port_only". If set the bind fails; otherwise we 8254 * search for any an unused port to bind to the the stream. 8255 * 8256 * As per the BSD semantics, as modified by the Deering multicast 8257 * changes, if udp_reuseaddr is set, then we allow multiple binds 8258 * to the same port independent of the local IP address. 8259 * 8260 * This is slightly different than in SunOS 4.X which did not 8261 * support IP multicast. Note that the change implemented by the 8262 * Deering multicast code effects all binds - not only binding 8263 * to IP multicast addresses. 8264 * 8265 * Note that when binding to port zero we ignore SO_REUSEADDR in 8266 * order to guarantee a unique port. 8267 */ 8268 8269 count = 0; 8270 if (udp->udp_anon_priv_bind) { 8271 /* 8272 * loopmax = (IPPORT_RESERVED-1) - 8273 * us->us_min_anonpriv_port + 1 8274 */ 8275 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8276 } else { 8277 loopmax = us->us_largest_anon_port - 8278 us->us_smallest_anon_port + 1; 8279 } 8280 8281 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8282 zoneid = connp->conn_zoneid; 8283 8284 for (;;) { 8285 udp_t *udp1; 8286 boolean_t found_exclbind = B_FALSE; 8287 8288 /* 8289 * Walk through the list of udp streams bound to 8290 * requested port with the same IP address. 8291 */ 8292 lport = htons(port); 8293 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8294 us->us_bind_fanout_size)]; 8295 mutex_enter(&udpf->uf_lock); 8296 for (udp1 = udpf->uf_udp; udp1 != NULL; 8297 udp1 = udp1->udp_bind_hash) { 8298 if (lport != udp1->udp_port) 8299 continue; 8300 8301 /* 8302 * On a labeled system, we must treat bindings to ports 8303 * on shared IP addresses by sockets with MAC exemption 8304 * privilege as being in all zones, as there's 8305 * otherwise no way to identify the right receiver. 8306 */ 8307 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8308 IPCL_ZONE_MATCH(connp, 8309 udp1->udp_connp->conn_zoneid)) && 8310 !connp->conn_mac_exempt && \ 8311 !udp1->udp_connp->conn_mac_exempt) 8312 continue; 8313 8314 /* 8315 * If UDP_EXCLBIND is set for either the bound or 8316 * binding endpoint, the semantics of bind 8317 * is changed according to the following chart. 8318 * 8319 * spec = specified address (v4 or v6) 8320 * unspec = unspecified address (v4 or v6) 8321 * A = specified addresses are different for endpoints 8322 * 8323 * bound bind to allowed? 8324 * ------------------------------------- 8325 * unspec unspec no 8326 * unspec spec no 8327 * spec unspec no 8328 * spec spec yes if A 8329 * 8330 * For labeled systems, SO_MAC_EXEMPT behaves the same 8331 * as UDP_EXCLBIND, except that zoneid is ignored. 8332 */ 8333 if (udp1->udp_exclbind || udp->udp_exclbind || 8334 udp1->udp_connp->conn_mac_exempt || 8335 connp->conn_mac_exempt) { 8336 if (V6_OR_V4_INADDR_ANY( 8337 udp1->udp_bound_v6src) || 8338 is_inaddr_any || 8339 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8340 &v6src)) { 8341 found_exclbind = B_TRUE; 8342 break; 8343 } 8344 continue; 8345 } 8346 8347 /* 8348 * Check ipversion to allow IPv4 and IPv6 sockets to 8349 * have disjoint port number spaces. 8350 */ 8351 if (udp->udp_ipversion != udp1->udp_ipversion) { 8352 8353 /* 8354 * On the first time through the loop, if the 8355 * the user intentionally specified a 8356 * particular port number, then ignore any 8357 * bindings of the other protocol that may 8358 * conflict. This allows the user to bind IPv6 8359 * alone and get both v4 and v6, or bind both 8360 * both and get each seperately. On subsequent 8361 * times through the loop, we're checking a 8362 * port that we chose (not the user) and thus 8363 * we do not allow casual duplicate bindings. 8364 */ 8365 if (count == 0 && requested_port != 0) 8366 continue; 8367 } 8368 8369 /* 8370 * No difference depending on SO_REUSEADDR. 8371 * 8372 * If existing port is bound to a 8373 * non-wildcard IP address and 8374 * the requesting stream is bound to 8375 * a distinct different IP addresses 8376 * (non-wildcard, also), keep going. 8377 */ 8378 if (!is_inaddr_any && 8379 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8380 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8381 &v6src)) { 8382 continue; 8383 } 8384 break; 8385 } 8386 8387 if (!found_exclbind && 8388 (udp->udp_reuseaddr && requested_port != 0)) { 8389 break; 8390 } 8391 8392 if (udp1 == NULL) { 8393 /* 8394 * No other stream has this IP address 8395 * and port number. We can use it. 8396 */ 8397 break; 8398 } 8399 mutex_exit(&udpf->uf_lock); 8400 if (bind_to_req_port_only) { 8401 /* 8402 * We get here only when requested port 8403 * is bound (and only first of the for() 8404 * loop iteration). 8405 * 8406 * The semantics of this bind request 8407 * require it to fail so we return from 8408 * the routine (and exit the loop). 8409 * 8410 */ 8411 udp->udp_pending_op = -1; 8412 rw_exit(&udp->udp_rwlock); 8413 return (-TADDRBUSY); 8414 } 8415 8416 if (udp->udp_anon_priv_bind) { 8417 port = udp_get_next_priv_port(udp); 8418 } else { 8419 if ((count == 0) && (requested_port != 0)) { 8420 /* 8421 * If the application wants us to find 8422 * a port, get one to start with. Set 8423 * requested_port to 0, so that we will 8424 * update us->us_next_port_to_try below. 8425 */ 8426 port = udp_update_next_port(udp, 8427 us->us_next_port_to_try, B_TRUE); 8428 requested_port = 0; 8429 } else { 8430 port = udp_update_next_port(udp, port + 1, 8431 B_FALSE); 8432 } 8433 } 8434 8435 if (port == 0 || ++count >= loopmax) { 8436 /* 8437 * We've tried every possible port number and 8438 * there are none available, so send an error 8439 * to the user. 8440 */ 8441 udp->udp_pending_op = -1; 8442 rw_exit(&udp->udp_rwlock); 8443 return (-TNOADDR); 8444 } 8445 } 8446 8447 /* 8448 * Copy the source address into our udp structure. This address 8449 * may still be zero; if so, ip will fill in the correct address 8450 * each time an outbound packet is passed to it. 8451 * If we are binding to a broadcast or multicast address then 8452 * udp_post_ip_bind_connect will clear the source address 8453 * when udp_do_bind success. 8454 */ 8455 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8456 udp->udp_port = lport; 8457 /* 8458 * Now reset the the next anonymous port if the application requested 8459 * an anonymous port, or we handed out the next anonymous port. 8460 */ 8461 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8462 us->us_next_port_to_try = port + 1; 8463 } 8464 8465 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8466 if (udp->udp_family == AF_INET) { 8467 sin->sin_port = udp->udp_port; 8468 } else { 8469 sin6->sin6_port = udp->udp_port; 8470 /* Rebuild the header template */ 8471 error = udp_build_hdrs(udp); 8472 if (error != 0) { 8473 udp->udp_pending_op = -1; 8474 rw_exit(&udp->udp_rwlock); 8475 mutex_exit(&udpf->uf_lock); 8476 return (error); 8477 } 8478 } 8479 udp->udp_state = TS_IDLE; 8480 udp_bind_hash_insert(udpf, udp); 8481 mutex_exit(&udpf->uf_lock); 8482 rw_exit(&udp->udp_rwlock); 8483 8484 if (cl_inet_bind) { 8485 /* 8486 * Running in cluster mode - register bind information 8487 */ 8488 if (udp->udp_ipversion == IPV4_VERSION) { 8489 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8490 IPPROTO_UDP, AF_INET, 8491 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8492 (in_port_t)udp->udp_port, NULL); 8493 } else { 8494 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8495 IPPROTO_UDP, AF_INET6, 8496 (uint8_t *)&(udp->udp_v6src), 8497 (in_port_t)udp->udp_port, NULL); 8498 } 8499 } 8500 8501 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8502 if (is_system_labeled() && (!connp->conn_anon_port || 8503 connp->conn_anon_mlp)) { 8504 uint16_t mlpport; 8505 zone_t *zone; 8506 8507 zone = crgetzone(cr); 8508 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8509 mlptSingle; 8510 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8511 &v6src, us->us_netstack->netstack_ip); 8512 if (addrtype == mlptSingle) { 8513 rw_enter(&udp->udp_rwlock, RW_WRITER); 8514 udp->udp_pending_op = -1; 8515 rw_exit(&udp->udp_rwlock); 8516 connp->conn_anon_port = B_FALSE; 8517 connp->conn_mlp_type = mlptSingle; 8518 return (-TNOADDR); 8519 } 8520 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8521 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8522 addrtype); 8523 8524 /* 8525 * It is a coding error to attempt to bind an MLP port 8526 * without first setting SOL_SOCKET/SCM_UCRED. 8527 */ 8528 if (mlptype != mlptSingle && 8529 connp->conn_mlp_type == mlptSingle) { 8530 rw_enter(&udp->udp_rwlock, RW_WRITER); 8531 udp->udp_pending_op = -1; 8532 rw_exit(&udp->udp_rwlock); 8533 connp->conn_anon_port = B_FALSE; 8534 connp->conn_mlp_type = mlptSingle; 8535 return (EINVAL); 8536 } 8537 8538 /* 8539 * It is an access violation to attempt to bind an MLP port 8540 * without NET_BINDMLP privilege. 8541 */ 8542 if (mlptype != mlptSingle && 8543 secpolicy_net_bindmlp(cr) != 0) { 8544 if (udp->udp_debug) { 8545 (void) strlog(UDP_MOD_ID, 0, 1, 8546 SL_ERROR|SL_TRACE, 8547 "udp_bind: no priv for multilevel port %d", 8548 mlpport); 8549 } 8550 rw_enter(&udp->udp_rwlock, RW_WRITER); 8551 udp->udp_pending_op = -1; 8552 rw_exit(&udp->udp_rwlock); 8553 connp->conn_anon_port = B_FALSE; 8554 connp->conn_mlp_type = mlptSingle; 8555 return (-TACCES); 8556 } 8557 8558 /* 8559 * If we're specifically binding a shared IP address and the 8560 * port is MLP on shared addresses, then check to see if this 8561 * zone actually owns the MLP. Reject if not. 8562 */ 8563 if (mlptype == mlptShared && addrtype == mlptShared) { 8564 /* 8565 * No need to handle exclusive-stack zones since 8566 * ALL_ZONES only applies to the shared stack. 8567 */ 8568 zoneid_t mlpzone; 8569 8570 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8571 htons(mlpport)); 8572 if (connp->conn_zoneid != mlpzone) { 8573 if (udp->udp_debug) { 8574 (void) strlog(UDP_MOD_ID, 0, 1, 8575 SL_ERROR|SL_TRACE, 8576 "udp_bind: attempt to bind port " 8577 "%d on shared addr in zone %d " 8578 "(should be %d)", 8579 mlpport, connp->conn_zoneid, 8580 mlpzone); 8581 } 8582 rw_enter(&udp->udp_rwlock, RW_WRITER); 8583 udp->udp_pending_op = -1; 8584 rw_exit(&udp->udp_rwlock); 8585 connp->conn_anon_port = B_FALSE; 8586 connp->conn_mlp_type = mlptSingle; 8587 return (-TACCES); 8588 } 8589 } 8590 if (connp->conn_anon_port) { 8591 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8592 port, B_TRUE); 8593 if (error != 0) { 8594 if (udp->udp_debug) { 8595 (void) strlog(UDP_MOD_ID, 0, 1, 8596 SL_ERROR|SL_TRACE, 8597 "udp_bind: cannot establish anon " 8598 "MLP for port %d", port); 8599 } 8600 rw_enter(&udp->udp_rwlock, RW_WRITER); 8601 udp->udp_pending_op = -1; 8602 rw_exit(&udp->udp_rwlock); 8603 connp->conn_anon_port = B_FALSE; 8604 connp->conn_mlp_type = mlptSingle; 8605 return (-TACCES); 8606 } 8607 } 8608 connp->conn_mlp_type = mlptype; 8609 } 8610 8611 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8612 /* 8613 * Append a request for an IRE if udp_v6src not 8614 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8615 */ 8616 mp = allocb(sizeof (ire_t), BPRI_HI); 8617 if (!mp) { 8618 rw_enter(&udp->udp_rwlock, RW_WRITER); 8619 udp->udp_pending_op = -1; 8620 rw_exit(&udp->udp_rwlock); 8621 return (ENOMEM); 8622 } 8623 mp->b_wptr += sizeof (ire_t); 8624 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8625 } 8626 if (udp->udp_family == AF_INET6) { 8627 ASSERT(udp->udp_connp->conn_af_isv6); 8628 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8629 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8630 } else { 8631 ASSERT(!udp->udp_connp->conn_af_isv6); 8632 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8633 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8634 B_TRUE); 8635 } 8636 8637 (void) udp_post_ip_bind_connect(udp, mp, error); 8638 return (error); 8639 } 8640 8641 int 8642 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8643 socklen_t len, cred_t *cr) 8644 { 8645 int error; 8646 conn_t *connp; 8647 8648 /* All Solaris components should pass a cred for this operation. */ 8649 ASSERT(cr != NULL); 8650 8651 connp = (conn_t *)proto_handle; 8652 8653 if (sa == NULL) 8654 error = udp_do_unbind(connp); 8655 else 8656 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8657 8658 if (error < 0) { 8659 if (error == -TOUTSTATE) 8660 error = EINVAL; 8661 else 8662 error = proto_tlitosyserr(-error); 8663 } 8664 8665 return (error); 8666 } 8667 8668 static int 8669 udp_implicit_bind(conn_t *connp, cred_t *cr) 8670 { 8671 int error; 8672 8673 /* All Solaris components should pass a cred for this operation. */ 8674 ASSERT(cr != NULL); 8675 8676 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8677 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8678 } 8679 8680 /* 8681 * This routine removes a port number association from a stream. It 8682 * is called by udp_unbind and udp_tpi_unbind. 8683 */ 8684 static int 8685 udp_do_unbind(conn_t *connp) 8686 { 8687 udp_t *udp = connp->conn_udp; 8688 udp_fanout_t *udpf; 8689 udp_stack_t *us = udp->udp_us; 8690 8691 if (cl_inet_unbind != NULL) { 8692 /* 8693 * Running in cluster mode - register unbind information 8694 */ 8695 if (udp->udp_ipversion == IPV4_VERSION) { 8696 (*cl_inet_unbind)( 8697 connp->conn_netstack->netstack_stackid, 8698 IPPROTO_UDP, AF_INET, 8699 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8700 (in_port_t)udp->udp_port, NULL); 8701 } else { 8702 (*cl_inet_unbind)( 8703 connp->conn_netstack->netstack_stackid, 8704 IPPROTO_UDP, AF_INET6, 8705 (uint8_t *)&(udp->udp_v6src), 8706 (in_port_t)udp->udp_port, NULL); 8707 } 8708 } 8709 8710 rw_enter(&udp->udp_rwlock, RW_WRITER); 8711 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8712 rw_exit(&udp->udp_rwlock); 8713 return (-TOUTSTATE); 8714 } 8715 udp->udp_pending_op = T_UNBIND_REQ; 8716 rw_exit(&udp->udp_rwlock); 8717 8718 /* 8719 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8720 * and therefore ip_unbind must never return NULL. 8721 */ 8722 ip_unbind(connp); 8723 8724 /* 8725 * Once we're unbound from IP, the pending operation may be cleared 8726 * here. 8727 */ 8728 rw_enter(&udp->udp_rwlock, RW_WRITER); 8729 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8730 us->us_bind_fanout_size)]; 8731 8732 mutex_enter(&udpf->uf_lock); 8733 udp_bind_hash_remove(udp, B_TRUE); 8734 V6_SET_ZERO(udp->udp_v6src); 8735 V6_SET_ZERO(udp->udp_bound_v6src); 8736 udp->udp_port = 0; 8737 mutex_exit(&udpf->uf_lock); 8738 8739 udp->udp_pending_op = -1; 8740 udp->udp_state = TS_UNBND; 8741 if (udp->udp_family == AF_INET6) 8742 (void) udp_build_hdrs(udp); 8743 rw_exit(&udp->udp_rwlock); 8744 8745 return (0); 8746 } 8747 8748 static int 8749 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8750 { 8751 ire_t *ire; 8752 udp_fanout_t *udpf; 8753 udp_stack_t *us = udp->udp_us; 8754 8755 ASSERT(udp->udp_pending_op != -1); 8756 rw_enter(&udp->udp_rwlock, RW_WRITER); 8757 if (error == 0) { 8758 /* For udp_do_connect() success */ 8759 /* udp_do_bind() success will do nothing in here */ 8760 /* 8761 * If a broadcast/multicast address was bound, set 8762 * the source address to 0. 8763 * This ensures no datagrams with broadcast address 8764 * as source address are emitted (which would violate 8765 * RFC1122 - Hosts requirements) 8766 * 8767 * Note that when connecting the returned IRE is 8768 * for the destination address and we only perform 8769 * the broadcast check for the source address (it 8770 * is OK to connect to a broadcast/multicast address.) 8771 */ 8772 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8773 ire = (ire_t *)ire_mp->b_rptr; 8774 8775 /* 8776 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8777 * multicast local address. 8778 */ 8779 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8780 us->us_bind_fanout_size)]; 8781 if (ire->ire_type == IRE_BROADCAST && 8782 udp->udp_state != TS_DATA_XFER) { 8783 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8784 udp->udp_pending_op == O_T_BIND_REQ); 8785 /* 8786 * This was just a local bind to a broadcast 8787 * addr. 8788 */ 8789 mutex_enter(&udpf->uf_lock); 8790 V6_SET_ZERO(udp->udp_v6src); 8791 mutex_exit(&udpf->uf_lock); 8792 if (udp->udp_family == AF_INET6) 8793 (void) udp_build_hdrs(udp); 8794 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8795 if (udp->udp_family == AF_INET6) 8796 (void) udp_build_hdrs(udp); 8797 } 8798 } 8799 } else { 8800 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8801 us->us_bind_fanout_size)]; 8802 mutex_enter(&udpf->uf_lock); 8803 8804 if (udp->udp_state == TS_DATA_XFER) { 8805 /* Connect failed */ 8806 /* Revert back to the bound source */ 8807 udp->udp_v6src = udp->udp_bound_v6src; 8808 udp->udp_state = TS_IDLE; 8809 } else { 8810 /* For udp_do_bind() failed */ 8811 V6_SET_ZERO(udp->udp_v6src); 8812 V6_SET_ZERO(udp->udp_bound_v6src); 8813 udp->udp_state = TS_UNBND; 8814 udp_bind_hash_remove(udp, B_TRUE); 8815 udp->udp_port = 0; 8816 } 8817 mutex_exit(&udpf->uf_lock); 8818 if (udp->udp_family == AF_INET6) 8819 (void) udp_build_hdrs(udp); 8820 } 8821 udp->udp_pending_op = -1; 8822 rw_exit(&udp->udp_rwlock); 8823 if (ire_mp != NULL) 8824 freeb(ire_mp); 8825 return (error); 8826 } 8827 8828 /* 8829 * It associates a default destination address with the stream. 8830 */ 8831 static int 8832 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8833 cred_t *cr) 8834 { 8835 sin6_t *sin6; 8836 sin_t *sin; 8837 in6_addr_t v6dst; 8838 ipaddr_t v4dst; 8839 uint16_t dstport; 8840 uint32_t flowinfo; 8841 mblk_t *ire_mp; 8842 udp_fanout_t *udpf; 8843 udp_t *udp, *udp1; 8844 ushort_t ipversion; 8845 udp_stack_t *us; 8846 int error; 8847 8848 udp = connp->conn_udp; 8849 us = udp->udp_us; 8850 8851 /* 8852 * Address has been verified by the caller 8853 */ 8854 switch (len) { 8855 default: 8856 /* 8857 * Should never happen 8858 */ 8859 return (EINVAL); 8860 8861 case sizeof (sin_t): 8862 sin = (sin_t *)sa; 8863 v4dst = sin->sin_addr.s_addr; 8864 dstport = sin->sin_port; 8865 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8866 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8867 ipversion = IPV4_VERSION; 8868 break; 8869 8870 case sizeof (sin6_t): 8871 sin6 = (sin6_t *)sa; 8872 v6dst = sin6->sin6_addr; 8873 dstport = sin6->sin6_port; 8874 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8875 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8876 ipversion = IPV4_VERSION; 8877 flowinfo = 0; 8878 } else { 8879 ipversion = IPV6_VERSION; 8880 flowinfo = sin6->sin6_flowinfo; 8881 } 8882 break; 8883 } 8884 8885 if (dstport == 0) 8886 return (-TBADADDR); 8887 8888 rw_enter(&udp->udp_rwlock, RW_WRITER); 8889 8890 /* 8891 * This UDP must have bound to a port already before doing a connect. 8892 * TPI mandates that users must send TPI primitives only 1 at a time 8893 * and wait for the response before sending the next primitive. 8894 */ 8895 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8896 rw_exit(&udp->udp_rwlock); 8897 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8898 "udp_connect: bad state, %u", udp->udp_state); 8899 return (-TOUTSTATE); 8900 } 8901 udp->udp_pending_op = T_CONN_REQ; 8902 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8903 8904 if (ipversion == IPV4_VERSION) { 8905 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8906 udp->udp_ip_snd_options_len; 8907 } else { 8908 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8909 } 8910 8911 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8912 us->us_bind_fanout_size)]; 8913 8914 mutex_enter(&udpf->uf_lock); 8915 if (udp->udp_state == TS_DATA_XFER) { 8916 /* Already connected - clear out state */ 8917 udp->udp_v6src = udp->udp_bound_v6src; 8918 udp->udp_state = TS_IDLE; 8919 } 8920 8921 /* 8922 * Create a default IP header with no IP options. 8923 */ 8924 udp->udp_dstport = dstport; 8925 udp->udp_ipversion = ipversion; 8926 if (ipversion == IPV4_VERSION) { 8927 /* 8928 * Interpret a zero destination to mean loopback. 8929 * Update the T_CONN_REQ (sin/sin6) since it is used to 8930 * generate the T_CONN_CON. 8931 */ 8932 if (v4dst == INADDR_ANY) { 8933 v4dst = htonl(INADDR_LOOPBACK); 8934 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8935 if (udp->udp_family == AF_INET) { 8936 sin->sin_addr.s_addr = v4dst; 8937 } else { 8938 sin6->sin6_addr = v6dst; 8939 } 8940 } 8941 udp->udp_v6dst = v6dst; 8942 udp->udp_flowinfo = 0; 8943 8944 /* 8945 * If the destination address is multicast and 8946 * an outgoing multicast interface has been set, 8947 * use the address of that interface as our 8948 * source address if no source address has been set. 8949 */ 8950 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8951 CLASSD(v4dst) && 8952 udp->udp_multicast_if_addr != INADDR_ANY) { 8953 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8954 &udp->udp_v6src); 8955 } 8956 } else { 8957 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8958 /* 8959 * Interpret a zero destination to mean loopback. 8960 * Update the T_CONN_REQ (sin/sin6) since it is used to 8961 * generate the T_CONN_CON. 8962 */ 8963 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8964 v6dst = ipv6_loopback; 8965 sin6->sin6_addr = v6dst; 8966 } 8967 udp->udp_v6dst = v6dst; 8968 udp->udp_flowinfo = flowinfo; 8969 /* 8970 * If the destination address is multicast and 8971 * an outgoing multicast interface has been set, 8972 * then the ip bind logic will pick the correct source 8973 * address (i.e. matching the outgoing multicast interface). 8974 */ 8975 } 8976 8977 /* 8978 * Verify that the src/port/dst/port is unique for all 8979 * connections in TS_DATA_XFER 8980 */ 8981 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8982 if (udp1->udp_state != TS_DATA_XFER) 8983 continue; 8984 if (udp->udp_port != udp1->udp_port || 8985 udp->udp_ipversion != udp1->udp_ipversion || 8986 dstport != udp1->udp_dstport || 8987 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8988 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8989 !(IPCL_ZONE_MATCH(udp->udp_connp, 8990 udp1->udp_connp->conn_zoneid) || 8991 IPCL_ZONE_MATCH(udp1->udp_connp, 8992 udp->udp_connp->conn_zoneid))) 8993 continue; 8994 mutex_exit(&udpf->uf_lock); 8995 udp->udp_pending_op = -1; 8996 rw_exit(&udp->udp_rwlock); 8997 return (-TBADADDR); 8998 } 8999 9000 if (cl_inet_connect2 != NULL) { 9001 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 9002 if (error != 0) { 9003 mutex_exit(&udpf->uf_lock); 9004 udp->udp_pending_op = -1; 9005 rw_exit(&udp->udp_rwlock); 9006 return (-TBADADDR); 9007 } 9008 } 9009 9010 udp->udp_state = TS_DATA_XFER; 9011 mutex_exit(&udpf->uf_lock); 9012 9013 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 9014 if (ire_mp == NULL) { 9015 mutex_enter(&udpf->uf_lock); 9016 udp->udp_state = TS_IDLE; 9017 udp->udp_pending_op = -1; 9018 mutex_exit(&udpf->uf_lock); 9019 rw_exit(&udp->udp_rwlock); 9020 return (ENOMEM); 9021 } 9022 9023 rw_exit(&udp->udp_rwlock); 9024 9025 ire_mp->b_wptr += sizeof (ire_t); 9026 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 9027 9028 if (udp->udp_family == AF_INET) { 9029 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 9030 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 9031 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 9032 B_TRUE, B_TRUE, cr); 9033 } else { 9034 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 9035 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 9036 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 9037 } 9038 9039 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 9040 } 9041 9042 /* ARGSUSED */ 9043 static int 9044 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 9045 socklen_t len, sock_connid_t *id, cred_t *cr) 9046 { 9047 conn_t *connp = (conn_t *)proto_handle; 9048 udp_t *udp = connp->conn_udp; 9049 int error; 9050 boolean_t did_bind = B_FALSE; 9051 9052 /* All Solaris components should pass a cred for this operation. */ 9053 ASSERT(cr != NULL); 9054 9055 if (sa == NULL) { 9056 /* 9057 * Disconnect 9058 * Make sure we are connected 9059 */ 9060 if (udp->udp_state != TS_DATA_XFER) 9061 return (EINVAL); 9062 9063 error = udp_disconnect(connp); 9064 return (error); 9065 } 9066 9067 error = proto_verify_ip_addr(udp->udp_family, sa, len); 9068 if (error != 0) 9069 goto done; 9070 9071 /* do an implicit bind if necessary */ 9072 if (udp->udp_state == TS_UNBND) { 9073 error = udp_implicit_bind(connp, cr); 9074 /* 9075 * We could be racing with an actual bind, in which case 9076 * we would see EPROTO. We cross our fingers and try 9077 * to connect. 9078 */ 9079 if (!(error == 0 || error == EPROTO)) 9080 goto done; 9081 did_bind = B_TRUE; 9082 } 9083 /* 9084 * set SO_DGRAM_ERRIND 9085 */ 9086 udp->udp_dgram_errind = B_TRUE; 9087 9088 error = udp_do_connect(connp, sa, len, cr); 9089 9090 if (error != 0 && did_bind) { 9091 int unbind_err; 9092 9093 unbind_err = udp_do_unbind(connp); 9094 ASSERT(unbind_err == 0); 9095 } 9096 9097 if (error == 0) { 9098 *id = 0; 9099 (*connp->conn_upcalls->su_connected) 9100 (connp->conn_upper_handle, 0, NULL, -1); 9101 } else if (error < 0) { 9102 error = proto_tlitosyserr(-error); 9103 } 9104 9105 done: 9106 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 9107 /* 9108 * No need to hold locks to set state 9109 * after connect failure socket state is undefined 9110 * We set the state only to imitate old sockfs behavior 9111 */ 9112 udp->udp_state = TS_IDLE; 9113 } 9114 return (error); 9115 } 9116 9117 /* ARGSUSED */ 9118 int 9119 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 9120 cred_t *cr) 9121 { 9122 conn_t *connp = (conn_t *)proto_handle; 9123 udp_t *udp = connp->conn_udp; 9124 udp_stack_t *us = udp->udp_us; 9125 int error = 0; 9126 9127 ASSERT(DB_TYPE(mp) == M_DATA); 9128 9129 /* All Solaris components should pass a cred for this operation. */ 9130 ASSERT(cr != NULL); 9131 9132 /* If labeled then sockfs should have already set db_credp */ 9133 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 9134 9135 /* 9136 * If the socket is connected and no change in destination 9137 */ 9138 if (msg->msg_namelen == 0) { 9139 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9140 if (error == EDESTADDRREQ) 9141 return (error); 9142 else 9143 return (udp->udp_dgram_errind ? error : 0); 9144 } 9145 9146 /* 9147 * Do an implicit bind if necessary. 9148 */ 9149 if (udp->udp_state == TS_UNBND) { 9150 error = udp_implicit_bind(connp, cr); 9151 /* 9152 * We could be racing with an actual bind, in which case 9153 * we would see EPROTO. We cross our fingers and try 9154 * to send. 9155 */ 9156 if (!(error == 0 || error == EPROTO)) { 9157 freemsg(mp); 9158 return (error); 9159 } 9160 } 9161 9162 rw_enter(&udp->udp_rwlock, RW_WRITER); 9163 9164 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9165 rw_exit(&udp->udp_rwlock); 9166 freemsg(mp); 9167 return (EISCONN); 9168 } 9169 9170 9171 if (udp->udp_delayed_error != 0) { 9172 boolean_t match; 9173 9174 error = udp->udp_delayed_error; 9175 match = B_FALSE; 9176 udp->udp_delayed_error = 0; 9177 switch (udp->udp_family) { 9178 case AF_INET: { 9179 /* Compare just IP address and port */ 9180 sin_t *sin1 = (sin_t *)msg->msg_name; 9181 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9182 9183 if (msg->msg_namelen == sizeof (sin_t) && 9184 sin1->sin_port == sin2->sin_port && 9185 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9186 match = B_TRUE; 9187 9188 break; 9189 } 9190 case AF_INET6: { 9191 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9192 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9193 9194 if (msg->msg_namelen == sizeof (sin6_t) && 9195 sin1->sin6_port == sin2->sin6_port && 9196 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9197 &sin2->sin6_addr)) 9198 match = B_TRUE; 9199 break; 9200 } 9201 default: 9202 ASSERT(0); 9203 } 9204 9205 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9206 9207 if (match) { 9208 rw_exit(&udp->udp_rwlock); 9209 freemsg(mp); 9210 return (error); 9211 } 9212 } 9213 9214 error = proto_verify_ip_addr(udp->udp_family, 9215 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9216 rw_exit(&udp->udp_rwlock); 9217 9218 if (error != 0) { 9219 freemsg(mp); 9220 return (error); 9221 } 9222 9223 error = udp_send_not_connected(connp, mp, 9224 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9225 curproc->p_pid); 9226 if (error != 0) { 9227 UDP_STAT(us, udp_out_err_output); 9228 freemsg(mp); 9229 } 9230 return (udp->udp_dgram_errind ? error : 0); 9231 } 9232 9233 int 9234 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9235 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9236 { 9237 conn_t *connp = (conn_t *)proto_handle; 9238 udp_t *udp; 9239 struct T_capability_ack tca; 9240 struct sockaddr_in6 laddr, faddr; 9241 socklen_t laddrlen, faddrlen; 9242 short opts; 9243 struct stroptions *stropt; 9244 mblk_t *stropt_mp; 9245 int error; 9246 9247 udp = connp->conn_udp; 9248 9249 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9250 9251 /* 9252 * setup the fallback stream that was allocated 9253 */ 9254 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9255 connp->conn_minor_arena = WR(q)->q_ptr; 9256 9257 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9258 9259 WR(q)->q_qinfo = &udp_winit; 9260 9261 connp->conn_rq = RD(q); 9262 connp->conn_wq = WR(q); 9263 9264 /* Notify stream head about options before sending up data */ 9265 stropt_mp->b_datap->db_type = M_SETOPTS; 9266 stropt_mp->b_wptr += sizeof (*stropt); 9267 stropt = (struct stroptions *)stropt_mp->b_rptr; 9268 stropt->so_flags = SO_WROFF | SO_HIWAT; 9269 stropt->so_wroff = 9270 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9271 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9272 putnext(RD(q), stropt_mp); 9273 9274 /* 9275 * Free the helper stream 9276 */ 9277 ip_free_helper_stream(connp); 9278 9279 if (!direct_sockfs) 9280 udp_disable_direct_sockfs(udp); 9281 9282 /* 9283 * Collect the information needed to sync with the sonode 9284 */ 9285 udp_do_capability_ack(udp, &tca, TC1_INFO); 9286 9287 laddrlen = faddrlen = sizeof (sin6_t); 9288 (void) udp_getsockname((sock_lower_handle_t)connp, 9289 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9290 error = udp_getpeername((sock_lower_handle_t)connp, 9291 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9292 if (error != 0) 9293 faddrlen = 0; 9294 9295 opts = 0; 9296 if (udp->udp_dgram_errind) 9297 opts |= SO_DGRAM_ERRIND; 9298 if (udp->udp_dontroute) 9299 opts |= SO_DONTROUTE; 9300 9301 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9302 (struct sockaddr *)&laddr, laddrlen, 9303 (struct sockaddr *)&faddr, faddrlen, opts); 9304 9305 mutex_enter(&udp->udp_recv_lock); 9306 /* 9307 * Attempts to send data up during fallback will result in it being 9308 * queued in udp_t. Now we push up any queued packets. 9309 */ 9310 while (udp->udp_fallback_queue_head != NULL) { 9311 mblk_t *mp; 9312 mp = udp->udp_fallback_queue_head; 9313 udp->udp_fallback_queue_head = mp->b_next; 9314 mutex_exit(&udp->udp_recv_lock); 9315 mp->b_next = NULL; 9316 putnext(RD(q), mp); 9317 mutex_enter(&udp->udp_recv_lock); 9318 } 9319 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9320 /* 9321 * No longer a streams less socket 9322 */ 9323 rw_enter(&udp->udp_rwlock, RW_WRITER); 9324 connp->conn_flags &= ~IPCL_NONSTR; 9325 rw_exit(&udp->udp_rwlock); 9326 9327 mutex_exit(&udp->udp_recv_lock); 9328 9329 ASSERT(connp->conn_ref >= 1); 9330 9331 return (0); 9332 } 9333 9334 static int 9335 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9336 { 9337 sin_t *sin = (sin_t *)sa; 9338 sin6_t *sin6 = (sin6_t *)sa; 9339 9340 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9341 ASSERT(udp != NULL); 9342 9343 if (udp->udp_state != TS_DATA_XFER) 9344 return (ENOTCONN); 9345 9346 switch (udp->udp_family) { 9347 case AF_INET: 9348 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9349 9350 if (*salenp < sizeof (sin_t)) 9351 return (EINVAL); 9352 9353 *salenp = sizeof (sin_t); 9354 *sin = sin_null; 9355 sin->sin_family = AF_INET; 9356 sin->sin_port = udp->udp_dstport; 9357 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9358 break; 9359 case AF_INET6: 9360 if (*salenp < sizeof (sin6_t)) 9361 return (EINVAL); 9362 9363 *salenp = sizeof (sin6_t); 9364 *sin6 = sin6_null; 9365 sin6->sin6_family = AF_INET6; 9366 sin6->sin6_port = udp->udp_dstport; 9367 sin6->sin6_addr = udp->udp_v6dst; 9368 sin6->sin6_flowinfo = udp->udp_flowinfo; 9369 break; 9370 } 9371 9372 return (0); 9373 } 9374 9375 /* ARGSUSED */ 9376 int 9377 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9378 socklen_t *salenp, cred_t *cr) 9379 { 9380 conn_t *connp = (conn_t *)proto_handle; 9381 udp_t *udp = connp->conn_udp; 9382 int error; 9383 9384 /* All Solaris components should pass a cred for this operation. */ 9385 ASSERT(cr != NULL); 9386 9387 ASSERT(udp != NULL); 9388 9389 rw_enter(&udp->udp_rwlock, RW_READER); 9390 9391 error = udp_do_getpeername(udp, sa, salenp); 9392 9393 rw_exit(&udp->udp_rwlock); 9394 9395 return (error); 9396 } 9397 9398 static int 9399 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9400 { 9401 sin_t *sin = (sin_t *)sa; 9402 sin6_t *sin6 = (sin6_t *)sa; 9403 9404 ASSERT(udp != NULL); 9405 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9406 9407 switch (udp->udp_family) { 9408 case AF_INET: 9409 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9410 9411 if (*salenp < sizeof (sin_t)) 9412 return (EINVAL); 9413 9414 *salenp = sizeof (sin_t); 9415 *sin = sin_null; 9416 sin->sin_family = AF_INET; 9417 if (udp->udp_state == TS_UNBND) { 9418 break; 9419 } 9420 sin->sin_port = udp->udp_port; 9421 9422 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9423 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9424 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9425 } else { 9426 /* 9427 * INADDR_ANY 9428 * udp_v6src is not set, we might be bound to 9429 * broadcast/multicast. Use udp_bound_v6src as 9430 * local address instead (that could 9431 * also still be INADDR_ANY) 9432 */ 9433 sin->sin_addr.s_addr = 9434 V4_PART_OF_V6(udp->udp_bound_v6src); 9435 } 9436 break; 9437 9438 case AF_INET6: 9439 if (*salenp < sizeof (sin6_t)) 9440 return (EINVAL); 9441 9442 *salenp = sizeof (sin6_t); 9443 *sin6 = sin6_null; 9444 sin6->sin6_family = AF_INET6; 9445 if (udp->udp_state == TS_UNBND) { 9446 break; 9447 } 9448 sin6->sin6_port = udp->udp_port; 9449 9450 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9451 sin6->sin6_addr = udp->udp_v6src; 9452 } else { 9453 /* 9454 * UNSPECIFIED 9455 * udp_v6src is not set, we might be bound to 9456 * broadcast/multicast. Use udp_bound_v6src as 9457 * local address instead (that could 9458 * also still be UNSPECIFIED) 9459 */ 9460 sin6->sin6_addr = udp->udp_bound_v6src; 9461 } 9462 } 9463 return (0); 9464 } 9465 9466 /* ARGSUSED */ 9467 int 9468 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9469 socklen_t *salenp, cred_t *cr) 9470 { 9471 conn_t *connp = (conn_t *)proto_handle; 9472 udp_t *udp = connp->conn_udp; 9473 int error; 9474 9475 /* All Solaris components should pass a cred for this operation. */ 9476 ASSERT(cr != NULL); 9477 9478 ASSERT(udp != NULL); 9479 rw_enter(&udp->udp_rwlock, RW_READER); 9480 9481 error = udp_do_getsockname(udp, sa, salenp); 9482 9483 rw_exit(&udp->udp_rwlock); 9484 9485 return (error); 9486 } 9487 9488 int 9489 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9490 void *optvalp, socklen_t *optlen, cred_t *cr) 9491 { 9492 conn_t *connp = (conn_t *)proto_handle; 9493 udp_t *udp = connp->conn_udp; 9494 int error; 9495 t_uscalar_t max_optbuf_len; 9496 void *optvalp_buf; 9497 int len; 9498 9499 /* All Solaris components should pass a cred for this operation. */ 9500 ASSERT(cr != NULL); 9501 9502 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9503 udp_opt_obj.odb_opt_des_arr, 9504 udp_opt_obj.odb_opt_arr_cnt, 9505 udp_opt_obj.odb_topmost_tpiprovider, 9506 B_FALSE, B_TRUE, cr); 9507 if (error != 0) { 9508 if (error < 0) 9509 error = proto_tlitosyserr(-error); 9510 return (error); 9511 } 9512 9513 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9514 rw_enter(&udp->udp_rwlock, RW_READER); 9515 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9516 rw_exit(&udp->udp_rwlock); 9517 9518 if (len < 0) { 9519 /* 9520 * Pass on to IP 9521 */ 9522 kmem_free(optvalp_buf, max_optbuf_len); 9523 return (ip_get_options(connp, level, option_name, 9524 optvalp, optlen, cr)); 9525 } else { 9526 /* 9527 * update optlen and copy option value 9528 */ 9529 t_uscalar_t size = MIN(len, *optlen); 9530 bcopy(optvalp_buf, optvalp, size); 9531 bcopy(&size, optlen, sizeof (size)); 9532 9533 kmem_free(optvalp_buf, max_optbuf_len); 9534 return (0); 9535 } 9536 } 9537 9538 int 9539 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9540 const void *optvalp, socklen_t optlen, cred_t *cr) 9541 { 9542 conn_t *connp = (conn_t *)proto_handle; 9543 udp_t *udp = connp->conn_udp; 9544 int error; 9545 9546 /* All Solaris components should pass a cred for this operation. */ 9547 ASSERT(cr != NULL); 9548 9549 error = proto_opt_check(level, option_name, optlen, NULL, 9550 udp_opt_obj.odb_opt_des_arr, 9551 udp_opt_obj.odb_opt_arr_cnt, 9552 udp_opt_obj.odb_topmost_tpiprovider, 9553 B_TRUE, B_FALSE, cr); 9554 9555 if (error != 0) { 9556 if (error < 0) 9557 error = proto_tlitosyserr(-error); 9558 return (error); 9559 } 9560 9561 rw_enter(&udp->udp_rwlock, RW_WRITER); 9562 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9563 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9564 NULL, cr); 9565 rw_exit(&udp->udp_rwlock); 9566 9567 if (error < 0) { 9568 /* 9569 * Pass on to ip 9570 */ 9571 error = ip_set_options(connp, level, option_name, optvalp, 9572 optlen, cr); 9573 } 9574 9575 return (error); 9576 } 9577 9578 void 9579 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9580 { 9581 conn_t *connp = (conn_t *)proto_handle; 9582 udp_t *udp = connp->conn_udp; 9583 9584 mutex_enter(&udp->udp_recv_lock); 9585 connp->conn_flow_cntrld = B_FALSE; 9586 mutex_exit(&udp->udp_recv_lock); 9587 } 9588 9589 /* ARGSUSED */ 9590 int 9591 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9592 { 9593 conn_t *connp = (conn_t *)proto_handle; 9594 9595 /* All Solaris components should pass a cred for this operation. */ 9596 ASSERT(cr != NULL); 9597 9598 /* shut down the send side */ 9599 if (how != SHUT_RD) 9600 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9601 SOCK_OPCTL_SHUT_SEND, 0); 9602 /* shut down the recv side */ 9603 if (how != SHUT_WR) 9604 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9605 SOCK_OPCTL_SHUT_RECV, 0); 9606 return (0); 9607 } 9608 9609 int 9610 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9611 int mode, int32_t *rvalp, cred_t *cr) 9612 { 9613 conn_t *connp = (conn_t *)proto_handle; 9614 int error; 9615 9616 /* All Solaris components should pass a cred for this operation. */ 9617 ASSERT(cr != NULL); 9618 9619 switch (cmd) { 9620 case ND_SET: 9621 case ND_GET: 9622 case _SIOCSOCKFALLBACK: 9623 case TI_GETPEERNAME: 9624 case TI_GETMYNAME: 9625 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9626 cmd)); 9627 error = EINVAL; 9628 break; 9629 default: 9630 /* 9631 * Pass on to IP using helper stream 9632 */ 9633 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9634 cmd, arg, mode, cr, rvalp); 9635 break; 9636 } 9637 return (error); 9638 } 9639 9640 /* ARGSUSED */ 9641 int 9642 udp_accept(sock_lower_handle_t lproto_handle, 9643 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9644 cred_t *cr) 9645 { 9646 return (EOPNOTSUPP); 9647 } 9648 9649 /* ARGSUSED */ 9650 int 9651 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9652 { 9653 return (EOPNOTSUPP); 9654 } 9655 9656 sock_downcalls_t sock_udp_downcalls = { 9657 udp_activate, /* sd_activate */ 9658 udp_accept, /* sd_accept */ 9659 udp_bind, /* sd_bind */ 9660 udp_listen, /* sd_listen */ 9661 udp_connect, /* sd_connect */ 9662 udp_getpeername, /* sd_getpeername */ 9663 udp_getsockname, /* sd_getsockname */ 9664 udp_getsockopt, /* sd_getsockopt */ 9665 udp_setsockopt, /* sd_setsockopt */ 9666 udp_send, /* sd_send */ 9667 NULL, /* sd_send_uio */ 9668 NULL, /* sd_recv_uio */ 9669 NULL, /* sd_poll */ 9670 udp_shutdown, /* sd_shutdown */ 9671 udp_clr_flowctrl, /* sd_setflowctrl */ 9672 udp_ioctl, /* sd_ioctl */ 9673 udp_close /* sd_close */ 9674 }; 9675